[opaque pointer type] Add textual IR support for explicit type parameter to load instruction

Essentially the same as the GEP change in r230786.

A similar migration script can be used to update test cases, though a few more
test case improvements/changes were required this time around: (r229269-r229278)

import fileinput
import sys
import re

pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace\(\d+\) *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)")

for line in sys.stdin:
  sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line))

Reviewers: rafael, dexonsmith, grosser

Differential Revision: http://reviews.llvm.org/D7649

llvm-svn: 230794
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 4244679..cad7c6d 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -5241,7 +5241,11 @@
     Lex.Lex();
   }
 
-  if (ParseTypeAndValue(Val, Loc, PFS) ||
+  Type *Ty = nullptr;
+  LocTy ExplicitTypeLoc = Lex.getLoc();
+  if (ParseType(Ty) ||
+      ParseToken(lltok::comma, "expected comma after load's type") ||
+      ParseTypeAndValue(Val, Loc, PFS) ||
       ParseScopeAndOrdering(isAtomic, Scope, Ordering) ||
       ParseOptionalCommaAlign(Alignment, AteExtraComma))
     return true;
@@ -5254,6 +5258,10 @@
   if (Ordering == Release || Ordering == AcquireRelease)
     return Error(Loc, "atomic load cannot use Release ordering");
 
+  if (Ty != cast<PointerType>(Val->getType())->getElementType())
+    return Error(ExplicitTypeLoc,
+                 "explicit pointee type doesn't match operand's pointee type");
+
   Inst = new LoadInst(Val, "", isVolatile, Alignment, Ordering, Scope);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 8f3e5ec..51be1b0 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2898,10 +2898,14 @@
     Out << ", ";
     TypePrinter.print(I.getType(), Out);
   } else if (Operand) {   // Print the normal way.
-    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    if (const auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
       Out << ' ';
       TypePrinter.print(GEP->getSourceElementType(), Out);
       Out << ',';
+    } else if (const auto *LI = dyn_cast<LoadInst>(&I)) {
+      Out << ' ';
+      TypePrinter.print(LI->getType(), Out);
+      Out << ", ";
     }
 
     // PrintAllTypes - Instructions who have operands of all the same type
diff --git a/llvm/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll b/llvm/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
index b597ff8..d712e33 100644
--- a/llvm/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
+++ b/llvm/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
@@ -5,15 +5,15 @@
 ; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
 
 define i32 @test() {
-; CHECK: %Y.DONOTREMOVE = load i32* %A
+; CHECK: %Y.DONOTREMOVE = load i32, i32* %A
 ; CHECK: %Z = sub i32 0, %Y.DONOTREMOVE
   %A = alloca i32
   store i32 0, i32* %A
-  %X = load i32* %A
+  %X = load i32, i32* %A
   %B = bitcast i32* %A to i8*
   %C = getelementptr i8, i8* %B, i64 1
   store i8 1, i8* %C    ; Aliases %A
-  %Y.DONOTREMOVE = load i32* %A
+  %Y.DONOTREMOVE = load i32, i32* %A
   %Z = sub i32 %X, %Y.DONOTREMOVE
   ret i32 %Z
 }
diff --git a/llvm/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll b/llvm/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
index c72ec81..96ca071 100644
--- a/llvm/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
+++ b/llvm/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
@@ -6,9 +6,9 @@
 ; CHECK: sub i32 %X, %Y
   %P2 = getelementptr i32, i32* %Ptr, i64 1
   %P1 = getelementptr i32, i32* %Ptr, i64 %V
-  %X = load i32* %P1
+  %X = load i32, i32* %P1
   store i32 5, i32* %P2
-  %Y = load i32* %P1
+  %Y = load i32, i32* %P1
   %Z = sub i32 %X, %Y
   ret i32 %Z
 }
diff --git a/llvm/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll b/llvm/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
index dbda954..fb5b3bb 100644
--- a/llvm/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
+++ b/llvm/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
@@ -7,7 +7,7 @@
 
 loopentry:		; preds = %0, %no_exit
 	%tmp.101 = getelementptr %struct..apr_table_t, %struct..apr_table_t* %t.1, i64 0, i32 0, i32 2
-	%tmp.11 = load i32* %tmp.101		; <i32> [#uses=0]
+	%tmp.11 = load i32, i32* %tmp.101		; <i32> [#uses=0]
 	br i1 false, label %no_exit, label %UnifiedExitNode
 
 no_exit:		; preds = %loopentry
diff --git a/llvm/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll b/llvm/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
index 305546b..ace5982 100644
--- a/llvm/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
+++ b/llvm/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
@@ -2,10 +2,10 @@
 
 define i32 @MTConcat([3 x i32]* %a.1) {
 	%tmp.961 = getelementptr [3 x i32], [3 x i32]* %a.1, i64 0, i64 4
-	%tmp.97 = load i32* %tmp.961
+	%tmp.97 = load i32, i32* %tmp.961
 	%tmp.119 = getelementptr [3 x i32], [3 x i32]* %a.1, i64 1, i64 0
-	%tmp.120 = load i32* %tmp.119
+	%tmp.120 = load i32, i32* %tmp.119
 	%tmp.1541 = getelementptr [3 x i32], [3 x i32]* %a.1, i64 0, i64 4
-	%tmp.155 = load i32* %tmp.1541
+	%tmp.155 = load i32, i32* %tmp.1541
 	ret i32 0
 }
diff --git a/llvm/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll b/llvm/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
index fd4c239..1e75d64 100644
--- a/llvm/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
+++ b/llvm/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
@@ -7,9 +7,9 @@
 
 define i32 @test(i32* %P) {
 	%X = alloca i32
-	%V1 = load i32* %P
+	%V1 = load i32, i32* %P
 	store i32 0, i32* %X
-	%V2 = load i32* %P
+	%V2 = load i32, i32* %P
 	%Diff = sub i32 %V1, %V2
 	ret i32 %Diff
 }
diff --git a/llvm/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/llvm/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
index 104d2bf..eb05e1e 100644
--- a/llvm/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
+++ b/llvm/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -15,9 +15,9 @@
 	%tmp.6 = getelementptr [3 x [3 x i32]], [3 x [3 x i32]]* %X, i32 0, i32 0, i32 %i.0.0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp.6
 	%tmp.8 = getelementptr [3 x [3 x i32]], [3 x [3 x i32]]* %X, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp.9 = load i32* %tmp.8		; <i32> [#uses=1]
+	%tmp.9 = load i32, i32* %tmp.8		; <i32> [#uses=1]
 	%tmp.11 = getelementptr [3 x [3 x i32]], [3 x [3 x i32]]* %X, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
-	%tmp.12 = load i32* %tmp.11		; <i32> [#uses=1]
+	%tmp.12 = load i32, i32* %tmp.11		; <i32> [#uses=1]
 	%tmp.13 = add i32 %tmp.12, %tmp.9		; <i32> [#uses=1]
 	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
 	%tmp.2 = icmp slt i32 %inc, %N		; <i1> [#uses=1]
diff --git a/llvm/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll b/llvm/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
index 14d7f58..86bbd44 100644
--- a/llvm/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
+++ b/llvm/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
@@ -23,12 +23,12 @@
 define i32 @test(%struct.closure_type* %tmp18169) {
 	%tmp18174 = getelementptr %struct.closure_type, %struct.closure_type* %tmp18169, i32 0, i32 4, i32 0, i32 0		; <i32*> [#uses=2]
 	%tmp18269 = bitcast i32* %tmp18174  to %struct.STYLE*		; <%struct.STYLE*> [#uses=1]
-	%A = load i32* %tmp18174		; <i32> [#uses=1]
+	%A = load i32, i32* %tmp18174		; <i32> [#uses=1]
 
         %tmp18272 = getelementptr %struct.STYLE, %struct.STYLE* %tmp18269, i32 0, i32 0, i32 0, i32 2          ; <i16*> [#uses=1]
         store i16 123, i16* %tmp18272
 
-	%Q = load i32* %tmp18174		; <i32> [#uses=1]
+	%Q = load i32, i32* %tmp18174		; <i32> [#uses=1]
 	%Z = sub i32 %A, %Q		; <i32> [#uses=1]
 	ret i32 %Z
 }
diff --git a/llvm/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/llvm/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
index ec0e2bd..5f0e117 100644
--- a/llvm/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
+++ b/llvm/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -5,10 +5,10 @@
 
 define i32 @test(i32* %P, i16* %Q) {
 ; CHECK: ret i32 0
-        %A = load i16* %Q               ; <i16> [#uses=1]
-        %x = load i32* %P               ; <i32> [#uses=1]
+        %A = load i16, i16* %Q               ; <i16> [#uses=1]
+        %x = load i32, i32* %P               ; <i32> [#uses=1]
         %B = call i16 @llvm.cttz.i16( i16 %A, i1 true )          ; <i16> [#uses=1]
-        %y = load i32* %P               ; <i32> [#uses=1]
+        %y = load i32, i32* %P               ; <i32> [#uses=1]
         store i16 %B, i16* %Q
         %z = sub i32 %x, %y             ; <i32> [#uses=1]
         ret i32 %z
diff --git a/llvm/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll b/llvm/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
index e0e64fb..9e37457 100644
--- a/llvm/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
+++ b/llvm/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
@@ -11,6 +11,6 @@
         store i32 1, i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8
         %tmp4 = getelementptr %struct.A, %struct.A* %b, i32 0, i32 0               ;<i32*> [#uses=1]
         store i32 0, i32* %tmp4, align 4
-        %tmp7 = load i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8           ; <i32> [#uses=1]
+        %tmp7 = load i32, i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8           ; <i32> [#uses=1]
         ret i32 %tmp7
 }
diff --git a/llvm/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/llvm/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index 8014a24..069bd0b 100644
--- a/llvm/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/llvm/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -17,7 +17,7 @@
         %tmp17 = getelementptr %struct.usb_hcd, %struct.usb_hcd* %hcd, i32 0, i32 2, i64 1      
         ; <i64*> [#uses=1]
         %tmp1718 = bitcast i64* %tmp17 to i32*          ; <i32*> [#uses=1]
-        %tmp19 = load i32* %tmp1718, align 4            ; <i32> [#uses=0]
+        %tmp19 = load i32, i32* %tmp1718, align 4            ; <i32> [#uses=0]
         br i1 false, label %cond_true34, label %done_okay
 
 cond_true34:            ; preds = %entry
@@ -25,7 +25,7 @@
 2305843009213693950            ; <i64*> [#uses=1]
         %tmp70 = bitcast i64* %tmp631 to %struct.device**
 
-        %tmp71 = load %struct.device** %tmp70, align 8
+        %tmp71 = load %struct.device*, %struct.device** %tmp70, align 8
 
         ret i32 undef
 
diff --git a/llvm/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/llvm/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index ceba1d2..20be13d 100644
--- a/llvm/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/llvm/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -14,7 +14,7 @@
 define i32 @ehci_pci_setup(%struct.usb_hcd* %hcd) {
 entry:
 	%tmp14 = getelementptr %struct.usb_hcd, %struct.usb_hcd* %hcd, i32 0, i32 0, i32 0		; <%struct.device**> [#uses=1]
-	%tmp15 = load %struct.device** %tmp14, align 8		; <%struct.device*> [#uses=0]
+	%tmp15 = load %struct.device*, %struct.device** %tmp14, align 8		; <%struct.device*> [#uses=0]
 	br i1 false, label %bb25, label %return
 
 bb25:		; preds = %entry
@@ -23,7 +23,7 @@
 cond_true:		; preds = %bb25
 	%tmp601 = getelementptr %struct.usb_hcd, %struct.usb_hcd* %hcd, i32 0, i32 1, i64 2305843009213693951		; <i64*> [#uses=1]
 	%tmp67 = bitcast i64* %tmp601 to %struct.device**		; <%struct.device**> [#uses=1]
-	%tmp68 = load %struct.device** %tmp67, align 8		; <%struct.device*> [#uses=0]
+	%tmp68 = load %struct.device*, %struct.device** %tmp67, align 8		; <%struct.device*> [#uses=0]
 	ret i32 undef
 
 return:		; preds = %bb25, %entry
diff --git a/llvm/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll b/llvm/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
index 1709144..9b6dbec 100644
--- a/llvm/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
+++ b/llvm/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
@@ -10,6 +10,6 @@
 define void @test291() nounwind  {
 entry:
 	store i32 1138410269, i32* getelementptr ([5 x %struct.S291]* @a291, i32 0, i32 2, i32 1)
-	%tmp54 = load i32* bitcast (%struct.S291* getelementptr ([5 x %struct.S291]* @a291, i32 0, i32 2) to i32*), align 4		; <i32> [#uses=0]
+	%tmp54 = load i32, i32* bitcast (%struct.S291* getelementptr ([5 x %struct.S291]* @a291, i32 0, i32 2) to i32*), align 4		; <i32> [#uses=0]
 	unreachable
 }
diff --git a/llvm/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/llvm/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
index 3db9a3f..49a742c 100644
--- a/llvm/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
+++ b/llvm/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
@@ -9,6 +9,6 @@
   %B = call i32* @_Znwj(i32 4)
   store i32 1, i32* %A
   store i32 2, i32* %B
-  %C = load i32* %A
+  %C = load i32, i32* %A
   ret i32 %C
 }
diff --git a/llvm/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll b/llvm/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
index 643d54d..65dcf5ce 100644
--- a/llvm/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
+++ b/llvm/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
@@ -3,12 +3,12 @@
 declare noalias i32* @noalias()
 
 define i32 @test(i32 %x) {
-; CHECK: load i32* %a
+; CHECK: load i32, i32* %a
   %a = call i32* @noalias()
   store i32 1, i32* %a
   %b = getelementptr i32, i32* %a, i32 %x
   store i32 2, i32* %b
 
-  %c = load i32* %a
+  %c = load i32, i32* %a
   ret i32 %c
 }
diff --git a/llvm/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/llvm/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
index 8704d19..97a9251 100644
--- a/llvm/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
+++ b/llvm/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -5,9 +5,9 @@
   %P = getelementptr i8, i8* %ptr, i32 0
   %Q = getelementptr i8, i8* %ptr, i32 1
 ; CHECK: getelementptr
-  %X = load i8* %P
+  %X = load i8, i8* %P
   %Y = atomicrmw add i8* %Q, i8 1 monotonic
-  %Z = load i8* %P
+  %Z = load i8, i8* %P
 ; CHECK-NOT: = load
   %A = sub i8 %X, %Z
   ret i8 %A
diff --git a/llvm/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/llvm/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
index a2515a6..43ee96c 100644
--- a/llvm/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
+++ b/llvm/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
@@ -23,9 +23,9 @@
 
 bb2:
   %P = phi i32* [ %b, %bb ], [ @Y, %bb1 ]
-  %tmp1 = load i32* @Z, align 4
+  %tmp1 = load i32, i32* @Z, align 4
   store i32 123, i32* %P, align 4
-  %tmp2 = load i32* @Z, align 4
+  %tmp2 = load i32, i32* @Z, align 4
   br label %return
 
 return:
diff --git a/llvm/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/llvm/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
index 40c65af..b2e7a60 100644
--- a/llvm/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
+++ b/llvm/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -9,9 +9,9 @@
   %tmp31 = mul i32 %indvar, -2
   %tmp32 = add i32 %tmp31, 30
   %t.5 = getelementptr i32, i32* %tab, i32 %tmp32
-  %loada = load i32* %tab
+  %loada = load i32, i32* %tab
   store i32 0, i32* %t.5
-  %loadb = load i32* %tab
+  %loadb = load i32, i32* %tab
   %rval = add i32 %loada, %loadb
   ret i32 %rval
 }
diff --git a/llvm/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll b/llvm/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
index 82e8044..08db5ec 100644
--- a/llvm/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
+++ b/llvm/test/Analysis/BasicAA/2014-03-18-Maxlookup-reached.ll
@@ -30,7 +30,7 @@
   store i8 0, i8* %10
   %11 = getelementptr inbounds i8, i8* %10, i32 -1
   store i8 0, i8* %11
-  %12 = load i32* %1, align 4
+  %12 = load i32, i32* %1, align 4
   ret i32 %12
 ; CHECK: ret i32 %12
 }
diff --git a/llvm/test/Analysis/BasicAA/aligned-overread.ll b/llvm/test/Analysis/BasicAA/aligned-overread.ll
index b05f8eb..47588e7 100644
--- a/llvm/test/Analysis/BasicAA/aligned-overread.ll
+++ b/llvm/test/Analysis/BasicAA/aligned-overread.ll
@@ -9,10 +9,10 @@
 
 define i32 @main() nounwind uwtable ssp {
 entry:
-  %tmp = load i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
+  %tmp = load i8, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
   %tmp1 = or i8 %tmp, -128
   store i8 %tmp1, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
-  %tmp2 = load i64* bitcast ({ i8, i8, i8, i8, i8 }* @a to i64*), align 8
+  %tmp2 = load i64, i64* bitcast ({ i8, i8, i8, i8, i8 }* @a to i64*), align 8
   store i8 11, i8* getelementptr inbounds ({ i8, i8, i8, i8, i8 }* @a, i64 0, i32 4), align 4
   %tmp3 = trunc i64 %tmp2 to i32
   ret i32 %tmp3
diff --git a/llvm/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/llvm/test/Analysis/BasicAA/args-rets-allocas-loads.ll
index 066f46b..05b56a0 100644
--- a/llvm/test/Analysis/BasicAA/args-rets-allocas-loads.ll
+++ b/llvm/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -22,8 +22,8 @@
   %noalias_ret_a0 = call double* @noalias_returner()
   %noalias_ret_a1 = call double* @noalias_returner()
 
-  %loaded_a0 = load double** %indirect_a0
-  %loaded_a1 = load double** %indirect_a1
+  %loaded_a0 = load double*, double** %indirect_a0
+  %loaded_a1 = load double*, double** %indirect_a1
 
   call void @callee(double* %escape_alloca_a0)
   call void @callee(double* %escape_alloca_a1)
diff --git a/llvm/test/Analysis/BasicAA/byval.ll b/llvm/test/Analysis/BasicAA/byval.ll
index 260aebe..edbe7b3 100644
--- a/llvm/test/Analysis/BasicAA/byval.ll
+++ b/llvm/test/Analysis/BasicAA/byval.ll
@@ -10,7 +10,7 @@
   %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0		; <i32*> [#uses=2]
   store i32 1, i32* %tmp2, align 4
   store i32 2, i32* @g, align 4
-  %tmp4 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+  %tmp4 = load i32, i32* %tmp2, align 4		; <i32> [#uses=1]
   ret i32 %tmp4
 }
 
diff --git a/llvm/test/Analysis/BasicAA/cas.ll b/llvm/test/Analysis/BasicAA/cas.ll
index d0cd9f4..b770cb7 100644
--- a/llvm/test/Analysis/BasicAA/cas.ll
+++ b/llvm/test/Analysis/BasicAA/cas.ll
@@ -6,9 +6,9 @@
 ; CHECK: ret i32 0
 
 define i32 @main() {
-  %a = load i32* @flag0
+  %a = load i32, i32* @flag0
   %b = atomicrmw xchg i32* @turn, i32 1 monotonic
-  %c = load i32* @flag0
+  %c = load i32, i32* @flag0
   %d = sub i32 %a, %c
   ret i32 %d
 }
diff --git a/llvm/test/Analysis/BasicAA/dag.ll b/llvm/test/Analysis/BasicAA/dag.ll
index 1d2f6f1..63e2c1a 100644
--- a/llvm/test/Analysis/BasicAA/dag.ll
+++ b/llvm/test/Analysis/BasicAA/dag.ll
@@ -36,6 +36,6 @@
   %bigbase = bitcast i8* %base to i16*
   store i16 -1, i16* %bigbase
 
-  %loaded = load i8* %phi
+  %loaded = load i8, i8* %phi
   ret i8 %loaded
 }
diff --git a/llvm/test/Analysis/BasicAA/featuretest.ll b/llvm/test/Analysis/BasicAA/featuretest.ll
index 19e9b16..97e97f5 100644
--- a/llvm/test/Analysis/BasicAA/featuretest.ll
+++ b/llvm/test/Analysis/BasicAA/featuretest.ll
@@ -19,12 +19,12 @@
         call void @external(i32* %Array2)
 
 	%pointer = getelementptr i32, i32* %Array1, i64 %A
-	%val = load i32* %pointer
+	%val = load i32, i32* %pointer
 
 	%pointer2 = getelementptr i32, i32* %Array2, i64 %B
 	store i32 7, i32* %pointer2
 
-	%REMOVE = load i32* %pointer ; redundant with above load
+	%REMOVE = load i32, i32* %pointer ; redundant with above load
 	%retval = sub i32 %REMOVE, %val
 	ret i32 %retval
 ; CHECK: @different_array_test
@@ -41,9 +41,9 @@
 	%P1 = getelementptr i32, i32* %Array, i64 7
 	%P2 = getelementptr i32, i32* %Array, i64 6
 	
-	%A = load i32* %P1
+	%A = load i32, i32* %P1
 	store i32 1, i32* %P2   ; Should not invalidate load
-	%BREMOVE = load i32* %P1
+	%BREMOVE = load i32, i32* %P1
 	%Val = sub i32 %A, %BREMOVE
 	ret i32 %Val
 ; CHECK: @constant_array_index_test
@@ -53,10 +53,10 @@
 ; Test that if two pointers are spaced out by a constant getelementptr, that 
 ; they cannot alias.
 define i32 @gep_distance_test(i32* %A) {
-        %REMOVEu = load i32* %A
+        %REMOVEu = load i32, i32* %A
         %B = getelementptr i32, i32* %A, i64 2  ; Cannot alias A
         store i32 7, i32* %B
-        %REMOVEv = load i32* %A
+        %REMOVEv = load i32, i32* %A
         %r = sub i32 %REMOVEu, %REMOVEv
         ret i32 %r
 ; CHECK: @gep_distance_test
@@ -67,10 +67,10 @@
 ; cannot alias, even if there is a variable offset between them...
 define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) {
 	%A1 = getelementptr {i32,i32}, {i32,i32}* %A, i64 0, i32 0
-	%REMOVEu = load i32* %A1
+	%REMOVEu = load i32, i32* %A1
 	%B = getelementptr {i32,i32}, {i32,i32}* %A, i64 %distance, i32 1
 	store i32 7, i32* %B    ; B cannot alias A, it's at least 4 bytes away
-	%REMOVEv = load i32* %A1
+	%REMOVEv = load i32, i32* %A1
         %r = sub i32 %REMOVEu, %REMOVEv
         ret i32 %r
 ; CHECK: @gep_distance_test2
@@ -80,11 +80,11 @@
 ; Test that we can do funny pointer things and that distance calc will still 
 ; work.
 define i32 @gep_distance_test3(i32 * %A) {
-	%X = load i32* %A
+	%X = load i32, i32* %A
 	%B = bitcast i32* %A to i8*
 	%C = getelementptr i8, i8* %B, i64 4
         store i8 42, i8* %C
-	%Y = load i32* %A
+	%Y = load i32, i32* %A
         %R = sub i32 %X, %Y
 	ret i32 %R
 ; CHECK: @gep_distance_test3
@@ -96,9 +96,9 @@
    %X = alloca i32
    call void @external(i32* %X)
 
-   %Y = load i32* %X
+   %Y = load i32, i32* %X
    store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0)
-   %REMOVE = load i32* %X
+   %REMOVE = load i32, i32* %X
    %retval = sub i32 %Y, %REMOVE
    ret i32 %retval
 ; CHECK: @constexpr_test
@@ -113,12 +113,12 @@
 entry:
   %sum5.cast = zext i5 %j to i64             ; <i64> [#uses=1]
   %P1 = getelementptr i16, i16* %row2col, i64 %sum5.cast
-  %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1]
+  %row2col.load.1.2 = load i16, i16* %P1, align 1 ; <i16> [#uses=1]
   
   %sum13.cast31 = sext i5 %j to i6          ; <i6> [#uses=1]
   %sum13.cast = zext i6 %sum13.cast31 to i64      ; <i64> [#uses=1]
   %P2 = getelementptr i16, i16* %row2col, i64 %sum13.cast
-  %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1]
+  %row2col.load.1.6 = load i16, i16* %P2, align 1 ; <i16> [#uses=1]
   
   %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1]
   ret i16 %.ret
diff --git a/llvm/test/Analysis/BasicAA/full-store-partial-alias.ll b/llvm/test/Analysis/BasicAA/full-store-partial-alias.ll
index e046e13..341f6ba 100644
--- a/llvm/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/llvm/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -20,11 +20,11 @@
   %u = alloca %union.anon, align 8
   %tmp9 = getelementptr inbounds %union.anon, %union.anon* %u, i64 0, i32 0
   store double %x, double* %tmp9, align 8, !tbaa !0
-  %tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
+  %tmp2 = load i32, i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
   %idxprom = sext i32 %tmp2 to i64
   %tmp4 = bitcast %union.anon* %u to [2 x i32]*
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %tmp4, i64 0, i64 %idxprom
-  %tmp5 = load i32* %arrayidx, align 4, !tbaa !3
+  %tmp5 = load i32, i32* %arrayidx, align 4, !tbaa !3
   %tmp5.lobit = lshr i32 %tmp5, 31
   ret i32 %tmp5.lobit
 }
diff --git a/llvm/test/Analysis/BasicAA/gcsetest.ll b/llvm/test/Analysis/BasicAA/gcsetest.ll
index 64792eb..cf6ab71 100644
--- a/llvm/test/Analysis/BasicAA/gcsetest.ll
+++ b/llvm/test/Analysis/BasicAA/gcsetest.ll
@@ -12,11 +12,11 @@
 ; CHECK-NEXT:   ret i32 0
 
 define i32 @test() {
-	%A1 = load i32* @A
+	%A1 = load i32, i32* @A
 
 	store i32 123, i32* @B  ; Store cannot alias @A
 
-	%A2 = load i32* @A
+	%A2 = load i32, i32* @A
 	%X = sub i32 %A1, %A2
 	ret i32 %X
 }
@@ -30,13 +30,13 @@
 ; CHECK-NEXT:   ret i32 0
 
 define i32 @test2() {
-        %A1 = load i32* @A
+        %A1 = load i32, i32* @A
         br label %Loop
 Loop:
         %AP = phi i32 [0, %0], [%X, %Loop]
         store i32 %AP, i32* @B  ; Store cannot alias @A
 
-        %A2 = load i32* @A
+        %A2 = load i32, i32* @A
         %X = sub i32 %A1, %A2
         %c = icmp eq i32 %X, 0
         br i1 %c, label %out, label %Loop
@@ -55,7 +55,7 @@
 	%X = alloca i32
 	store i32 7, i32* %X
 	call void @external()
-	%V = load i32* %X
+	%V = load i32, i32* %X
 	ret i32 %V
 }
 
diff --git a/llvm/test/Analysis/BasicAA/gep-alias.ll b/llvm/test/Analysis/BasicAA/gep-alias.ll
index 3f2e88a..f686010 100644
--- a/llvm/test/Analysis/BasicAA/gep-alias.ll
+++ b/llvm/test/Analysis/BasicAA/gep-alias.ll
@@ -7,11 +7,11 @@
 entry:
 	%Q = bitcast i8* %P to {i32, i32}*
 	%R = getelementptr {i32, i32}, {i32, i32}* %Q, i32 0, i32 1
-	%S = load i32* %R
+	%S = load i32, i32* %R
 
 	%q = bitcast i8* %P to {i32, i32}*
 	%r = getelementptr {i32, i32}, {i32, i32}* %q, i32 0, i32 1
-	%s = load i32* %r
+	%s = load i32, i32* %r
 
 	%t = sub i32 %S, %s
 	ret i32 %t
@@ -23,12 +23,12 @@
 entry:
 	%Q = bitcast i8* %P to {i32, i32, i32}*
 	%R = getelementptr {i32, i32, i32}, {i32, i32, i32}* %Q, i32 0, i32 1
-	%S = load i32* %R
+	%S = load i32, i32* %R
 
 	%r = getelementptr {i32, i32, i32}, {i32, i32, i32}* %Q, i32 0, i32 2
   store i32 42, i32* %r
 
-	%s = load i32* %R
+	%s = load i32, i32* %R
 
 	%t = sub i32 %S, %s
 	ret i32 %t
@@ -42,12 +42,12 @@
 entry:
   %P2 = getelementptr {float, {i32, i32, i32}}, {float, {i32, i32, i32}}* %P, i32 0, i32 1
 	%R = getelementptr {i32, i32, i32}, {i32, i32, i32}* %P2, i32 0, i32 1
-	%S = load i32* %R
+	%S = load i32, i32* %R
 
 	%r = getelementptr {i32, i32, i32}, {i32, i32, i32}* %P2, i32 0, i32 2
   store i32 42, i32* %r
 
-	%s = load i32* %R
+	%s = load i32, i32* %R
 
 	%t = sub i32 %S, %s
 	ret i32 %t
@@ -66,7 +66,7 @@
   store i32 64, i32* %tmp2, align 8
   %tmp3 = getelementptr inbounds %SmallPtrSet64, %SmallPtrSet64* %P, i64 0, i32 0, i32 4, i64 64
   store i8* null, i8** %tmp3, align 8
-  %tmp4 = load i32* %tmp2, align 8
+  %tmp4 = load i32, i32* %tmp2, align 8
 	ret i32 %tmp4
 ; CHECK-LABEL: @test4(
 ; CHECK: ret i32 64
@@ -77,9 +77,9 @@
   %pi = getelementptr i32, i32* %p, i64 %i
   %i.next = add i64 %i, 1
   %pi.next = getelementptr i32, i32* %p, i64 %i.next
-  %x = load i32* %pi
+  %x = load i32, i32* %pi
   store i32 42, i32* %pi.next
-  %y = load i32* %pi
+  %y = load i32, i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test5(
@@ -90,9 +90,9 @@
   %pi = getelementptr i32, i32 addrspace(1)* %p, i8 %i
   %i.next = add i8 %i, 1
   %pi.next = getelementptr i32, i32 addrspace(1)* %p, i8 %i.next
-  %x = load i32 addrspace(1)* %pi
+  %x = load i32, i32 addrspace(1)* %pi
   store i32 42, i32 addrspace(1)* %pi.next
-  %y = load i32 addrspace(1)* %pi
+  %y = load i32, i32 addrspace(1)* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test5_as1_smaller_size(
@@ -104,9 +104,9 @@
   %pi = getelementptr i32, i32 addrspace(1)* %p, i16 %i
   %i.next = add i16 %i, 1
   %pi.next = getelementptr i32, i32 addrspace(1)* %p, i16 %i.next
-  %x = load i32 addrspace(1)* %pi
+  %x = load i32, i32 addrspace(1)* %pi
   store i32 42, i32 addrspace(1)* %pi.next
-  %y = load i32 addrspace(1)* %pi
+  %y = load i32, i32 addrspace(1)* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test5_as1_same_size(
@@ -119,9 +119,9 @@
   %pi = getelementptr i32, i32* %p, i64 %i
   %i.next = or i64 %i, 1
   %pi.next = getelementptr i32, i32* %p, i64 %i.next
-  %x = load i32* %pi
+  %x = load i32, i32* %pi
   store i32 42, i32* %pi.next
-  %y = load i32* %pi
+  %y = load i32, i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test6(
@@ -133,9 +133,9 @@
   %pi = getelementptr i32, i32* %p, i64 1
   %i.next = shl i64 %i, 2
   %pi.next = getelementptr i32, i32* %p, i64 %i.next
-  %x = load i32* %pi
+  %x = load i32, i32* %pi
   store i32 42, i32* %pi.next
-  %y = load i32* %pi
+  %y = load i32, i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test7(
@@ -150,9 +150,9 @@
   %i.next = add i16 %i, 1
   %i.next2 = zext i16 %i.next to i32
   %pi.next = getelementptr i32, i32* %p, i32 %i.next2
-  %x = load i32* %pi
+  %x = load i32, i32* %pi
   store i32 42, i32* %pi.next
-  %y = load i32* %pi
+  %y = load i32, i32* %pi
   %z = sub i32 %x, %y
   ret i32 %z
 ; CHECK-LABEL: @test8(
@@ -170,9 +170,9 @@
   ; P4 = P + 4*j
   %P4 = getelementptr [4 x i8], [4 x i8]* %P, i32 0, i32 %j2
 
-  %x = load i8* %P2
+  %x = load i8, i8* %P2
   store i8 42, i8* %P4
-  %y = load i8* %P2
+  %y = load i8, i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
 ; CHECK-LABEL: @test9(
@@ -188,9 +188,9 @@
   ; P4 = P + 4*i
   %P4 = getelementptr [4 x i8], [4 x i8]* %P, i32 0, i32 %i2
 
-  %x = load i8* %P2
+  %x = load i8, i8* %P2
   store i8 42, i8* %P4
-  %y = load i8* %P2
+  %y = load i8, i8* %P2
   %z = sub i8 %x, %y
   ret i8 %z
 ; CHECK-LABEL: @test10(
@@ -207,7 +207,7 @@
   %y29 = getelementptr inbounds [2 x float], [2 x float]* %arrayidx28, i32 0, i32 1
   store float 1.0, float* %y29, align 4
   store i64 0, i64* %scevgep35, align 4
-  %tmp30 = load float* %y29, align 4
+  %tmp30 = load float, float* %y29, align 4
   ret float %tmp30
 ; CHECK-LABEL: @test11(
 ; CHECK: ret float %tmp30
@@ -223,7 +223,7 @@
   %castp = bitcast i8* %p to i32*
   store i32 1, i32* %castp
   store i32 0, i32* %castd
-  %r = load i32* %castp
+  %r = load i32, i32* %castp
   ret i32 %r
 ; CHECK-LABEL: @test12(
 ; CHECK: ret i32 %r
diff --git a/llvm/test/Analysis/BasicAA/global-size.ll b/llvm/test/Analysis/BasicAA/global-size.ll
index 6d06698..bacf3bc 100644
--- a/llvm/test/Analysis/BasicAA/global-size.ll
+++ b/llvm/test/Analysis/BasicAA/global-size.ll
@@ -8,9 +8,9 @@
 
 ; CHECK-LABEL: @test1(
 define i16 @test1(i32* %P) {
-        %X = load i16* @B
+        %X = load i16, i16* @B
         store i32 7, i32* %P
-        %Y = load i16* @B
+        %Y = load i16, i16* @B
         %Z = sub i16 %Y, %X
         ret i16 %Z
 ; CHECK: ret i16 0
@@ -21,9 +21,9 @@
 define i16 @test1_as1(i32 addrspace(1)* %P) {
 ; CHECK-LABEL: @test1_as1(
 ; CHECK: ret i16 0
-  %X = load i16 addrspace(1)* @B_as1
+  %X = load i16, i16 addrspace(1)* @B_as1
   store i32 7, i32 addrspace(1)* %P
-  %Y = load i16 addrspace(1)* @B_as1
+  %Y = load i16, i16 addrspace(1)* @B_as1
   %Z = sub i16 %Y, %X
   ret i16 %Z
 }
@@ -39,10 +39,10 @@
   %tmp93 = add i32 %w.2, %indvar89
   %arrayidx416 = getelementptr [0 x i8], [0 x i8]* @window, i32 0, i32 %tmp93
 
-  %A = load i8* %arrayidx412, align 1
+  %A = load i8, i8* %arrayidx412, align 1
   store i8 4, i8* %arrayidx416, align 1
 
-  %B = load i8* %arrayidx412, align 1
+  %B = load i8, i8* %arrayidx412, align 1
   %C = sub i8 %A, %B
   ret i8 %C
 
diff --git a/llvm/test/Analysis/BasicAA/invariant_load.ll b/llvm/test/Analysis/BasicAA/invariant_load.ll
index bc629cd..722fb5b 100644
--- a/llvm/test/Analysis/BasicAA/invariant_load.ll
+++ b/llvm/test/Analysis/BasicAA/invariant_load.ll
@@ -10,15 +10,15 @@
 
 define i32 @foo(i32* nocapture %p, i8* nocapture %q) {
 entry:
-  %0 = load i32* %p, align 4, !invariant.load !3
+  %0 = load i32, i32* %p, align 4, !invariant.load !3
   %conv = trunc i32 %0 to i8
   store i8 %conv, i8* %q, align 1
-  %1 = load i32* %p, align 4, !invariant.load !3
+  %1 = load i32, i32* %p, align 4, !invariant.load !3
   %add = add nsw i32 %1, 1
   ret i32 %add
 
 ; CHECK: foo
-; CHECK: %0 = load i32* %p
+; CHECK: %0 = load i32, i32* %p
 ; CHECK: store i8 %conv, i8* %q,
 ; CHECK: %add = add nsw i32 %0, 1
 }
diff --git a/llvm/test/Analysis/BasicAA/memset_pattern.ll b/llvm/test/Analysis/BasicAA/memset_pattern.ll
index 590664c..25bdb2e 100644
--- a/llvm/test/Analysis/BasicAA/memset_pattern.ll
+++ b/llvm/test/Analysis/BasicAA/memset_pattern.ll
@@ -13,7 +13,7 @@
   store i32 1, i32* @z
   tail call void @memset_pattern16(i8* bitcast (i32* @y to i8*), i8* bitcast (i32* @x to i8*), i64 4) nounwind
 ; CHECK-NOT: load
-  %l = load i32* @z
+  %l = load i32, i32* @z
 ; CHECK: ret i32 1
   ret i32 %l
 }
diff --git a/llvm/test/Analysis/BasicAA/modref.ll b/llvm/test/Analysis/BasicAA/modref.ll
index 39747f9..e124d6c 100644
--- a/llvm/test/Analysis/BasicAA/modref.ll
+++ b/llvm/test/Analysis/BasicAA/modref.ll
@@ -13,7 +13,7 @@
 
   call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
 
-  %B = load i32* %A
+  %B = load i32, i32* %A
   ret i32 %B
 
 ; CHECK-LABEL: @test0
@@ -29,7 +29,7 @@
 
   call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
-  %C = load i8* %B
+  %C = load i8, i8* %B
   ret i8 %C
 ; CHECK: ret i8 2
 }
@@ -39,7 +39,7 @@
   %P2 = getelementptr i8, i8* %P, i32 127
   store i8 1, i8* %P2  ;; Not dead across memset
   call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
-  %A = load i8* %P2
+  %A = load i8, i8* %P2
   ret i8 %A
 ; CHECK: ret i8 1
 }
@@ -52,7 +52,7 @@
   store i8 1, i8* %P2  ;; Dead, clobbered by memset.
 
   call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
-  %A = load i8* %P2
+  %A = load i8, i8* %P2
   ret i8 %A
 ; CHECK-NOT: load
 ; CHECK: ret i8 2
@@ -90,9 +90,9 @@
 @G2 = external global [4000 x i32]
 
 define i32 @test4(i8* %P) {
-  %tmp = load i32* @G1
+  %tmp = load i32, i32* @G1
   call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false)
-  %tmp2 = load i32* @G1
+  %tmp2 = load i32, i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
 ; CHECK-LABEL: @test4
@@ -105,9 +105,9 @@
 ; Verify that basicaa is handling variable length memcpy, knowing it doesn't
 ; write to G1.
 define i32 @test5(i8* %P, i32 %Len) {
-  %tmp = load i32* @G1
+  %tmp = load i32, i32* @G1
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false)
-  %tmp2 = load i32* @G1
+  %tmp2 = load i32, i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
 ; CHECK: @test5
@@ -118,13 +118,13 @@
 }
 
 define i8 @test6(i8* %p, i8* noalias %a) {
-  %x = load i8* %a
+  %x = load i8, i8* %a
   %t = va_arg i8* %p, float
-  %y = load i8* %a
+  %y = load i8, i8* %a
   %z = add i8 %x, %y
   ret i8 %z
 ; CHECK-LABEL: @test6
-; CHECK: load i8* %a
+; CHECK: load i8, i8* %a
 ; CHECK-NOT: load
 ; CHECK: ret
 }
@@ -137,12 +137,12 @@
   store i32 0, i32* %x, align 4
   %add.ptr = getelementptr inbounds i32, i32* %x, i64 1
   call void @test7decl(i32* %add.ptr)
-  %tmp = load i32* %x, align 4
+  %tmp = load i32, i32* %x, align 4
   ret i32 %tmp
 ; CHECK-LABEL: @test7(
 ; CHECK: store i32 0
 ; CHECK: call void @test7decl
-; CHECK: load i32*
+; CHECK: load i32, i32*
 }
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/llvm/test/Analysis/BasicAA/must-and-partial.ll b/llvm/test/Analysis/BasicAA/must-and-partial.ll
index e8dc1de..3b4c84a 100644
--- a/llvm/test/Analysis/BasicAA/must-and-partial.ll
+++ b/llvm/test/Analysis/BasicAA/must-and-partial.ll
@@ -20,7 +20,7 @@
   %bigbase0 = bitcast i8* %base to i16*
   store i16 -1, i16* %bigbase0
 
-  %loaded = load i8* %phi
+  %loaded = load i8, i8* %phi
   ret i8 %loaded
 }
 
@@ -34,6 +34,6 @@
   %bigbase1 = bitcast i8* %base to i16*
   store i16 -1, i16* %bigbase1
 
-  %loaded = load i8* %sel
+  %loaded = load i8, i8* %sel
   ret i8 %loaded
 }
diff --git a/llvm/test/Analysis/BasicAA/no-escape-call.ll b/llvm/test/Analysis/BasicAA/no-escape-call.ll
index 072575c..ea33532 100644
--- a/llvm/test/Analysis/BasicAA/no-escape-call.ll
+++ b/llvm/test/Analysis/BasicAA/no-escape-call.ll
@@ -12,9 +12,9 @@
 	store i8* %tmp2, i8** %tmp4, align 4
 	%tmp10 = getelementptr i8, i8* %tmp2, i32 10		; <i8*> [#uses=1]
 	store i8 42, i8* %tmp10, align 1
-	%tmp14 = load i8** %tmp4, align 4		; <i8*> [#uses=1]
+	%tmp14 = load i8*, i8** %tmp4, align 4		; <i8*> [#uses=1]
 	%tmp16 = getelementptr i8, i8* %tmp14, i32 10		; <i8*> [#uses=1]
-	%tmp17 = load i8* %tmp16, align 1		; <i8> [#uses=1]
+	%tmp17 = load i8, i8* %tmp16, align 1		; <i8> [#uses=1]
 	%tmp19 = icmp eq i8 %tmp17, 42		; <i1> [#uses=1]
 	ret i1 %tmp19
 }
diff --git a/llvm/test/Analysis/BasicAA/noalias-bugs.ll b/llvm/test/Analysis/BasicAA/noalias-bugs.ll
index 2ae76609..acb230c 100644
--- a/llvm/test/Analysis/BasicAA/noalias-bugs.ll
+++ b/llvm/test/Analysis/BasicAA/noalias-bugs.ll
@@ -27,7 +27,7 @@
 ; CHECK; store i64 1
 
   store i64 2, i64* %ptr.64, align 8
-  %r = load i64* %either_ptr.64, align 8
+  %r = load i64, i64* %either_ptr.64, align 8
   store i64 1, i64* %ptr.64, align 8
   ret i64 %r
 }
diff --git a/llvm/test/Analysis/BasicAA/noalias-param.ll b/llvm/test/Analysis/BasicAA/noalias-param.ll
index 6494771..c5b1ebf 100644
--- a/llvm/test/Analysis/BasicAA/noalias-param.ll
+++ b/llvm/test/Analysis/BasicAA/noalias-param.ll
@@ -6,7 +6,7 @@
 entry:
   store i32 1, i32* %a 
   %cap = call i32* @captures(i32* %a) nounwind readonly
-  %l = load i32* %b
+  %l = load i32, i32* %b
   ret void
 }
 
@@ -16,7 +16,7 @@
 entry:
   store i32 1, i32* %c 
   %cap = call i32* @captures(i32* %c) nounwind readonly
-  %l = load i32* %d
+  %l = load i32, i32* %d
   ret void
 }
 
diff --git a/llvm/test/Analysis/BasicAA/nocapture.ll b/llvm/test/Analysis/BasicAA/nocapture.ll
index ffc0a09..26cb69b 100644
--- a/llvm/test/Analysis/BasicAA/nocapture.ll
+++ b/llvm/test/Analysis/BasicAA/nocapture.ll
@@ -6,9 +6,9 @@
 ; CHECK: ret i32 0
        %P = alloca i32
        %Q = call i32* @test(i32* %P)
-       %a = load i32* %P
+       %a = load i32, i32* %P
        store i32 4, i32* %Q   ;; cannot clobber P since it is nocapture.
-       %b = load i32* %P
+       %b = load i32, i32* %P
        %c = sub i32 %a, %b
        ret i32 %c
 }
@@ -19,7 +19,7 @@
 ; CHECK: call void @test3
 ; CHECK: store i32 0, i32* %p
 ; CHECK: store i32 1, i32* %x
-; CHECK: %y = load i32* %p
+; CHECK: %y = load i32, i32* %p
 ; CHECK: ret i32 %y
 entry:
        %q = alloca i32*
@@ -27,10 +27,10 @@
        ; attribute since the copy doesn't outlive the function.
        call void @test3(i32** %q, i32* %p) nounwind
        store i32 0, i32* %p
-       %x = load i32** %q
+       %x = load i32*, i32** %q
        ; This store might write to %p and so we can't eliminate the subsequent
        ; load
        store i32 1, i32* %x
-       %y = load i32* %p
+       %y = load i32, i32* %p
        ret i32 %y
 }
diff --git a/llvm/test/Analysis/BasicAA/phi-aa.ll b/llvm/test/Analysis/BasicAA/phi-aa.ll
index 1b3341e..3944e9e 100644
--- a/llvm/test/Analysis/BasicAA/phi-aa.ll
+++ b/llvm/test/Analysis/BasicAA/phi-aa.ll
@@ -25,9 +25,9 @@
 
 bb2:
   %P = phi i32* [ @X, %bb ], [ @Y, %bb1 ]
-  %tmp1 = load i32* @Z, align 4
+  %tmp1 = load i32, i32* @Z, align 4
   store i32 123, i32* %P, align 4
-  %tmp2 = load i32* @Z, align 4
+  %tmp2 = load i32, i32* @Z, align 4
   br label %return
 
 return:
@@ -52,14 +52,14 @@
   br i1 %targetBlock, label %for.body, label %bye
 
 for.body:
-  %1 = load i32* %jj7, align 4
+  %1 = load i32, i32* %jj7, align 4
   %idxprom4 = zext i32 %1 to i64
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %oa5, i64 0, i64 %idxprom4
-  %2 = load i32* %arrayidx5, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
   %sub6 = sub i32 %2, 6
   store i32 %sub6, i32* %arrayidx5, align 4
   ; %0 and %arrayidx5 can alias! It is not safe to DSE the above store.
-  %3 = load i32* %0, align 4
+  %3 = load i32, i32* %0, align 4
   store i32 %3, i32* %arrayidx5, align 4
   %sub11 = add i32 %1, -1
   %idxprom12 = zext i32 %sub11 to i64
@@ -68,7 +68,7 @@
   br label %codeRepl
 
 bye:
-  %.reload = load i32* %jj7, align 4
+  %.reload = load i32, i32* %jj7, align 4
   ret i32 %.reload
 }
 
diff --git a/llvm/test/Analysis/BasicAA/phi-spec-order.ll b/llvm/test/Analysis/BasicAA/phi-spec-order.ll
index 0d1a6f4..30aff8c 100644
--- a/llvm/test/Analysis/BasicAA/phi-spec-order.ll
+++ b/llvm/test/Analysis/BasicAA/phi-spec-order.ll
@@ -24,20 +24,20 @@
   %lsr.iv46 = bitcast [16000 x double]* %lsr.iv4 to <4 x double>*
   %lsr.iv12 = bitcast [16000 x double]* %lsr.iv1 to <4 x double>*
   %scevgep11 = getelementptr <4 x double>, <4 x double>* %lsr.iv46, i64 -2
-  %i6 = load <4 x double>* %scevgep11, align 32
+  %i6 = load <4 x double>, <4 x double>* %scevgep11, align 32
   %add = fadd <4 x double> %i6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   store <4 x double> %add, <4 x double>* %lsr.iv12, align 32
   %scevgep10 = getelementptr <4 x double>, <4 x double>* %lsr.iv46, i64 -1
-  %i7 = load <4 x double>* %scevgep10, align 32
+  %i7 = load <4 x double>, <4 x double>* %scevgep10, align 32
   %add.4 = fadd <4 x double> %i7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep9 = getelementptr <4 x double>, <4 x double>* %lsr.iv12, i64 1
   store <4 x double> %add.4, <4 x double>* %scevgep9, align 32
-  %i8 = load <4 x double>* %lsr.iv46, align 32
+  %i8 = load <4 x double>, <4 x double>* %lsr.iv46, align 32
   %add.8 = fadd <4 x double> %i8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep8 = getelementptr <4 x double>, <4 x double>* %lsr.iv12, i64 2
   store <4 x double> %add.8, <4 x double>* %scevgep8, align 32
   %scevgep7 = getelementptr <4 x double>, <4 x double>* %lsr.iv46, i64 1
-  %i9 = load <4 x double>* %scevgep7, align 32
+  %i9 = load <4 x double>, <4 x double>* %scevgep7, align 32
   %add.12 = fadd <4 x double> %i9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
   %scevgep3 = getelementptr <4 x double>, <4 x double>* %lsr.iv12, i64 3
   store <4 x double> %add.12, <4 x double>* %scevgep3, align 32
diff --git a/llvm/test/Analysis/BasicAA/phi-speculation.ll b/llvm/test/Analysis/BasicAA/phi-speculation.ll
index 8965056..ed0d49b 100644
--- a/llvm/test/Analysis/BasicAA/phi-speculation.ll
+++ b/llvm/test/Analysis/BasicAA/phi-speculation.ll
@@ -17,10 +17,10 @@
   %ptr2_phi = phi i32* [ %ptr2, %entry ], [ %ptr2_inc, %while.body ]
   %result.09 = phi i32 [ 0 , %entry ], [ %add, %while.body ]
   %dec = add nsw i32 %num, -1
-  %0 = load i32* %ptr_phi, align 4
+  %0 = load i32, i32* %ptr_phi, align 4
   store i32 %0, i32* %ptr2_phi, align 4
-  %1 = load i32* %coeff, align 4
-  %2 = load i32* %ptr_phi, align 4
+  %1 = load i32, i32* %coeff, align 4
+  %2 = load i32, i32* %ptr_phi, align 4
   %mul = mul nsw i32 %1, %2
   %add = add nsw i32 %mul, %result.09
   %tobool = icmp eq i32 %dec, 0
@@ -52,10 +52,10 @@
   %ptr2_phi = phi i32* [ %ptr_outer_phi2, %outer.while.header ], [ %ptr2_inc, %while.body ]
   %result.09 = phi i32 [ 0 , %outer.while.header ], [ %add, %while.body ]
   %dec = add nsw i32 %num, -1
-  %0 = load i32* %ptr_phi, align 4
+  %0 = load i32, i32* %ptr_phi, align 4
   store i32 %0, i32* %ptr2_phi, align 4
-  %1 = load i32* %coeff, align 4
-  %2 = load i32* %ptr_phi, align 4
+  %1 = load i32, i32* %coeff, align 4
+  %2 = load i32, i32* %ptr_phi, align 4
   %mul = mul nsw i32 %1, %2
   %add = add nsw i32 %mul, %result.09
   %tobool = icmp eq i32 %dec, 0
diff --git a/llvm/test/Analysis/BasicAA/pr18573.ll b/llvm/test/Analysis/BasicAA/pr18573.ll
index 25f9d94..ea5e4a2 100644
--- a/llvm/test/Analysis/BasicAA/pr18573.ll
+++ b/llvm/test/Analysis/BasicAA/pr18573.ll
@@ -10,7 +10,7 @@
 ; Function Attrs: nounwind
 define <8 x float> @foo1(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 {
 allocas:
-  %vix = load <8 x i32>* %vix.ptr, align 4
+  %vix = load <8 x i32>, <8 x i32>* %vix.ptr, align 4
   %t1.ptr = getelementptr i8, i8* %arr.ptr, i8 4
   
   %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
@@ -31,7 +31,7 @@
 ; Function Attrs: nounwind
 define <8 x float> @foo2(i8* noalias readonly %arr.ptr, <8 x i32>* noalias readonly %vix.ptr, i8* noalias %t2.ptr) #1 {
 allocas:
-  %vix = load <8 x i32>* %vix.ptr, align 4
+  %vix = load <8 x i32>, <8 x i32>* %vix.ptr, align 4
   %t1.ptr = getelementptr i8, i8* %arr.ptr, i8 4
   
   %v1 = tail call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> undef, i8* %arr.ptr, <8 x i32> %vix, <8 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, i8 1) #2
diff --git a/llvm/test/Analysis/BasicAA/store-promote.ll b/llvm/test/Analysis/BasicAA/store-promote.ll
index bb4258ff..afe11c2 100644
--- a/llvm/test/Analysis/BasicAA/store-promote.ll
+++ b/llvm/test/Analysis/BasicAA/store-promote.ll
@@ -10,11 +10,11 @@
 @C = global [2 x i32] [ i32 4, i32 8 ]          ; <[2 x i32]*> [#uses=2]
 
 define i32 @test1(i1 %c) {
-        %Atmp = load i32* @A            ; <i32> [#uses=2]
+        %Atmp = load i32, i32* @A            ; <i32> [#uses=2]
         br label %Loop
 
 Loop:           ; preds = %Loop, %0
-        %ToRemove = load i32* @A                ; <i32> [#uses=1]
+        %ToRemove = load i32, i32* @A                ; <i32> [#uses=1]
         store i32 %Atmp, i32* @B
         br i1 %c, label %Out, label %Loop
 
@@ -24,7 +24,7 @@
         
 ; The Loop block should be empty after the load/store are promoted.
 ; CHECK:     @test1
-; CHECK:        load i32* @A
+; CHECK:        load i32, i32* @A
 ; CHECK:      Loop:
 ; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
 ; CHECK:      Out:
@@ -35,10 +35,10 @@
         br label %Loop
 
 Loop:           ; preds = %Loop, %0
-        %AVal = load i32* @A            ; <i32> [#uses=2]
+        %AVal = load i32, i32* @A            ; <i32> [#uses=2]
         %C0 = getelementptr [2 x i32], [2 x i32]* @C, i64 0, i64 0         ; <i32*> [#uses=1]
         store i32 %AVal, i32* %C0
-        %BVal = load i32* @B            ; <i32> [#uses=2]
+        %BVal = load i32, i32* @B            ; <i32> [#uses=2]
         %C1 = getelementptr [2 x i32], [2 x i32]* @C, i64 0, i64 1         ; <i32*> [#uses=1]
         store i32 %BVal, i32* %C1
         br i1 %c, label %Out, label %Loop
diff --git a/llvm/test/Analysis/BasicAA/tailcall-modref.ll b/llvm/test/Analysis/BasicAA/tailcall-modref.ll
index ebeb28c..5857e68 100644
--- a/llvm/test/Analysis/BasicAA/tailcall-modref.ll
+++ b/llvm/test/Analysis/BasicAA/tailcall-modref.ll
@@ -4,9 +4,9 @@
 ; CHECK: ret i32 0
         %A = alloca i32         ; <i32*> [#uses=3]
         call void @foo( i32* %A )
-        %X = load i32* %A               ; <i32> [#uses=1]
+        %X = load i32, i32* %A               ; <i32> [#uses=1]
         tail call void @bar( )
-        %Y = load i32* %A               ; <i32> [#uses=1]
+        %Y = load i32, i32* %A               ; <i32> [#uses=1]
         %Z = sub i32 %X, %Y             ; <i32> [#uses=1]
         ret i32 %Z
 }
diff --git a/llvm/test/Analysis/BasicAA/underlying-value.ll b/llvm/test/Analysis/BasicAA/underlying-value.ll
index b0d2261..0cfbdb8 100644
--- a/llvm/test/Analysis/BasicAA/underlying-value.ll
+++ b/llvm/test/Analysis/BasicAA/underlying-value.ll
@@ -15,9 +15,9 @@
 
 for.body5:                                        ; preds = %for.cond2
   %arrayidx = getelementptr inbounds [2 x i64], [2 x i64]* undef, i32 0, i64 0
-  %tmp7 = load i64* %arrayidx, align 8
+  %tmp7 = load i64, i64* %arrayidx, align 8
   %arrayidx9 = getelementptr inbounds [2 x i64], [2 x i64]* undef, i32 0, i64 undef
-  %tmp10 = load i64* %arrayidx9, align 8
+  %tmp10 = load i64, i64* %arrayidx9, align 8
   br label %for.cond2
 
 for.end22:                                        ; preds = %for.cond
diff --git a/llvm/test/Analysis/BasicAA/zext.ll b/llvm/test/Analysis/BasicAA/zext.ll
index bf35a52..ed35656 100644
--- a/llvm/test/Analysis/BasicAA/zext.ll
+++ b/llvm/test/Analysis/BasicAA/zext.ll
@@ -112,7 +112,7 @@
 
 define void @test_spec2006() {
   %h = alloca [1 x [2 x i32*]], align 16
-  %d.val = load i32* @d, align 4
+  %d.val = load i32, i32* @d, align 4
   %d.promoted = sext i32 %d.val to i64
   %1 = icmp slt i32 %d.val, 2
   br i1 %1, label %.lr.ph, label %3
@@ -168,7 +168,7 @@
 
 define void @test_modulo_analysis_with_global() {
   %h = alloca [1 x [2 x i32*]], align 16
-  %b = load i32* @b, align 4
+  %b = load i32, i32* @b, align 4
   %b.promoted = sext i32 %b to i64
   br label %for.loop
 
diff --git a/llvm/test/Analysis/BlockFrequencyInfo/basic.ll b/llvm/test/Analysis/BlockFrequencyInfo/basic.ll
index 8701bbd..728adf0 100644
--- a/llvm/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/llvm/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -13,7 +13,7 @@
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
diff --git a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
index 29cfc4e..0f66911 100644
--- a/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/llvm/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -10,7 +10,7 @@
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -154,7 +154,7 @@
 
 entry:
   %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32* %gep1
+  %val1 = load i32, i32* %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else
 
@@ -165,7 +165,7 @@
 
 else:
   %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32* %gep2
+  %val2 = load i32, i32* %gep2
   %val3 = call i32 @regular_function(i32 %val2)
   br label %exit
 
diff --git a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll
index d072778..e792790 100644
--- a/llvm/test/Analysis/BranchProbabilityInfo/loop.ll
+++ b/llvm/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -88,7 +88,7 @@
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ]
   call void @g1()
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %do.body1, label %if.end
 ; CHECK: edge do.body -> do.body1 probability is 16 / 32 = 50%
@@ -124,7 +124,7 @@
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %do.body1
 ; CHECK: edge do.body -> return probability is 4 / 128
@@ -169,7 +169,7 @@
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %if.end
 ; CHECK: edge do.body1 -> return probability is 4 / 128
@@ -214,7 +214,7 @@
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
   call void @g2()
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %do.cond
 ; CHECK: edge do.body1 -> return probability is 4 / 128
@@ -258,7 +258,7 @@
 
 for.body:
   %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ]
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp1 = icmp eq i32 %0, %i.011
   br i1 %cmp1, label %for.inc5, label %if.end
 ; CHECK: edge for.body -> for.inc5 probability is 16 / 32 = 50%
@@ -319,21 +319,21 @@
 
 for.body3:
   %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %cmp4 = icmp eq i32 %0, %j.017
   br i1 %cmp4, label %for.inc, label %if.end
 ; CHECK: edge for.body3 -> for.inc probability is 16 / 32 = 50%
 ; CHECK: edge for.body3 -> if.end probability is 16 / 32 = 50%
 
 if.end:
-  %1 = load i32* %arrayidx5, align 4
+  %1 = load i32, i32* %arrayidx5, align 4
   %cmp6 = icmp eq i32 %1, %j.017
   br i1 %cmp6, label %for.inc, label %if.end8
 ; CHECK: edge if.end -> for.inc probability is 16 / 32 = 50%
 ; CHECK: edge if.end -> if.end8 probability is 16 / 32 = 50%
 
 if.end8:
-  %2 = load i32* %arrayidx9, align 4
+  %2 = load i32, i32* %arrayidx9, align 4
   %cmp10 = icmp eq i32 %2, %j.017
   br i1 %cmp10, label %for.inc, label %if.end12
 ; CHECK: edge if.end8 -> for.inc probability is 16 / 32 = 50%
diff --git a/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll b/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll
index fa300d1..aff08a6 100644
--- a/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll
+++ b/llvm/test/Analysis/BranchProbabilityInfo/pr18705.ll
@@ -23,22 +23,22 @@
   %c.addr.09 = phi i32* [ %c, %while.body.lr.ph ], [ %c.addr.1, %if.end ]
   %indvars.iv.next = add nsw i64 %indvars.iv, -1
   %arrayidx = getelementptr inbounds float, float* %f0, i64 %indvars.iv.next
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %f1, i64 %indvars.iv.next
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %cmp = fcmp une float %1, %2
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:
   %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.011, i64 1
-  %3 = load i32* %b.addr.011, align 4
+  %3 = load i32, i32* %b.addr.011, align 4
   %add = add nsw i32 %3, 12
   store i32 %add, i32* %b.addr.011, align 4
   br label %if.end
 
 if.else:
   %incdec.ptr3 = getelementptr inbounds i32, i32* %c.addr.09, i64 1
-  %4 = load i32* %c.addr.09, align 4
+  %4 = load i32, i32* %c.addr.09, align 4
   %sub = add nsw i32 %4, -13
   store i32 %sub, i32* %c.addr.09, align 4
   br label %if.end
diff --git a/llvm/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll b/llvm/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
index 245a060..adacf04 100644
--- a/llvm/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
+++ b/llvm/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
@@ -22,11 +22,11 @@
   %u = alloca %union.anon, align 8
   %tmp9 = getelementptr inbounds %union.anon, %union.anon* %u, i64 0, i32 0
   store double %x, double* %tmp9, align 8, !tbaa !0
-  %tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
+  %tmp2 = load i32, i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
   %idxprom = sext i32 %tmp2 to i64
   %tmp4 = bitcast %union.anon* %u to [2 x i32]*
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %tmp4, i64 0, i64 %idxprom
-  %tmp5 = load i32* %arrayidx, align 4, !tbaa !3
+  %tmp5 = load i32, i32* %arrayidx, align 4, !tbaa !3
   %tmp5.lobit = lshr i32 %tmp5, 31
   ret i32 %tmp5.lobit
 }
diff --git a/llvm/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll b/llvm/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
index eeb4237..c2fcf32 100644
--- a/llvm/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
+++ b/llvm/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
@@ -11,9 +11,9 @@
   %tmp31 = mul i32 %indvar, -2
   %tmp32 = add i32 %tmp31, 30
   %t.5 = getelementptr i32, i32* %tab, i32 %tmp32
-  %loada = load i32* %tab
+  %loada = load i32, i32* %tab
   store i32 0, i32* %t.5
-  %loadb = load i32* %tab
+  %loadb = load i32, i32* %tab
   %rval = add i32 %loada, %loadb
   ret i32 %rval
 }
diff --git a/llvm/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll b/llvm/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
index 9bbc721..e997374 100644
--- a/llvm/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
+++ b/llvm/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
@@ -25,7 +25,7 @@
 
   store %T* %MS, %T** %M
 
-  %AP = load %T** %M ; PartialAlias with %A, %B
+  %AP = load %T*, %T** %M ; PartialAlias with %A, %B
 
   ret void
 }
diff --git a/llvm/test/Analysis/CFLAliasAnalysis/multilevel.ll b/llvm/test/Analysis/CFLAliasAnalysis/multilevel.ll
index 9c9eb9a..d42dca4 100644
--- a/llvm/test/Analysis/CFLAliasAnalysis/multilevel.ll
+++ b/llvm/test/Analysis/CFLAliasAnalysis/multilevel.ll
@@ -23,8 +23,8 @@
   store %T* %A, %T** %M
   store %T* %B, %T** %N
 
-  %AP = load %T** %M ; PartialAlias with %A
-  %BP = load %T** %N ; PartialAlias with %B
+  %AP = load %T*, %T** %M ; PartialAlias with %A
+  %BP = load %T*, %T** %N ; PartialAlias with %B
 
   ret void
 }
diff --git a/llvm/test/Analysis/CFLAliasAnalysis/must-and-partial.ll b/llvm/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
index bf1e66c..9deacf8 100644
--- a/llvm/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
+++ b/llvm/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
@@ -21,7 +21,7 @@
   %bigbase0 = bitcast i8* %base to i16*
   store i16 -1, i16* %bigbase0
 
-  %loaded = load i8* %phi
+  %loaded = load i8, i8* %phi
   ret i8 %loaded
 }
 
@@ -37,7 +37,7 @@
   %bigbase1 = bitcast i8* %base to i16*
   store i16 -1, i16* %bigbase1
 
-  %loaded = load i8* %sel
+  %loaded = load i8, i8* %sel
   ret i8 %loaded
 }
 
@@ -46,9 +46,9 @@
 ; CHECK: MayAlias:  double* %A, double* %Index
 define void @testr2(double* nocapture readonly %A, double* nocapture readonly %Index) {
   %arrayidx22 = getelementptr inbounds double, double* %Index, i64 2
-  %1 = load double* %arrayidx22
+  %1 = load double, double* %arrayidx22
   %arrayidx25 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx25
+  %2 = load double, double* %arrayidx25
   %mul26 = fmul double %1, %2
   ret void
 }
diff --git a/llvm/test/Analysis/CostModel/AArch64/store.ll b/llvm/test/Analysis/CostModel/AArch64/store.ll
index 0c9883c..307f8f8 100644
--- a/llvm/test/Analysis/CostModel/AArch64/store.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/store.ll
@@ -14,9 +14,9 @@
     ; CHECK: cost of 64 {{.*}} store
     store <4 x i8> undef, <4 x i8> * undef
     ; CHECK: cost of 16 {{.*}} load
-    load <2 x i8> * undef
+    load <2 x i8> , <2 x i8> * undef
     ; CHECK: cost of 64 {{.*}} load
-    load <4 x i8> * undef
+    load <4 x i8> , <4 x i8> * undef
 
     ret void
 }
diff --git a/llvm/test/Analysis/CostModel/ARM/insertelement.ll b/llvm/test/Analysis/CostModel/ARM/insertelement.ll
index f951b08..bd1467ef 100644
--- a/llvm/test/Analysis/CostModel/ARM/insertelement.ll
+++ b/llvm/test/Analysis/CostModel/ARM/insertelement.ll
@@ -10,8 +10,8 @@
 ; CHECK: insertelement_i8
 define void @insertelement_i8(%T_i8* %saddr,
                            %T_i8v* %vaddr) {
-  %v0 = load %T_i8v* %vaddr
-  %v1 = load %T_i8* %saddr
+  %v0 = load %T_i8v, %T_i8v* %vaddr
+  %v1 = load %T_i8, %T_i8* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <8 x i8>
   %v2 = insertelement %T_i8v %v0, %T_i8 %v1, i32 1
   store %T_i8v %v2, %T_i8v* %vaddr
@@ -24,8 +24,8 @@
 ; CHECK: insertelement_i16
 define void @insertelement_i16(%T_i16* %saddr,
                            %T_i16v* %vaddr) {
-  %v0 = load %T_i16v* %vaddr
-  %v1 = load %T_i16* %saddr
+  %v0 = load %T_i16v, %T_i16v* %vaddr
+  %v1 = load %T_i16, %T_i16* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <4 x i16>
   %v2 = insertelement %T_i16v %v0, %T_i16 %v1, i32 1
   store %T_i16v %v2, %T_i16v* %vaddr
@@ -37,8 +37,8 @@
 ; CHECK: insertelement_i32
 define void @insertelement_i32(%T_i32* %saddr,
                            %T_i32v* %vaddr) {
-  %v0 = load %T_i32v* %vaddr
-  %v1 = load %T_i32* %saddr
+  %v0 = load %T_i32v, %T_i32v* %vaddr
+  %v1 = load %T_i32, %T_i32* %saddr
 ;CHECK: estimated cost of 3 for {{.*}} insertelement <2 x i32>
   %v2 = insertelement %T_i32v %v0, %T_i32 %v1, i32 1
   store %T_i32v %v2, %T_i32v* %vaddr
diff --git a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
index 368f0a7..1e50f16 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -19,26 +19,26 @@
 }
 define i32 @loads(i32 %arg) {
   ; CHECK: cost of 1 {{.*}} load
-  load i8* undef, align 4
+  load i8, i8* undef, align 4
   ; CHECK: cost of 1 {{.*}} load
-  load i16* undef, align 4
+  load i16, i16* undef, align 4
   ; CHECK: cost of 1 {{.*}} load
-  load i32* undef, align 4
+  load i32, i32* undef, align 4
   ; CHECK: cost of 2 {{.*}} load
-  load i64* undef, align 4
+  load i64, i64* undef, align 4
   ; CHECK: cost of 4 {{.*}} load
-  load i128* undef, align 4
+  load i128, i128* undef, align 4
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
   ; CHECK: cost of 48 {{.*}} load
-  load <4 x i16>* undef, align 2
+  load <4 x i16>, <4 x i16>* undef, align 2
 
   ; CHECK: cost of 1 {{.*}} load
-  load <4 x i32>* undef, align 4
+  load <4 x i32>, <4 x i32>* undef, align 4
 
   ; CHECK: cost of 46 {{.*}} load
-  load <3 x float>* undef, align 1
+  load <3 x float>, <3 x float>* undef, align 1
 
   ret i32 undef
 }
diff --git a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll
index 1964881..cbe409d 100644
--- a/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/intrinsic-cost.ll
@@ -11,7 +11,7 @@
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.ceil.v4f32(<4 x float> %wide.load)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
@@ -39,7 +39,7 @@
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %wide.load)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
@@ -67,7 +67,7 @@
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds float, float* %f, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %b, <4 x float> %c)
   store <4 x float> %2, <4 x float>* %1, align 4
   %index.next = add i64 %index, 4
diff --git a/llvm/test/Analysis/CostModel/X86/load_store.ll b/llvm/test/Analysis/CostModel/X86/load_store.ll
index a53d0bd..ccf110a 100644
--- a/llvm/test/Analysis/CostModel/X86/load_store.ll
+++ b/llvm/test/Analysis/CostModel/X86/load_store.ll
@@ -34,49 +34,49 @@
 }
 define i32 @loads(i32 %arg) {
   ;CHECK: cost of 1 {{.*}} load
-  load i8* undef, align 4
+  load i8, i8* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i16* undef, align 4
+  load i16, i16* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i32* undef, align 4
+  load i32, i32* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load i64* undef, align 4
+  load i64, i64* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load i128* undef, align 4
+  load i128, i128* undef, align 4
 
   ;CHECK: cost of 1 {{.*}} load
-  load <2 x i32>* undef, align 4
+  load <2 x i32>, <2 x i32>* undef, align 4
   ;CHECK: cost of 1 {{.*}} load
-  load <4 x i32>* undef, align 4
+  load <4 x i32>, <4 x i32>* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load <8 x i32>* undef, align 4
+  load <8 x i32>, <8 x i32>* undef, align 4
 
 
   ;CHECK: cost of 1 {{.*}} load
-  load <2 x i64>* undef, align 4
+  load <2 x i64>, <2 x i64>* undef, align 4
   ;CHECK: cost of 2 {{.*}} load
-  load <4 x i64>* undef, align 4
+  load <4 x i64>, <4 x i64>* undef, align 4
   ;CHECK: cost of 4 {{.*}} load
-  load <8 x i64>* undef, align 4
+  load <8 x i64>, <8 x i64>* undef, align 4
 
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x float>* undef, align 4
+  load <3 x float>, <3 x float>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x double>* undef, align 4
+  load <3 x double>, <3 x double>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x i32>* undef, align 4
+  load <3 x i32>, <3 x i32>* undef, align 4
 
   ;CHECK: cost of 3 {{.*}} load
-  load <3 x i64>* undef, align 4
+  load <3 x i64>, <3 x i64>* undef, align 4
 
   ;CHECK: cost of 10 {{.*}} load
-  load <5 x i32>* undef, align 4
+  load <5 x i32>, <5 x i32>* undef, align 4
 
   ;CHECK: cost of 10 {{.*}} load
-  load <5 x i64>* undef, align 4
+  load <5 x i64>, <5 x i64>* undef, align 4
 
   ret i32 undef
 }
diff --git a/llvm/test/Analysis/CostModel/X86/loop_v2.ll b/llvm/test/Analysis/CostModel/X86/loop_v2.ll
index bd56512..9283310 100644
--- a/llvm/test/Analysis/CostModel/X86/loop_v2.ll
+++ b/llvm/test/Analysis/CostModel/X86/loop_v2.ll
@@ -12,7 +12,7 @@
   %vec.phi = phi <2 x i32> [ zeroinitializer, %vector.ph ], [ %12, %vector.body ]
   %0 = getelementptr inbounds i32, i32* %A, i64 %index
   %1 = bitcast i32* %0 to <2 x i32>*
-  %2 = load <2 x i32>* %1, align 4
+  %2 = load <2 x i32>, <2 x i32>* %1, align 4
   %3 = sext <2 x i32> %2 to <2 x i64>
   ;CHECK: cost of 1 {{.*}} extract
   %4 = extractelement <2 x i64> %3, i32 0
@@ -20,10 +20,10 @@
   ;CHECK: cost of 1 {{.*}} extract
   %6 = extractelement <2 x i64> %3, i32 1
   %7 = getelementptr inbounds i32, i32* %A, i64 %6
-  %8 = load i32* %5, align 4
+  %8 = load i32, i32* %5, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %9 = insertelement <2 x i32> undef, i32 %8, i32 0
-  %10 = load i32* %7, align 4
+  %10 = load i32, i32* %7, align 4
   ;CHECK: cost of 1 {{.*}} insert
   %11 = insertelement <2 x i32> %9, i32 %10, i32 1
   %12 = add nsw <2 x i32> %11, %vec.phi
diff --git a/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll b/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll
index a311f72..2dd52a0 100644
--- a/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/llvm/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -29,13 +29,13 @@
   ;CHECK: cost of 0 {{.*}} bitcast
   %5 = bitcast i32* %4 to <8 x i32>*
   ;CHECK: cost of 2 {{.*}} load
-  %6 = load <8 x i32>* %5, align 4
+  %6 = load <8 x i32>, <8 x i32>* %5, align 4
   ;CHECK: cost of 4 {{.*}} mul
   %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   %8 = getelementptr inbounds i32, i32* %A, i64 %index
   %9 = bitcast i32* %8 to <8 x i32>*
   ;CHECK: cost of 2 {{.*}} load
-  %10 = load <8 x i32>* %9, align 4
+  %10 = load <8 x i32>, <8 x i32>* %9, align 4
   ;CHECK: cost of 4 {{.*}} add
   %11 = add nsw <8 x i32> %10, %7
   ;CHECK: cost of 2 {{.*}} store
@@ -54,12 +54,12 @@
   %13 = add nsw i64 %indvars.iv, 2
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %13
   ;CHECK: cost of 1 {{.*}} load
-  %14 = load i32* %arrayidx, align 4
+  %14 = load i32, i32* %arrayidx, align 4
   ;CHECK: cost of 1 {{.*}} mul
   %mul = mul nsw i32 %14, 5
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
   ;CHECK: cost of 1 {{.*}} load
-  %15 = load i32* %arrayidx2, align 4
+  %15 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %15, %mul
   store i32 %add3, i32* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
index e1db0d2..c30a672 100644
--- a/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
+++ b/llvm/test/Analysis/Delinearization/gcd_multiply_expr.ll
@@ -27,7 +27,7 @@
 
 define i32 @fn2() {
 entry:
-  %.pr = load i32* @d, align 4
+  %.pr = load i32, i32* @d, align 4
   %phitmp = icmp eq i32 %.pr, 0
   br label %for.cond
 
@@ -36,11 +36,11 @@
   br i1 %0, label %for.cond, label %for.cond2thread-pre-split.preheader.i
 
 for.cond2thread-pre-split.preheader.i:
-  %1 = load i32* @g, align 4
-  %2 = load i32* @h, align 4
+  %1 = load i32, i32* @g, align 4
+  %2 = load i32, i32* @h, align 4
   %mul = mul nsw i32 %2, %1
-  %3 = load i8** @f, align 4
-  %.pr.pre.i = load i32* @b, align 4
+  %3 = load i8*, i8** @f, align 4
+  %.pr.pre.i = load i32, i32* @b, align 4
   br label %for.cond2thread-pre-split.i
 
 for.cond2thread-pre-split.i:
@@ -65,56 +65,56 @@
   %8 = phi i32 [ %inc.7.i, %for.body4.i ], [ %.pr.i, %for.body4.i.preheader ]
   %arrayidx.sum1 = add i32 %add.i, %8
   %arrayidx.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum1
-  %9 = load i8* %arrayidx.i, align 1
+  %9 = load i8, i8* %arrayidx.i, align 1
   %conv.i = sext i8 %9 to i32
   store i32 %conv.i, i32* @c, align 4
   %inc.i = add nsw i32 %8, 1
   store i32 %inc.i, i32* @b, align 4
   %arrayidx.sum2 = add i32 %add.i, %inc.i
   %arrayidx.1.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum2
-  %10 = load i8* %arrayidx.1.i, align 1
+  %10 = load i8, i8* %arrayidx.1.i, align 1
   %conv.1.i = sext i8 %10 to i32
   store i32 %conv.1.i, i32* @c, align 4
   %inc.1.i = add nsw i32 %8, 2
   store i32 %inc.1.i, i32* @b, align 4
   %arrayidx.sum3 = add i32 %add.i, %inc.1.i
   %arrayidx.2.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum3
-  %11 = load i8* %arrayidx.2.i, align 1
+  %11 = load i8, i8* %arrayidx.2.i, align 1
   %conv.2.i = sext i8 %11 to i32
   store i32 %conv.2.i, i32* @c, align 4
   %inc.2.i = add nsw i32 %8, 3
   store i32 %inc.2.i, i32* @b, align 4
   %arrayidx.sum4 = add i32 %add.i, %inc.2.i
   %arrayidx.3.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum4
-  %12 = load i8* %arrayidx.3.i, align 1
+  %12 = load i8, i8* %arrayidx.3.i, align 1
   %conv.3.i = sext i8 %12 to i32
   store i32 %conv.3.i, i32* @c, align 4
   %inc.3.i = add nsw i32 %8, 4
   store i32 %inc.3.i, i32* @b, align 4
   %arrayidx.sum5 = add i32 %add.i, %inc.3.i
   %arrayidx.4.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum5
-  %13 = load i8* %arrayidx.4.i, align 1
+  %13 = load i8, i8* %arrayidx.4.i, align 1
   %conv.4.i = sext i8 %13 to i32
   store i32 %conv.4.i, i32* @c, align 4
   %inc.4.i = add nsw i32 %8, 5
   store i32 %inc.4.i, i32* @b, align 4
   %arrayidx.sum6 = add i32 %add.i, %inc.4.i
   %arrayidx.5.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum6
-  %14 = load i8* %arrayidx.5.i, align 1
+  %14 = load i8, i8* %arrayidx.5.i, align 1
   %conv.5.i = sext i8 %14 to i32
   store i32 %conv.5.i, i32* @c, align 4
   %inc.5.i = add nsw i32 %8, 6
   store i32 %inc.5.i, i32* @b, align 4
   %arrayidx.sum7 = add i32 %add.i, %inc.5.i
   %arrayidx.6.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum7
-  %15 = load i8* %arrayidx.6.i, align 1
+  %15 = load i8, i8* %arrayidx.6.i, align 1
   %conv.6.i = sext i8 %15 to i32
   store i32 %conv.6.i, i32* @c, align 4
   %inc.6.i = add nsw i32 %8, 7
   store i32 %inc.6.i, i32* @b, align 4
   %arrayidx.sum8 = add i32 %add.i, %inc.6.i
   %arrayidx.7.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum8
-  %16 = load i8* %arrayidx.7.i, align 1
+  %16 = load i8, i8* %arrayidx.7.i, align 1
   %conv.7.i = sext i8 %16 to i32
   store i32 %conv.7.i, i32* @c, align 4
   %inc.7.i = add nsw i32 %8, 8
@@ -136,7 +136,7 @@
   %20 = phi i32 [ %inc.ur.i, %for.body4.ur.i ], [ %.ph, %for.body4.ur.i.preheader ]
   %arrayidx.sum = add i32 %add.i, %20
   %arrayidx.ur.i = getelementptr inbounds i8, i8* %3, i32 %arrayidx.sum
-  %21 = load i8* %arrayidx.ur.i, align 1
+  %21 = load i8, i8* %arrayidx.ur.i, align 1
   %conv.ur.i = sext i8 %21 to i32
   store i32 %conv.ur.i, i32* @c, align 4
   %inc.ur.i = add nsw i32 %20, 1
diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll
index b2e2f95..bba7b4c 100644
--- a/llvm/test/Analysis/Delinearization/himeno_1.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_1.ll
@@ -36,23 +36,23 @@
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
 entry:
   %p.rows.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 2
-  %p.rows = load i32* %p.rows.ptr
+  %p.rows = load i32, i32* %p.rows.ptr
   %p.rows.sub = add i32 %p.rows, -1
   %p.rows.sext = sext i32 %p.rows.sub to i64
   %p.cols.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 3
-  %p.cols = load i32* %p.cols.ptr
+  %p.cols = load i32, i32* %p.cols.ptr
   %p.cols.sub = add i32 %p.cols, -1
   %p.cols.sext = sext i32 %p.cols.sub to i64
   %p.deps.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 4
-  %p.deps = load i32* %p.deps.ptr
+  %p.deps = load i32, i32* %p.deps.ptr
   %p.deps.sub = add i32 %p.deps, -1
   %p.deps.sext = sext i32 %p.deps.sub to i64
   %a.cols.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 3
-  %a.cols = load i32* %a.cols.ptr
+  %a.cols = load i32, i32* %a.cols.ptr
   %a.deps.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 4
-  %a.deps = load i32* %a.deps.ptr
+  %a.deps = load i32, i32* %a.deps.ptr
   %a.base.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 0
-  %a.base = load float** %a.base.ptr, align 8
+  %a.base = load float*, float** %a.base.ptr, align 8
   br label %for.i
 
 for.i:                                            ; preds = %for.i.inc, %entry
diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll
index 56662f5..2cf8ebc 100644
--- a/llvm/test/Analysis/Delinearization/himeno_2.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_2.ll
@@ -36,25 +36,25 @@
 define void @jacobi(i32 %nn, %struct.Mat* nocapture %a, %struct.Mat* nocapture %p) nounwind uwtable {
 entry:
   %p.rows.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 2
-  %p.rows = load i32* %p.rows.ptr
+  %p.rows = load i32, i32* %p.rows.ptr
   %p.rows.sub = add i32 %p.rows, -1
   %p.rows.sext = sext i32 %p.rows.sub to i64
   %p.cols.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 3
-  %p.cols = load i32* %p.cols.ptr
+  %p.cols = load i32, i32* %p.cols.ptr
   %p.cols.sub = add i32 %p.cols, -1
   %p.cols.sext = sext i32 %p.cols.sub to i64
   %p.deps.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %p, i64 0, i32 4
-  %p.deps = load i32* %p.deps.ptr
+  %p.deps = load i32, i32* %p.deps.ptr
   %p.deps.sub = add i32 %p.deps, -1
   %p.deps.sext = sext i32 %p.deps.sub to i64
   %a.cols.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 3
-  %a.cols = load i32* %a.cols.ptr
+  %a.cols = load i32, i32* %a.cols.ptr
   %a.cols.sext = sext i32 %a.cols to i64
   %a.deps.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 4
-  %a.deps = load i32* %a.deps.ptr
+  %a.deps = load i32, i32* %a.deps.ptr
   %a.deps.sext = sext i32 %a.deps to i64
   %a.base.ptr = getelementptr inbounds %struct.Mat, %struct.Mat* %a, i64 0, i32 0
-  %a.base = load float** %a.base.ptr, align 8
+  %a.base = load float*, float** %a.base.ptr, align 8
   br label %for.i
 
 for.i:                                            ; preds = %for.i.inc, %entry
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index a947c07..9df7109 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -8,7 +8,7 @@
 ;       A[i][j] = 1.0;
 ; }
 
-; Inst:  %val = load double* %arrayidx
+; Inst:  %val = load double, double* %arrayidx
 ; In Loop with Header: for.j
 ; AddRec: {{0,+,(%m * sizeof(double))}<%for.i>,+,sizeof(double)}<%for.j>
 ; Base offset: %A
@@ -35,7 +35,7 @@
   %j = phi i64 [ 0, %for.i ], [ %j.inc, %for.j ]
   %vlaarrayidx.sum = add i64 %j, %tmp
   %arrayidx = getelementptr inbounds double, double* %A, i64 %vlaarrayidx.sum
-  %val = load double* %arrayidx
+  %val = load double, double* %arrayidx
   store double %val, double* %arrayidx
   %j.inc = add nsw i64 %j, 1
   %j.exitcond = icmp eq i64 %j.inc, %m
diff --git a/llvm/test/Analysis/Delinearization/undef.ll b/llvm/test/Analysis/Delinearization/undef.ll
index 71bce89e..399ff27 100644
--- a/llvm/test/Analysis/Delinearization/undef.ll
+++ b/llvm/test/Analysis/Delinearization/undef.ll
@@ -21,7 +21,7 @@
   %tmp6 = mul i64 %tmp5, undef
   %arrayidx69.sum = add i64 undef, %tmp6
   %arrayidx70 = getelementptr inbounds double, double* %Ey, i64 %arrayidx69.sum
-  %1 = load double* %arrayidx70, align 8
+  %1 = load double, double* %arrayidx70, align 8
   %inc = add nsw i64 %ix.062, 1
   br i1 false, label %for.body60, label %for.end
 
diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
index 12e03bb..84459b2 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -46,7 +46,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %sub
-  %0 = load i64* %arrayidx6, align 8
+  %0 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -115,7 +115,7 @@
   %add5 = add nsw i64 %mul4, %j.03
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %sub
-  %2 = load i64* %arrayidx6, align 8
+  %2 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.12, i64 1
   store i64 %2, i64* %B.addr.12, align 8
   %inc = add nsw i64 %j.03, 1
@@ -181,7 +181,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %add6 = add nsw i64 %add5, 100
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %0 = load i64* %arrayidx7, align 8
+  %0 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -240,7 +240,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %add6 = add nsw i64 %add5, 99
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %0 = load i64* %arrayidx7, align 8
+  %0 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -299,7 +299,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %sub = add nsw i64 %add5, -100
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %sub
-  %0 = load i64* %arrayidx6, align 8
+  %0 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -358,7 +358,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %sub = add nsw i64 %add5, -99
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %sub
-  %0 = load i64* %arrayidx6, align 8
+  %0 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -417,7 +417,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %add6 = add nsw i64 %add5, 9
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %0 = load i64* %arrayidx7, align 8
+  %0 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -476,7 +476,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %add6 = add nsw i64 %add5, 10
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %0 = load i64* %arrayidx7, align 8
+  %0 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -535,7 +535,7 @@
   %add5 = add nsw i64 %mul4, %j.02
   %add6 = add nsw i64 %add5, 11
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %0 = load i64* %arrayidx7, align 8
+  %0 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -595,7 +595,7 @@
   %sub = add i64 %i.03, %0
   %add6 = add nsw i64 %sub, 11
   %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 %add6
-  %1 = load i64* %arrayidx7, align 8
+  %1 = load i64, i64* %arrayidx7, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %1, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -654,7 +654,7 @@
   %sub = add i64 %i.03, %0
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %add5
-  %1 = load i64* %arrayidx6, align 8
+  %1 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %1, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -713,7 +713,7 @@
   %sub = sub nsw i64 %mul4, %j.02
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %add5
-  %0 = load i64* %arrayidx6, align 8
+  %0 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
@@ -772,7 +772,7 @@
   %sub = sub nsw i64 %mul4, %j.02
   %add5 = add nsw i64 %sub, 11
   %arrayidx6 = getelementptr inbounds i64, i64* %A, i64 %add5
-  %0 = load i64* %arrayidx6, align 8
+  %0 = load i64, i64* %arrayidx6, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.11, i64 1
   store i64 %0, i64* %B.addr.11, align 8
   %inc = add nsw i64 %j.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll
index 0315475..096add6 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Coupled.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Coupled.ll
@@ -29,7 +29,7 @@
   %add = add nsw i64 %i.02, 9
   %add2 = add nsw i64 %i.02, 10
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add2, i64 %add
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -65,7 +65,7 @@
   %add = add nsw i64 %i.02, 9
   %add2 = add nsw i64 %i.02, 9
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add2, i64 %add
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -103,7 +103,7 @@
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub2, i64 %sub
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -141,7 +141,7 @@
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub2, i64 %sub
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -180,7 +180,7 @@
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub3, i64 %sub
   store i32 %conv, i32* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -221,7 +221,7 @@
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add, i64 %sub
   store i32 %conv, i32* %arrayidx5, align 4
   %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -257,7 +257,7 @@
   %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %sub
   store i32 %conv, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -293,7 +293,7 @@
   %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %sub
   store i32 %conv, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -330,7 +330,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -367,7 +367,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -405,7 +405,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -443,7 +443,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -481,7 +481,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -518,7 +518,7 @@
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %sub1, i64 %sub
   store i32 %conv, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -555,7 +555,7 @@
   %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
@@ -592,7 +592,7 @@
   %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %sub1, i64 %sub, i64 %i.02
   store i32 %conv, i32* %arrayidx3, align 4
   %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %i.02, i64 %i.02, i64 %i.02
-  %0 = load i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add nsw i64 %i.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll
index a30d012..5b2488c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/ExactRDIV.ll
@@ -41,7 +41,7 @@
   %mul5 = shl nsw i64 %j.02, 1
   %add64 = or i64 %mul5, 1
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add64
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc9 = add nsw i64 %j.02, 1
@@ -87,7 +87,7 @@
   %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %j.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -133,7 +133,7 @@
   %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %j.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -179,7 +179,7 @@
   %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %j.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -225,7 +225,7 @@
   %j.02 = phi i64 [ %inc7, %for.body4 ], [ 0, %for.body4.preheader ]
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %j.02
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -272,7 +272,7 @@
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -319,7 +319,7 @@
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -366,7 +366,7 @@
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -413,7 +413,7 @@
   %B.addr.01 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub = sub nsw i64 0, %j.02
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc7 = add nsw i64 %j.02, 1
@@ -455,7 +455,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 45
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -504,7 +504,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 45
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -552,7 +552,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 45
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -600,7 +600,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %sub
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 45
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll
index c3fddbb..d84cd05 100644
--- a/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/ExactSIV.ll
@@ -30,7 +30,7 @@
   %mul = shl i64 %i.02, 1
   %add13 = or i64 %mul, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add13
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -68,7 +68,7 @@
   %mul1 = shl i64 %i.02, 1
   %add23 = or i64 %mul1, 1
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %add23
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -104,7 +104,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -140,7 +140,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -176,7 +176,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -212,7 +212,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -248,7 +248,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -284,7 +284,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add = add i64 %i.02, 60
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -320,7 +320,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -356,7 +356,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -392,7 +392,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -428,7 +428,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -464,7 +464,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -500,7 +500,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub1 = sub i64 -60, %i.02
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index f2b5860..81d05a1 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -49,7 +49,7 @@
   %mul6 = shl nsw i64 %j.02, 3
   %add = add nsw i64 %mul5, %mul6
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -111,7 +111,7 @@
   %add = add nsw i64 %mul5, %mul6
   %add7 = or i64 %add, 1
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %add7
-  %0 = load i32* %arrayidx8, align 4
+  %0 = load i32, i32* %arrayidx8, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -173,7 +173,7 @@
   %mul6 = shl nsw i64 %j.02, 3
   %add7 = add nsw i64 %mul5, %mul6
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %add7
-  %0 = load i32* %arrayidx8, align 4
+  %0 = load i32, i32* %arrayidx8, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -233,7 +233,7 @@
   %add5 = add nsw i64 %i.03, %mul4
   %sub = add nsw i64 %add5, -1
   %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -303,7 +303,7 @@
   %sub = sub nsw i64 %add12, %mul14
   %add15 = add nsw i64 %sub, 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 %add15
-  %0 = load i32* %arrayidx16, align 4
+  %0 = load i32, i32* %arrayidx16, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -373,7 +373,7 @@
   %sub = sub nsw i64 %add12, %mul14
   %add15 = add nsw i64 %sub, 5
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 %add15
-  %0 = load i32* %arrayidx16, align 4
+  %0 = load i32, i32* %arrayidx16, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -445,7 +445,7 @@
   %1 = mul nsw i64 %mul7, %n
   %arrayidx8.sum = add i64 %1, %add7
   %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %arrayidx8.sum
-  %2 = load i32* %arrayidx9, align 4
+  %2 = load i32, i32* %arrayidx9, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.12, i64 1
   store i32 %2, i32* %B.addr.12, align 4
   %inc = add nsw i64 %j.03, 1
@@ -536,7 +536,7 @@
   %10 = mul nsw i64 %idxprom10, %0
   %arrayidx11.sum = add i64 %10, %idxprom8
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %arrayidx11.sum
-  %11 = load i32* %arrayidx12, align 4
+  %11 = load i32, i32* %arrayidx12, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.12, i64 1
   store i32 %11, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -623,7 +623,7 @@
   %add10 = or i32 %add9, 1
   %idxprom11 = sext i32 %add10 to i64
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %idxprom11
-  %5 = load i32* %arrayidx12, align 4
+  %5 = load i32, i32* %arrayidx12, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.12, i64 1
   store i32 %5, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -715,7 +715,7 @@
   %10 = mul nsw i64 %idxprom10, %0
   %arrayidx11.sum = add i64 %10, %idxprom8
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %arrayidx11.sum
-  %11 = load i32* %arrayidx12, align 4
+  %11 = load i32, i32* %arrayidx12, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.12, i64 1
   store i32 %11, i32* %B.addr.12, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/Invariant.ll b/llvm/test/Analysis/DependenceAnalysis/Invariant.ll
index cd878bf..9fdb4d9 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Invariant.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Invariant.ll
@@ -20,9 +20,9 @@
   %j.02 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
   %res.11 = phi float [ %res.03, %for.cond1.preheader ], [ %add.res.1, %for.body3 ]
   %arrayidx4 = getelementptr inbounds [40 x float], [40 x float]* %rr, i32 %j.02, i32 %j.02
-  %0 = load float* %arrayidx4, align 4
+  %0 = load float, float* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds [40 x float], [40 x float]* %rr, i32 %i.04, i32 %j.02
-  %1 = load float* %arrayidx6, align 4
+  %1 = load float, float* %arrayidx6, align 4
   %add = fadd float %0, %1
   %cmp7 = fcmp ogt float %add, %g
   %add.res.1 = select i1 %cmp7, float %add, float %res.11
diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
index 77c1f3d..1b47341 100644
--- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -28,7 +28,7 @@
   %i = phi i64 [ 0, %entry ], [ %i.inc, %for.body ]
   %a.addr = getelementptr [100 x [100 x i32]], [100 x [100 x i32]]* %a, i64 0, i64 %i, i64 %i
   %a.addr.2 = getelementptr [100 x [100 x i32]], [100 x [100 x i32]]* %a, i64 0, i64 %i, i32 5
-  %0 = load i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
   %1 = add i32 %0, 1
   store i32 %1, i32* %a.addr.2, align 4
   %i.inc = add nsw i64 %i, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
index cfe21f3..d6500cc 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -18,7 +18,7 @@
 ; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 1
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   ret i32 %0
 }
 
@@ -36,7 +36,7 @@
 ; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 1
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   ret i32 %0
 }
 
@@ -107,7 +107,7 @@
   %add13 = add nsw i64 %j.07, 2
   %add14 = add nsw i64 %i.011, 3
   %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
-  %0 = load i64* %arrayidx17, align 8
+  %0 = load i64, i64* %arrayidx17, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
   store i64 %0, i64* %B.addr.24, align 8
   %inc19 = add nsw i64 %k9.05, 1
@@ -290,7 +290,7 @@
   %sub48 = sub nsw i64 1, %k.037
   %add49 = add nsw i64 %i.045, 3
   %arrayidx57 = getelementptr inbounds [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]], [100 x [100 x [100 x [100 x [100 x [100 x [100 x i64]]]]]]]* %A, i64 %add49, i64 2, i64 %u.06, i64 %sub48, i64 %sub47, i64 %o.025, i64 %add45, i64 %add44
-  %0 = load i64* %arrayidx57, align 8
+  %0 = load i64, i64* %arrayidx57, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.112, i64 1
   store i64 %0, i64* %B.addr.112, align 8
   %inc = add nsw i64 %t.03, 1
@@ -445,7 +445,7 @@
   store i32 %conv2, i32* %arrayidx, align 4
   %idxprom4 = sext i8 %i.03 to i64
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i8 %i.03, 1
@@ -491,7 +491,7 @@
   store i32 %conv2, i32* %arrayidx, align 4
   %idxprom4 = sext i16 %i.03 to i64
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %idxprom4
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i16 %i.03, 1
@@ -535,7 +535,7 @@
   %1 = trunc i64 %indvars.iv to i32
   store i32 %1, i32* %arrayidx, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %2, i32* %B.addr.02, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -570,7 +570,7 @@
   %conv = sext i8 %n to i64
   %add = add i64 %conv, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -596,7 +596,7 @@
   %conv = sext i16 %n to i64
   %add = add i64 %conv, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -622,7 +622,7 @@
   %add = add nsw i32 %n, 1
   %idxprom1 = sext i32 %add to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -648,7 +648,7 @@
   %add = add i32 %n, 1
   %idxprom1 = zext i32 %add to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -682,7 +682,7 @@
 while.body:                                       ; preds = %while.body.preheader, %while.body
   %i.02 = phi %struct.S* [ %incdec.ptr, %while.body ], [ %s, %while.body.preheader ]
   %0 = getelementptr inbounds %struct.S, %struct.S* %i.02, i64 1, i32 0
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %2 = getelementptr inbounds %struct.S, %struct.S* %i.02, i64 0, i32 0
   store i32 %1, i32* %2, align 4
   %incdec.ptr = getelementptr inbounds %struct.S, %struct.S* %i.02, i64 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
index 5677eed..5a97b99 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -36,7 +36,7 @@
   store i32 %conv, i32* %arrayidx5, align 4
   %add6 = add nsw i64 %i.03, %j.02
   %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.03, i64 %add6
-  %0 = load i32* %arrayidx8, align 4
+  %0 = load i32, i32* %arrayidx8, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -93,7 +93,7 @@
   %add10 = add nsw i64 %j.03, %k.02
   %sub11 = sub nsw i64 %j.03, %i.05
   %arrayidx14 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %sub11, i64 %i.05, i64 %add10
-  %0 = load i32* %arrayidx14, align 4
+  %0 = load i32, i32* %arrayidx14, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.21, i64 1
   store i32 %0, i32* %B.addr.21, align 4
   %inc = add nsw i64 %k.02, 1
@@ -149,7 +149,7 @@
   %add = add nsw i64 %i.03, %j.02
   %add5 = add nsw i64 %add, 110
   %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.03, i64 %add5
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -200,7 +200,7 @@
   %sub = sub nsw i64 %mul5, %i.03
   %add6 = add nsw i64 %sub, 5
   %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.03, i64 %add6
-  %0 = load i32* %arrayidx8, align 4
+  %0 = load i32, i32* %arrayidx8, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -252,7 +252,7 @@
   %mul7 = shl nsw i64 %i.03, 1
   %add8 = add nsw i64 %mul7, %j.02
   %arrayidx10 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %i.03, i64 %add8
-  %0 = load i32* %arrayidx10, align 4
+  %0 = load i32, i32* %arrayidx10, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -306,7 +306,7 @@
   %mul8 = mul nsw i64 %i.03, 3
   %add9 = add nsw i64 %mul8, %j.02
   %arrayidx12 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 %i.03, i64 %i.03, i64 %add9
-  %0 = load i32* %arrayidx12, align 4
+  %0 = load i32, i32* %arrayidx12, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -359,7 +359,7 @@
   %add8 = add nsw i64 %mul7, %j.02
   %mul9 = shl nsw i64 %i.03, 1
   %arrayidx11 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %mul9, i64 %add8
-  %0 = load i32* %arrayidx11, align 4
+  %0 = load i32, i32* %arrayidx11, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -415,7 +415,7 @@
   %mul10 = mul nsw i64 %i.03, -2
   %add11 = add nsw i64 %mul10, 20
   %arrayidx13 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add11, i64 %add9
-  %0 = load i32* %arrayidx13, align 4
+  %0 = load i32, i32* %arrayidx13, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -466,7 +466,7 @@
   %mul6 = mul nsw i64 %i.03, -2
   %add7 = add nsw i64 %mul6, 4
   %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add7, i64 %add5
-  %0 = load i32* %arrayidx9, align 4
+  %0 = load i32, i32* %arrayidx9, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
@@ -517,7 +517,7 @@
   %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 %add6, i64 %add4
   store i32 %conv, i32* %arrayidx7, align 4
   %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %A, i64 4, i64 %j.02
-  %0 = load i32* %arrayidx9, align 4
+  %0 = load i32, i32* %arrayidx9, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.11, i64 1
   store i32 %0, i32* %B.addr.11, align 4
   %inc = add nsw i64 %j.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/Separability.ll b/llvm/test/Analysis/DependenceAnalysis/Separability.ll
index 8df18b3..e56e741 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Separability.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Separability.ll
@@ -50,7 +50,7 @@
   %sub = sub nsw i64 %mul, %l.02
   %add12 = add nsw i64 %i.07, 10
   %arrayidx15 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
-  %0 = load i32* %arrayidx15, align 4
+  %0 = load i32, i32* %arrayidx15, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
@@ -124,7 +124,7 @@
   %sub = sub nsw i64 %mul, %l.02
   %add12 = add nsw i64 %i.07, 10
   %arrayidx15 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %A, i64 10, i64 %add12, i64 %sub
-  %0 = load i32* %arrayidx15, align 4
+  %0 = load i32, i32* %arrayidx15, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
@@ -198,7 +198,7 @@
   %add14 = add nsw i64 %j.05, %k.03
   %add15 = add nsw i64 %i.07, 10
   %arrayidx19 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add15, i64 %add14, i64 %add13
-  %0 = load i32* %arrayidx19, align 4
+  %0 = load i32, i32* %arrayidx19, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
@@ -273,7 +273,7 @@
   %add15 = add nsw i64 %j.05, %k.03
   %add16 = add nsw i64 %i.07, 10
   %arrayidx20 = getelementptr inbounds [100 x [100 x [100 x i32]]], [100 x [100 x [100 x i32]]]* %A, i64 10, i64 %add16, i64 %add15, i64 %add14
-  %0 = load i32* %arrayidx20, align 4
+  %0 = load i32, i32* %arrayidx20, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.31, i64 1
   store i32 %0, i32* %B.addr.31, align 4
   %inc = add nsw i64 %l.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
index 9a5ab9b..78befa5 100644
--- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -32,7 +32,7 @@
   %1 = trunc i64 %indvars.iv to i32
   store i32 %1, i32* %arrayidx, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %2, i32* %B.addr.02, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -75,7 +75,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv2, i32* %arrayidx, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %i.03
-  %1 = load i32* %arrayidx3, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %1, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -117,7 +117,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %i.03
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -159,7 +159,7 @@
   %1 = trunc i64 %indvars.iv to i32
   store i32 %1, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %2, i32* %B.addr.02, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -198,7 +198,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %i.02
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -233,7 +233,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %i.02
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -270,7 +270,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul1 = shl i64 %i.02, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %mul1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -307,7 +307,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul1 = shl i64 %i.02, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %mul1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -342,7 +342,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %i.02
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -383,7 +383,7 @@
   %mul = shl i64 %n, 1
   %add1 = add i64 %i.03, %mul
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -424,7 +424,7 @@
   %mul1 = mul i64 %i.02, %n
   %add2 = add i64 %mul1, 5
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %add2
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index cde1e8d..6e8b98c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -53,7 +53,7 @@
   %mul56 = add i64 %j.03, %n1
   %add7 = mul i64 %mul56, 3
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %add7
-  %0 = load i32* %arrayidx8, align 4
+  %0 = load i32, i32* %arrayidx8, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc10 = add nsw i64 %j.03, 1
@@ -118,7 +118,7 @@
   %mul7 = shl i64 %n2, 1
   %add8 = add i64 %mul6, %mul7
   %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %add8
-  %0 = load i32* %arrayidx9, align 4
+  %0 = load i32, i32* %arrayidx9, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc11 = add nsw i64 %j.03, 1
@@ -181,7 +181,7 @@
   %mul6 = shl i64 %n1, 1
   %add = sub i64 %mul6, %j.03
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
@@ -242,7 +242,7 @@
   %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %sub5 = sub i64 %j.03, %n1
   %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %sub5
-  %0 = load i32* %arrayidx6, align 4
+  %0 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc8 = add nsw i64 %j.03, 1
@@ -304,7 +304,7 @@
   %B.addr.02 = phi i32* [ %incdec.ptr, %for.body4 ], [ %B, %for.body4.preheader ]
   %add6 = sub i64 %n1, %j.03
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add6
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
@@ -366,7 +366,7 @@
   %mul = shl i64 %n2, 1
   %add6 = sub i64 %mul, %j.03
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add6
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc9 = add nsw i64 %j.03, 1
@@ -421,7 +421,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = shl i64 %n2, 1
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.12, i64 1
   store i32 %0, i32* %B.addr.12, align 4
   %inc = add nsw i64 %j.03, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index aa51483..711d0fa 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -35,7 +35,7 @@
   %mul14 = add i64 %i.03, %n
   %add3 = mul i64 %mul14, 3
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %add3
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -82,7 +82,7 @@
   %mul3 = shl i64 %n, 1
   %add4 = add i64 %mul2, %mul3
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %add4
-  %0 = load i32* %arrayidx5, align 4
+  %0 = load i32, i32* %arrayidx5, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -127,7 +127,7 @@
   %mul2 = shl i64 %n, 1
   %add = sub i64 %mul2, %i.03
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -173,7 +173,7 @@
   %mul2 = shl i64 %n, 1
   %sub = sub i64 %i.03, %mul2
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -218,7 +218,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %add2 = sub i64 %n, %i.03
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %add2
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -264,7 +264,7 @@
   %sub2 = sub nsw i64 0, %i.03
   %sub3 = sub i64 %sub2, %n
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %sub3
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -310,7 +310,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub = sub i64 0, %i.03
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -359,7 +359,7 @@
   %add5 = add i64 %mul3, %mul4
   %add6 = add i64 %add5, 1
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %add6
-  %0 = load i32* %arrayidx7, align 4
+  %0 = load i32, i32* %arrayidx7, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
@@ -408,7 +408,7 @@
   %sub = add i64 %mul3, %0
   %add5 = add i64 %sub, 2
   %arrayidx6 = getelementptr inbounds i32, i32* %A, i64 %add5
-  %1 = load i32* %arrayidx6, align 4
+  %1 = load i32, i32* %arrayidx6, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %1, i32* %B.addr.02, align 4
   %inc = add nsw i64 %i.03, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index be2d035..5b81ec1 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -35,7 +35,7 @@
   %mul1 = mul i64 %i.03, %n
   %sub = sub i64 1, %mul1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -80,7 +80,7 @@
   %add1 = add i64 %n, 1
   %sub = sub i64 %add1, %i.03
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -118,7 +118,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub = sub i64 6, %i.02
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -153,7 +153,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub = sub i64 6, %i.02
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -188,7 +188,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub = sub i64 -6, %i.02
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -229,7 +229,7 @@
   %0 = mul i64 %i.03, -3
   %sub = add i64 %0, 5
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %1, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -268,7 +268,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %sub = sub i64 5, %i.02
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %sub
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
index fa77fc0..8adb7f7 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -29,7 +29,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -69,7 +69,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %add
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -107,7 +107,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -142,7 +142,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -177,7 +177,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -212,7 +212,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 -10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -251,7 +251,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
   store i32 %conv, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 10
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
index 40e714f..ac261b0 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -29,7 +29,7 @@
   %mul = shl i64 %i.02, 1
   %add = add i64 %mul, 10
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -69,7 +69,7 @@
   %mul = mul i64 %i.03, %n
   %add = add i64 %mul, 10
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
@@ -107,7 +107,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -142,7 +142,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -177,7 +177,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -212,7 +212,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = shl i64 %i.02, 1
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.01, i64 1
   store i32 %0, i32* %B.addr.01, align 4
   %inc = add i64 %i.02, 1
@@ -251,7 +251,7 @@
   store i32 %conv, i32* %arrayidx, align 4
   %mul = mul i64 %i.03, 3
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %mul
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.02, i64 1
   store i32 %0, i32* %B.addr.02, align 4
   %inc = add i64 %i.03, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/ZIV.ll b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll
index 700c51e..b321641 100644
--- a/llvm/test/Analysis/DependenceAnalysis/ZIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/ZIV.ll
@@ -23,7 +23,7 @@
 
   %add1 = add i64 %n, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -46,7 +46,7 @@
 
   %add = add i64 %n, 1
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %add
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
@@ -68,7 +68,7 @@
 ; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 %m
-  %0 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx1, align 4
   store i32 %0, i32* %B, align 4
   ret void
 }
diff --git a/llvm/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/llvm/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
index d51c159..513ec86 100644
--- a/llvm/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
+++ b/llvm/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -3,7 +3,7 @@
 @g = internal global i32 0		; <i32*> [#uses=2]
 
 define i32 @r() {
-	%tmp = load i32* @g		; <i32> [#uses=1]
+	%tmp = load i32, i32* @g		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/Analysis/GlobalsModRef/aliastest.ll b/llvm/test/Analysis/GlobalsModRef/aliastest.ll
index 4cfed71..3474e13 100644
--- a/llvm/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/llvm/test/Analysis/GlobalsModRef/aliastest.ll
@@ -9,6 +9,6 @@
 ; CHECK-NEXT: ret i32 7
 	store i32 7, i32* %P
 	store i32 12, i32* @X
-	%V = load i32* %P		; <i32> [#uses=1]
+	%V = load i32, i32* %P		; <i32> [#uses=1]
 	ret i32 %V
 }
diff --git a/llvm/test/Analysis/GlobalsModRef/chaining-analysis.ll b/llvm/test/Analysis/GlobalsModRef/chaining-analysis.ll
index aeb76e4..26671da 100644
--- a/llvm/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/llvm/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -14,7 +14,7 @@
 ; CHECK-NEXT: ret i32 12
 	store i32 12, i32* @X
 	call double @doesnotmodX( double 1.000000e+00 )		; <double>:1 [#uses=0]
-	%V = load i32* @X		; <i32> [#uses=1]
+	%V = load i32, i32* @X		; <i32> [#uses=1]
 	ret i32 %V
 }
 
diff --git a/llvm/test/Analysis/GlobalsModRef/indirect-global.ll b/llvm/test/Analysis/GlobalsModRef/indirect-global.ll
index 48ac6dd..0281323 100644
--- a/llvm/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/llvm/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -12,11 +12,11 @@
 
 define i32 @test1(i32* %P) {
 ; CHECK: ret i32 0
-	%g1 = load i32** @G		; <i32*> [#uses=2]
-	%h1 = load i32* %g1		; <i32> [#uses=1]
+	%g1 = load i32*, i32** @G		; <i32*> [#uses=2]
+	%h1 = load i32, i32* %g1		; <i32> [#uses=1]
 	store i32 123, i32* %P
-	%g2 = load i32** @G		; <i32*> [#uses=0]
-	%h2 = load i32* %g1		; <i32> [#uses=1]
+	%g2 = load i32*, i32** @G		; <i32*> [#uses=0]
+	%h2 = load i32, i32* %g1		; <i32> [#uses=1]
 	%X = sub i32 %h1, %h2		; <i32> [#uses=1]
 	ret i32 %X
 }
diff --git a/llvm/test/Analysis/GlobalsModRef/modreftest.ll b/llvm/test/Analysis/GlobalsModRef/modreftest.ll
index 3eed916e..74101e2 100644
--- a/llvm/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/llvm/test/Analysis/GlobalsModRef/modreftest.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: ret i32 12
 	store i32 12, i32* @X
 	call void @doesnotmodX( )
-	%V = load i32* @X		; <i32> [#uses=1]
+	%V = load i32, i32* @X		; <i32> [#uses=1]
 	ret i32 %V
 }
 
diff --git a/llvm/test/Analysis/GlobalsModRef/pr12351.ll b/llvm/test/Analysis/GlobalsModRef/pr12351.ll
index c221f4c..8f92277 100644
--- a/llvm/test/Analysis/GlobalsModRef/pr12351.ll
+++ b/llvm/test/Analysis/GlobalsModRef/pr12351.ll
@@ -9,7 +9,7 @@
 define void @bar(i8* %y, i8* %z) {
   %x = alloca i8
   call void @foo(i8* %x, i8* %y)
-  %t = load i8* %x
+  %t = load i8, i8* %x
   store i8 %t, i8* %y
 ; CHECK: store i8 %t, i8* %y
   ret void
@@ -19,8 +19,8 @@
 define i32 @foo2() {
   %foo = alloca i32
   call void @bar2(i32* %foo)
-  %t0 = load i32* %foo, align 4
-; CHECK: %t0 = load i32* %foo, align 4
+  %t0 = load i32, i32* %foo, align 4
+; CHECK: %t0 = load i32, i32* %foo, align 4
   ret i32 %t0
 }
 
diff --git a/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll b/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll
index 46d3d76..df49b4b 100644
--- a/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll
+++ b/llvm/test/Analysis/GlobalsModRef/volatile-instrs.ll
@@ -22,7 +22,7 @@
 define i32 @main() nounwind uwtable ssp {
 main_entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false)
-  %0 = load volatile i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4
+  %0 = load volatile i32, i32* getelementptr inbounds (%struct.anon* @b, i64 0, i32 0), align 4
   store i32 %0, i32* @c, align 4
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%struct.anon* @b to i8*), i8* bitcast (%struct.anon* @a to i8*), i64 12, i32 4, i1 false) nounwind
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) nounwind
diff --git a/llvm/test/Analysis/LazyCallGraph/basic.ll b/llvm/test/Analysis/LazyCallGraph/basic.ll
index b8108d9..d011602 100644
--- a/llvm/test/Analysis/LazyCallGraph/basic.ll
+++ b/llvm/test/Analysis/LazyCallGraph/basic.ll
@@ -118,10 +118,10 @@
 ; CHECK-NEXT: -> f1
 ; CHECK-NOT: ->
 
-  load i8** bitcast (void ()** @g to i8**)
-  load i8** bitcast (void ()** getelementptr ([4 x void ()*]* @g1, i32 0, i32 2) to i8**)
-  load i8** bitcast (void ()** getelementptr ({i8, void ()*, i8}* @g2, i32 0, i32 1) to i8**)
-  load i8** bitcast (void ()** @h to i8**)
+  load i8*, i8** bitcast (void ()** @g to i8**)
+  load i8*, i8** bitcast (void ()** getelementptr ([4 x void ()*]* @g1, i32 0, i32 2) to i8**)
+  load i8*, i8** bitcast (void ()** getelementptr ({i8, void ()*, i8}* @g2, i32 0, i32 1) to i8**)
+  load i8*, i8** bitcast (void ()** @h to i8**)
   ret void
 }
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll b/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
index 5ca4b31..238f3f4 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/backward-dep-different-types.ll
@@ -20,18 +20,18 @@
 
 define void @f() {
 entry:
-  %a = load i32** @A, align 8
-  %b = load i32** @B, align 8
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
 
   %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %storemerge3
-  %loadA = load i32* %arrayidxA, align 2
+  %loadA = load i32, i32* %arrayidxA, align 2
 
   %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %storemerge3
-  %loadB = load i32* %arrayidxB, align 2
+  %loadB = load i32, i32* %arrayidxB, align 2
 
   %mul = mul i32 %loadB, %loadA
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll
index f0203c5..6770f92 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks-no-dbg.ll
@@ -28,22 +28,22 @@
 
 define void @f() {
 entry:
-  %a = load i16** @A, align 8
-  %b = load i16** @B, align 8
-  %c = load i16** @C, align 8
+  %a = load i16*, i16** @A, align 8
+  %b = load i16*, i16** @B, align 8
+  %c = load i16*, i16** @C, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
 
   %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3
-  %loadA = load i16* %arrayidxA, align 2
+  %loadA = load i16, i16* %arrayidxA, align 2
 
   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %storemerge3
-  %loadB = load i16* %arrayidxB, align 2
+  %loadB = load i16, i16* %arrayidxB, align 2
 
   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3
-  %loadC = load i16* %arrayidxC, align 2
+  %loadC = load i16, i16* %arrayidxC, align 2
 
   %mul = mul i16 %loadB, %loadA
   %mul1 = mul i16 %mul, %loadC
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
index f452b32..a7a324b 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
@@ -11,7 +11,7 @@
 
 ; CHECK: Report: unsafe dependent memory operations in loop
 
-; DEBUG: LAA: Distance for   %loadA = load i16* %arrayidxA, align 2 to   store i16 %mul1, i16* %arrayidxA_plus_2, align 2: 2
+; DEBUG: LAA: Distance for   %loadA = load i16, i16* %arrayidxA, align 2 to   store i16 %mul1, i16* %arrayidxA_plus_2, align 2: 2
 ; DEBUG-NEXT: LAA: Failure because of Positive distance 2
 
 ; CHECK: Run-time memory checks:
@@ -29,22 +29,22 @@
 
 define void @f() {
 entry:
-  %a = load i16** @A, align 8
-  %b = load i16** @B, align 8
-  %c = load i16** @C, align 8
+  %a = load i16*, i16** @A, align 8
+  %b = load i16*, i16** @B, align 8
+  %c = load i16*, i16** @C, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
   %storemerge3 = phi i64 [ 0, %entry ], [ %add, %for.body ]
 
   %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %storemerge3
-  %loadA = load i16* %arrayidxA, align 2
+  %loadA = load i16, i16* %arrayidxA, align 2
 
   %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %storemerge3
-  %loadB = load i16* %arrayidxB, align 2
+  %loadB = load i16, i16* %arrayidxB, align 2
 
   %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %storemerge3
-  %loadC = load i16* %arrayidxC, align 2
+  %loadC = load i16, i16* %arrayidxC, align 2
 
   %mul = mul i16 %loadB, %loadA
   %mul1 = mul i16 %mul, %loadC
diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll
index b0725ec..d472f7c 100644
--- a/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll
+++ b/llvm/test/Analysis/MemoryDependenceAnalysis/memdep_requires_dominator_tree.ll
@@ -11,7 +11,7 @@
   %i.01 = phi i32 [ 0, %entry ], [ %tmp8.7, %for.body ]
   %arrayidx = getelementptr i32, i32* %bufUInt, i32 %i.01
   %arrayidx5 = getelementptr i32, i32* %pattern, i32 %i.01
-  %tmp6 = load i32* %arrayidx5, align 4
+  %tmp6 = load i32, i32* %arrayidx5, align 4
   store i32 %tmp6, i32* %arrayidx, align 4
   %tmp8.7 = add i32 %i.01, 8
   %cmp.7 = icmp ult i32 %tmp8.7, 1024
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 6896e7a4..7e42530 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -16,11 +16,11 @@
 
 bb:		; preds = %bb1, %bb.nph
 	%j.01 = phi i32 [ %8, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
-	load i32* %srcptr, align 4		; <i32>:1 [#uses=2]
+	load i32, i32* %srcptr, align 4		; <i32>:1 [#uses=2]
 	and i32 %1, 255		; <i32>:2 [#uses=1]
 	and i32 %1, -256		; <i32>:3 [#uses=1]
 	getelementptr [256 x i8], [256 x i8]* @lut, i32 0, i32 %2		; <i8*>:4 [#uses=1]
-	load i8* %4, align 1		; <i8>:5 [#uses=1]
+	load i8, i8* %4, align 1		; <i8>:5 [#uses=1]
 	zext i8 %5 to i32		; <i32>:6 [#uses=1]
 	or i32 %6, %3		; <i32>:7 [#uses=1]
 	store i32 %7, i32* %dstptr, align 4
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 1d4a27c..0c24ee4 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -10,7 +10,7 @@
 	%indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=4]
 	%i.0.reg2mem.0 = sub i32 255, %indvar		; <i32> [#uses=2]
 	%0 = getelementptr i32, i32* %alp, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	%2 = getelementptr i32, i32* %lam, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
 	store i32 %1, i32* %2, align 4
 	%3 = sub i32 254, %indvar		; <i32> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index f19d18c..ebcecbf 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -9,12 +9,12 @@
 
 define void @func_15() nounwind {
 entry:
-	%0 = load i16* @g_16, align 2		; <i16> [#uses=1]
+	%0 = load i16, i16* @g_16, align 2		; <i16> [#uses=1]
 	%1 = icmp sgt i16 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb2, label %bb.nph
 
 bb.nph:		; preds = %entry
-	%g_16.promoted = load i16* @g_16		; <i16> [#uses=1]
+	%g_16.promoted = load i16, i16* @g_16		; <i16> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb1, %bb.nph
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
index a4358aa..d18bdaf 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
@@ -3,11 +3,11 @@
 
 define void @test() {
 entry:
-        %0 = load i16* undef, align 1
+        %0 = load i16, i16* undef, align 1
         %1 = lshr i16 %0, 8
         %2 = and i16 %1, 3
         %3 = zext i16 %2 to i32
-        %4 = load i8* undef, align 1
+        %4 = load i8, i8* undef, align 1
         %5 = lshr i8 %4, 4
         %6 = and i8 %5, 1
         %7 = zext i8 %6 to i32
diff --git a/llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll b/llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
index 8c6c9b6..3ca552a 100644
--- a/llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll
@@ -19,20 +19,20 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %lbl_818
-  %0 = load i32* @g_814, align 4
+  %0 = load i32, i32* @g_814, align 4
   %cmp = icmp sle i32 %0, 0
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* getelementptr inbounds ([1 x [0 x i32]]* @g_244, i32 0, i64 0), i32 0, i64 %idxprom
-  %1 = load i32* %arrayidx, align 1
+  %1 = load i32, i32* %arrayidx, align 1
   store i32 %1, i32* @func_21_l_773, align 4
   store i32 1, i32* @g_814, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %2 = load i32* @func_21_l_773, align 4
+  %2 = load i32, i32* @func_21_l_773, align 4
   %tobool = icmp ne i32 %2, 0
   br i1 %tobool, label %lbl_818, label %if.end
 
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
index 7eeb308..0976ef9 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
@@ -7,7 +7,7 @@
 
 define i32 @test() {
 entry:
-	%0 = load i32** undef, align 8		; <i32*> [#uses=1]
+	%0 = load i32*, i32** undef, align 8		; <i32*> [#uses=1]
 	%1 = ptrtoint i32* %0 to i64		; <i64> [#uses=1]
 	%2 = sub i64 undef, %1		; <i64> [#uses=1]
 	%3 = lshr i64 %2, 3		; <i64> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index e921544..a282ee6 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -11,7 +11,7 @@
 	br i1 %0, label %bb, label %return
 
 bb:
-	load i32* %q, align 4
+	load i32, i32* %q, align 4
 	icmp eq i32 %1, 0
 	br i1 %2, label %return, label %bb3.preheader
 
@@ -21,7 +21,7 @@
 bb3:
 	%i.0 = phi i32 [ %7, %bb3 ], [ 0, %bb3.preheader ]
 	getelementptr i32, i32* %p, i32 %i.0
-	load i32* %3, align 4
+	load i32, i32* %3, align 4
 	add i32 %4, 1
 	getelementptr i32, i32* %p, i32 %i.0
 	store i32 %5, i32* %6, align 4
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 685a106..e6c62ee 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -36,7 +36,7 @@
 	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
 	%9 = add i32 %6, %8		; <i32> [#uses=1]
 	%10 = getelementptr i8, i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%11 = load i8, i8* %10, align 1		; <i8> [#uses=1]
 	%12 = getelementptr i8, i8* %j, i32 %7		; <i8*> [#uses=1]
 	store i8 %11, i8* %12, align 1
 	%13 = add i32 %x.06, 1		; <i32> [#uses=2]
@@ -103,7 +103,7 @@
 	%29 = shl i32 %x.12, 2		; <i32> [#uses=1]
 	%30 = add i32 %29, %25		; <i32> [#uses=1]
 	%31 = getelementptr i8, i8* %r, i32 %30		; <i8*> [#uses=1]
-	%32 = load i8* %31, align 1		; <i8> [#uses=1]
+	%32 = load i8, i8* %31, align 1		; <i8> [#uses=1]
 	%.sum = add i32 %26, %x.12		; <i32> [#uses=1]
 	%33 = getelementptr i8, i8* %j, i32 %.sum		; <i8*> [#uses=1]
 	store i8 %32, i8* %33, align 1
@@ -111,7 +111,7 @@
 	%35 = or i32 %34, 2		; <i32> [#uses=1]
 	%36 = add i32 %35, %25		; <i32> [#uses=1]
 	%37 = getelementptr i8, i8* %r, i32 %36		; <i8*> [#uses=1]
-	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%38 = load i8, i8* %37, align 1		; <i8> [#uses=1]
 	%.sum6 = add i32 %27, %x.12		; <i32> [#uses=1]
 	%39 = getelementptr i8, i8* %j, i32 %.sum6		; <i8*> [#uses=1]
 	store i8 %38, i8* %39, align 1
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
index c9689f7..078ca03 100644
--- a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
+++ b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
@@ -53,7 +53,7 @@
 ; CHECK: %idx.sext = sext i32 %idx to i64
 ; CHECK-NEXT:  -->  {(2 + (sext i32 (4 * %start) to i64)),+,2}<nsw><%loop>
   %idx.inc = add nsw i32 %idx, 2
-  %condition = load i1* %c
+  %condition = load i1, i1* %c
   br i1 %condition, label %exit, label %loop
 
  exit:
@@ -73,7 +73,7 @@
 ; CHECK: %idx.sext = sext i8 %idx to i16
 ; CHECK-NEXT: -->  {(1 + (sext i8 %start to i16)),+,1}<nsw><%loop>
   %idx.inc = add nsw i8 %idx, 1
-  %condition = load volatile i1* %c
+  %condition = load volatile i1, i1* %c
   br i1 %condition, label %exit, label %loop
 
  exit:
@@ -93,7 +93,7 @@
 ; CHECK: %idx.zext = zext i8 %idx to i16
 ; CHECK-NEXT: -->  {(1 + (zext i8 %start to i16)),+,1}<nuw><%loop>
   %idx.inc = add nuw i8 %idx, 1
-  %condition = load volatile i1* %c
+  %condition = load volatile i1, i1* %c
   br i1 %condition, label %exit, label %loop
 
  exit:
diff --git a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
index 32c1074..f26c8d5 100644
--- a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
@@ -3,7 +3,7 @@
 define i32 @slt_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) {
 ; CHECK-LABEL: slt_trip_count_with_range
  entry:
-  %limit = load i32* %ptr0, !range !0
+  %limit = load i32, i32* %ptr0, !range !0
   br label %loop
 
  loop:
@@ -20,7 +20,7 @@
 define i32 @ult_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) {
 ; CHECK-LABEL: ult_trip_count_with_range
  entry:
-  %limit = load i32* %ptr0, !range !0
+  %limit = load i32, i32* %ptr0, !range !0
   br label %loop
 
  loop:
diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll
index 8b460a80..f73e7f1 100644
--- a/llvm/test/Analysis/ScalarEvolution/load.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load.ll
@@ -17,10 +17,10 @@
 ; CHECK: -->  %sum.04{{ *}}Exits: 2450
   %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds [50 x i32], [50 x i32]* @arr1, i32 0, i32 %i.03
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
 ; CHECK: -->  %0{{ *}}Exits: 50
   %arrayidx1 = getelementptr inbounds [50 x i32], [50 x i32]* @arr2, i32 0, i32 %i.03
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
 ; CHECK: -->  %1{{ *}}Exits: 0
   %add = add i32 %0, %sum.04
   %add2 = add i32 %add, %1
@@ -52,10 +52,10 @@
   %n.01 = phi %struct.ListNode* [ bitcast ({ %struct.ListNode*, i32, [4 x i8] }* @node5 to %struct.ListNode*), %entry ], [ %1, %for.body ]
 ; CHECK: -->  %n.01{{ *}}Exits: @node1
   %i = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 1
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %add = add nsw i32 %0, %sum.02
   %next = getelementptr inbounds %struct.ListNode, %struct.ListNode* %n.01, i64 0, i32 0
-  %1 = load %struct.ListNode** %next, align 8
+  %1 = load %struct.ListNode*, %struct.ListNode** %next, align 8
 ; CHECK: -->  %1{{ *}}Exits: 0
   %cmp = icmp eq %struct.ListNode* %1, null
   br i1 %cmp, label %for.end, label %for.body
diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
index 4faedde..abebea7 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -174,7 +174,7 @@
 
 for.cond.i:                                       ; preds = %for.body.i
   store i32 %add.i.i, i32* @a, align 4
-  %ld = load volatile i32* @b
+  %ld = load volatile i32, i32* @b
   %cmp.i = icmp ne i32 %ld, 0
   br i1 %cmp.i, label %for.body.i, label %bar.exit
 
diff --git a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
index b9ede6f..892fc23 100644
--- a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
@@ -35,7 +35,7 @@
 ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
 ; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
-  %tmp12 = load i32* %tmp11, align 4
+  %tmp12 = load i32, i32* %tmp11, align 4
   %tmp13 = shl nsw i32 %tmp12, 1
   %tmp14 = icmp sge i32 3, %i.0
   %tmp17 = add nsw i64 %i.0.1, -3
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
index 246f9ad..bef1070 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
@@ -28,10 +28,10 @@
 ; CHECK: -->  {%d,+,16}<nsw><%bb>
   %2 = getelementptr inbounds double, double* %d, i64 %1  ; <double*> [#uses=1]
 
-  %3 = load double* %2, align 8                   ; <double> [#uses=1]
+  %3 = load double, double* %2, align 8                   ; <double> [#uses=1]
   %4 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
   %5 = getelementptr inbounds double, double* %q, i64 %4  ; <double*> [#uses=1]
-  %6 = load double* %5, align 8                   ; <double> [#uses=1]
+  %6 = load double, double* %5, align 8                   ; <double> [#uses=1]
   %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %8 = sext i32 %7 to i64
@@ -54,7 +54,7 @@
 ; CHECK: {(8 + %q),+,16}<nsw><%bb>
   %t9 = getelementptr inbounds double, double* %q, i64 %t8  ; <double*> [#uses=1]
 
-  %10 = load double* %9, align 8                  ; <double> [#uses=1]
+  %10 = load double, double* %9, align 8                  ; <double> [#uses=1]
   %11 = fadd double %6, %10                       ; <double> [#uses=1]
   %12 = fadd double %11, 3.200000e+00             ; <double> [#uses=1]
   %13 = fmul double %3, %12                       ; <double> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
index 7b8de51..127bb19 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -26,10 +26,10 @@
 ; CHECK: -->  {%d,+,16}<nsw><%bb>
   %2 = getelementptr inbounds double, double* %d, i64 %1  ; <double*> [#uses=1]
 
-  %3 = load double* %2, align 8                   ; <double> [#uses=1]
+  %3 = load double, double* %2, align 8                   ; <double> [#uses=1]
   %4 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
   %5 = getelementptr inbounds double, double* %q, i64 %4  ; <double*> [#uses=1]
-  %6 = load double* %5, align 8                   ; <double> [#uses=1]
+  %6 = load double, double* %5, align 8                   ; <double> [#uses=1]
   %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
 
 ; CHECK: %8 = sext i32 %7 to i64
@@ -52,7 +52,7 @@
 ; CHECK: {(8 + %q),+,16}<nsw><%bb>
   %t9 = getelementptr inbounds double, double* %q, i64 %t8  ; <double*> [#uses=1]
 
-  %10 = load double* %9, align 8                  ; <double> [#uses=1]
+  %10 = load double, double* %9, align 8                  ; <double> [#uses=1]
   %11 = fadd double %6, %10                       ; <double> [#uses=1]
   %12 = fadd double %11, 3.200000e+00             ; <double> [#uses=1]
   %13 = fmul double %3, %12                       ; <double> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
index 024b280..0a3c535 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -7,7 +7,7 @@
 ; CHECK: Classifying expressions for: @test1
 define void @test1(double* %p) nounwind {
 entry:
-	%tmp = load double* %p, align 8		; <double> [#uses=1]
+	%tmp = load double, double* %p, align 8		; <double> [#uses=1]
 	%tmp1 = fcmp ogt double %tmp, 2.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp1, label %bb.nph, label %return
 
@@ -20,7 +20,7 @@
 ; CHECK-NEXT: -->  {0,+,1}<nuw><nsw><%bb>
 	%tmp2 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%tmp3 = getelementptr double, double* %p, i64 %tmp2		; <double*> [#uses=1]
-	%tmp4 = load double* %tmp3, align 8		; <double> [#uses=1]
+	%tmp4 = load double, double* %tmp3, align 8		; <double> [#uses=1]
 	%tmp5 = fmul double %tmp4, 9.200000e+00		; <double> [#uses=1]
 	%tmp6 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%tmp7 = getelementptr double, double* %p, i64 %tmp6		; <double*> [#uses=1]
@@ -39,7 +39,7 @@
 	%tmp9 = getelementptr double, double* %p, i64 %phitmp		; <double*> [#uses=1]
 ; CHECK: %tmp9
 ; CHECK-NEXT:  -->  {(8 + %p),+,8}<%bb>
-	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
+	%tmp10 = load double, double* %tmp9, align 8		; <double> [#uses=1]
 	%tmp11 = fcmp ogt double %tmp10, 2.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp11, label %bb, label %bb1.return_crit_edge
 
diff --git a/llvm/test/Analysis/ScalarEvolution/pr22179.ll b/llvm/test/Analysis/ScalarEvolution/pr22179.ll
index d9fb510..5dc4192 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr22179.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr22179.ll
@@ -14,7 +14,7 @@
 
 loop:
   %storemerge1 = phi i8 [ 0, %0 ], [ %inc, %loop ]
-  %m = load volatile i32* getelementptr inbounds (%struct.S* @b, i64 0, i32 0), align 4
+  %m = load volatile i32, i32* getelementptr inbounds (%struct.S* @b, i64 0, i32 0), align 4
   %inc = add nuw i8 %storemerge1, 1
 ; CHECK:   %inc = add nuw i8 %storemerge1, 1
 ; CHECK-NEXT: -->  {1,+,1}<nuw><%loop>
diff --git a/llvm/test/Analysis/ScalarEvolution/pr22674.ll b/llvm/test/Analysis/ScalarEvolution/pr22674.ll
index 6b7a143..1bc7fd3 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr22674.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr22674.ll
@@ -45,9 +45,9 @@
 
 _ZNK4llvm12AttributeSet3endEj.exit:               ; preds = %for.end
   %second.i.i.i = getelementptr inbounds %"struct.std::pair.241.2040.3839.6152.6923.7694.8465.9493.10007.10264.18507", %"struct.std::pair.241.2040.3839.6152.6923.7694.8465.9493.10007.10264.18507"* undef, i32 %I.099.lcssa129, i32 1
-  %0 = load %"class.llvm::AttributeSetNode.230.2029.3828.6141.6912.7683.8454.9482.9996.10253.18506"** %second.i.i.i, align 4, !tbaa !2
+  %0 = load %"class.llvm::AttributeSetNode.230.2029.3828.6141.6912.7683.8454.9482.9996.10253.18506"*, %"class.llvm::AttributeSetNode.230.2029.3828.6141.6912.7683.8454.9482.9996.10253.18506"** %second.i.i.i, align 4, !tbaa !2
   %NumAttrs.i.i.i = getelementptr inbounds %"class.llvm::AttributeSetNode.230.2029.3828.6141.6912.7683.8454.9482.9996.10253.18506", %"class.llvm::AttributeSetNode.230.2029.3828.6141.6912.7683.8454.9482.9996.10253.18506"* %0, i32 0, i32 1
-  %1 = load i32* %NumAttrs.i.i.i, align 4, !tbaa !8
+  %1 = load i32, i32* %NumAttrs.i.i.i, align 4, !tbaa !8
   %add.ptr.i.i.i55 = getelementptr inbounds %"class.llvm::Attribute.222.2021.3820.6133.6904.7675.8446.9474.9988.10245.18509", %"class.llvm::Attribute.222.2021.3820.6133.6904.7675.8446.9474.9988.10245.18509"* undef, i32 %1
   br i1 undef, label %return, label %for.body11
 
@@ -58,7 +58,7 @@
 for.body11:                                       ; preds = %for.cond9, %_ZNK4llvm12AttributeSet3endEj.exit
   %I5.096 = phi %"class.llvm::Attribute.222.2021.3820.6133.6904.7675.8446.9474.9988.10245.18509"* [ %incdec.ptr, %for.cond9 ], [ undef, %_ZNK4llvm12AttributeSet3endEj.exit ]
   %2 = bitcast %"class.llvm::Attribute.222.2021.3820.6133.6904.7675.8446.9474.9988.10245.18509"* %I5.096 to i32*
-  %3 = load i32* %2, align 4, !tbaa !10
+  %3 = load i32, i32* %2, align 4, !tbaa !10
   %tobool.i59 = icmp eq i32 %3, 0
   br i1 %tobool.i59, label %cond.false21, label %_ZNK4llvm9Attribute15isEnumAttributeEv.exit
 
diff --git a/llvm/test/Analysis/ScalarEvolution/scev-aa.ll b/llvm/test/Analysis/ScalarEvolution/scev-aa.ll
index 9a3b9cd..e2123f4 100644
--- a/llvm/test/Analysis/ScalarEvolution/scev-aa.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -22,8 +22,8 @@
   %pi = getelementptr double, double* %p, i64 %i
   %i.next = add i64 %i, 1
   %pi.next = getelementptr double, double* %p, i64 %i.next
-  %x = load double* %pi
-  %y = load double* %pi.next
+  %x = load double, double* %pi
+  %y = load double, double* %pi.next
   %z = fmul double %x, %y
   store double %z, double* %pi
   %exitcond = icmp eq i64 %i.next, %n
@@ -61,15 +61,15 @@
   %pi.j = getelementptr double, double* %p, i64 %e
   %f = add i64 %i.next, %j
   %pi.next.j = getelementptr double, double* %p, i64 %f
-  %x = load double* %pi.j
-  %y = load double* %pi.next.j
+  %x = load double, double* %pi.j
+  %y = load double, double* %pi.next.j
   %z = fmul double %x, %y
   store double %z, double* %pi.j
 
   %o = add i64 %j, 91
   %g = add i64 %i, %o
   %pi.j.next = getelementptr double, double* %p, i64 %g
-  %a = load double* %pi.j.next
+  %a = load double, double* %pi.j.next
   %b = fmul double %x, %a
   store double %b, double* %pi.j.next
 
@@ -118,15 +118,15 @@
   %pi.j = getelementptr double, double* %p, i64 %e
   %f = add i64 %i.next, %j
   %pi.next.j = getelementptr double, double* %p, i64 %f
-  %x = load double* %pi.j
-  %y = load double* %pi.next.j
+  %x = load double, double* %pi.j
+  %y = load double, double* %pi.next.j
   %z = fmul double %x, %y
   store double %z, double* %pi.j
 
   %o = add i64 %j, %n
   %g = add i64 %i, %o
   %pi.j.next = getelementptr double, double* %p, i64 %g
-  %a = load double* %pi.j.next
+  %a = load double, double* %pi.j.next
   %b = fmul double %x, %a
   store double %b, double* %pi.j.next
 
@@ -202,7 +202,7 @@
   %inc = add nsw i64 %i, 1                         ; <i64> [#uses=2]
   %arrayidx = getelementptr inbounds i64, i64* %p, i64 %inc
   store i64 0, i64* %arrayidx
-  %tmp6 = load i64* %p                            ; <i64> [#uses=1]
+  %tmp6 = load i64, i64* %p                            ; <i64> [#uses=1]
   %cmp = icmp slt i64 %inc, %tmp6                 ; <i1> [#uses=1]
   br i1 %cmp, label %for.body, label %for.end
 
diff --git a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
index 3ca32bd..77f3482 100644
--- a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
@@ -66,7 +66,7 @@
   br i1 %break.early, label %continue.1, label %early.exit
 
  continue.1:
-  %cond = load volatile i1* %unknown
+  %cond = load volatile i1, i1* %unknown
   %idx.inc = add nsw i32 %idx, 1
   br i1 %cond, label %loop, label %continue
 
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
index f5d5455..47f0271 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -24,7 +24,7 @@
 ; CHECK: %2
 ; CHECK-NEXT: -->  {-128,+,1}<nsw><%bb1>	Exits: 127
 	%3 = getelementptr double, double* %x, i64 %2		; <double*> [#uses=1]
-	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%4 = load double, double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double, double* %x, i64 %6		; <double*> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
index 07f055e..575b744a 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -24,7 +24,7 @@
 	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double, double* %x, i64 %2		; <double*> [#uses=1]
-	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%4 = load double, double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i7 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double, double* %x, i64 %6		; <double*> [#uses=1]
@@ -47,7 +47,7 @@
 	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double, double* %x, i64 %2		; <double*> [#uses=1]
-	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%4 = load double, double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double, double* %x, i64 %6		; <double*> [#uses=1]
@@ -70,7 +70,7 @@
 	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double, double* %x, i64 %2		; <double*> [#uses=1]
-	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%4 = load double, double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double, double* %x, i64 %6		; <double*> [#uses=1]
@@ -93,7 +93,7 @@
 	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
 	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
 	%3 = getelementptr double, double* %x, i64 %2		; <double*> [#uses=1]
-	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%4 = load double, double* %3, align 8		; <double> [#uses=1]
 	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
 	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr double, double* %x, i64 %6		; <double*> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
index e580cc1..6e07573 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -56,7 +56,7 @@
 	br label %bb5
 
 bb5:		; preds = %bb4.bb5_crit_edge, %entry
-	%tmp12 = load i32* getelementptr ([32 x [256 x i32]]* @table, i64 0, i64 9, i64 132), align 16		; <i32> [#uses=1]
+	%tmp12 = load i32, i32* getelementptr ([32 x [256 x i32]]* @table, i64 0, i64 9, i64 132), align 16		; <i32> [#uses=1]
 	%tmp13 = icmp eq i32 %tmp12, -1116		; <i1> [#uses=1]
 	br i1 %tmp13, label %bb7, label %bb6
 
diff --git a/llvm/test/Analysis/ScalarEvolution/sle.ll b/llvm/test/Analysis/ScalarEvolution/sle.ll
index c31f750..f24c480 100644
--- a/llvm/test/Analysis/ScalarEvolution/sle.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sle.ll
@@ -15,7 +15,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
   %arrayidx = getelementptr double, double* %p, i64 %i    ; <double*> [#uses=2]
-  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %t4 = load double, double* %arrayidx                    ; <double> [#uses=1]
   %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
   store double %mul, double* %arrayidx
   %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
index 3faa951..b0a2c40 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -21,7 +21,7 @@
 for.inc:                                          ; preds = %for.cond
   %idxprom = sext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @foo.a, i64 0, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %sum.0, %0
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
@@ -44,7 +44,7 @@
 for.inc:                                          ; preds = %for.cond
   %idxprom = sext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds [8 x i32], [8 x i32] addrspace(1)* @foo.a_as1, i64 0, i64 %idxprom
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %add = add nsw i32 %sum.0, %0
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
index 3fd16b2..d0086ee 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
@@ -17,7 +17,7 @@
   %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %for.body.preheader ]
   %res.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
   %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i32 1
-  %0 = load i16* %p.addr.05, align 2
+  %0 = load i16, i16* %p.addr.05, align 2
   %conv = zext i16 %0 to i32
   %add = add i32 %conv, %res.03
   %sub = add nsw i32 %len.addr.04, -2
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
index 6c1ed89..966ffd2 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -13,7 +13,7 @@
 	%s0 = shl i64 %indvar, 8		; <i64> [#uses=1]
 	%indvar.i8 = ashr i64 %s0, 8		; <i64> [#uses=1]
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8		; <double*> [#uses=2]
-	%t1 = load double* %t0		; <double> [#uses=1]
+	%t1 = load double, double* %t0		; <double> [#uses=1]
 	%t2 = fmul double %t1, 1.000000e-01		; <double> [#uses=1]
 	store double %t2, double* %t0
 	%indvar.next = sub i64 %indvar, 1		; <i64> [#uses=2]
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
index 564a75a..dc02fed 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -9,7 +9,7 @@
 
 define float @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bim) nounwind {
 entry:
-	%tmp3 = load float* %peakWeight, align 4		; <float> [#uses=2]
+	%tmp3 = load float, float* %peakWeight, align 4		; <float> [#uses=2]
 	%tmp2538 = icmp sgt i32 %bim, 0		; <i1> [#uses=1]
 	br i1 %tmp2538, label %bb.nph, label %bb4
 
@@ -22,12 +22,12 @@
 	%peakCount.034 = phi float [ %tmp19, %bb1 ], [ %tmp3, %bb.nph ]		; <float> [#uses=1]
 	%tmp6 = sext i32 %hiPart.035 to i64		; <i64> [#uses=1]
 	%tmp7 = getelementptr float, float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
-	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp8 = load float, float* %tmp7, align 4		; <float> [#uses=1]
 	%tmp10 = fadd float %tmp8, %distERBhi.036		; <float> [#uses=3]
 	%tmp12 = add i32 %hiPart.035, 1		; <i32> [#uses=3]
 	%tmp15 = sext i32 %tmp12 to i64		; <i64> [#uses=1]
 	%tmp16 = getelementptr float, float* %peakWeight, i64 %tmp15		; <float*> [#uses=1]
-	%tmp17 = load float* %tmp16, align 4		; <float> [#uses=1]
+	%tmp17 = load float, float* %tmp16, align 4		; <float> [#uses=1]
 	%tmp19 = fadd float %tmp17, %peakCount.034		; <float> [#uses=2]
 	br label %bb1
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
index 9cba110..7980bbd 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -13,7 +13,7 @@
   %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]                ; <i8> [#uses=4]
   zext i8 %mode.0 to i32                ; <i32>:1 [#uses=1]
   getelementptr [4 x i32], [4 x i32]* @mode_table, i32 0, i32 %1           ; <i32*>:2 [#uses=1]
-  load i32* %2, align 4         ; <i32>:3 [#uses=1]
+  load i32, i32* %2, align 4         ; <i32>:3 [#uses=1]
   icmp eq i32 %3, %0            ; <i1>:4 [#uses=1]
   br i1 %4, label %bb1, label %bb2
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
index a4eb72f..bbe76c49 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -73,7 +73,7 @@
 	store i32 0, i32* %q, align 4
 	%tmp1 = sext i32 %tmp to i64		; <i64> [#uses=1]
 	%tmp2 = getelementptr [9 x i32], [9 x i32]* %a, i64 0, i64 %tmp1		; <i32*> [#uses=1]
-	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* %tmp2, align 4		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
 	br i1 %tmp4, label %bb.i.bb7.i.backedge_crit_edge, label %bb1.i
 
@@ -81,7 +81,7 @@
 	%tmp5 = add i32 %j.0.i, 2		; <i32> [#uses=1]
 	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
 	%tmp7 = getelementptr [17 x i32], [17 x i32]* %b, i64 0, i64 %tmp6		; <i32*> [#uses=1]
-	%tmp8 = load i32* %tmp7, align 4		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %tmp7, align 4		; <i32> [#uses=1]
 	%tmp9 = icmp eq i32 %tmp8, 0		; <i1> [#uses=1]
 	br i1 %tmp9, label %bb1.i.bb7.i.backedge_crit_edge, label %bb2.i
 
@@ -89,7 +89,7 @@
 	%tmp10 = sub i32 7, %j.0.i		; <i32> [#uses=1]
 	%tmp11 = sext i32 %tmp10 to i64		; <i64> [#uses=1]
 	%tmp12 = getelementptr [15 x i32], [15 x i32]* %c, i64 0, i64 %tmp11		; <i32*> [#uses=1]
-	%tmp13 = load i32* %tmp12, align 4		; <i32> [#uses=1]
+	%tmp13 = load i32, i32* %tmp12, align 4		; <i32> [#uses=1]
 	%tmp14 = icmp eq i32 %tmp13, 0		; <i1> [#uses=1]
 	br i1 %tmp14, label %bb2.i.bb7.i.backedge_crit_edge, label %bb3.i
 
@@ -108,7 +108,7 @@
 	%tmp23 = getelementptr [15 x i32], [15 x i32]* %c, i64 0, i64 %tmp22		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp23, align 4
 	call void @Try(i32 2, i32* %q, i32* %b9, i32* %a10, i32* %c11, i32* %x1.sub) nounwind
-	%tmp24 = load i32* %q, align 4		; <i32> [#uses=1]
+	%tmp24 = load i32, i32* %q, align 4		; <i32> [#uses=1]
 	%tmp25 = icmp eq i32 %tmp24, 0		; <i1> [#uses=1]
 	br i1 %tmp25, label %bb5.i, label %bb3.i.bb7.i.backedge_crit_edge
 
@@ -131,7 +131,7 @@
 
 bb7.i:		; preds = %bb7.i.backedge, %newFuncRoot
 	%j.0.i = phi i32 [ 0, %newFuncRoot ], [ %tmp, %bb7.i.backedge ]		; <i32> [#uses=8]
-	%tmp34 = load i32* %q, align 4		; <i32> [#uses=1]
+	%tmp34 = load i32, i32* %q, align 4		; <i32> [#uses=1]
 	%tmp35 = icmp eq i32 %tmp34, 0		; <i1> [#uses=1]
 	%tmp36 = icmp ne i32 %j.0.i, 8		; <i1> [#uses=1]
 	%tmp37 = and i1 %tmp35, %tmp36		; <i1> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll b/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
index 4369820..9201ffc 100644
--- a/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/zext-signed-addrec.ll
@@ -15,16 +15,16 @@
 ; CHECK-LABEL: foo
 define i32 @foo() {
 entry:
-  %.pr = load i32* @b, align 4
+  %.pr = load i32, i32* @b, align 4
   %cmp10 = icmp slt i32 %.pr, 1
   br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %entry.for.end9_crit_edge
 
 entry.for.end9_crit_edge:                         ; preds = %entry
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %for.end9
 
 for.cond1.preheader.lr.ph:                        ; preds = %entry
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.cond1.preheader.for.cond1.preheader.split_crit_edge, label %return.loopexit.split
 
diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll
index 1cb69a0..c2b5bbd 100644
--- a/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll
+++ b/llvm/test/Analysis/ScopedNoAliasAA/basic-domains.ll
@@ -5,15 +5,15 @@
 define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 {
 entry:
 ; CHECK-LABEL: Function: foo1
-  %0 = load float* %c, align 4, !alias.scope !9
+  %0 = load float, float* %c, align 4, !alias.scope !9
   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx.i, align 4, !noalias !6
 
-  %1 = load float* %c, align 4, !alias.scope !5
+  %1 = load float, float* %c, align 4, !alias.scope !5
   %arrayidx.i2 = getelementptr inbounds float, float* %a, i64 15
   store float %1, float* %arrayidx.i2, align 4, !noalias !6
 
-  %2 = load float* %c, align 4, !alias.scope !6
+  %2 = load float, float* %c, align 4, !alias.scope !6
   %arrayidx.i3 = getelementptr inbounds float, float* %a, i64 16
   store float %2, float* %arrayidx.i3, align 4, !noalias !5
 
@@ -42,15 +42,15 @@
 ; A list of scopes from both domains.
 !9 = !{!2, !4, !7}
 
-; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
-; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
-; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
-; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
-; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
-; CHECK: NoAlias:   %1 = load float* %c, align 4, !alias.scope !7 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
-; CHECK: NoAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
-; CHECK: NoAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
-; CHECK: MayAlias:   %2 = load float* %c, align 4, !alias.scope !6 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: MayAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   %1 = load float, float* %c, align 4, !alias.scope !7 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %1 = load float, float* %c, align 4, !alias.scope !7 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: NoAlias:   %1 = load float, float* %c, align 4, !alias.scope !7 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
+; CHECK: NoAlias:   %2 = load float, float* %c, align 4, !alias.scope !6 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
+; CHECK: NoAlias:   %2 = load float, float* %c, align 4, !alias.scope !6 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
+; CHECK: MayAlias:   %2 = load float, float* %c, align 4, !alias.scope !6 <->   store float %2, float* %arrayidx.i3, align 4, !noalias !7
 ; CHECK: NoAlias:   store float %1, float* %arrayidx.i2, align 4, !noalias !6 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
 ; CHECK: NoAlias:   store float %2, float* %arrayidx.i3, align 4, !noalias !7 <->   store float %0, float* %arrayidx.i, align 4, !noalias !6
 ; CHECK: NoAlias:   store float %2, float* %arrayidx.i3, align 4, !noalias !7 <->   store float %1, float* %arrayidx.i2, align 4, !noalias !6
diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic.ll
index cc26413..2625834 100644
--- a/llvm/test/Analysis/ScopedNoAliasAA/basic.ll
+++ b/llvm/test/Analysis/ScopedNoAliasAA/basic.ll
@@ -5,18 +5,18 @@
 define void @foo1(float* nocapture %a, float* nocapture readonly %c) #0 {
 entry:
 ; CHECK-LABEL: Function: foo1
-  %0 = load float* %c, align 4, !alias.scope !1
+  %0 = load float, float* %c, align 4, !alias.scope !1
   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx.i, align 4, !noalias !1
-  %1 = load float* %c, align 4
+  %1 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %1, float* %arrayidx, align 4
   ret void
 
-; CHECK: NoAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
-; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
-; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
-; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
+; CHECK: NoAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
+; CHECK: MayAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
+; CHECK: MayAlias:   %1 = load float, float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
+; CHECK: MayAlias:   %1 = load float, float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
 ; CHECK: NoAlias:   store float %1, float* %arrayidx, align 4 <->   store float %0, float* %arrayidx.i, align 4, !noalias !0
 }
 
diff --git a/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll b/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll
index ff99f66..a8a17e5 100644
--- a/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll
+++ b/llvm/test/Analysis/ScopedNoAliasAA/basic2.ll
@@ -5,24 +5,24 @@
 define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 entry:
 ; CHECK-LABEL: Function: foo2
-  %0 = load float* %c, align 4, !alias.scope !0
+  %0 = load float, float* %c, align 4, !alias.scope !0
   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx.i, align 4, !alias.scope !5, !noalias !4
   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !5
-  %1 = load float* %c, align 4
+  %1 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %1, float* %arrayidx, align 4
   ret void
 
-; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalia
+; CHECK: MayAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalia
 ; CHECK: s !5
-; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noali
+; CHECK: MayAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noali
 ; CHECK: as !4
-; CHECK: MayAlias:   %0 = load float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
-; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5
-; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4
-; CHECK: MayAlias:   %1 = load float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
+; CHECK: MayAlias:   %0 = load float, float* %c, align 4, !alias.scope !0 <->   store float %1, float* %arrayidx, align 4
+; CHECK: MayAlias:   %1 = load float, float* %c, align 4 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5
+; CHECK: MayAlias:   %1 = load float, float* %c, align 4 <->   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4
+; CHECK: MayAlias:   %1 = load float, float* %c, align 4 <->   store float %1, float* %arrayidx, align 4
 ; CHECK: NoAlias:   store float %0, float* %arrayidx1.i, align 4, !alias.scope !0, !noalias !4 <->   store float %0, float* %arrayidx.i, align
 ; CHECK: 4, !alias.scope !4, !noalias !5
 ; CHECK: NoAlias:   store float %1, float* %arrayidx, align 4 <->   store float %0, float* %arrayidx.i, align 4, !alias.scope !4, !noalias !5
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
index 920d6f5..1504164 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/PR17620.ll
@@ -16,11 +16,11 @@
 define %structA** @test(%classA* %this, i32** %p1) #0 align 2 {
 entry:
 ; CHECK-LABEL: @test
-; CHECK: load i32** %p1, align 8, !tbaa
-; CHECK: load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa
+; CHECK: load i32*, i32** %p1, align 8, !tbaa
+; CHECK: load i32*, i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa
 ; CHECK: call void @callee
-  %0 = load i32** %p1, align 8, !tbaa !1
-  %1 = load i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa !5
+  %0 = load i32*, i32** %p1, align 8, !tbaa !1
+  %1 = load i32*, i32** getelementptr (%classC* null, i32 0, i32 1, i32 0, i32 0), align 8, !tbaa !5
   call void @callee(i32* %0, i32* %1)
   unreachable
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
index 10da13a..93c34f9 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -5,9 +5,9 @@
 ; CHECK: @test0_yes
 ; CHECK: add i8 %x, %x
 define i8 @test0_yes(i8* %a, i8* %b) nounwind {
-  %x = load i8* %a, !tbaa !1
+  %x = load i8, i8* %a, !tbaa !1
   store i8 0, i8* %b, !tbaa !2
-  %y = load i8* %a, !tbaa !1
+  %y = load i8, i8* %a, !tbaa !1
   %z = add i8 %x, %y
   ret i8 %z
 }
@@ -15,9 +15,9 @@
 ; CHECK: @test0_no
 ; CHECK: add i8 %x, %y
 define i8 @test0_no(i8* %a, i8* %b) nounwind {
-  %x = load i8* %a, !tbaa !3
+  %x = load i8, i8* %a, !tbaa !3
   store i8 0, i8* %b, !tbaa !4
-  %y = load i8* %a, !tbaa !3
+  %y = load i8, i8* %a, !tbaa !3
   %z = add i8 %x, %y
   ret i8 %z
 }
@@ -27,9 +27,9 @@
 ; CHECK: @test1_yes
 ; CHECK: add i8 %x, %x
 define i8 @test1_yes(i8* %a, i8* %b) nounwind {
-  %x = load i8* %a, !tbaa !5
+  %x = load i8, i8* %a, !tbaa !5
   store i8 0, i8* %b
-  %y = load i8* %a, !tbaa !5
+  %y = load i8, i8* %a, !tbaa !5
   %z = add i8 %x, %y
   ret i8 %z
 }
@@ -37,9 +37,9 @@
 ; CHECK: @test1_no
 ; CHECK: add i8 %x, %y
 define i8 @test1_no(i8* %a, i8* %b) nounwind {
-  %x = load i8* %a, !tbaa !6
+  %x = load i8, i8* %a, !tbaa !6
   store i8 0, i8* %b
-  %y = load i8* %a, !tbaa !6
+  %y = load i8, i8* %a, !tbaa !6
   %z = add i8 %x, %y
   ret i8 %z
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
index 31f775e..a7987f7 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -6,8 +6,8 @@
 ; CHECK-NOT: alloca
 define internal i32 @test(i32* %X, i32* %Y, i32* %Q) {
   store i32 77, i32* %Q, !tbaa !2
-  %A = load i32* %X, !tbaa !1
-  %B = load i32* %Y, !tbaa !1
+  %A = load i32, i32* %X, !tbaa !1
+  %B = load i32, i32* %Y, !tbaa !1
   %C = add i32 %A, %B
   ret i32 %C
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
index 09f8feb..b6dc9b2 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -4,47 +4,47 @@
 ; DSE should make use of TBAA.
 
 ; CHECK: @test0_yes
-; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: load i8, i8* %b
 ; CHECK-NEXT: store i8 1, i8* %a
 ; CHECK-NEXT: ret i8 %y
 define i8 @test0_yes(i8* %a, i8* %b) nounwind {
   store i8 0, i8* %a, !tbaa !1
-  %y = load i8* %b, !tbaa !2
+  %y = load i8, i8* %b, !tbaa !2
   store i8 1, i8* %a, !tbaa !1
   ret i8 %y
 }
 
 ; CHECK: @test0_no
 ; CHECK-NEXT: store i8 0, i8* %a
-; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: load i8, i8* %b
 ; CHECK-NEXT: store i8 1, i8* %a
 ; CHECK-NEXT: ret i8 %y
 define i8 @test0_no(i8* %a, i8* %b) nounwind {
   store i8 0, i8* %a, !tbaa !3
-  %y = load i8* %b, !tbaa !4
+  %y = load i8, i8* %b, !tbaa !4
   store i8 1, i8* %a, !tbaa !3
   ret i8 %y
 }
 
 ; CHECK: @test1_yes
-; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: load i8, i8* %b
 ; CHECK-NEXT: store i8 1, i8* %a
 ; CHECK-NEXT: ret i8 %y
 define i8 @test1_yes(i8* %a, i8* %b) nounwind {
   store i8 0, i8* %a
-  %y = load i8* %b, !tbaa !5
+  %y = load i8, i8* %b, !tbaa !5
   store i8 1, i8* %a
   ret i8 %y
 }
 
 ; CHECK: @test1_no
 ; CHECK-NEXT: store i8 0, i8* %a
-; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: load i8, i8* %b
 ; CHECK-NEXT: store i8 1, i8* %a
 ; CHECK-NEXT: ret i8 %y
 define i8 @test1_no(i8* %a, i8* %b) nounwind {
   store i8 0, i8* %a
-  %y = load i8* %b, !tbaa !6
+  %y = load i8, i8* %b, !tbaa !6
   store i8 1, i8* %a
   ret i8 %y
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
index 293e96e..afc83c9 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/dynamic-indices.ll
@@ -13,7 +13,7 @@
 
 ; CHECK: for.end:
 ; CHECK:   %arrayidx31 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 1
-; CHECK:   %tmp32 = load i64* %arrayidx31, align 8, !tbaa [[TAG:!.*]]
+; CHECK:   %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa [[TAG:!.*]]
 
 define void @vrlh(%union.vector_t* %va, %union.vector_t* %vb, %union.vector_t* %vd) nounwind {
 entry:
@@ -26,21 +26,21 @@
   %idxprom = sext i32 %sub to i64
   %half = bitcast %union.vector_t* %vb to [8 x i16]*
   %arrayidx = getelementptr inbounds [8 x i16], [8 x i16]* %half, i64 0, i64 %idxprom
-  %tmp4 = load i16* %arrayidx, align 2, !tbaa !0
+  %tmp4 = load i16, i16* %arrayidx, align 2, !tbaa !0
   %conv = zext i16 %tmp4 to i32
   %and = and i32 %conv, 15
   %sub6 = sub nsw i32 7, %i.01
   %idxprom7 = sext i32 %sub6 to i64
   %half9 = bitcast %union.vector_t* %va to [8 x i16]*
   %arrayidx10 = getelementptr inbounds [8 x i16], [8 x i16]* %half9, i64 0, i64 %idxprom7
-  %tmp11 = load i16* %arrayidx10, align 2, !tbaa !0
+  %tmp11 = load i16, i16* %arrayidx10, align 2, !tbaa !0
   %conv12 = zext i16 %tmp11 to i32
   %shl = shl i32 %conv12, %and
   %sub15 = sub nsw i32 7, %i.01
   %idxprom16 = sext i32 %sub15 to i64
   %half18 = bitcast %union.vector_t* %va to [8 x i16]*
   %arrayidx19 = getelementptr inbounds [8 x i16], [8 x i16]* %half18, i64 0, i64 %idxprom16
-  %tmp20 = load i16* %arrayidx19, align 2, !tbaa !0
+  %tmp20 = load i16, i16* %arrayidx19, align 2, !tbaa !0
   %conv21 = zext i16 %tmp20 to i32
   %sub23 = sub nsw i32 16, %and
   %shr = lshr i32 %conv21, %sub23
@@ -57,11 +57,11 @@
 
 for.end:                                          ; preds = %for.body
   %arrayidx31 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 1
-  %tmp32 = load i64* %arrayidx31, align 8, !tbaa !3
+  %tmp32 = load i64, i64* %arrayidx31, align 8, !tbaa !3
   %arrayidx35 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 1
   store i64 %tmp32, i64* %arrayidx35, align 8, !tbaa !3
   %arrayidx37 = getelementptr inbounds %union.vector_t, %union.vector_t* %t, i64 0, i32 0, i64 0
-  %tmp38 = load i64* %arrayidx37, align 8, !tbaa !3
+  %tmp38 = load i64, i64* %arrayidx37, align 8, !tbaa !3
   %arrayidx41 = getelementptr inbounds %union.vector_t, %union.vector_t* %vd, i64 0, i32 0, i64 0
   store i64 %tmp38, i64* %arrayidx41, align 8, !tbaa !3
   ret void
@@ -82,7 +82,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i2.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %f = getelementptr inbounds %struct.X, %struct.X* %a, i64 %i2.01, i32 1
-  %tmp6 = load float* %f, align 4, !tbaa !5
+  %tmp6 = load float, float* %f, align 4, !tbaa !5
   %mul = fmul float %tmp6, 0x40019999A0000000
   store float %mul, float* %f, align 4, !tbaa !5
   %inc = add nsw i64 %i2.01, 1
@@ -91,7 +91,7 @@
 
 for.end:                                          ; preds = %for.body
   %i9 = getelementptr inbounds %struct.X, %struct.X* %a, i64 0, i32 0
-  %tmp10 = load i32* %i9, align 4, !tbaa !4
+  %tmp10 = load i32, i32* %i9, align 4, !tbaa !4
   ret i32 %tmp10
 }
 
@@ -110,7 +110,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %i5 = getelementptr inbounds %struct.X, %struct.X* %a, i64 %i.01, i32 0
-  %tmp6 = load i32* %i5, align 4, !tbaa !4
+  %tmp6 = load i32, i32* %i5, align 4, !tbaa !4
   %mul = mul nsw i32 %tmp6, 3
   store i32 %mul, i32* %i5, align 4, !tbaa !4
   %inc = add nsw i64 %i.01, 1
@@ -119,7 +119,7 @@
 
 for.end:                                          ; preds = %for.body
   %f9 = getelementptr inbounds %struct.X, %struct.X* %a, i64 0, i32 1
-  %tmp10 = load float* %f9, align 4, !tbaa !5
+  %tmp10 = load float, float* %f9, align 4, !tbaa !5
   ret float %tmp10
 }
 
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
index edea6d0..aaa43a4 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -17,7 +17,7 @@
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p, !tbaa !1
+  %t = load i32, i32* %p, !tbaa !1
   store i32 %t, i32* %q
   ret void
 
@@ -32,11 +32,11 @@
 
 ; CHECK: @watch_out_for_type_change
 ; CHECK: if.then:
-; CHECK:   %t = load i32* %p
+; CHECK:   %t = load i32, i32* %p
 ; CHECK:   store i32 %t, i32* %q
 ; CHECK:   ret void
 ; CHECK: if.else:
-; CHECK:   %u = load i32* %p
+; CHECK:   %u = load i32, i32* %p
 ; CHECK:   store i32 %u, i32* %q
 
 define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
@@ -46,12 +46,12 @@
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p, !tbaa !3
+  %t = load i32, i32* %p, !tbaa !3
   store i32 %t, i32* %q
   ret void
 
 if.else:
-  %u = load i32* %p, !tbaa !4
+  %u = load i32, i32* %p, !tbaa !4
   store i32 %u, i32* %q
   ret void
 }
@@ -64,7 +64,7 @@
 ; CHECK:   store i32 0, i32* %q
 ; CHECK:   ret void
 ; CHECK: if.else:
-; CHECK:   %u = load i32* %p
+; CHECK:   %u = load i32, i32* %p
 ; CHECK:   store i32 %u, i32* %q
 
 define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
@@ -74,12 +74,12 @@
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p, !tbaa !4
+  %t = load i32, i32* %p, !tbaa !4
   store i32 %t, i32* %q
   ret void
 
 if.else:
-  %u = load i32* %p, !tbaa !3
+  %u = load i32, i32* %p, !tbaa !3
   store i32 %u, i32* %q
   ret void
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index 150be83..fe07730 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -5,7 +5,7 @@
 
 ; CHECK: @foo
 ; CHECK:      entry:
-; CHECK-NEXT:   %tmp3 = load double** @P, !tbaa !0
+; CHECK-NEXT:   %tmp3 = load double*, double** @P, !tbaa !0
 ; CHECK-NEXT:   br label %for.body
 
 @P = common global double* null
@@ -16,9 +16,9 @@
 
 for.body:                                         ; preds = %entry, %for.body
   %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
-  %tmp3 = load double** @P, !tbaa !1
+  %tmp3 = load double*, double** @P, !tbaa !1
   %scevgep = getelementptr double, double* %tmp3, i64 %i.07
-  %tmp4 = load double* %scevgep, !tbaa !2
+  %tmp4 = load double, double* %scevgep, !tbaa !2
   %mul = fmul double %tmp4, 2.300000e+00
   store double %mul, double* %scevgep, !tbaa !2
   %inc = add i64 %i.07, 1
@@ -49,9 +49,9 @@
   br label %loop
 
 loop:
-  %tmp51 = load i8** %p, !tbaa !4
+  %tmp51 = load i8*, i8** %p, !tbaa !4
   store i8* %tmp51, i8** %p
-  %tmp40 = load i8* %q, !tbaa !5
+  %tmp40 = load i8, i8* %q, !tbaa !5
   store i8 %tmp40, i8* %q
   br label %loop
 }
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
index 6d775b4..aa91020 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/placement-tbaa.ll
@@ -33,20 +33,20 @@
   %call = call noalias i8* @_Znwm(i64 8)
   %0 = bitcast i8* %call to %struct.Foo*
   store %struct.Foo* %0, %struct.Foo** %f, align 8, !tbaa !4
-  %1 = load %struct.Foo** %f, align 8, !tbaa !4
+  %1 = load %struct.Foo*, %struct.Foo** %f, align 8, !tbaa !4
   %i = getelementptr inbounds %struct.Foo, %struct.Foo* %1, i32 0, i32 0
   store i64 1, i64* %i, align 8, !tbaa !6
   store i32 0, i32* %i1, align 4, !tbaa !0
   br label %for.cond
 
 for.cond:
-  %2 = load i32* %i1, align 4, !tbaa !0
-  %3 = load i32* %n.addr, align 4, !tbaa !0
+  %2 = load i32, i32* %i1, align 4, !tbaa !0
+  %3 = load i32, i32* %n.addr, align 4, !tbaa !0
   %cmp = icmp slt i32 %2, %3
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:
-  %4 = load %struct.Foo** %f, align 8, !tbaa !4
+  %4 = load %struct.Foo*, %struct.Foo** %f, align 8, !tbaa !4
   %5 = bitcast %struct.Foo* %4 to i8*
   %new.isnull = icmp eq i8* %5, null
   br i1 %new.isnull, label %new.cont, label %new.notnull
@@ -58,10 +58,10 @@
 new.cont:
   %7 = phi %struct.Bar* [ %6, %new.notnull ], [ null, %for.body ]
   store %struct.Bar* %7, %struct.Bar** %b, align 8, !tbaa !4
-  %8 = load %struct.Bar** %b, align 8, !tbaa !4
+  %8 = load %struct.Bar*, %struct.Bar** %b, align 8, !tbaa !4
   %p = getelementptr inbounds %struct.Bar, %struct.Bar* %8, i32 0, i32 0
   store i8* null, i8** %p, align 8, !tbaa !9
-  %9 = load %struct.Foo** %f, align 8, !tbaa !4
+  %9 = load %struct.Foo*, %struct.Foo** %f, align 8, !tbaa !4
   %10 = bitcast %struct.Foo* %9 to i8*
   %new.isnull2 = icmp eq i8* %10, null
   br i1 %new.isnull2, label %new.cont4, label %new.notnull3
@@ -73,23 +73,23 @@
 new.cont4:
   %12 = phi %struct.Foo* [ %11, %new.notnull3 ], [ null, %new.cont ]
   store %struct.Foo* %12, %struct.Foo** %f, align 8, !tbaa !4
-  %13 = load i32* %i1, align 4, !tbaa !0
+  %13 = load i32, i32* %i1, align 4, !tbaa !0
   %conv = sext i32 %13 to i64
-  %14 = load %struct.Foo** %f, align 8, !tbaa !4
+  %14 = load %struct.Foo*, %struct.Foo** %f, align 8, !tbaa !4
   %i5 = getelementptr inbounds %struct.Foo, %struct.Foo* %14, i32 0, i32 0
   store i64 %conv, i64* %i5, align 8, !tbaa !6
   br label %for.inc
 
 for.inc:
-  %15 = load i32* %i1, align 4, !tbaa !0
+  %15 = load i32, i32* %i1, align 4, !tbaa !0
   %inc = add nsw i32 %15, 1
   store i32 %inc, i32* %i1, align 4, !tbaa !0
   br label %for.cond
 
 for.end:
-  %16 = load %struct.Foo** %f, align 8, !tbaa !4
+  %16 = load %struct.Foo*, %struct.Foo** %f, align 8, !tbaa !4
   %i6 = getelementptr inbounds %struct.Foo, %struct.Foo* %16, i32 0, i32 0
-  %17 = load i64* %i6, align 8, !tbaa !6
+  %17 = load i64, i64* %i6, align 8, !tbaa !6
   ret i64 %17
 }
 
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index e50021b..b2931ca 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -18,7 +18,7 @@
   store i32 0, i32* %x, !tbaa !0
   %0 = bitcast i32* %x to float*
   store float 0x4002666660000000, float* %0, !tbaa !3
-  %tmp3 = load i32* %x, !tbaa !0
+  %tmp3 = load i32, i32* %x, !tbaa !0
   ret i32 %tmp3
 }
 
@@ -35,7 +35,7 @@
   %0 = bitcast i64* %x to i8*
   %1 = getelementptr i8, i8* %0, i64 1
   store i8 1, i8* %1, !tbaa !5
-  %tmp3 = load i64* %x, !tbaa !4
+  %tmp3 = load i64, i64* %x, !tbaa !4
   ret i64 %tmp3
 }
 
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/sink.ll
index 1a124b8..c95dc15 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/sink.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -1,11 +1,11 @@
 ; RUN: opt -tbaa -sink -S < %s | FileCheck %s
 
 ; CHECK: a:
-; CHECK:   %f = load float* %p, !tbaa [[TAGA:!.*]]
+; CHECK:   %f = load float, float* %p, !tbaa [[TAGA:!.*]]
 ; CHECK:   store float %f, float* %q
 
 define void @foo(float* %p, i1 %c, float* %q, float* %r) {
-  %f = load float* %p, !tbaa !0
+  %f = load float, float* %p, !tbaa !0
   store float 0.0, float* %r, !tbaa !1
   br i1 %c, label %a, label %b
 a:
diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
index 107aff0..a2e4dc6 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/tbaa-path.ll
@@ -17,7 +17,7 @@
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; OPT: %[[RET:.*]] = load i32*
+; OPT: %[[RET:.*]] = load i32, i32*
 ; OPT: ret i32 %[[RET]]
   %s.addr = alloca i32*, align 8
   %A.addr = alloca %struct.StructA*, align 8
@@ -25,13 +25,13 @@
   store i32* %s, i32** %s.addr, align 8, !tbaa !0
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load i32** %s.addr, align 8, !tbaa !0
+  %0 = load i32*, i32** %s.addr, align 8, !tbaa !0
   store i32 1, i32* %0, align 4, !tbaa !6
-  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %1 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %1, i32 0, i32 1
   store i32 4, i32* %f32, align 4, !tbaa !8
-  %2 = load i32** %s.addr, align 8, !tbaa !0
-  %3 = load i32* %2, align 4, !tbaa !6
+  %2 = load i32*, i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32, i32* %2, align 4, !tbaa !6
   ret i32 %3
 }
 
@@ -51,13 +51,13 @@
   store i32* %s, i32** %s.addr, align 8, !tbaa !0
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load i32** %s.addr, align 8, !tbaa !0
+  %0 = load i32*, i32** %s.addr, align 8, !tbaa !0
   store i32 1, i32* %0, align 4, !tbaa !6
-  %1 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %1 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f16 = getelementptr inbounds %struct.StructA, %struct.StructA* %1, i32 0, i32 0
   store i16 4, i16* %f16, align 2, !tbaa !11
-  %2 = load i32** %s.addr, align 8, !tbaa !0
-  %3 = load i32* %2, align 4, !tbaa !6
+  %2 = load i32*, i32** %s.addr, align 8, !tbaa !0
+  %3 = load i32, i32* %2, align 4, !tbaa !6
   ret i32 %3
 }
 
@@ -69,7 +69,7 @@
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; OPT: %[[RET:.*]] = load i32*
+; OPT: %[[RET:.*]] = load i32, i32*
 ; OPT: ret i32 %[[RET]]
   %A.addr = alloca %struct.StructA*, align 8
   %B.addr = alloca %struct.StructB*, align 8
@@ -77,16 +77,16 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %1 = load %struct.StructB*, %struct.StructB** %B.addr, align 8, !tbaa !0
   %a = getelementptr inbounds %struct.StructB, %struct.StructB* %1, i32 0, i32 1
   %f321 = getelementptr inbounds %struct.StructA, %struct.StructA* %a, i32 0, i32 1
   store i32 4, i32* %f321, align 4, !tbaa !12
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f322 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f322, align 4, !tbaa !8
+  %3 = load i32, i32* %f322, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -106,16 +106,16 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %1 = load %struct.StructB*, %struct.StructB** %B.addr, align 8, !tbaa !0
   %a = getelementptr inbounds %struct.StructB, %struct.StructB* %1, i32 0, i32 1
   %f16 = getelementptr inbounds %struct.StructA, %struct.StructA* %a, i32 0, i32 0
   store i16 4, i16* %f16, align 2, !tbaa !14
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f321, align 4, !tbaa !8
+  %3 = load i32, i32* %f321, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -135,15 +135,15 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %1 = load %struct.StructB*, %struct.StructB** %B.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructB, %struct.StructB* %1, i32 0, i32 2
   store i32 4, i32* %f321, align 4, !tbaa !15
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f322 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f322, align 4, !tbaa !8
+  %3 = load i32, i32* %f322, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -163,16 +163,16 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructB* %B, %struct.StructB** %B.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructB** %B.addr, align 8, !tbaa !0
+  %1 = load %struct.StructB*, %struct.StructB** %B.addr, align 8, !tbaa !0
   %a = getelementptr inbounds %struct.StructB, %struct.StructB* %1, i32 0, i32 1
   %f32_2 = getelementptr inbounds %struct.StructA, %struct.StructA* %a, i32 0, i32 3
   store i32 4, i32* %f32_2, align 4, !tbaa !16
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f321, align 4, !tbaa !8
+  %3 = load i32, i32* %f321, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -192,15 +192,15 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %1 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructS, %struct.StructS* %1, i32 0, i32 1
   store i32 4, i32* %f321, align 4, !tbaa !17
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f322 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f322, align 4, !tbaa !8
+  %3 = load i32, i32* %f322, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -220,15 +220,15 @@
   store %struct.StructA* %A, %struct.StructA** %A.addr, align 8, !tbaa !0
   store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %0 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !8
-  %1 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %1 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f16 = getelementptr inbounds %struct.StructS, %struct.StructS* %1, i32 0, i32 0
   store i16 4, i16* %f16, align 2, !tbaa !19
-  %2 = load %struct.StructA** %A.addr, align 8, !tbaa !0
+  %2 = load %struct.StructA*, %struct.StructA** %A.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructA, %struct.StructA* %2, i32 0, i32 1
-  %3 = load i32* %f321, align 4, !tbaa !8
+  %3 = load i32, i32* %f321, align 4, !tbaa !8
   ret i32 %3
 }
 
@@ -248,15 +248,15 @@
   store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
   store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %0 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructS, %struct.StructS* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !17
-  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %1 = load %struct.StructS2*, %struct.StructS2** %S2.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructS2, %struct.StructS2* %1, i32 0, i32 1
   store i32 4, i32* %f321, align 4, !tbaa !20
-  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %2 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f322 = getelementptr inbounds %struct.StructS, %struct.StructS* %2, i32 0, i32 1
-  %3 = load i32* %f322, align 4, !tbaa !17
+  %3 = load i32, i32* %f322, align 4, !tbaa !17
   ret i32 %3
 }
 
@@ -276,15 +276,15 @@
   store %struct.StructS* %S, %struct.StructS** %S.addr, align 8, !tbaa !0
   store %struct.StructS2* %S2, %struct.StructS2** %S2.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %0 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f32 = getelementptr inbounds %struct.StructS, %struct.StructS* %0, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !17
-  %1 = load %struct.StructS2** %S2.addr, align 8, !tbaa !0
+  %1 = load %struct.StructS2*, %struct.StructS2** %S2.addr, align 8, !tbaa !0
   %f16 = getelementptr inbounds %struct.StructS2, %struct.StructS2* %1, i32 0, i32 0
   store i16 4, i16* %f16, align 2, !tbaa !22
-  %2 = load %struct.StructS** %S.addr, align 8, !tbaa !0
+  %2 = load %struct.StructS*, %struct.StructS** %S.addr, align 8, !tbaa !0
   %f321 = getelementptr inbounds %struct.StructS, %struct.StructS* %2, i32 0, i32 1
-  %3 = load i32* %f321, align 4, !tbaa !17
+  %3 = load i32, i32* %f321, align 4, !tbaa !17
   ret i32 %3
 }
 
@@ -304,21 +304,21 @@
   store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
   store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %0 = load %struct.StructC*, %struct.StructC** %C.addr, align 8, !tbaa !0
   %b = getelementptr inbounds %struct.StructC, %struct.StructC* %0, i32 0, i32 1
   %a = getelementptr inbounds %struct.StructB, %struct.StructB* %b, i32 0, i32 1
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %a, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !23
-  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %1 = load %struct.StructD*, %struct.StructD** %D.addr, align 8, !tbaa !0
   %b1 = getelementptr inbounds %struct.StructD, %struct.StructD* %1, i32 0, i32 1
   %a2 = getelementptr inbounds %struct.StructB, %struct.StructB* %b1, i32 0, i32 1
   %f323 = getelementptr inbounds %struct.StructA, %struct.StructA* %a2, i32 0, i32 1
   store i32 4, i32* %f323, align 4, !tbaa !25
-  %2 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %2 = load %struct.StructC*, %struct.StructC** %C.addr, align 8, !tbaa !0
   %b4 = getelementptr inbounds %struct.StructC, %struct.StructC* %2, i32 0, i32 1
   %a5 = getelementptr inbounds %struct.StructB, %struct.StructB* %b4, i32 0, i32 1
   %f326 = getelementptr inbounds %struct.StructA, %struct.StructA* %a5, i32 0, i32 1
-  %3 = load i32* %f326, align 4, !tbaa !23
+  %3 = load i32, i32* %f326, align 4, !tbaa !23
   ret i32 %3
 }
 
@@ -330,7 +330,7 @@
 ; OPT: define
 ; OPT: store i32 1
 ; OPT: store i32 4
-; OPT: %[[RET:.*]] = load i32*
+; OPT: %[[RET:.*]] = load i32, i32*
 ; OPT: ret i32 %[[RET]]
   %C.addr = alloca %struct.StructC*, align 8
   %D.addr = alloca %struct.StructD*, align 8
@@ -340,24 +340,24 @@
   store %struct.StructC* %C, %struct.StructC** %C.addr, align 8, !tbaa !0
   store %struct.StructD* %D, %struct.StructD** %D.addr, align 8, !tbaa !0
   store i64 %count, i64* %count.addr, align 8, !tbaa !4
-  %0 = load %struct.StructC** %C.addr, align 8, !tbaa !0
+  %0 = load %struct.StructC*, %struct.StructC** %C.addr, align 8, !tbaa !0
   %b = getelementptr inbounds %struct.StructC, %struct.StructC* %0, i32 0, i32 1
   store %struct.StructB* %b, %struct.StructB** %b1, align 8, !tbaa !0
-  %1 = load %struct.StructD** %D.addr, align 8, !tbaa !0
+  %1 = load %struct.StructD*, %struct.StructD** %D.addr, align 8, !tbaa !0
   %b3 = getelementptr inbounds %struct.StructD, %struct.StructD* %1, i32 0, i32 1
   store %struct.StructB* %b3, %struct.StructB** %b2, align 8, !tbaa !0
-  %2 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %2 = load %struct.StructB*, %struct.StructB** %b1, align 8, !tbaa !0
   %a = getelementptr inbounds %struct.StructB, %struct.StructB* %2, i32 0, i32 1
   %f32 = getelementptr inbounds %struct.StructA, %struct.StructA* %a, i32 0, i32 1
   store i32 1, i32* %f32, align 4, !tbaa !12
-  %3 = load %struct.StructB** %b2, align 8, !tbaa !0
+  %3 = load %struct.StructB*, %struct.StructB** %b2, align 8, !tbaa !0
   %a4 = getelementptr inbounds %struct.StructB, %struct.StructB* %3, i32 0, i32 1
   %f325 = getelementptr inbounds %struct.StructA, %struct.StructA* %a4, i32 0, i32 1
   store i32 4, i32* %f325, align 4, !tbaa !12
-  %4 = load %struct.StructB** %b1, align 8, !tbaa !0
+  %4 = load %struct.StructB*, %struct.StructB** %b1, align 8, !tbaa !0
   %a6 = getelementptr inbounds %struct.StructB, %struct.StructB* %4, i32 0, i32 1
   %f327 = getelementptr inbounds %struct.StructA, %struct.StructA* %a6, i32 0, i32 1
-  %5 = load i32* %f327, align 4, !tbaa !12
+  %5 = load i32, i32* %f327, align 4, !tbaa !12
   ret i32 %5
 }
 
diff --git a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll
index 4ee21c5..51f9265 100644
--- a/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll
+++ b/llvm/test/Analysis/ValueTracking/memory-dereferenceable.ll
@@ -18,15 +18,15 @@
 ; CHECK-NOT: %nparam
 entry:
     %globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0
-    %load1 = load i8* %globalptr
+    %load1 = load i8, i8* %globalptr
     %alloca = alloca i1
-    %load2 = load i1* %alloca
-    %load3 = load i32 addrspace(1)* %dparam
+    %load2 = load i1, i1* %alloca
+    %load3 = load i32, i32 addrspace(1)* %dparam
     %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
     %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4)
-    %load4 = load i32 addrspace(1)* %relocate
+    %load4 = load i32, i32 addrspace(1)* %relocate
     %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5
-    %load5 = load i32 addrspace(1)* %nparam
+    %load5 = load i32, i32 addrspace(1)* %nparam
     ret void
 }
 
diff --git a/llvm/test/Assembler/2002-04-29-NameBinding.ll b/llvm/test/Assembler/2002-04-29-NameBinding.ll
index 960209b..c387c47 100644
--- a/llvm/test/Assembler/2002-04-29-NameBinding.ll
+++ b/llvm/test/Assembler/2002-04-29-NameBinding.ll
@@ -13,7 +13,7 @@
 
 define i32 @createtask() {
         %v1 = alloca i32                ;; Alloca should have one use! 
-        %reg112 = load i32* %v1         ;; This load should not use the global!
+        %reg112 = load i32, i32* %v1         ;; This load should not use the global!
         ret i32 %reg112
 }
 
diff --git a/llvm/test/Assembler/2002-08-19-BytecodeReader.ll b/llvm/test/Assembler/2002-08-19-BytecodeReader.ll
index 6ddb01e..1fd6bb6 100644
--- a/llvm/test/Assembler/2002-08-19-BytecodeReader.ll
+++ b/llvm/test/Assembler/2002-08-19-BytecodeReader.ll
@@ -11,8 +11,8 @@
 
 define void @Evaluate() {
 	%reg1321 = getelementptr %CHESS_POSITION, %CHESS_POSITION* @search, i64 0, i32 1		; <i32*> [#uses=1]
-	%reg114 = load i32* %reg1321		; <i32> [#uses=0]
+	%reg114 = load i32, i32* %reg1321		; <i32> [#uses=0]
 	%reg1801 = getelementptr %CHESS_POSITION, %CHESS_POSITION* @search, i64 0, i32 0		; <i32*> [#uses=1]
-	%reg182 = load i32* %reg1801		; <i32> [#uses=0]
+	%reg182 = load i32, i32* %reg1801		; <i32> [#uses=0]
 	ret void
 }
diff --git a/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
index 911f0ff..05b4ee2 100644
--- a/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
+++ b/llvm/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
@@ -4,7 +4,7 @@
 @.str_1 = internal constant [6 x i8] c"_Bool\00"                ; <[6 x i8]*> [#uses=2]
 
 define i32 @test() {
-        %tmp.54 = load i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 1)            ; <i8> [#uses=1]
+        %tmp.54 = load i8, i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 1)            ; <i8> [#uses=1]
         %tmp.55 = icmp ne i8 %tmp.54, 66                ; <i1> [#uses=1]
         br i1 %tmp.55, label %then.7, label %endif.7
 
diff --git a/llvm/test/Assembler/2004-06-07-VerifierBug.ll b/llvm/test/Assembler/2004-06-07-VerifierBug.ll
index e01cee8..2fc0ae2 100644
--- a/llvm/test/Assembler/2004-06-07-VerifierBug.ll
+++ b/llvm/test/Assembler/2004-06-07-VerifierBug.ll
@@ -7,6 +7,6 @@
 
 loop:           ; preds = %loop
      %tmp.4.i9 = getelementptr i32, i32* null, i32 %tmp.5.i10             ; <i32*> [#uses=1]
-     %tmp.5.i10 = load i32* %tmp.4.i9                ; <i32> [#uses=1]
+     %tmp.5.i10 = load i32, i32* %tmp.4.i9                ; <i32> [#uses=1]
      br label %loop
 }
diff --git a/llvm/test/Assembler/2007-01-05-Cmp-ConstExpr.ll b/llvm/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
index 573d008..5d1dd91 100644
--- a/llvm/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
+++ b/llvm/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
@@ -7,7 +7,7 @@
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
         %tmp65 = getelementptr i8*, i8** %argv, i32 1                ; <i8**> [#uses=1]
-        %tmp66 = load i8** %tmp65               ; <i8*> [#uses=0]
+        %tmp66 = load i8*, i8** %tmp65               ; <i8*> [#uses=0]
         br i1 icmp ne (i32 sub (i32 ptrtoint (i8* getelementptr ([4 x i8]* @str, i32 0, i64 1) to i32), i32 ptrtoint ([4 x i8]* @str to i32)), i32 1), label %exit_1, label %exit_2
 
 exit_1:         ; preds = %entry
diff --git a/llvm/test/Assembler/2007-04-20-AlignedLoad.ll b/llvm/test/Assembler/2007-04-20-AlignedLoad.ll
index bcf65fd..1e8850a 100644
--- a/llvm/test/Assembler/2007-04-20-AlignedLoad.ll
+++ b/llvm/test/Assembler/2007-04-20-AlignedLoad.ll
@@ -3,6 +3,6 @@
 
 define i32 @test(i32* %arg) {
 entry:
-        %tmp2 = load i32* %arg, align 1024      ; <i32> [#uses=1]
+        %tmp2 = load i32, i32* %arg, align 1024      ; <i32> [#uses=1]
         ret i32 %tmp2
 }
diff --git a/llvm/test/Assembler/2007-12-11-AddressSpaces.ll b/llvm/test/Assembler/2007-12-11-AddressSpaces.ll
index f860f57..3d13f43 100644
--- a/llvm/test/Assembler/2007-12-11-AddressSpaces.ll
+++ b/llvm/test/Assembler/2007-12-11-AddressSpaces.ll
@@ -12,15 +12,15 @@
 
 define void @foo() {
 entry:
-	%tmp1 = load i32 addrspace(33)* addrspace(42)* getelementptr (%struct.mystruct addrspace(42)* @input, i32 0, i32 3), align 4		; <i32 addrspace(33)*> [#uses=1]
+	%tmp1 = load i32 addrspace(33)*, i32 addrspace(33)* addrspace(42)* getelementptr (%struct.mystruct addrspace(42)* @input, i32 0, i32 3), align 4		; <i32 addrspace(33)*> [#uses=1]
 	store i32 addrspace(33)* %tmp1, i32 addrspace(33)* addrspace(66)* getelementptr (%struct.mystruct addrspace(66)* @output, i32 0, i32 1), align 4
 	ret void
 }
 
 define i32 addrspace(11)* @bar(i32 addrspace(11)* addrspace(22)* addrspace(33)* %x) {
 entry:
-	%tmp1 = load i32 addrspace(11)* addrspace(22)* addrspace(33)* @y, align 4		; <i32 addrspace(11)* addrspace(22)*> [#uses=2]
+	%tmp1 = load i32 addrspace(11)* addrspace(22)*, i32 addrspace(11)* addrspace(22)* addrspace(33)* @y, align 4		; <i32 addrspace(11)* addrspace(22)*> [#uses=2]
 	store i32 addrspace(11)* addrspace(22)* %tmp1, i32 addrspace(11)* addrspace(22)* addrspace(33)* %x, align 4
-	%tmp5 = load i32 addrspace(11)* addrspace(22)* %tmp1, align 4		; <i32 addrspace(11)*> [#uses=1]
+	%tmp5 = load i32 addrspace(11)*, i32 addrspace(11)* addrspace(22)* %tmp1, align 4		; <i32 addrspace(11)*> [#uses=1]
 	ret i32 addrspace(11)* %tmp5
 }
diff --git a/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index 50ad32e..c8898fc 100644
--- a/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -15,7 +15,7 @@
 ; CHECK: call void @llvm.dbg.value(metadata i64 72,
   call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !{!"0x102"})
   store i64 72, i64* %diff1, align 8
-  %v1 = load %struct.test** @TestArrayPtr, align 8 ; <%struct.test*> [#uses=1]
+  %v1 = load %struct.test*, %struct.test** @TestArrayPtr, align 8 ; <%struct.test*> [#uses=1]
   %v2 = ptrtoint %struct.test* %v1 to i64 ; <i64> [#uses=1]
   %v3 = sub i64 %v2, ptrtoint ([10 x %struct.test]* @TestArray to i64) ; <i64> [#uses=1]
   store i64 %v3, i64* %diff1, align 8
diff --git a/llvm/test/Assembler/align-inst-load.ll b/llvm/test/Assembler/align-inst-load.ll
index 3586be2..75fcd3e 100644
--- a/llvm/test/Assembler/align-inst-load.ll
+++ b/llvm/test/Assembler/align-inst-load.ll
@@ -1,6 +1,6 @@
 ; RUN: not llvm-as %s -o /dev/null 2>/dev/null
 
 define void @foo() {
-  load i1* %p, align 1073741824
+  load i1, i1* %p, align 1073741824
   ret void
 }
diff --git a/llvm/test/Assembler/align-inst.ll b/llvm/test/Assembler/align-inst.ll
index 1952fbc..028da39 100644
--- a/llvm/test/Assembler/align-inst.ll
+++ b/llvm/test/Assembler/align-inst.ll
@@ -5,7 +5,7 @@
 
 define void @foo() {
   %p = alloca i1, align 536870912
-  load i1* %p, align 536870912
+  load i1, i1* %p, align 536870912
   store i1 false, i1* %p, align 536870912
   ret void
 }
diff --git a/llvm/test/Assembler/atomic.ll b/llvm/test/Assembler/atomic.ll
index 0356f5f..148b95d 100644
--- a/llvm/test/Assembler/atomic.ll
+++ b/llvm/test/Assembler/atomic.ll
@@ -3,10 +3,10 @@
 ; Basic smoke test for atomic operations.
 
 define void @f(i32* %x) {
-  ; CHECK: load atomic i32* %x unordered, align 4
-  load atomic i32* %x unordered, align 4
-  ; CHECK: load atomic volatile i32* %x singlethread acquire, align 4
-  load atomic volatile i32* %x singlethread acquire, align 4
+  ; CHECK: load atomic i32, i32* %x unordered, align 4
+  load atomic i32, i32* %x unordered, align 4
+  ; CHECK: load atomic volatile i32, i32* %x singlethread acquire, align 4
+  load atomic volatile i32, i32* %x singlethread acquire, align 4
   ; CHECK: store atomic i32 3, i32* %x release, align 4
   store atomic i32 3, i32* %x release, align 4
   ; CHECK: store atomic volatile i32 3, i32* %x singlethread monotonic, align 4
diff --git a/llvm/test/Assembler/fast-math-flags.ll b/llvm/test/Assembler/fast-math-flags.ll
index 8e75bdf..f0d3ecc 100644
--- a/llvm/test/Assembler/fast-math-flags.ll
+++ b/llvm/test/Assembler/fast-math-flags.ll
@@ -9,12 +9,12 @@
 
 define float @none(float %x, float %y) {
 entry:
-; CHECK:  %vec = load  <3 x float>* @vec
-  %vec    = load  <3 x float>* @vec
-; CHECK:  %select = load i1* @select
-  %select = load i1* @select
-; CHECK:  %arr    = load [3 x float]* @arr
-  %arr    = load [3 x float]* @arr
+; CHECK:  %vec = load  <3 x float>,  <3 x float>* @vec
+  %vec    = load  <3 x float>,  <3 x float>* @vec
+; CHECK:  %select = load i1, i1* @select
+  %select = load i1, i1* @select
+; CHECK:  %arr    = load [3 x float], [3 x float]* @arr
+  %arr    = load [3 x float], [3 x float]* @arr
 
 ; CHECK:  %a = fadd  float %x, %y
   %a = fadd  float %x, %y
@@ -43,12 +43,12 @@
 ; CHECK: no_nan
 define float @no_nan(float %x, float %y) {
 entry:
-; CHECK:  %vec = load <3 x float>* @vec
-  %vec    = load  <3 x float>* @vec
-; CHECK:  %select = load i1* @select
-  %select = load i1* @select
-; CHECK:  %arr = load  [3 x float]* @arr
-  %arr    = load  [3 x float]* @arr
+; CHECK:  %vec = load <3 x float>, <3 x float>* @vec
+  %vec    = load  <3 x float>,  <3 x float>* @vec
+; CHECK:  %select = load i1, i1* @select
+  %select = load i1, i1* @select
+; CHECK:  %arr = load  [3 x float],  [3 x float]* @arr
+  %arr    = load  [3 x float],  [3 x float]* @arr
 
 ; CHECK:  %a = fadd nnan  float %x, %y
   %a = fadd nnan  float %x, %y
@@ -77,12 +77,12 @@
 ; CHECK: no_nan_inf
 define float @no_nan_inf(float %x, float %y) {
 entry:
-; CHECK:  %vec = load <3 x float>* @vec
-  %vec    = load <3 x float>* @vec
-; CHECK:  %select = load i1* @select
-  %select = load i1* @select
-; CHECK:  %arr = load [3 x float]* @arr
-  %arr    = load [3 x float]* @arr
+; CHECK:  %vec = load <3 x float>, <3 x float>* @vec
+  %vec    = load <3 x float>, <3 x float>* @vec
+; CHECK:  %select = load i1, i1* @select
+  %select = load i1, i1* @select
+; CHECK:  %arr = load [3 x float], [3 x float]* @arr
+  %arr    = load [3 x float], [3 x float]* @arr
 
 ; CHECK:  %a = fadd nnan ninf  float %x, %y
   %a = fadd ninf nnan  float %x, %y
@@ -111,12 +111,12 @@
 ; CHECK: mixed_flags
 define float @mixed_flags(float %x, float %y) {
 entry:
-; CHECK:  %vec = load <3 x float>* @vec
-  %vec    = load <3 x float>* @vec
-; CHECK:  %select = load i1* @select
-  %select = load i1* @select
-; CHECK:  %arr    = load [3 x float]* @arr
-  %arr    = load [3 x float]* @arr
+; CHECK:  %vec = load <3 x float>, <3 x float>* @vec
+  %vec    = load <3 x float>, <3 x float>* @vec
+; CHECK:  %select = load i1, i1* @select
+  %select = load i1, i1* @select
+; CHECK:  %arr    = load [3 x float], [3 x float]* @arr
+  %arr    = load [3 x float], [3 x float]* @arr
 
 ; CHECK:  %a = fadd nnan ninf float %x, %y
   %a = fadd ninf nnan float %x, %y
diff --git a/llvm/test/Assembler/half-constprop.ll b/llvm/test/Assembler/half-constprop.ll
index c5ae3bf..7ca876b 100644
--- a/llvm/test/Assembler/half-constprop.ll
+++ b/llvm/test/Assembler/half-constprop.ll
@@ -9,8 +9,8 @@
   %.compoundliteral = alloca float, align 4
   store half 0xH4200, half* %a, align 2
   store half 0xH4B9A, half* %b, align 2
-  %tmp = load half* %a, align 2
-  %tmp1 = load half* %b, align 2
+  %tmp = load half, half* %a, align 2
+  %tmp1 = load half, half* %b, align 2
   %add = fadd half %tmp, %tmp1
 ; CHECK: 0xH4C8D
   ret half %add
diff --git a/llvm/test/Assembler/half-conv.ll b/llvm/test/Assembler/half-conv.ll
index e6f73cf..6575501 100644
--- a/llvm/test/Assembler/half-conv.ll
+++ b/llvm/test/Assembler/half-conv.ll
@@ -7,7 +7,7 @@
   %a = alloca half, align 2
   %.compoundliteral = alloca float, align 4
   store half 0xH4C8D, half* %a, align 2
-  %tmp = load half* %a, align 2
+  %tmp = load half, half* %a, align 2
   %conv = fpext half %tmp to float
 ; CHECK: 0x4032340000000000
   ret float %conv
diff --git a/llvm/test/Assembler/insertextractvalue.ll b/llvm/test/Assembler/insertextractvalue.ll
index 692843e..71dbba3 100644
--- a/llvm/test/Assembler/insertextractvalue.ll
+++ b/llvm/test/Assembler/insertextractvalue.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT: store
 ; CHECK-NEXT: ret
 define float @foo({{i32},{float, double}}* %p) nounwind {
-  %t = load {{i32},{float, double}}* %p
+  %t = load {{i32},{float, double}}, {{i32},{float, double}}* %p
   %s = extractvalue {{i32},{float, double}} %t, 1, 0
   %r = insertvalue {{i32},{float, double}} %t, double 2.0, 1, 1
   store {{i32},{float, double}} %r, {{i32},{float, double}}* %p
diff --git a/llvm/test/Assembler/invalid-load-mismatched-explicit-type.ll b/llvm/test/Assembler/invalid-load-mismatched-explicit-type.ll
new file mode 100644
index 0000000..b8422ed
--- /dev/null
+++ b/llvm/test/Assembler/invalid-load-mismatched-explicit-type.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: <stdin>:4:13: error: explicit pointee type doesn't match operand's pointee type
+define void @test(i32* %t) {
+  %x = load i16, i32* %t
+  ret void
+}
diff --git a/llvm/test/Assembler/invalid-load-missing-explicit-type.ll b/llvm/test/Assembler/invalid-load-missing-explicit-type.ll
new file mode 100644
index 0000000..455498e
--- /dev/null
+++ b/llvm/test/Assembler/invalid-load-missing-explicit-type.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: <stdin>:4:18: error: expected comma after load's type
+define void @test(i32* %t) {
+  %x = load i32* %t
+  ret void
+}
diff --git a/llvm/test/Assembler/numbered-values.ll b/llvm/test/Assembler/numbered-values.ll
index 70b6377..0b14c68 100644
--- a/llvm/test/Assembler/numbered-values.ll
+++ b/llvm/test/Assembler/numbered-values.ll
@@ -9,7 +9,7 @@
 	%0 = alloca i32		; <i32*>:0 [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %X, i32* %X_addr
-	%1 = load i32* %X_addr, align 4		; <i32>:1 [#uses=1]
+	%1 = load i32, i32* %X_addr, align 4		; <i32>:1 [#uses=1]
 	mul i32 %1, 4		; <i32>:2 [#uses=1]
 	%3 = add i32 %2, 123		; <i32>:3 [#uses=1]
 	store i32 %3, i32* %0, align 4
diff --git a/llvm/test/Assembler/unnamed.ll b/llvm/test/Assembler/unnamed.ll
index 099a15a..7f79e5c 100644
--- a/llvm/test/Assembler/unnamed.ll
+++ b/llvm/test/Assembler/unnamed.ll
@@ -16,7 +16,7 @@
 @3 = global x86_fp80 0xK4001E000000000000000
 
 define float @foo(%0* %p) nounwind {
-  %t = load %0* %p                                ; <%0> [#uses=2]
+  %t = load %0, %0* %p                                ; <%0> [#uses=2]
   %s = extractvalue %0 %t, 1, 0                   ; <float> [#uses=1]
   %r = insertvalue %0 %t, double 2.000000e+00, 1, 1; <%0> [#uses=1]
   store %0 %r, %0* %p
diff --git a/llvm/test/Assembler/upgrade-loop-metadata.ll b/llvm/test/Assembler/upgrade-loop-metadata.ll
index 0852469..d88cb3e 100644
--- a/llvm/test/Assembler/upgrade-loop-metadata.ll
+++ b/llvm/test/Assembler/upgrade-loop-metadata.ll
@@ -14,7 +14,7 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 16
   br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
 
@@ -22,7 +22,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
diff --git a/llvm/test/Assembler/uselistorder.ll b/llvm/test/Assembler/uselistorder.ll
index be5ee70..873e160 100644
--- a/llvm/test/Assembler/uselistorder.ll
+++ b/llvm/test/Assembler/uselistorder.ll
@@ -48,7 +48,7 @@
 
 define i1 @loada() {
 entry:
-  %a = load i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+  %a = load i1, i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
   ret i1 %a
 }
 
diff --git a/llvm/test/Bitcode/arm32_neon_vcnt_upgrade.ll b/llvm/test/Bitcode/arm32_neon_vcnt_upgrade.ll
index ed3981b..0032c4a 100644
--- a/llvm/test/Bitcode/arm32_neon_vcnt_upgrade.ll
+++ b/llvm/test/Bitcode/arm32_neon_vcnt_upgrade.ll
@@ -4,7 +4,7 @@
 
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK: @vclz16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
 ;CHECK: {{call.*@llvm.ctlz.v4i16\(<4 x i16>.*, i1 false}}
         ret <4 x i16> %tmp2
@@ -12,7 +12,7 @@
 
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK: @vcnt8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
 ;CHECK: call <8 x i8> @llvm.ctpop.v8i8(<8 x i8>
         ret <8 x i8> %tmp2
diff --git a/llvm/test/Bitcode/case-ranges-3.3.ll b/llvm/test/Bitcode/case-ranges-3.3.ll
index 020b37f4..eb55ef1 100644
--- a/llvm/test/Bitcode/case-ranges-3.3.ll
+++ b/llvm/test/Bitcode/case-ranges-3.3.ll
@@ -10,7 +10,7 @@
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   store i32 %x, i32* %2, align 4
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   switch i32 %3, label %9 [
 ; CHECK: switch i32 %3, label %9
     i32 -3, label %4
@@ -63,6 +63,6 @@
   br label %11
 
 ; <label>:11
-  %12 = load i32* %1
+  %12 = load i32, i32* %1
   ret i32 %12
 }
diff --git a/llvm/test/Bitcode/function-encoding-rel-operands.ll b/llvm/test/Bitcode/function-encoding-rel-operands.ll
index d7a7516..1307dd4 100644
--- a/llvm/test/Bitcode/function-encoding-rel-operands.ll
+++ b/llvm/test/Bitcode/function-encoding-rel-operands.ll
@@ -44,7 +44,7 @@
 define i1 @test_load(i32 %a, {i32, i32}* %ptr) nounwind {
 entry:
   %0 = getelementptr inbounds {i32, i32}, {i32, i32}* %ptr, i32 %a, i32 0
-  %1 = load i32* %0
+  %1 = load i32, i32* %0
   %2 = icmp eq i32 %1, %a
   ret i1 %2
 }
diff --git a/llvm/test/Bitcode/memInstructions.3.2.ll b/llvm/test/Bitcode/memInstructions.3.2.ll
index 356ecf7..f430086 100644
--- a/llvm/test/Bitcode/memInstructions.3.2.ll
+++ b/llvm/test/Bitcode/memInstructions.3.2.ll
@@ -27,53 +27,53 @@
   %ptr1 = alloca i8
   store i8 2, i8* %ptr1
 
-; CHECK: %res1 = load i8* %ptr1
-  %res1 = load i8* %ptr1
+; CHECK: %res1 = load i8, i8* %ptr1
+  %res1 = load i8, i8* %ptr1
 
-; CHECK-NEXT: %res2 = load volatile i8* %ptr1
-  %res2 = load volatile i8* %ptr1
+; CHECK-NEXT: %res2 = load volatile i8, i8* %ptr1
+  %res2 = load volatile i8, i8* %ptr1
 
-; CHECK-NEXT: %res3 = load i8* %ptr1, align 1
-  %res3 = load i8* %ptr1, align 1
+; CHECK-NEXT: %res3 = load i8, i8* %ptr1, align 1
+  %res3 = load i8, i8* %ptr1, align 1
 
-; CHECK-NEXT: %res4 = load volatile i8* %ptr1, align 1
-  %res4 = load volatile i8* %ptr1, align 1
+; CHECK-NEXT: %res4 = load volatile i8, i8* %ptr1, align 1
+  %res4 = load volatile i8, i8* %ptr1, align 1
 
-; CHECK-NEXT: %res5 = load i8* %ptr1, !nontemporal !0
-  %res5 = load i8* %ptr1, !nontemporal !0
+; CHECK-NEXT: %res5 = load i8, i8* %ptr1, !nontemporal !0
+  %res5 = load i8, i8* %ptr1, !nontemporal !0
 
-; CHECK-NEXT: %res6 = load volatile i8* %ptr1, !nontemporal !0
-  %res6 = load volatile i8* %ptr1, !nontemporal !0
+; CHECK-NEXT: %res6 = load volatile i8, i8* %ptr1, !nontemporal !0
+  %res6 = load volatile i8, i8* %ptr1, !nontemporal !0
 
-; CHECK-NEXT: %res7 = load i8* %ptr1, align 1, !nontemporal !0
-  %res7 = load i8* %ptr1, align 1, !nontemporal !0
+; CHECK-NEXT: %res7 = load i8, i8* %ptr1, align 1, !nontemporal !0
+  %res7 = load i8, i8* %ptr1, align 1, !nontemporal !0
 
-; CHECK-NEXT: %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
-  %res8 = load volatile i8* %ptr1, align 1, !nontemporal !0
+; CHECK-NEXT: %res8 = load volatile i8, i8* %ptr1, align 1, !nontemporal !0
+  %res8 = load volatile i8, i8* %ptr1, align 1, !nontemporal !0
 
-; CHECK-NEXT: %res9 = load i8* %ptr1, !invariant.load !1
-  %res9 = load i8* %ptr1, !invariant.load !1
+; CHECK-NEXT: %res9 = load i8, i8* %ptr1, !invariant.load !1
+  %res9 = load i8, i8* %ptr1, !invariant.load !1
 
-; CHECK-NEXT: %res10 = load volatile i8* %ptr1, !invariant.load !1
-  %res10 = load volatile i8* %ptr1, !invariant.load !1
+; CHECK-NEXT: %res10 = load volatile i8, i8* %ptr1, !invariant.load !1
+  %res10 = load volatile i8, i8* %ptr1, !invariant.load !1
 
-; CHECK-NEXT: %res11 = load i8* %ptr1, align 1, !invariant.load !1
-  %res11 = load i8* %ptr1, align 1, !invariant.load !1
+; CHECK-NEXT: %res11 = load i8, i8* %ptr1, align 1, !invariant.load !1
+  %res11 = load i8, i8* %ptr1, align 1, !invariant.load !1
 
-; CHECK-NEXT: %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
-  %res12 = load volatile i8* %ptr1, align 1, !invariant.load !1
+; CHECK-NEXT: %res12 = load volatile i8, i8* %ptr1, align 1, !invariant.load !1
+  %res12 = load volatile i8, i8* %ptr1, align 1, !invariant.load !1
 
-; CHECK-NEXT: %res13 = load i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res13 = load i8* %ptr1, !nontemporal !0, !invariant.load !1
+; CHECK-NEXT: %res13 = load i8, i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res13 = load i8, i8* %ptr1, !nontemporal !0, !invariant.load !1
 
-; CHECK-NEXT: %res14 = load volatile i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res14 = load volatile i8* %ptr1, !nontemporal !0, !invariant.load !1
+; CHECK-NEXT: %res14 = load volatile i8, i8* %ptr1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res14 = load volatile i8, i8* %ptr1, !nontemporal !0, !invariant.load !1
 
-; CHECK-NEXT: %res15 = load i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res15 = load i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
+; CHECK-NEXT: %res15 = load i8, i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res15 = load i8, i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
 
-; CHECK-NEXT: %res16 = load volatile i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
-  %res16 = load volatile i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
+; CHECK-NEXT: %res16 = load volatile i8, i8* %ptr1, align 1, {{[(!nontemporal !0, !invariant.load !1) | (!invariant.load !1, !nontemporal !0)]}}
+  %res16 = load volatile i8, i8* %ptr1, align 1, !nontemporal !0, !invariant.load !1
 
   ret void
 }
@@ -83,53 +83,53 @@
   %ptr1 = alloca i8
   store i8 2, i8* %ptr1
 
-; CHECK: %res1 = load atomic i8* %ptr1 unordered, align 1
-  %res1 = load atomic i8* %ptr1 unordered, align 1
+; CHECK: %res1 = load atomic i8, i8* %ptr1 unordered, align 1
+  %res1 = load atomic i8, i8* %ptr1 unordered, align 1
 
-; CHECK-NEXT: %res2 = load atomic i8* %ptr1 monotonic, align 1
-  %res2 = load atomic i8* %ptr1 monotonic, align 1
+; CHECK-NEXT: %res2 = load atomic i8, i8* %ptr1 monotonic, align 1
+  %res2 = load atomic i8, i8* %ptr1 monotonic, align 1
 
-; CHECK-NEXT: %res3 = load atomic i8* %ptr1 acquire, align 1
-  %res3 = load atomic i8* %ptr1 acquire, align 1
+; CHECK-NEXT: %res3 = load atomic i8, i8* %ptr1 acquire, align 1
+  %res3 = load atomic i8, i8* %ptr1 acquire, align 1
 
-; CHECK-NEXT: %res4 = load atomic i8* %ptr1 seq_cst, align 1
-  %res4 = load atomic i8* %ptr1 seq_cst, align 1
+; CHECK-NEXT: %res4 = load atomic i8, i8* %ptr1 seq_cst, align 1
+  %res4 = load atomic i8, i8* %ptr1 seq_cst, align 1
 
-; CHECK-NEXT: %res5 = load atomic volatile i8* %ptr1 unordered, align 1
-  %res5 = load atomic volatile i8* %ptr1 unordered, align 1
+; CHECK-NEXT: %res5 = load atomic volatile i8, i8* %ptr1 unordered, align 1
+  %res5 = load atomic volatile i8, i8* %ptr1 unordered, align 1
 
-; CHECK-NEXT: %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
-  %res6 = load atomic volatile i8* %ptr1 monotonic, align 1
+; CHECK-NEXT: %res6 = load atomic volatile i8, i8* %ptr1 monotonic, align 1
+  %res6 = load atomic volatile i8, i8* %ptr1 monotonic, align 1
 
-; CHECK-NEXT: %res7 = load atomic volatile i8* %ptr1 acquire, align 1
-  %res7 = load atomic volatile i8* %ptr1 acquire, align 1
+; CHECK-NEXT: %res7 = load atomic volatile i8, i8* %ptr1 acquire, align 1
+  %res7 = load atomic volatile i8, i8* %ptr1 acquire, align 1
 
-; CHECK-NEXT: %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
-  %res8 = load atomic volatile i8* %ptr1 seq_cst, align 1
+; CHECK-NEXT: %res8 = load atomic volatile i8, i8* %ptr1 seq_cst, align 1
+  %res8 = load atomic volatile i8, i8* %ptr1 seq_cst, align 1
 
-; CHECK-NEXT: %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
-  %res9 = load atomic i8* %ptr1 singlethread unordered, align 1
+; CHECK-NEXT: %res9 = load atomic i8, i8* %ptr1 singlethread unordered, align 1
+  %res9 = load atomic i8, i8* %ptr1 singlethread unordered, align 1
 
-; CHECK-NEXT: %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
-  %res10 = load atomic i8* %ptr1 singlethread monotonic, align 1
+; CHECK-NEXT: %res10 = load atomic i8, i8* %ptr1 singlethread monotonic, align 1
+  %res10 = load atomic i8, i8* %ptr1 singlethread monotonic, align 1
 
-; CHECK-NEXT: %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
-  %res11 = load atomic i8* %ptr1 singlethread acquire, align 1
+; CHECK-NEXT: %res11 = load atomic i8, i8* %ptr1 singlethread acquire, align 1
+  %res11 = load atomic i8, i8* %ptr1 singlethread acquire, align 1
 
-; CHECK-NEXT: %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
-  %res12 = load atomic i8* %ptr1 singlethread seq_cst, align 1
+; CHECK-NEXT: %res12 = load atomic i8, i8* %ptr1 singlethread seq_cst, align 1
+  %res12 = load atomic i8, i8* %ptr1 singlethread seq_cst, align 1
 
-; CHECK-NEXT: %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
-  %res13 = load atomic volatile i8* %ptr1 singlethread unordered, align 1
+; CHECK-NEXT: %res13 = load atomic volatile i8, i8* %ptr1 singlethread unordered, align 1
+  %res13 = load atomic volatile i8, i8* %ptr1 singlethread unordered, align 1
 
-; CHECK-NEXT: %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
-  %res14 = load atomic volatile i8* %ptr1 singlethread monotonic, align 1
+; CHECK-NEXT: %res14 = load atomic volatile i8, i8* %ptr1 singlethread monotonic, align 1
+  %res14 = load atomic volatile i8, i8* %ptr1 singlethread monotonic, align 1
 
-; CHECK-NEXT: %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
-  %res15 = load atomic volatile i8* %ptr1 singlethread acquire, align 1
+; CHECK-NEXT: %res15 = load atomic volatile i8, i8* %ptr1 singlethread acquire, align 1
+  %res15 = load atomic volatile i8, i8* %ptr1 singlethread acquire, align 1
 
-; CHECK-NEXT: %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
-  %res16 = load atomic volatile i8* %ptr1 singlethread seq_cst, align 1
+; CHECK-NEXT: %res16 = load atomic volatile i8, i8* %ptr1 singlethread seq_cst, align 1
+  %res16 = load atomic volatile i8, i8* %ptr1 singlethread seq_cst, align 1
 
   ret void
 }
diff --git a/llvm/test/Bitcode/metadata-2.ll b/llvm/test/Bitcode/metadata-2.ll
index 07371a3..a5367da 100644
--- a/llvm/test/Bitcode/metadata-2.ll
+++ b/llvm/test/Bitcode/metadata-2.ll
@@ -77,7 +77,7 @@
 
 define internal void @_D5tango4core8BitManip16__moduleinfoCtorZ() nounwind {
 moduleinfoCtorEntry:
-	%current = load %ModuleReference** @_Dmodule_ref		; <%ModuleReference*> [#uses=1]
+	%current = load %ModuleReference*, %ModuleReference** @_Dmodule_ref		; <%ModuleReference*> [#uses=1]
 	store %ModuleReference* %current, %ModuleReference** getelementptr (%ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, i32 0, i32 0)
 	store %ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, %ModuleReference** @_Dmodule_ref
 	ret void
diff --git a/llvm/test/Bitcode/upgrade-loop-metadata.ll b/llvm/test/Bitcode/upgrade-loop-metadata.ll
index be2a99a4..8dee907 100644
--- a/llvm/test/Bitcode/upgrade-loop-metadata.ll
+++ b/llvm/test/Bitcode/upgrade-loop-metadata.ll
@@ -10,7 +10,7 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 16
   br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1
 
@@ -18,7 +18,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
diff --git a/llvm/test/Bitcode/use-list-order.ll b/llvm/test/Bitcode/use-list-order.ll
index 6617b9c5..f57b4a6 100644
--- a/llvm/test/Bitcode/use-list-order.ll
+++ b/llvm/test/Bitcode/use-list-order.ll
@@ -79,13 +79,13 @@
 
 define i1 @loadb() {
 entry:
-  %b = load i1* @b
+  %b = load i1, i1* @b
   ret i1 %b
 }
 
 define i1 @loada() {
 entry:
-  %a = load i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
+  %a = load i1, i1* getelementptr ([4 x i1]* @a, i64 0, i64 2)
   ret i1 %a
 }
 
@@ -115,7 +115,7 @@
 
 define i4 @globalAndFunctionFunctionUser() {
 entry:
-  %local = load i4* @globalAndFunction
+  %local = load i4, i4* @globalAndFunction
   ret i4 %local
 }
 
diff --git a/llvm/test/CodeGen/AArch64/128bit_load_store.ll b/llvm/test/CodeGen/AArch64/128bit_load_store.ll
index 20911e8..94fd386 100644
--- a/llvm/test/CodeGen/AArch64/128bit_load_store.ll
+++ b/llvm/test/CodeGen/AArch64/128bit_load_store.ll
@@ -12,7 +12,7 @@
 ; CHECK-LABEL: test_load_f128
 ; CHECK: ldr	 {{q[0-9]+}}, [{{x[0-9]+}}]
 entry:
-  %0 = load fp128* %ptr, align 16
+  %0 = load fp128, fp128* %ptr, align 16
   ret fp128 %0
 }
 
@@ -33,7 +33,7 @@
 
 entry:
   %0 = bitcast i128* %ptr to fp128*
-  %1 = load fp128* %0, align 16
+  %1 = load fp128, fp128* %0, align 16
   %2 = bitcast fp128 %1 to i128
   ret i128 %2
 }
@@ -44,7 +44,7 @@
 ; CHECK-NEXT: str	{{q[0-9]+}}, [{{x[0-9]+}}, #16]
 entry:
   %0 = bitcast i128* %ptr to fp128*
-  %1 = load fp128* %0, align 16
+  %1 = load fp128, fp128* %0, align 16
   %add.ptr = getelementptr inbounds i128, i128* %ptr, i64 1
   %2 = bitcast i128* %add.ptr to fp128*
   store fp128 %1, fp128* %2, align 16
diff --git a/llvm/test/CodeGen/AArch64/PBQP-chain.ll b/llvm/test/CodeGen/AArch64/PBQP-chain.ll
index 4729842..3e5fa74 100644
--- a/llvm/test/CodeGen/AArch64/PBQP-chain.ll
+++ b/llvm/test/CodeGen/AArch64/PBQP-chain.ll
@@ -22,79 +22,79 @@
 ; CHECK-ODD: fmadd {{d[0-9]*[13579]}}, {{d[0-9]*}}, {{d[0-9]*}}, {{d[0-9]*[13579]}}
 define void @fir(double* nocapture %rx, double* nocapture %ry, double* nocapture %c, double* nocapture %x, double* nocapture %y) {
 entry:
-  %0 = load double* %c, align 8
-  %1 = load double* %x, align 8
+  %0 = load double, double* %c, align 8
+  %1 = load double, double* %x, align 8
   %mul = fmul fast double %1, %0
-  %2 = load double* %y, align 8
+  %2 = load double, double* %y, align 8
   %mul7 = fmul fast double %2, %0
   %arrayidx.1 = getelementptr inbounds double, double* %c, i64 1
-  %3 = load double* %arrayidx.1, align 8
+  %3 = load double, double* %arrayidx.1, align 8
   %arrayidx2.1 = getelementptr inbounds double, double* %x, i64 1
-  %4 = load double* %arrayidx2.1, align 8
+  %4 = load double, double* %arrayidx2.1, align 8
   %mul.1 = fmul fast double %4, %3
   %add.1 = fadd fast double %mul.1, %mul
   %arrayidx6.1 = getelementptr inbounds double, double* %y, i64 1
-  %5 = load double* %arrayidx6.1, align 8
+  %5 = load double, double* %arrayidx6.1, align 8
   %mul7.1 = fmul fast double %5, %3
   %add8.1 = fadd fast double %mul7.1, %mul7
   %arrayidx.2 = getelementptr inbounds double, double* %c, i64 2
-  %6 = load double* %arrayidx.2, align 8
+  %6 = load double, double* %arrayidx.2, align 8
   %arrayidx2.2 = getelementptr inbounds double, double* %x, i64 2
-  %7 = load double* %arrayidx2.2, align 8
+  %7 = load double, double* %arrayidx2.2, align 8
   %mul.2 = fmul fast double %7, %6
   %add.2 = fadd fast double %mul.2, %add.1
   %arrayidx6.2 = getelementptr inbounds double, double* %y, i64 2
-  %8 = load double* %arrayidx6.2, align 8
+  %8 = load double, double* %arrayidx6.2, align 8
   %mul7.2 = fmul fast double %8, %6
   %add8.2 = fadd fast double %mul7.2, %add8.1
   %arrayidx.3 = getelementptr inbounds double, double* %c, i64 3
-  %9 = load double* %arrayidx.3, align 8
+  %9 = load double, double* %arrayidx.3, align 8
   %arrayidx2.3 = getelementptr inbounds double, double* %x, i64 3
-  %10 = load double* %arrayidx2.3, align 8
+  %10 = load double, double* %arrayidx2.3, align 8
   %mul.3 = fmul fast double %10, %9
   %add.3 = fadd fast double %mul.3, %add.2
   %arrayidx6.3 = getelementptr inbounds double, double* %y, i64 3
-  %11 = load double* %arrayidx6.3, align 8
+  %11 = load double, double* %arrayidx6.3, align 8
   %mul7.3 = fmul fast double %11, %9
   %add8.3 = fadd fast double %mul7.3, %add8.2
   %arrayidx.4 = getelementptr inbounds double, double* %c, i64 4
-  %12 = load double* %arrayidx.4, align 8
+  %12 = load double, double* %arrayidx.4, align 8
   %arrayidx2.4 = getelementptr inbounds double, double* %x, i64 4
-  %13 = load double* %arrayidx2.4, align 8
+  %13 = load double, double* %arrayidx2.4, align 8
   %mul.4 = fmul fast double %13, %12
   %add.4 = fadd fast double %mul.4, %add.3
   %arrayidx6.4 = getelementptr inbounds double, double* %y, i64 4
-  %14 = load double* %arrayidx6.4, align 8
+  %14 = load double, double* %arrayidx6.4, align 8
   %mul7.4 = fmul fast double %14, %12
   %add8.4 = fadd fast double %mul7.4, %add8.3
   %arrayidx.5 = getelementptr inbounds double, double* %c, i64 5
-  %15 = load double* %arrayidx.5, align 8
+  %15 = load double, double* %arrayidx.5, align 8
   %arrayidx2.5 = getelementptr inbounds double, double* %x, i64 5
-  %16 = load double* %arrayidx2.5, align 8
+  %16 = load double, double* %arrayidx2.5, align 8
   %mul.5 = fmul fast double %16, %15
   %add.5 = fadd fast double %mul.5, %add.4
   %arrayidx6.5 = getelementptr inbounds double, double* %y, i64 5
-  %17 = load double* %arrayidx6.5, align 8
+  %17 = load double, double* %arrayidx6.5, align 8
   %mul7.5 = fmul fast double %17, %15
   %add8.5 = fadd fast double %mul7.5, %add8.4
   %arrayidx.6 = getelementptr inbounds double, double* %c, i64 6
-  %18 = load double* %arrayidx.6, align 8
+  %18 = load double, double* %arrayidx.6, align 8
   %arrayidx2.6 = getelementptr inbounds double, double* %x, i64 6
-  %19 = load double* %arrayidx2.6, align 8
+  %19 = load double, double* %arrayidx2.6, align 8
   %mul.6 = fmul fast double %19, %18
   %add.6 = fadd fast double %mul.6, %add.5
   %arrayidx6.6 = getelementptr inbounds double, double* %y, i64 6
-  %20 = load double* %arrayidx6.6, align 8
+  %20 = load double, double* %arrayidx6.6, align 8
   %mul7.6 = fmul fast double %20, %18
   %add8.6 = fadd fast double %mul7.6, %add8.5
   %arrayidx.7 = getelementptr inbounds double, double* %c, i64 7
-  %21 = load double* %arrayidx.7, align 8
+  %21 = load double, double* %arrayidx.7, align 8
   %arrayidx2.7 = getelementptr inbounds double, double* %x, i64 7
-  %22 = load double* %arrayidx2.7, align 8
+  %22 = load double, double* %arrayidx2.7, align 8
   %mul.7 = fmul fast double %22, %21
   %add.7 = fadd fast double %mul.7, %add.6
   %arrayidx6.7 = getelementptr inbounds double, double* %y, i64 7
-  %23 = load double* %arrayidx6.7, align 8
+  %23 = load double, double* %arrayidx6.7, align 8
   %mul7.7 = fmul fast double %23, %21
   %add8.7 = fadd fast double %mul7.7, %add8.6
   store double %add.7, double* %rx, align 8
diff --git a/llvm/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll b/llvm/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
index e5d2d1c..bd50b2d 100644
--- a/llvm/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
+++ b/llvm/test/CodeGen/AArch64/PBQP-coalesce-benefit.ll
@@ -3,11 +3,11 @@
 ; CHECK-LABEL: test:
 define i32 @test(i32 %acc, i32* nocapture readonly %c) {
 entry:
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
 ; CHECK-NOT: mov	 w{{[0-9]*}}, w0
   %add = add nsw i32 %0, %acc
   %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add2 = add nsw i32 %add, %1
   ret i32 %add2
 }
diff --git a/llvm/test/CodeGen/AArch64/PBQP-csr.ll b/llvm/test/CodeGen/AArch64/PBQP-csr.ll
index 644bc25..16d7f8c 100644
--- a/llvm/test/CodeGen/AArch64/PBQP-csr.ll
+++ b/llvm/test/CodeGen/AArch64/PBQP-csr.ll
@@ -23,15 +23,15 @@
   %na = getelementptr inbounds %rs, %rs* %r, i64 0, i32 0
   %0 = bitcast double* %x.i to i8*
   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 72, i32 8, i1 false)
-  %1 = load i32* %na, align 4
+  %1 = load i32, i32* %na, align 4
   %cmp70 = icmp sgt i32 %1, 0
   br i1 %cmp70, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
   %fn = getelementptr inbounds %rs, %rs* %r, i64 0, i32 4
-  %2 = load %v** %fn, align 8
+  %2 = load %v*, %v** %fn, align 8
   %fs = getelementptr inbounds %rs, %rs* %r, i64 0, i32 5
-  %3 = load %v** %fs, align 8
+  %3 = load %v*, %v** %fs, align 8
   %4 = sext i32 %1 to i64
   br label %for.body
 
@@ -46,27 +46,27 @@
   %x1.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 0
   %y.i56 = getelementptr inbounds %v, %v* %2, i64 %indvars.iv, i32 1
   %10 = bitcast double* %x.i54 to <2 x double>*
-  %11 = load <2 x double>* %10, align 8
+  %11 = load <2 x double>, <2 x double>* %10, align 8
   %y2.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 1
   %12 = bitcast double* %x1.i to <2 x double>*
-  %13 = load <2 x double>* %12, align 8
+  %13 = load <2 x double>, <2 x double>* %12, align 8
   %14 = fadd fast <2 x double> %13, %11
   %z.i57 = getelementptr inbounds %v, %v* %2, i64 %indvars.iv, i32 2
-  %15 = load double* %z.i57, align 8
+  %15 = load double, double* %z.i57, align 8
   %z4.i = getelementptr inbounds %v, %v* %3, i64 %indvars.iv, i32 2
-  %16 = load double* %z4.i, align 8
+  %16 = load double, double* %z4.i, align 8
   %add5.i = fadd fast double %16, %15
   %17 = fadd fast <2 x double> %6, %11
   %18 = bitcast double* %x.i to <2 x double>*
   store <2 x double> %17, <2 x double>* %18, align 8
-  %19 = load double* %x1.i, align 8
+  %19 = load double, double* %x1.i, align 8
   %20 = insertelement <2 x double> undef, double %15, i32 0
   %21 = insertelement <2 x double> %20, double %19, i32 1
   %22 = fadd fast <2 x double> %7, %21
   %23 = bitcast double* %z.i to <2 x double>*
   store <2 x double> %22, <2 x double>* %23, align 8
   %24 = bitcast double* %y2.i to <2 x double>*
-  %25 = load <2 x double>* %24, align 8
+  %25 = load <2 x double>, <2 x double>* %24, align 8
   %26 = fadd fast <2 x double> %8, %25
   %27 = bitcast double* %y.i62 to <2 x double>*
   store <2 x double> %26, <2 x double>* %27, align 8
diff --git a/llvm/test/CodeGen/AArch64/Redundantstore.ll b/llvm/test/CodeGen/AArch64/Redundantstore.ll
index 40be61d..b207268 100644
--- a/llvm/test/CodeGen/AArch64/Redundantstore.ll
+++ b/llvm/test/CodeGen/AArch64/Redundantstore.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: stur
 define i8* @test(i32 %size) {
 entry:
-  %0 = load i8** @end_of_array, align 8
+  %0 = load i8*, i8** @end_of_array, align 8
   %conv = sext i32 %size to i64
   %and = and i64 %conv, -8
   %conv2 = trunc i64 %and to i32
diff --git a/llvm/test/CodeGen/AArch64/a57-csel.ll b/llvm/test/CodeGen/AArch64/a57-csel.ll
index 9d16d1a..f5496f7 100644
--- a/llvm/test/CodeGen/AArch64/a57-csel.ll
+++ b/llvm/test/CodeGen/AArch64/a57-csel.ll
@@ -3,7 +3,7 @@
 ; Check that the select is expanded into a branch sequence.
 define i64 @f(i64 %a, i64 %b, i64* %c, i64 %d, i64 %e) {
   ; CHECK: cbz
-  %x0 = load i64* %c
+  %x0 = load i64, i64* %c
   %x1 = icmp eq i64 %x0, 0
   %x2 = select i1 %x1, i64 %a, i64 %b
   %x3 = add i64 %x2, %d
diff --git a/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index 4b3e6ba..b0b8333 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -9,7 +9,7 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %arrayidx5 = getelementptr inbounds i32, i32* null, i64 1, !dbg !43
-  %0 = load i32* null, align 4, !dbg !45, !tbaa !46
+  %0 = load i32, i32* null, align 4, !dbg !45, !tbaa !46
   %s1 = sub nsw i32 0, %0, !dbg !50
   %n1 = sext i32 %s1 to i64, !dbg !50
   %arrayidx21 = getelementptr inbounds i32, i32* null, i64 3, !dbg !51
diff --git a/llvm/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll b/llvm/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
index 4553251..b2ee517 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-2014-12-02-combine-soften.ll
@@ -9,7 +9,7 @@
 entry:
 ;CHECK-LABEL: foo:
 ;CHECK: __floatsisf
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %conv = sitofp i32 %0 to float
   store float %conv, float* bitcast (i32* @t to float*), align 4
   ret void
diff --git a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
index dace22e..b0e9d4aa 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll
@@ -29,15 +29,15 @@
 
 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
 entry:
-  %0 = load double* %p, align 8
+  %0 = load double, double* %p, align 8
   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
-  %4 = load double* %arrayidx4, align 8
+  %4 = load double, double* %arrayidx4, align 8
   %mul = fmul fast double %0, %1
   %add = fadd fast double %mul, %4
   %mul5 = fmul fast double %1, %2
@@ -48,11 +48,11 @@
   %add9 = fadd fast double %mul8, %sub
   store double %add9, double* %q, align 8
   %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
-  %5 = load double* %arrayidx11, align 8
+  %5 = load double, double* %arrayidx11, align 8
   %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
-  %6 = load double* %arrayidx12, align 8
+  %6 = load double, double* %arrayidx12, align 8
   %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
-  %7 = load double* %arrayidx13, align 8
+  %7 = load double, double* %arrayidx13, align 8
   %mul15 = fmul fast double %6, %7
   %mul16 = fmul fast double %0, %5
   %add17 = fadd fast double %mul16, %mul15
@@ -81,21 +81,21 @@
 
 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
 entry:
-  %0 = load double* %p, align 8
+  %0 = load double, double* %p, align 8
   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
-  %4 = load double* %arrayidx4, align 8
+  %4 = load double, double* %arrayidx4, align 8
   %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
-  %5 = load double* %arrayidx5, align 8
+  %5 = load double, double* %arrayidx5, align 8
   %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
-  %6 = load double* %arrayidx6, align 8
+  %6 = load double, double* %arrayidx6, align 8
   %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
-  %7 = load double* %arrayidx7, align 8
+  %7 = load double, double* %arrayidx7, align 8
   %mul = fmul fast double %0, %1
   %add = fadd fast double %mul, %7
   %mul8 = fmul fast double %5, %6
@@ -127,15 +127,15 @@
 
 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
 entry:
-  %0 = load double* %p, align 8
+  %0 = load double, double* %p, align 8
   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
-  %4 = load double* %arrayidx4, align 8
+  %4 = load double, double* %arrayidx4, align 8
   %mul = fmul fast double %0, %1
   %add = fadd fast double %mul, %4
   %mul5 = fmul fast double %1, %2
@@ -176,21 +176,21 @@
 
 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
 entry:
-  %0 = load float* %p, align 4
+  %0 = load float, float* %p, align 4
   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
-  %3 = load float* %arrayidx3, align 4
+  %3 = load float, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
-  %4 = load float* %arrayidx4, align 4
+  %4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
-  %5 = load float* %arrayidx5, align 4
+  %5 = load float, float* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
-  %6 = load float* %arrayidx6, align 4
+  %6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
-  %7 = load float* %arrayidx7, align 4
+  %7 = load float, float* %arrayidx7, align 4
   %mul = fmul fast float %0, %1
   %add = fadd fast float %mul, %7
   %mul8 = fmul fast float %5, %6
@@ -222,15 +222,15 @@
 
 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
 entry:
-  %0 = load float* %p, align 4
+  %0 = load float, float* %p, align 4
   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
-  %3 = load float* %arrayidx3, align 4
+  %3 = load float, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
-  %4 = load float* %arrayidx4, align 4
+  %4 = load float, float* %arrayidx4, align 4
   %mul = fmul fast float %0, %1
   %add = fadd fast float %mul, %4
   %mul5 = fmul fast float %1, %2
@@ -264,15 +264,15 @@
 
 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
 entry:
-  %0 = load double* %p, align 8
+  %0 = load double, double* %p, align 8
   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
-  %4 = load double* %arrayidx4, align 8
+  %4 = load double, double* %arrayidx4, align 8
   %mul = fmul fast double %0, %1
   %add = fadd fast double %mul, %4
   %mul5 = fmul fast double %1, %2
@@ -299,15 +299,15 @@
 
 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
 entry:
-  %0 = load double* %p, align 8
+  %0 = load double, double* %p, align 8
   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
-  %4 = load double* %arrayidx4, align 8
+  %4 = load double, double* %arrayidx4, align 8
   %mul = fmul fast double %0, %1
   %add = fadd fast double %mul, %4
   %mul5 = fmul fast double %1, %2
diff --git a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
index 1ba54b2..0c6be21 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion-assertion.ll
@@ -11,7 +11,7 @@
   br i1 %or.cond, label %if.then274, label %invoke.cont145
 
 if.then274:
-  %0 = load i32* null, align 4
+  %0 = load i32, i32* null, align 4
   br i1 undef, label %invoke.cont291, label %if.else313
 
 invoke.cont291:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
index 7ac83b8..07e0ba6 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-address-type-promotion.ll
@@ -15,11 +15,11 @@
   %add = add nsw i32 %i, 1
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add1 = add nsw i32 %i, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom2
-  %1 = load i32* %arrayidx3, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
   %add4 = add nsw i32 %1, %0
   %idxprom5 = sext i32 %i to i64
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i64 %idxprom5
diff --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
index 01642a4..fb41156 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll
@@ -8,7 +8,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -20,7 +20,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -32,7 +32,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -44,7 +44,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -56,7 +56,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].8h, #0x1
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -68,7 +68,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -80,7 +80,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, msl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 511, i16 0, i16 511, i16 0, i16 511, i16 0, i16 511, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -92,7 +92,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].4s, #0x1, msl #16
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1, i16 65535, i16 1>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -104,7 +104,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].16b, #0x1
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -116,7 +116,7 @@
   ; CHECK-NEXT:    movi	   v[[REG2:[0-9]+]].2d, #0x00ffff0000ffff
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0, i16 -1, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -128,7 +128,7 @@
   ; CHECK-NEXT:    fmov    v[[REG2:[0-9]+]].4s, #3.00000000
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448, i16 0, i16 16448>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -140,7 +140,7 @@
   ; CHECK-NEXT:    fmov    v[[REG2:[0-9]+]].2d, #0.17968750
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 0, i16 0, i16 0, i16 16327, i16 0, i16 0, i16 0, i16 16327>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -152,7 +152,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -164,7 +164,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -176,7 +176,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -188,7 +188,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -200,7 +200,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].8h, #0x1
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -212,7 +212,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -224,7 +224,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, msl #8
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535, i16 65024, i16 65535>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -236,7 +236,7 @@
   ; CHECK-NEXT:    mvni	   v[[REG2:[0-9]+]].4s, #0x1, msl #16
   ; CHECK-NEXT:    add	   v[[REG1]].8h, v[[REG1]].8h, v[[REG2]].8h
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = add <8 x i16> %in, <i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534, i16 0, i16 65534>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -247,7 +247,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -258,7 +258,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #8
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -269,7 +269,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534, i16 65535, i16 65534>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -280,7 +280,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279, i16 65535, i16 65279>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -291,7 +291,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].8h, #0x1
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534, i16 65534>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -302,7 +302,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    bic	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = and <8 x i16> %in, <i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279, i16 65279>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -313,7 +313,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -324,7 +324,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr     v[[REG2:[0-9]+]].4s, #0x1, lsl #8
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -335,7 +335,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1, lsl #16
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -346,7 +346,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].4s, #0x1, lsl #24
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 0, i16 256, i16 0, i16 256, i16 0, i16 256, i16 0, i16 256>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -357,7 +357,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].8h, #0x1
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
@@ -368,7 +368,7 @@
   ; CHECK:         ld1     { v[[REG1:[0-9]+]].8h }, [x{{[0-9]+}}]
   ; CHECK-NEXT:    orr	   v[[REG2:[0-9]+]].8h, #0x1, lsl #8
   ; CHECK-NEXT:    umov	   w{{[0-9]+}}, v[[REG1]].h[0]
-  %in = load <8 x i16>* @vec_v8i16
+  %in = load <8 x i16>, <8 x i16>* @vec_v8i16
   %rv = or <8 x i16> %in, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
   %el = extractelement <8 x i16> %rv, i32 0
   ret i16 %el
diff --git a/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll b/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
index 64d91ee..2170e4b 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-fix-cortex-a53-835769.ll
@@ -22,7 +22,7 @@
 
 define i64 @f_load_madd_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
 entry:
-  %0 = load i64* %c, align 8
+  %0 = load i64, i64* %c, align 8
   %mul = mul nsw i64 %0, %b
   %add = add nsw i64 %mul, %a
   ret i64 %add
@@ -41,7 +41,7 @@
 
 define i32 @f_load_madd_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
 entry:
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %mul = mul nsw i32 %0, %b
   %add = add nsw i32 %mul, %a
   ret i32 %add
@@ -56,7 +56,7 @@
 
 define i64 @f_load_msub_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
 entry:
-  %0 = load i64* %c, align 8
+  %0 = load i64, i64* %c, align 8
   %mul = mul nsw i64 %0, %b
   %sub = sub nsw i64 %a, %mul
   ret i64 %sub
@@ -72,7 +72,7 @@
 
 define i32 @f_load_msub_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
 entry:
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %mul = mul nsw i32 %0, %b
   %sub = sub nsw i32 %a, %mul
   ret i32 %sub
@@ -87,7 +87,7 @@
 
 define i64 @f_load_mul_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
 entry:
-  %0 = load i64* %c, align 8
+  %0 = load i64, i64* %c, align 8
   %mul = mul nsw i64 %0, %b
   ret i64 %mul
 }
@@ -101,7 +101,7 @@
 
 define i32 @f_load_mul_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
 entry:
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %mul = mul nsw i32 %0, %b
   ret i32 %mul
 }
@@ -115,7 +115,7 @@
 
 define i64 @f_load_mneg_64(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
 entry:
-  %0 = load i64* %c, align 8
+  %0 = load i64, i64* %c, align 8
   %mul = sub i64 0, %b
   %sub = mul i64 %0, %mul
   ret i64 %sub
@@ -133,7 +133,7 @@
 
 define i32 @f_load_mneg_32(i32 %a, i32 %b, i32* nocapture readonly %c) #0 {
 entry:
-  %0 = load i32* %c, align 4
+  %0 = load i32, i32* %c, align 4
   %mul = sub i32 0, %b
   %sub = mul i32 %0, %mul
   ret i32 %sub
@@ -154,7 +154,7 @@
   %conv1 = sext i32 %c to i64
   %mul = mul nsw i64 %conv1, %conv
   %add = add nsw i64 %mul, %a
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = sext i32 %0 to i64
   %add3 = add nsw i64 %add, %conv2
   ret i64 %add3
@@ -174,7 +174,7 @@
   %conv1 = sext i32 %c to i64
   %mul = mul nsw i64 %conv1, %conv
   %sub = sub i64 %a, %mul
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = sext i32 %0 to i64
   %add = add nsw i64 %sub, %conv2
   ret i64 %add
@@ -193,7 +193,7 @@
   %conv = sext i32 %b to i64
   %conv1 = sext i32 %c to i64
   %mul = mul nsw i64 %conv1, %conv
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = sext i32 %0 to i64
   %div = sdiv i64 %mul, %conv2
   ret i64 %div
@@ -212,7 +212,7 @@
   %conv1 = sext i32 %c to i64
   %mul = sub nsw i64 0, %conv
   %sub = mul i64 %conv1, %mul
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = sext i32 %0 to i64
   %div = sdiv i64 %sub, %conv2
   ret i64 %div
@@ -229,7 +229,7 @@
   %conv1 = zext i32 %c to i64
   %mul = mul i64 %conv1, %conv
   %add = add i64 %mul, %a
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = zext i32 %0 to i64
   %add3 = add i64 %add, %conv2
   ret i64 %add3
@@ -249,7 +249,7 @@
   %conv1 = zext i32 %c to i64
   %mul = mul i64 %conv1, %conv
   %sub = sub i64 %a, %mul
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = zext i32 %0 to i64
   %add = add i64 %sub, %conv2
   ret i64 %add
@@ -268,7 +268,7 @@
   %conv = zext i32 %b to i64
   %conv1 = zext i32 %c to i64
   %mul = mul i64 %conv1, %conv
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = zext i32 %0 to i64
   %div = udiv i64 %mul, %conv2
   ret i64 %div
@@ -287,7 +287,7 @@
   %conv1 = zext i32 %c to i64
   %mul = sub nsw i64 0, %conv
   %sub = mul i64 %conv1, %mul
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv2 = zext i32 %0 to i64
   %div = udiv i64 %sub, %conv2
   ret i64 %div
@@ -300,7 +300,7 @@
 
 define i64 @f_store_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   store i64 %a, i64* %e, align 8
   %mul = mul nsw i64 %0, %b
   %add = add nsw i64 %mul, %a
@@ -317,7 +317,7 @@
 
 define i32 @f_store_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   store i32 %a, i32* %e, align 4
   %mul = mul nsw i32 %0, %b
   %add = add nsw i32 %mul, %a
@@ -333,7 +333,7 @@
 
 define i64 @f_store_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   store i64 %a, i64* %e, align 8
   %mul = mul nsw i64 %0, %b
   %sub = sub nsw i64 %a, %mul
@@ -350,7 +350,7 @@
 
 define i32 @f_store_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   store i32 %a, i32* %e, align 4
   %mul = mul nsw i32 %0, %b
   %sub = sub nsw i32 %a, %mul
@@ -366,7 +366,7 @@
 
 define i64 @f_store_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   store i64 %a, i64* %e, align 8
   %mul = mul nsw i64 %0, %b
   ret i64 %mul
@@ -381,7 +381,7 @@
 
 define i32 @f_store_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   store i32 %a, i32* %e, align 4
   %mul = mul nsw i32 %0, %b
   ret i32 %mul
@@ -396,7 +396,7 @@
 
 define i64 @f_prefetch_madd_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   %1 = bitcast i64* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
   %mul = mul nsw i64 %0, %b
@@ -415,7 +415,7 @@
 
 define i32 @f_prefetch_madd_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   %1 = bitcast i32* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 1, i32 0, i32 1)
   %mul = mul nsw i32 %0, %b
@@ -431,7 +431,7 @@
 
 define i64 @f_prefetch_msub_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   %1 = bitcast i64* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 0, i32 1, i32 1)
   %mul = mul nsw i64 %0, %b
@@ -448,7 +448,7 @@
 
 define i32 @f_prefetch_msub_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   %1 = bitcast i32* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 1, i32 1, i32 1)
   %mul = mul nsw i32 %0, %b
@@ -464,7 +464,7 @@
 
 define i64 @f_prefetch_mul_64(i64 %a, i64 %b, i64* nocapture readonly %cp, i64* nocapture %e) #1 {
 entry:
-  %0 = load i64* %cp, align 8
+  %0 = load i64, i64* %cp, align 8
   %1 = bitcast i64* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 0, i32 3, i32 1)
   %mul = mul nsw i64 %0, %b
@@ -479,7 +479,7 @@
 
 define i32 @f_prefetch_mul_32(i32 %a, i32 %b, i32* nocapture readonly %cp, i32* nocapture %e) #1 {
 entry:
-  %0 = load i32* %cp, align 4
+  %0 = load i32, i32* %cp, align 4
   %1 = bitcast i32* %e to i8*
   tail call void @llvm.prefetch(i8* %1, i32 1, i32 3, i32 1)
   %mul = mul nsw i32 %0, %b
@@ -494,7 +494,7 @@
 
 define i64 @fall_through(i64 %a, i64 %b, i64* nocapture readonly %c) #0 {
 entry:
-  %0 = load i64* %c, align 8
+  %0 = load i64, i64* %c, align 8
   br label %block1
 
 block1:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
index 98d2edf..ce6c8a0 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -15,13 +15,13 @@
 ; elimilate the common subexpression for the second use.
 define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
   %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
-  %1 = load i32* %liberties, align 4
+  %1 = load i32, i32* %liberties, align 4
   %cmp = icmp eq i32 %1, %lib
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
   %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
-  %2 = load i32* %origin, align 4
+  %2 = load i32, i32* %origin, align 4
   store i32 %2, i32* %adj, align 4
   br label %if.end
 
@@ -66,9 +66,9 @@
 ; use.
 define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
   %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = icmp eq i32 %2, %4
   br i1 %5, label %if.true, label %exit
 
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 92582d7..ec0e2de 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,8 +3,8 @@
 define <8 x i16> @smull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: smull_v8i8_v8i16:
 ; CHECK: smull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -14,8 +14,8 @@
 define <4 x i32> @smull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: smull_v4i16_v4i32:
 ; CHECK: smull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -25,8 +25,8 @@
 define <2 x i64> @smull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ; CHECK-LABEL: smull_v2i32_v2i64:
 ; CHECK:  smull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -36,8 +36,8 @@
 define <8 x i16> @umull_v8i8_v8i16(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: umull_v8i8_v8i16:
 ; CHECK: umull {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -47,8 +47,8 @@
 define <4 x i32> @umull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: umull_v4i16_v4i32:
 ; CHECK: umull {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -58,8 +58,8 @@
 define <2 x i64> @umull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ; CHECK-LABEL: umull_v2i32_v2i64:
 ; CHECK:  umull {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -69,9 +69,9 @@
 define <8 x i16> @smlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ; CHECK-LABEL: smlal_v8i8_v8i16:
 ; CHECK:  smlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = load <8 x i8>* %C
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
+  %tmp3 = load <8 x i8>, <8 x i8>* %C
   %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
   %tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -82,9 +82,9 @@
 define <4 x i32> @smlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ; CHECK-LABEL: smlal_v4i16_v4i32:
 ; CHECK: smlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i16>* %C
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i16>, <4 x i16>* %C
   %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
   %tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -95,9 +95,9 @@
 define <2 x i64> @smlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ; CHECK-LABEL: smlal_v2i32_v2i64:
 ; CHECK: smlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i32>* %C
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i32>, <2 x i32>* %C
   %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
   %tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -108,9 +108,9 @@
 define <8 x i16> @umlal_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ; CHECK-LABEL: umlal_v8i8_v8i16:
 ; CHECK:  umlal {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = load <8 x i8>* %C
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
+  %tmp3 = load <8 x i8>, <8 x i8>* %C
   %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
   %tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -121,9 +121,9 @@
 define <4 x i32> @umlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ; CHECK-LABEL: umlal_v4i16_v4i32:
 ; CHECK: umlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i16>* %C
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i16>, <4 x i16>* %C
   %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
   %tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -134,9 +134,9 @@
 define <2 x i64> @umlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ; CHECK-LABEL: umlal_v2i32_v2i64:
 ; CHECK: umlal {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i32>* %C
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i32>, <2 x i32>* %C
   %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
   %tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -147,9 +147,9 @@
 define <8 x i16> @smlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ; CHECK-LABEL: smlsl_v8i8_v8i16:
 ; CHECK:  smlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = load <8 x i8>* %C
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
+  %tmp3 = load <8 x i8>, <8 x i8>* %C
   %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
   %tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -160,9 +160,9 @@
 define <4 x i32> @smlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ; CHECK-LABEL: smlsl_v4i16_v4i32:
 ; CHECK: smlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i16>* %C
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i16>, <4 x i16>* %C
   %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
   %tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -173,9 +173,9 @@
 define <2 x i64> @smlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ; CHECK-LABEL: smlsl_v2i32_v2i64:
 ; CHECK: smlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i32>* %C
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i32>, <2 x i32>* %C
   %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
   %tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -186,9 +186,9 @@
 define <8 x i16> @umlsl_v8i8_v8i16(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ; CHECK-LABEL: umlsl_v8i8_v8i16:
 ; CHECK:  umlsl {{v[0-9]+}}.8h, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i8>* %B
-  %tmp3 = load <8 x i8>* %C
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
+  %tmp3 = load <8 x i8>, <8 x i8>* %C
   %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
   %tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -199,9 +199,9 @@
 define <4 x i32> @umlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ; CHECK-LABEL: umlsl_v4i16_v4i32:
 ; CHECK: umlsl {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i16>* %C
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i16>, <4 x i16>* %C
   %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
   %tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -212,9 +212,9 @@
 define <2 x i64> @umlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ; CHECK-LABEL: umlsl_v2i32_v2i64:
 ; CHECK: umlsl {{v[0-9]+}}.2d, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i32>* %C
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i32>, <2 x i32>* %C
   %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
   %tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/llvm/test/CodeGen/AArch64/addsub-shifted.ll b/llvm/test/CodeGen/AArch64/addsub-shifted.ll
index 1d963f41..7c7d654 100644
--- a/llvm/test/CodeGen/AArch64/addsub-shifted.ll
+++ b/llvm/test/CodeGen/AArch64/addsub-shifted.ll
@@ -6,63 +6,63 @@
 define void @test_lsl_arith(i32 %lhs32, i32 %rhs32, i64 %lhs64, i64 %rhs64) {
 ; CHECK-LABEL: test_lsl_arith:
 
-  %rhs1 = load volatile i32* @var32
+  %rhs1 = load volatile i32, i32* @var32
   %shift1 = shl i32 %rhs1, 18
   %val1 = add i32 %lhs32, %shift1
   store volatile i32 %val1, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #18
 
-  %rhs2 = load volatile i32* @var32
+  %rhs2 = load volatile i32, i32* @var32
   %shift2 = shl i32 %rhs2, 31
   %val2 = add i32 %shift2, %lhs32
   store volatile i32 %val2, i32* @var32
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #31
 
-  %rhs3 = load volatile i32* @var32
+  %rhs3 = load volatile i32, i32* @var32
   %shift3 = shl i32 %rhs3, 5
   %val3 = sub i32 %lhs32, %shift3
   store volatile i32 %val3, i32* @var32
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #5
 
 ; Subtraction is not commutative!
-  %rhs4 = load volatile i32* @var32
+  %rhs4 = load volatile i32, i32* @var32
   %shift4 = shl i32 %rhs4, 19
   %val4 = sub i32 %shift4, %lhs32
   store volatile i32 %val4, i32* @var32
 ; CHECK-NOT: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, lsl #19
 
-  %lhs4a = load volatile i32* @var32
+  %lhs4a = load volatile i32, i32* @var32
   %shift4a = shl i32 %lhs4a, 15
   %val4a = sub i32 0, %shift4a
   store volatile i32 %val4a, i32* @var32
 ; CHECK: neg {{w[0-9]+}}, {{w[0-9]+}}, lsl #15
 
-  %rhs5 = load volatile i64* @var64
+  %rhs5 = load volatile i64, i64* @var64
   %shift5 = shl i64 %rhs5, 18
   %val5 = add i64 %lhs64, %shift5
   store volatile i64 %val5, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #18
 
-  %rhs6 = load volatile i64* @var64
+  %rhs6 = load volatile i64, i64* @var64
   %shift6 = shl i64 %rhs6, 31
   %val6 = add i64 %shift6, %lhs64
   store volatile i64 %val6, i64* @var64
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #31
 
-  %rhs7 = load volatile i64* @var64
+  %rhs7 = load volatile i64, i64* @var64
   %shift7 = shl i64 %rhs7, 5
   %val7 = sub i64 %lhs64, %shift7
   store volatile i64 %val7, i64* @var64
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #5
 
 ; Subtraction is not commutative!
-  %rhs8 = load volatile i64* @var64
+  %rhs8 = load volatile i64, i64* @var64
   %shift8 = shl i64 %rhs8, 19
   %val8 = sub i64 %shift8, %lhs64
   store volatile i64 %val8, i64* @var64
 ; CHECK-NOT: sub {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, lsl #19
 
-  %lhs8a = load volatile i64* @var64
+  %lhs8a = load volatile i64, i64* @var64
   %shift8a = shl i64 %lhs8a, 60
   %val8a = sub i64 0, %shift8a
   store volatile i64 %val8a, i64* @var64
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index b85fdbb..09b9f62 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -12,12 +12,12 @@
 ; CHECK-LABEL: add_small:
 
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #4095
-  %val32 = load i32* @var_i32
+  %val32 = load i32, i32* @var_i32
   %newval32 = add i32 %val32, 4095
   store i32 %newval32, i32* @var_i32
 
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, #52
-  %val64 = load i64* @var_i64
+  %val64 = load i64, i64* @var_i64
   %newval64 = add i64 %val64, 52
   store i64 %newval64, i64* @var_i64
 
@@ -29,12 +29,12 @@
 ; CHECK-LABEL: add_med:
 
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
-  %val32 = load i32* @var_i32
+  %val32 = load i32, i32* @var_i32
   %newval32 = add i32 %val32, 14610432 ; =0xdef000
   store i32 %newval32, i32* @var_i32
 
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
-  %val64 = load i64* @var_i64
+  %val64 = load i64, i64* @var_i64
   %newval64 = add i64 %val64, 16773120 ; =0xfff000
   store i64 %newval64, i64* @var_i64
 
@@ -46,12 +46,12 @@
 ; CHECK-LABEL: sub_small:
 
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, #4095
-  %val32 = load i32* @var_i32
+  %val32 = load i32, i32* @var_i32
   %newval32 = sub i32 %val32, 4095
   store i32 %newval32, i32* @var_i32
 
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, #52
-  %val64 = load i64* @var_i64
+  %val64 = load i64, i64* @var_i64
   %newval64 = sub i64 %val64, 52
   store i64 %newval64, i64* @var_i64
 
@@ -63,12 +63,12 @@
 ; CHECK-LABEL: sub_med:
 
 ; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{#3567, lsl #12|#14610432}}
-  %val32 = load i32* @var_i32
+  %val32 = load i32, i32* @var_i32
   %newval32 = sub i32 %val32, 14610432 ; =0xdef000
   store i32 %newval32, i32* @var_i32
 
 ; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{#4095, lsl #12|#16773120}}
-  %val64 = load i64* @var_i64
+  %val64 = load i64, i64* @var_i64
   %newval64 = sub i64 %val64, 16773120 ; =0xfff000
   store i64 %newval64, i64* @var_i64
 
@@ -77,7 +77,7 @@
 
 define void @testing() {
 ; CHECK-LABEL: testing:
-  %val = load i32* @var_i32
+  %val = load i32, i32* @var_i32
 
 ; CHECK: cmp {{w[0-9]+}}, #4095
 ; CHECK: b.ne [[RET:.?LBB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AArch64/addsub_ext.ll b/llvm/test/CodeGen/AArch64/addsub_ext.ll
index ceea8a0..f0c7572 100644
--- a/llvm/test/CodeGen/AArch64/addsub_ext.ll
+++ b/llvm/test/CodeGen/AArch64/addsub_ext.ll
@@ -7,9 +7,9 @@
 
 define void @addsub_i8rhs() minsize {
 ; CHECK-LABEL: addsub_i8rhs:
-    %val8_tmp = load i8* @var8
-    %lhs32 = load i32* @var32
-    %lhs64 = load i64* @var64
+    %val8_tmp = load i8, i8* @var8
+    %lhs32 = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
 
     ; Need this to prevent extension upon load and give a vanilla i8 operand.
     %val8 = add i8 %val8_tmp, 123
@@ -82,9 +82,9 @@
 
 define void @addsub_i16rhs() minsize {
 ; CHECK-LABEL: addsub_i16rhs:
-    %val16_tmp = load i16* @var16
-    %lhs32 = load i32* @var32
-    %lhs64 = load i64* @var64
+    %val16_tmp = load i16, i16* @var16
+    %lhs32 = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
 
     ; Need this to prevent extension upon load and give a vanilla i16 operand.
     %val16 = add i16 %val16_tmp, 123
@@ -160,8 +160,8 @@
 ; in the face of "add/sub (shifted register)" so I don't intend to.
 define void @addsub_i32rhs() minsize {
 ; CHECK-LABEL: addsub_i32rhs:
-    %val32_tmp = load i32* @var32
-    %lhs64 = load i64* @var64
+    %val32_tmp = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
 
     %val32 = add i32 %val32_tmp, 123
 
diff --git a/llvm/test/CodeGen/AArch64/alloca.ll b/llvm/test/CodeGen/AArch64/alloca.ll
index f93efbc..5b2278c 100644
--- a/llvm/test/CodeGen/AArch64/alloca.ll
+++ b/llvm/test/CodeGen/AArch64/alloca.ll
@@ -51,7 +51,7 @@
   call void @use_addr_loc(i8* %buf, i64* %loc)
 ; CHECK: bl use_addr
 
-  %val = load i64* %loc
+  %val = load i64, i64* %loc
 
 ; CHECK: ldur x0, [x29, #-[[LOC_FROM_FP]]]
 
diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
index 6bbec17..8291516 100644
--- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll
+++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll
@@ -9,13 +9,13 @@
 entry:
   %idxprom = sext i32 %pos to i64
   %arrayidx = getelementptr inbounds [400 x i8], [400 x i8]* @board, i64 0, i64 %idxprom
-  %tmp = load i8* %arrayidx, align 1
+  %tmp = load i8, i8* %arrayidx, align 1
   %.off = add i8 %tmp, -1
   %switch = icmp ult i8 %.off, 2
   br i1 %switch, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %tmp1 = load i32* @next_string, align 4
+  %tmp1 = load i32, i32* @next_string, align 4
   %arrayidx8 = getelementptr inbounds [400 x i32], [400 x i32]* @string_number, i64 0, i64 %idxprom
   store i32 %tmp1, i32* %arrayidx8, align 4
   br label %if.end
diff --git a/llvm/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll b/llvm/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
index 88232fc..e2c39e0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2011-04-21-CPSRBug.ll
@@ -8,7 +8,7 @@
   %cmp = icmp eq i32* null, undef
   %frombool = zext i1 %cmp to i8
   store i8 %frombool, i8* undef, align 1
-  %tmp4 = load i8* undef, align 1
+  %tmp4 = load i8, i8* undef, align 1
   %tobool = trunc i8 %tmp4 to i1
   br i1 %tobool, label %land.lhs.true, label %if.end
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
index a83f164..b69cd24 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll
@@ -19,7 +19,7 @@
   %0 = shl nsw i64 %indvars.iv, 12
   %add = add nsw i64 %0, 34628173824
   %1 = inttoptr i64 %add to i32*
-  %2 = load volatile i32* %1, align 4096
+  %2 = load volatile i32, i32* %1, align 4096
   store volatile i32 %2, i32* @test_data, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll b/llvm/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
index d47dbb2..8d0b1b6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2012-01-11-ComparisonDAGCrash.ll
@@ -13,7 +13,7 @@
   br i1 undef, label %return, label %if.end
 
 if.end:
-  %tmp.i = load i64* undef, align 8
+  %tmp.i = load i64, i64* undef, align 8
   %and.i.i.i = and i64 %tmp.i, -16
   br i1 %IsArrow, label %if.else_crit_edge, label %if.end32
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll b/llvm/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
index 52e1734..ef8d6f3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2012-05-22-LdStOptBug.ll
@@ -15,23 +15,23 @@
 entry:
 ; CHECK-LABEL: t:
 ; CHECK: ldp d{{[0-9]+}}, d{{[0-9]+}}
-  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+  %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
   %0 = bitcast %0* %self to i8*
   %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
   %add.ptr10.0 = bitcast i8* %add.ptr to double*
-  %tmp11 = load double* %add.ptr10.0, align 8
+  %tmp11 = load double, double* %add.ptr10.0, align 8
   %add.ptr.sum = add i64 %ivar, 8
   %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
   %1 = bitcast i8* %add.ptr10.1 to double*
-  %tmp12 = load double* %1, align 8
+  %tmp12 = load double, double* %1, align 8
   %add.ptr.sum17 = add i64 %ivar, 16
   %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum17
   %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
-  %tmp = load double* %add.ptr4.1.0, align 8
+  %tmp = load double, double* %add.ptr4.1.0, align 8
   %add.ptr4.1.sum = add i64 %ivar, 24
   %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr4.1.sum
   %2 = bitcast i8* %add.ptr4.1.1 to double*
-  %tmp5 = load double* %2, align 8
+  %tmp5 = load double, double* %2, align 8
   %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
   %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
   %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll b/llvm/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
index 4db1f59..04364b0 100644
--- a/llvm/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-2012-06-06-FPToUI.ll
@@ -13,12 +13,12 @@
 entry:
   %d.addr = alloca double, align 8
   store double %d, double* %d.addr, align 8
-  %0 = load double* %d.addr, align 8
-  %1 = load double* %d.addr, align 8
+  %0 = load double, double* %d.addr, align 8
+  %1 = load double, double* %d.addr, align 8
   %conv = fptoui double %1 to i64
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), double %0, i64 %conv)
-  %2 = load double* %d.addr, align 8
-  %3 = load double* %d.addr, align 8
+  %2 = load double, double* %d.addr, align 8
+  %3 = load double, double* %d.addr, align 8
   %conv1 = fptoui double %3 to i32
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), double %2, i32 %conv1)
   ret void
@@ -33,14 +33,14 @@
 entry:
   %f.addr = alloca float, align 4
   store float %f, float* %f.addr, align 4
-  %0 = load float* %f.addr, align 4
+  %0 = load float, float* %f.addr, align 4
   %conv = fpext float %0 to double
-  %1 = load float* %f.addr, align 4
+  %1 = load float, float* %f.addr, align 4
   %conv1 = fptoui float %1 to i64
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0), double %conv, i64 %conv1)
-  %2 = load float* %f.addr, align 4
+  %2 = load float, float* %f.addr, align 4
   %conv2 = fpext float %2 to double
-  %3 = load float* %f.addr, align 4
+  %3 = load float, float* %f.addr, align 4
   %conv3 = fptoui float %3 to i32
   %call4 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), double %conv2, i32 %conv3)
   ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
index 15c0077..b6826e1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll
@@ -82,18 +82,18 @@
   store i32 10, i32* %a10, align 4
   store i32 11, i32* %a11, align 4
   store i32 12, i32* %a12, align 4
-  %1 = load i32* %a1, align 4
-  %2 = load i32* %a2, align 4
-  %3 = load i32* %a3, align 4
-  %4 = load i32* %a4, align 4
-  %5 = load i32* %a5, align 4
-  %6 = load i32* %a6, align 4
-  %7 = load i32* %a7, align 4
-  %8 = load i32* %a8, align 4
-  %9 = load i32* %a9, align 4
-  %10 = load i32* %a10, align 4
-  %11 = load i32* %a11, align 4
-  %12 = load i32* %a12, align 4
+  %1 = load i32, i32* %a1, align 4
+  %2 = load i32, i32* %a2, align 4
+  %3 = load i32, i32* %a3, align 4
+  %4 = load i32, i32* %a4, align 4
+  %5 = load i32, i32* %a5, align 4
+  %6 = load i32, i32* %a6, align 4
+  %7 = load i32, i32* %a7, align 4
+  %8 = load i32, i32* %a8, align 4
+  %9 = load i32, i32* %a9, align 4
+  %10 = load i32, i32* %a10, align 4
+  %11 = load i32, i32* %a11, align 4
+  %12 = load i32, i32* %a12, align 4
   call void (i32, i32, i32, i32, i32, i32, i32, i32, i32, ...)* @fn9(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12)
   ret i32 0
 }
@@ -131,8 +131,8 @@
   %y.addr = alloca <4 x i32>, align 16
   store i32 %x, i32* %x.addr, align 4
   store <4 x i32> %y, <4 x i32>* %y.addr, align 16
-  %0 = load i32* %x.addr, align 4
-  %1 = load <4 x i32>* %y.addr, align 16
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load <4 x i32>, <4 x i32>* %y.addr, align 16
   call void (i8*, ...)* @foo(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0, <4 x i32> %1)
   ret void
 }
@@ -158,7 +158,7 @@
   call void @llvm.va_start(i8* %args1)
   %0 = va_arg i8** %args, i32
   store i32 %0, i32* %vc, align 4
-  %ap.cur = load i8** %args
+  %ap.cur = load i8*, i8** %args
   %1 = getelementptr i8, i8* %ap.cur, i32 15
   %2 = ptrtoint i8* %1 to i64
   %3 = and i64 %2, -16
@@ -183,9 +183,9 @@
   store i32 %x, i32* %x.addr, align 4
   %0 = bitcast %struct.s41* %s41 to i128*
   store i128 %s41.coerce, i128* %0, align 1
-  %1 = load i32* %x.addr, align 4
+  %1 = load i32, i32* %x.addr, align 4
   %2 = bitcast %struct.s41* %s41 to i128*
-  %3 = load i128* %2, align 1
+  %3 = load i128, i128* %2, align 1
   call void (i8*, ...)* @foo2(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1, i128 %3)
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi.ll b/llvm/test/CodeGen/AArch64/arm64-abi.ll
index 8a6b64d..36a6822 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi.ll
@@ -79,7 +79,7 @@
 ; FAST: sub sp, sp
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str [[REG_1:q[0-9]+]], [x[[ADDR]], #16]
-  %0 = load <4 x i32>* %in, align 16
+  %0 = load <4 x i32>, <4 x i32>* %in, align 16
   %call = tail call double @args_vec_4i(double 3.000000e+00, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, <4 x i32> %0, double 3.000000e+00, <4 x i32> %0, i8 signext 3)
   ret double %call
 }
@@ -133,7 +133,7 @@
 ; FAST: sub sp, sp, #32
 ; FAST: mov x[[ADDR:[0-9]+]], sp
 ; FAST: str [[REG_1:d[0-9]+]], [x[[ADDR]], #8]
-  %0 = load <2 x i32>* %in, align 8
+  %0 = load <2 x i32>, <2 x i32>* %in, align 8
   %call = tail call double @args_vec_2i(double 3.000000e+00, <2 x i32> %0,
           <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0, <2 x i32> %0,
           <2 x i32> %0, float 3.000000e+00, <2 x i32> %0, i8 signext 3)
@@ -148,7 +148,7 @@
 ; CHECK: str [[REG_1:d[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_2:w[0-9]+]], [sp]
 ; CHECK: orr w0, wzr, #0x3
-  %0 = load double* %in, align 8
+  %0 = load double, double* %in, align 8
   %call = tail call double @args_f64(double 3.000000e+00, double %0, double %0,
           double %0, double %0, double %0, double %0, double %0,
           float 3.000000e+00, double %0, i8 signext 3)
@@ -163,7 +163,7 @@
 ; CHECK: strb [[REG_3:w[0-9]+]], [sp, #16]
 ; CHECK: str [[REG_1:x[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_2:w[0-9]+]], [sp]
-  %0 = load i64* %in, align 8
+  %0 = load i64, i64* %in, align 8
   %call = tail call i64 @args_i64(i64 3, i64 %0, i64 %0, i64 %0, i64 %0, i64 %0,
                          i64 %0, i64 %0, i32 3, i64 %0, i8 signext 3)
   ret i64 %call
@@ -177,7 +177,7 @@
 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_1:s[0-9]+]], [sp, #4]
 ; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load float* %in, align 4
+  %0 = load float, float* %in, align 4
   %call = tail call i32 @args_f32(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
           i32 7, i32 8, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0,
           float 6.0, float 7.0, float 8.0, i16 signext 3, float %0,
@@ -194,7 +194,7 @@
 ; CHECK: strb [[REG_2:w[0-9]+]], [sp, #8]
 ; CHECK: str [[REG_1:w[0-9]+]], [sp, #4]
 ; CHECK: strh [[REG_3:w[0-9]+]], [sp]
-  %0 = load i32* %in, align 4
+  %0 = load i32, i32* %in, align 4
   %call = tail call i32 @args_i32(i32 3, i32 %0, i32 %0, i32 %0, i32 %0, i32 %0,
                          i32 %0, i32 %0, i16 signext 3, i32 %0, i8 signext 4)
   ret i32 %call
diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
index 6173e07..1c1b58b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -59,8 +59,8 @@
 ; CHECK-LABEL: caller38
 ; CHECK: ldr x1,
 ; CHECK: ldr x2,
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
   %call = tail call i32 @f38(i32 3, i64 %0, i64 %1) #5
   ret i32 %call
 }
@@ -76,8 +76,8 @@
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
-  %0 = load i64* bitcast (%struct.s38* @g38 to i64*), align 4
-  %1 = load i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
+  %0 = load i64, i64* bitcast (%struct.s38* @g38 to i64*), align 4
+  %1 = load i64, i64* bitcast (%struct.s38* @g38_2 to i64*), align 4
   %call = tail call i32 @f38_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
                                    i32 7, i32 8, i32 9, i64 %0, i64 %1) #5
   ret i32 %call
@@ -112,8 +112,8 @@
 ; CHECK-LABEL: caller39
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
   %call = tail call i32 @f39(i32 3, i128 %0, i128 %1) #5
   ret i32 %call
 }
@@ -130,8 +130,8 @@
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s39* @g39 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
+  %0 = load i128, i128* bitcast (%struct.s39* @g39 to i128*), align 16
+  %1 = load i128, i128* bitcast (%struct.s39* @g39_2 to i128*), align 16
   %call = tail call i32 @f39_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
                                    i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
   ret i32 %call
@@ -168,8 +168,8 @@
 ; CHECK-LABEL: caller40
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
   %call = tail call i32 @f40(i32 3, [2 x i64] %0, [2 x i64] %1) #5
   ret i32 %call
 }
@@ -186,8 +186,8 @@
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #8]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
-  %0 = load [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
-  %1 = load [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
+  %0 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40 to [2 x i64]*), align 4
+  %1 = load [2 x i64], [2 x i64]* bitcast (%struct.s40* @g40_2 to [2 x i64]*), align 4
   %call = tail call i32 @f40_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
                          i32 7, i32 8, i32 9, [2 x i64] %0, [2 x i64] %1) #5
   ret i32 %call
@@ -222,8 +222,8 @@
 ; CHECK-LABEL: caller41
 ; CHECK: ldp x1, x2,
 ; CHECK: ldp x3, x4,
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
   %call = tail call i32 @f41(i32 3, i128 %0, i128 %1) #5
   ret i32 %call
 }
@@ -240,8 +240,8 @@
 ; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
 ; CHECK: movz w[[C:[0-9]+]], #0x9
 ; CHECK: str w[[C]], [sp]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
-  %1 = load i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
+  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %1 = load i128, i128* bitcast (%struct.s41* @g41_2 to i128*), align 16
   %call = tail call i32 @f41_stack(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6,
                             i32 7, i32 8, i32 9, i128 %0, i128 %1) #5
   ret i32 %call
@@ -261,14 +261,14 @@
 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
   %i1 = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
+  %0 = load i32, i32* %i1, align 4, !tbaa !0
   %i2 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
+  %1 = load i32, i32* %i2, align 4, !tbaa !0
   %s = getelementptr inbounds %struct.s42, %struct.s42* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
+  %2 = load i16, i16* %s, align 2, !tbaa !3
   %conv = sext i16 %2 to i32
   %s5 = getelementptr inbounds %struct.s42, %struct.s42* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
+  %3 = load i16, i16* %s5, align 2, !tbaa !3
   %conv6 = sext i16 %3 to i32
   %add = add i32 %0, %i
   %add3 = add i32 %add, %1
@@ -370,14 +370,14 @@
 ; FAST: add w[[C:[0-9]+]], w[[A]], w0
 ; FAST: add {{w[0-9]+}}, w[[C]], w[[B]]
   %i1 = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 0
-  %0 = load i32* %i1, align 4, !tbaa !0
+  %0 = load i32, i32* %i1, align 4, !tbaa !0
   %i2 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 0
-  %1 = load i32* %i2, align 4, !tbaa !0
+  %1 = load i32, i32* %i2, align 4, !tbaa !0
   %s = getelementptr inbounds %struct.s43, %struct.s43* %s1, i64 0, i32 1
-  %2 = load i16* %s, align 2, !tbaa !3
+  %2 = load i16, i16* %s, align 2, !tbaa !3
   %conv = sext i16 %2 to i32
   %s5 = getelementptr inbounds %struct.s43, %struct.s43* %s2, i64 0, i32 1
-  %3 = load i16* %s5, align 2, !tbaa !3
+  %3 = load i16, i16* %s5, align 2, !tbaa !3
   %conv6 = sext i16 %3 to i32
   %add = add i32 %0, %i
   %add3 = add i32 %add, %1
@@ -493,7 +493,7 @@
 ; Load/Store opt is disabled with -O0, so the i128 is split.
 ; FAST: str {{x[0-9]+}}, [x[[ADDR]], #8]
 ; FAST: str {{x[0-9]+}}, [x[[ADDR]]]
-  %0 = load i128* bitcast (%struct.s41* @g41 to i128*), align 16
+  %0 = load i128, i128* bitcast (%struct.s41* @g41 to i128*), align 16
   %call = tail call i32 @callee_i128_split(i32 1, i32 2, i32 3, i32 4, i32 5,
                                            i32 6, i32 7, i128 %0, i32 8) #5
   ret i32 %call
@@ -514,7 +514,7 @@
 ; FAST: mov x[[R0:[0-9]+]], sp
 ; FAST: orr w[[R1:[0-9]+]], wzr, #0x8
 ; FAST: str w[[R1]], {{\[}}x[[R0]]{{\]}}
-  %0 = load i64* bitcast (%struct.s41* @g41 to i64*), align 16
+  %0 = load i64, i64* bitcast (%struct.s41* @g41 to i64*), align 16
   %call = tail call i32 @callee_i64(i32 1, i32 2, i32 3, i32 4, i32 5,
                                     i32 6, i32 7, i64 %0, i32 8) #5
   ret i32 %call
diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
index 1767647..3197f5b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll
@@ -11,12 +11,12 @@
 ; _CHECK-NOT_: , sxtw]
 entry:
   %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
+  %0 = load i8*, i8** @block, align 8
   %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   %idxprom1 = sext i32 %i2 to i64
   %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
+  %2 = load i8, i8* %arrayidx2, align 1
   %cmp = icmp eq i8 %1, %2
   br i1 %cmp, label %if.end, label %if.then
 
@@ -30,10 +30,10 @@
   %inc9 = add nsw i32 %i2, 1
   %idxprom10 = sext i32 %inc to i64
   %arrayidx11 = getelementptr inbounds i8, i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
+  %3 = load i8, i8* %arrayidx11, align 1
   %idxprom12 = sext i32 %inc9 to i64
   %arrayidx13 = getelementptr inbounds i8, i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
+  %4 = load i8, i8* %arrayidx13, align 1
   %cmp16 = icmp eq i8 %3, %4
   br i1 %cmp16, label %if.end23, label %if.then18
 
@@ -47,10 +47,10 @@
   %inc25 = add nsw i32 %i2, 2
   %idxprom26 = sext i32 %inc24 to i64
   %arrayidx27 = getelementptr inbounds i8, i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
+  %5 = load i8, i8* %arrayidx27, align 1
   %idxprom28 = sext i32 %inc25 to i64
   %arrayidx29 = getelementptr inbounds i8, i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
+  %6 = load i8, i8* %arrayidx29, align 1
   %cmp32 = icmp eq i8 %5, %6
   br i1 %cmp32, label %return, label %if.then34
 
@@ -71,12 +71,12 @@
 ; CHECK: , sxtw]
 entry:
   %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
+  %0 = load i8*, i8** @block, align 8
   %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   %idxprom1 = sext i32 %i2 to i64
   %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
+  %2 = load i8, i8* %arrayidx2, align 1
   %cmp = icmp eq i8 %1, %2
   br i1 %cmp, label %if.end, label %if.then
 
@@ -90,10 +90,10 @@
   %inc9 = add nsw i32 %i2, 1
   %idxprom10 = sext i32 %inc to i64
   %arrayidx11 = getelementptr inbounds i8, i8* %0, i64 %idxprom10
-  %3 = load i8* %arrayidx11, align 1
+  %3 = load i8, i8* %arrayidx11, align 1
   %idxprom12 = sext i32 %inc9 to i64
   %arrayidx13 = getelementptr inbounds i8, i8* %0, i64 %idxprom12
-  %4 = load i8* %arrayidx13, align 1
+  %4 = load i8, i8* %arrayidx13, align 1
   %cmp16 = icmp eq i8 %3, %4
   br i1 %cmp16, label %if.end23, label %if.then18
 
@@ -107,10 +107,10 @@
   %inc25 = add nsw i32 %i2, 2
   %idxprom26 = sext i32 %inc24 to i64
   %arrayidx27 = getelementptr inbounds i8, i8* %0, i64 %idxprom26
-  %5 = load i8* %arrayidx27, align 1
+  %5 = load i8, i8* %arrayidx27, align 1
   %idxprom28 = sext i32 %inc25 to i64
   %arrayidx29 = getelementptr inbounds i8, i8* %0, i64 %idxprom28
-  %6 = load i8* %arrayidx29, align 1
+  %6 = load i8, i8* %arrayidx29, align 1
   %cmp32 = icmp eq i8 %5, %6
   br i1 %cmp32, label %return, label %if.then34
 
@@ -136,8 +136,8 @@
 if.then:                                          ; preds = %entry
   %idxprom = zext i8 %c to i64
   %arrayidx = getelementptr inbounds i32, i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
+  %0 = load volatile i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* %arrayidx, align 4
   %add3 = add nsw i32 %1, %0
   br label %if.end
 
@@ -160,8 +160,8 @@
 if.then:                                          ; preds = %entry
   %idxprom = zext i8 %c to i64
   %arrayidx = getelementptr inbounds i32, i32* %array, i64 %idxprom
-  %0 = load volatile i32* %arrayidx, align 4
-  %1 = load volatile i32* %arrayidx, align 4
+  %0 = load volatile i32, i32* %arrayidx, align 4
+  %1 = load volatile i32, i32* %arrayidx, align 4
   %add3 = add nsw i32 %1, %0
   br label %if.end
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index 2bee1d5..4703d25 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -28,12 +28,12 @@
 ; CHECK-NEXT: cmp [[LOADEDVAL3]], [[LOADEDVAL4]]
 entry:
   %idxprom = sext i32 %i1 to i64
-  %tmp = load i8** @block, align 8
+  %tmp = load i8*, i8** @block, align 8
   %arrayidx = getelementptr inbounds i8, i8* %tmp, i64 %idxprom
-  %tmp1 = load i8* %arrayidx, align 1
+  %tmp1 = load i8, i8* %arrayidx, align 1
   %idxprom1 = sext i32 %i2 to i64
   %arrayidx2 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom1
-  %tmp2 = load i8* %arrayidx2, align 1
+  %tmp2 = load i8, i8* %arrayidx2, align 1
   %cmp = icmp eq i8 %tmp1, %tmp2
   br i1 %cmp, label %if.end, label %if.then
 
@@ -47,10 +47,10 @@
   %inc10 = add nsw i32 %i2, 1
   %idxprom11 = sext i32 %inc to i64
   %arrayidx12 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom11
-  %tmp3 = load i8* %arrayidx12, align 1
+  %tmp3 = load i8, i8* %arrayidx12, align 1
   %idxprom13 = sext i32 %inc10 to i64
   %arrayidx14 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom13
-  %tmp4 = load i8* %arrayidx14, align 1
+  %tmp4 = load i8, i8* %arrayidx14, align 1
   %cmp17 = icmp eq i8 %tmp3, %tmp4
   br i1 %cmp17, label %if.end25, label %if.then19
 
@@ -64,10 +64,10 @@
   %inc27 = add nsw i32 %i2, 2
   %idxprom28 = sext i32 %inc26 to i64
   %arrayidx29 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom28
-  %tmp5 = load i8* %arrayidx29, align 1
+  %tmp5 = load i8, i8* %arrayidx29, align 1
   %idxprom30 = sext i32 %inc27 to i64
   %arrayidx31 = getelementptr inbounds i8, i8* %tmp, i64 %idxprom30
-  %tmp6 = load i8* %arrayidx31, align 1
+  %tmp6 = load i8, i8* %arrayidx31, align 1
   %cmp34 = icmp eq i8 %tmp5, %tmp6
   br i1 %cmp34, label %return, label %if.then36
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index d897a79..c22d031 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -9,7 +9,7 @@
 ; CHECK: ret
 define void @t1() {
   %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 1
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -21,7 +21,7 @@
 ; CHECK: ret
 define void @t2() {
   %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 -33
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -31,7 +31,7 @@
 ; CHECK: ret
 define void @t3() {
   %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4095
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -42,7 +42,7 @@
 ; CHECK: ret
 define void @t4() {
   %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -52,7 +52,7 @@
 ; CHECK: ret
 define void @t5(i64 %a) {
   %incdec.ptr = getelementptr inbounds i64, i64* @object, i64 %a
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -65,7 +65,7 @@
 define void @t6(i64 %a) {
   %tmp1 = getelementptr inbounds i64, i64* @object, i64 %a
   %incdec.ptr = getelementptr inbounds i64, i64* %tmp1, i64 4096
-  %tmp = load volatile i64* %incdec.ptr, align 8
+  %tmp = load volatile i64, i64* %incdec.ptr, align 8
   ret void
 }
 
@@ -76,7 +76,7 @@
 ; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
   %1 = add i64 %a, 65535   ;0xffff
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -86,7 +86,7 @@
 ; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
   %1 = sub i64 %a, 4662   ;-4662 is 0xffffffffffffedca
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -96,7 +96,7 @@
 ; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
   %1 = add i64 -305463297, %a   ;-305463297 is 0xffffffffedcaffff
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -106,7 +106,7 @@
 ; CHECK-NEXT: ldr xzr, [x0, [[REG]]]
   %1 = add i64 %a, 81909218222800896   ;0x123000000000000
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -117,7 +117,7 @@
 ; CHECK-NEXT: ldr xzr, [x0, x[[NUM]]]
   %1 = add i64 %a, 19088743   ;0x1234567
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -128,7 +128,7 @@
 ; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
   %1 = add i64 %a, 4095   ;0xfff
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -138,7 +138,7 @@
 ; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
   %1 = add i64 %a, -4095   ;-0xfff
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -148,7 +148,7 @@
 ; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
   %1 = add i64 %a, 1191936   ;0x123000
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -158,7 +158,7 @@
 ; CHECK-NEXT: ldr xzr, {{\[}}[[REG]]]
   %1 = add i64 %a, -1191936   ;0xFFFFFFFFFFEDD000
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -167,7 +167,7 @@
 ; CHECK: ldr xzr, [x0, #28672]
   %1 = add i64 %a, 28672   ;0x7000
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
 
@@ -176,6 +176,6 @@
 ; CHECK: ldur xzr, [x0, #-256]
   %1 = add i64 %a, -256   ;-0x100
   %2 = inttoptr i64 %1 to i64*
-  %3 = load volatile i64* %2, align 8
+  %3 = load volatile i64, i64* %2, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
index f396bc9..bf2d2cf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-alloc-no-stack-realign.ll
@@ -13,9 +13,9 @@
 ; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE:x[0-9]+]], #32]
 ; CHECK: stp [[Q1:q[0-9]+]], [[Q2:q[0-9]+]], {{\[}}[[BASE]]]
  %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index 3750f31b..eb0cd35 100644
--- a/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -13,17 +13,17 @@
   %arr2 = alloca [32 x i32], align 4
   %j = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   %tmp1 = zext i32 %tmp to i64
   %v = mul i64 4, %tmp1
   %vla = alloca i8, i64 %v, align 4
   %tmp2 = bitcast i8* %vla to i32*
-  %tmp3 = load i32* %a.addr, align 4
+  %tmp3 = load i32, i32* %a.addr, align 4
   store i32 %tmp3, i32* %i, align 4
-  %tmp4 = load i32* %a.addr, align 4
+  %tmp4 = load i32, i32* %a.addr, align 4
   store i32 %tmp4, i32* %j, align 4
-  %tmp5 = load i32* %j, align 4
+  %tmp5 = load i32, i32* %j, align 4
   store i32 %tmp5, i32* %retval
-  %x = load i32* %retval
+  %x = load i32, i32* %retval
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll b/llvm/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
index 4194977..71e6480 100644
--- a/llvm/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-andCmpBrToTBZ.ll
@@ -29,7 +29,7 @@
   br i1 %cmp.i.i.i.i, label %if.then3, label %if.end5
 
 if.then3:                                         ; preds = %_ZNK7WebCore4Node10hasTagNameERKNS_13QualifiedNameE.exit, %land.rhs.i
-  %tmp11 = load i8* %str14, align 8
+  %tmp11 = load i8, i8* %str14, align 8
   %tmp12 = and i8 %tmp11, 2
   %tmp13 = icmp ne i8 %tmp12, 0
   br label %return
@@ -55,7 +55,7 @@
   br i1 %isTextField, label %if.then9, label %if.end12
 
 if.then9:                                         ; preds = %if.then7
-  %tmp23 = load i8* %str5, align 8
+  %tmp23 = load i8, i8* %str5, align 8
   %tmp24 = and i8 %tmp23, 2
   %tmp25 = icmp ne i8 %tmp24, 0
   br label %return
diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index 642d72a..a76cf74 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -169,7 +169,7 @@
 ; CHECK-NOT: dmb
 ; CHECK-LABEL: ldaxp
 ; CHECK-NOT: dmb
-   %r = load atomic i128* %p seq_cst, align 16
+   %r = load atomic i128, i128* %p seq_cst, align 16
    ret i128 %r
 }
 
@@ -178,7 +178,7 @@
 ; CHECK-NOT: dmb
 ; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
 ; CHECK-NOT: dmb
-   %r = load atomic i128* %p monotonic, align 16
+   %r = load atomic i128, i128* %p monotonic, align 16
    ret i128 %r
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/arm64-atomic.ll
index fc6e42f..81b555a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic.ll
@@ -107,7 +107,7 @@
 }
 
 define i32 @atomic_load(i32* %p) {
-   %r = load atomic i32* %p seq_cst, align 4
+   %r = load atomic i32, i32* %p seq_cst, align 4
    ret i32 %r
    ; CHECK-LABEL: atomic_load:
    ; CHECK: ldar
@@ -116,21 +116,21 @@
 define i8 @atomic_load_relaxed_8(i8* %p, i32 %off32) {
 ; CHECK-LABEL: atomic_load_relaxed_8:
   %ptr_unsigned = getelementptr i8, i8* %p, i32 4095
-  %val_unsigned = load atomic i8* %ptr_unsigned monotonic, align 1
+  %val_unsigned = load atomic i8, i8* %ptr_unsigned monotonic, align 1
 ; CHECK: ldrb {{w[0-9]+}}, [x0, #4095]
 
   %ptr_regoff = getelementptr i8, i8* %p, i32 %off32
-  %val_regoff = load atomic i8* %ptr_regoff unordered, align 1
+  %val_regoff = load atomic i8, i8* %ptr_regoff unordered, align 1
   %tot1 = add i8 %val_unsigned, %val_regoff
 ; CHECK: ldrb {{w[0-9]+}}, [x0, w1, sxtw]
 
   %ptr_unscaled = getelementptr i8, i8* %p, i32 -256
-  %val_unscaled = load atomic i8* %ptr_unscaled monotonic, align 1
+  %val_unscaled = load atomic i8, i8* %ptr_unscaled monotonic, align 1
   %tot2 = add i8 %tot1, %val_unscaled
 ; CHECK: ldurb {{w[0-9]+}}, [x0, #-256]
 
   %ptr_random = getelementptr i8, i8* %p, i32 1191936 ; 0x123000 (i.e. ADD imm)
-  %val_random = load atomic i8* %ptr_random unordered, align 1
+  %val_random = load atomic i8, i8* %ptr_random unordered, align 1
   %tot3 = add i8 %tot2, %val_random
 ; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
 ; CHECK: ldrb {{w[0-9]+}}, [x[[ADDR]]]
@@ -141,21 +141,21 @@
 define i16 @atomic_load_relaxed_16(i16* %p, i32 %off32) {
 ; CHECK-LABEL: atomic_load_relaxed_16:
   %ptr_unsigned = getelementptr i16, i16* %p, i32 4095
-  %val_unsigned = load atomic i16* %ptr_unsigned monotonic, align 2
+  %val_unsigned = load atomic i16, i16* %ptr_unsigned monotonic, align 2
 ; CHECK: ldrh {{w[0-9]+}}, [x0, #8190]
 
   %ptr_regoff = getelementptr i16, i16* %p, i32 %off32
-  %val_regoff = load atomic i16* %ptr_regoff unordered, align 2
+  %val_regoff = load atomic i16, i16* %ptr_regoff unordered, align 2
   %tot1 = add i16 %val_unsigned, %val_regoff
 ; CHECK: ldrh {{w[0-9]+}}, [x0, w1, sxtw #1]
 
   %ptr_unscaled = getelementptr i16, i16* %p, i32 -128
-  %val_unscaled = load atomic i16* %ptr_unscaled monotonic, align 2
+  %val_unscaled = load atomic i16, i16* %ptr_unscaled monotonic, align 2
   %tot2 = add i16 %tot1, %val_unscaled
 ; CHECK: ldurh {{w[0-9]+}}, [x0, #-256]
 
   %ptr_random = getelementptr i16, i16* %p, i32 595968 ; 0x123000/2 (i.e. ADD imm)
-  %val_random = load atomic i16* %ptr_random unordered, align 2
+  %val_random = load atomic i16, i16* %ptr_random unordered, align 2
   %tot3 = add i16 %tot2, %val_random
 ; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
 ; CHECK: ldrh {{w[0-9]+}}, [x[[ADDR]]]
@@ -166,21 +166,21 @@
 define i32 @atomic_load_relaxed_32(i32* %p, i32 %off32) {
 ; CHECK-LABEL: atomic_load_relaxed_32:
   %ptr_unsigned = getelementptr i32, i32* %p, i32 4095
-  %val_unsigned = load atomic i32* %ptr_unsigned monotonic, align 4
+  %val_unsigned = load atomic i32, i32* %ptr_unsigned monotonic, align 4
 ; CHECK: ldr {{w[0-9]+}}, [x0, #16380]
 
   %ptr_regoff = getelementptr i32, i32* %p, i32 %off32
-  %val_regoff = load atomic i32* %ptr_regoff unordered, align 4
+  %val_regoff = load atomic i32, i32* %ptr_regoff unordered, align 4
   %tot1 = add i32 %val_unsigned, %val_regoff
 ; CHECK: ldr {{w[0-9]+}}, [x0, w1, sxtw #2]
 
   %ptr_unscaled = getelementptr i32, i32* %p, i32 -64
-  %val_unscaled = load atomic i32* %ptr_unscaled monotonic, align 4
+  %val_unscaled = load atomic i32, i32* %ptr_unscaled monotonic, align 4
   %tot2 = add i32 %tot1, %val_unscaled
 ; CHECK: ldur {{w[0-9]+}}, [x0, #-256]
 
   %ptr_random = getelementptr i32, i32* %p, i32 297984 ; 0x123000/4 (i.e. ADD imm)
-  %val_random = load atomic i32* %ptr_random unordered, align 4
+  %val_random = load atomic i32, i32* %ptr_random unordered, align 4
   %tot3 = add i32 %tot2, %val_random
 ; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
 ; CHECK: ldr {{w[0-9]+}}, [x[[ADDR]]]
@@ -191,21 +191,21 @@
 define i64 @atomic_load_relaxed_64(i64* %p, i32 %off32) {
 ; CHECK-LABEL: atomic_load_relaxed_64:
   %ptr_unsigned = getelementptr i64, i64* %p, i32 4095
-  %val_unsigned = load atomic i64* %ptr_unsigned monotonic, align 8
+  %val_unsigned = load atomic i64, i64* %ptr_unsigned monotonic, align 8
 ; CHECK: ldr {{x[0-9]+}}, [x0, #32760]
 
   %ptr_regoff = getelementptr i64, i64* %p, i32 %off32
-  %val_regoff = load atomic i64* %ptr_regoff unordered, align 8
+  %val_regoff = load atomic i64, i64* %ptr_regoff unordered, align 8
   %tot1 = add i64 %val_unsigned, %val_regoff
 ; CHECK: ldr {{x[0-9]+}}, [x0, w1, sxtw #3]
 
   %ptr_unscaled = getelementptr i64, i64* %p, i32 -32
-  %val_unscaled = load atomic i64* %ptr_unscaled monotonic, align 8
+  %val_unscaled = load atomic i64, i64* %ptr_unscaled monotonic, align 8
   %tot2 = add i64 %tot1, %val_unscaled
 ; CHECK: ldur {{x[0-9]+}}, [x0, #-256]
 
   %ptr_random = getelementptr i64, i64* %p, i32 148992 ; 0x123000/8 (i.e. ADD imm)
-  %val_random = load atomic i64* %ptr_random unordered, align 8
+  %val_random = load atomic i64, i64* %ptr_random unordered, align 8
   %tot3 = add i64 %tot2, %val_random
 ; CHECK: add x[[ADDR:[0-9]+]], x0, #291, lsl #12
 ; CHECK: ldr {{x[0-9]+}}, [x[[ADDR]]]
diff --git a/llvm/test/CodeGen/AArch64/arm64-basic-pic.ll b/llvm/test/CodeGen/AArch64/arm64-basic-pic.ll
index 9fdb1e9..e11274e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-basic-pic.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-basic-pic.ll
@@ -5,7 +5,7 @@
 define i32 @get_globalvar() {
 ; CHECK-LABEL: get_globalvar:
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], :got_lo12:var]
 ; CHECK: ldr w0, [x[[GOTLOC]]]
@@ -16,7 +16,7 @@
 define i32* @get_globalvaraddr() {
 ; CHECK-LABEL: get_globalvaraddr:
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[GOTHI]], :got_lo12:var]
 
@@ -28,7 +28,7 @@
 define i32 @get_hiddenvar() {
 ; CHECK-LABEL: get_hiddenvar:
 
-  %val = load i32* @hiddenvar
+  %val = load i32, i32* @hiddenvar
 ; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
 ; CHECK: ldr w0, [x[[HI]], :lo12:hiddenvar]
 
@@ -38,7 +38,7 @@
 define i32* @get_hiddenvaraddr() {
 ; CHECK-LABEL: get_hiddenvaraddr:
 
-  %val = load i32* @hiddenvar
+  %val = load i32, i32* @hiddenvar
 ; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
 ; CHECK: add x0, [[HI]], :lo12:hiddenvar
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-bcc.ll b/llvm/test/CodeGen/AArch64/arm64-bcc.ll
index 94e6b6b..66d2f52 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bcc.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bcc.ll
@@ -28,9 +28,9 @@
 entry:
   %.sroa.0 = alloca i72, align 16
   %.count.value = getelementptr inbounds %Sstruct, %Sstruct* %1, i64 0, i32 0, i32 0
-  %4 = load i64* %.count.value, align 8
+  %4 = load i64, i64* %.count.value, align 8
   %.repeatedValue.value = getelementptr inbounds %Sstruct, %Sstruct* %1, i64 0, i32 1, i32 0
-  %5 = load i32* %.repeatedValue.value, align 8
+  %5 = load i32, i32* %.repeatedValue.value, align 8
   %6 = icmp eq i64 %4, 0
   br label %7
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll b/llvm/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
index d2985f4..876a691 100644
--- a/llvm/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-big-endian-bitconverts.ll
@@ -5,7 +5,7 @@
 define void @test_i64_f64(double* %p, i64* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to i64
     %4 = add i64 %3, %3
@@ -17,7 +17,7 @@
 define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to i64
     %4 = add i64 %3, %3
@@ -30,7 +30,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to i64
     %4 = add i64 %3, %3
@@ -43,7 +43,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to i64
     %4 = add i64 %3, %3
@@ -56,7 +56,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: str
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to i64
     %4 = add i64 %3, %3
@@ -69,7 +69,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: str
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to i64
     %4 = add i64 %3, %3
@@ -81,7 +81,7 @@
 define void @test_f64_i64(i64* %p, double* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to double
     %4 = fadd double %3, %3
@@ -93,7 +93,7 @@
 define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to double
     %4 = fadd double %3, %3
@@ -106,7 +106,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to double
     %4 = fadd double %3, %3
@@ -119,7 +119,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to double
     %4 = fadd double %3, %3
@@ -132,7 +132,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: str
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to double
     %4 = fadd double %3, %3
@@ -145,7 +145,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: str
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to double
     %4 = fadd double %3, %3
@@ -157,7 +157,7 @@
 define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -169,7 +169,7 @@
 define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
 ; CHECK: ldr
 ; CHECK: str
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -182,7 +182,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -195,7 +195,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: str
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -208,7 +208,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: str
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -221,7 +221,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: str
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to <1 x i64>
     %4 = add <1 x i64> %3, %3
@@ -234,7 +234,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -247,7 +247,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -260,7 +260,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -272,7 +272,7 @@
 define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -285,7 +285,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev32 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -298,7 +298,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev32 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to <2 x float>
     %4 = fadd <2 x float> %3, %3
@@ -311,7 +311,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -324,7 +324,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -337,7 +337,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -349,7 +349,7 @@
 define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -362,7 +362,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev32 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -375,7 +375,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev32 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.2s }
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to <2 x i32>
     %4 = add <2 x i32> %3, %3
@@ -388,7 +388,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -401,7 +401,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -414,7 +414,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -427,7 +427,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev32 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -440,7 +440,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev32 v{{[0-9]+}}.4h
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -453,7 +453,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8b }
 ; CHECK: rev16 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.4h }
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = bitcast <8 x i8> %2 to <4 x i16>
     %4 = add <4 x i16> %3, %3
@@ -466,7 +466,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = bitcast i64 %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -479,7 +479,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = bitcast double %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -492,7 +492,7 @@
 ; CHECK: ldr
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = bitcast <1 x i64> %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -505,7 +505,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev32 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = bitcast <2 x float> %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -518,7 +518,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2s }
 ; CHECK: rev32 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = bitcast <2 x i32> %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -531,7 +531,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4h }
 ; CHECK: rev16 v{{[0-9]+}}.8b
 ; CHECK: st1 { v{{[0-9]+}}.8b }
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = bitcast <4 x i16> %2 to <8 x i8>
     %4 = add <8 x i8> %3, %3
@@ -544,7 +544,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: ext
 ; CHECK: str
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -557,7 +557,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: ext
 ; CHECK: str
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -572,7 +572,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: str q
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -586,7 +586,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: str
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -600,7 +600,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
 ; CHECK: str
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -613,7 +613,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.16b }
 ; CHECK: ext
 ; CHECK: str q
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to fp128
     %4 = fadd fp128 %3, %3
@@ -626,7 +626,7 @@
 ; CHECK: ldr
 ; CHECK: ext
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -638,7 +638,7 @@
 define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -652,7 +652,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -665,7 +665,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4s }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -678,7 +678,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8h }
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -691,7 +691,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.16b }
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to <2 x double>
     %4 = fadd <2 x double> %3, %3
@@ -704,7 +704,7 @@
 ; CHECK: ldr
 ; CHECK: ext
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -716,7 +716,7 @@
 define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -730,7 +730,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -743,7 +743,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4s }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -756,7 +756,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8h }
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -769,7 +769,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.16b }
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to <2 x i64>
     %4 = add <2 x i64> %3, %3
@@ -784,7 +784,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -798,7 +798,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -812,7 +812,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -825,7 +825,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4s }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -839,7 +839,7 @@
 ; CHECK: rev32 v{{[0-9]+}}.8h
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -853,7 +853,7 @@
 ; CHECK: rev32 v{{[0-9]+}}.16b
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.2d }
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to <4 x float>
     %4 = fadd <4 x float> %3, %3
@@ -867,7 +867,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -880,7 +880,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -893,7 +893,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -906,7 +906,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -919,7 +919,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8h }
 ; CHECK: rev32 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -932,7 +932,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.16b }
 ; CHECK: rev32 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.4s }
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to <4 x i32>
     %4 = add <4 x i32> %3, %3
@@ -946,7 +946,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -959,7 +959,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -972,7 +972,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -986,7 +986,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev32 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -999,7 +999,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4s }
 ; CHECK: rev32 v{{[0-9]+}}.8h
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -1012,7 +1012,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.16b }
 ; CHECK: rev16 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.8h }
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = bitcast <16 x i8> %2 to <8 x i16>
     %4 = add <8 x i16> %3, %3
@@ -1026,7 +1026,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = bitcast fp128 %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
@@ -1039,7 +1039,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = bitcast <2 x double> %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
@@ -1052,7 +1052,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.2d }
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = bitcast <2 x i64> %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
@@ -1066,7 +1066,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: rev32 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = bitcast <4 x float> %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
@@ -1079,7 +1079,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.4s }
 ; CHECK: rev32 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = bitcast <4 x i32> %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
@@ -1092,7 +1092,7 @@
 ; CHECK: ld1 { v{{[0-9]+}}.8h }
 ; CHECK: rev16 v{{[0-9]+}}.16b
 ; CHECK: st1 { v{{[0-9]+}}.16b }
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = bitcast <8 x i16> %2 to <16 x i8>
     %4 = add <16 x i8> %3, %3
diff --git a/llvm/test/CodeGen/AArch64/arm64-big-endian-varargs.ll b/llvm/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
index 296eb19..e5e1684 100644
--- a/llvm/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-big-endian-varargs.ll
@@ -22,7 +22,7 @@
   %vl1 = bitcast %struct.__va_list* %vl to i8*
   call void @llvm.va_start(i8* %vl1)
   %vr_offs_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 4
-  %vr_offs = load i32* %vr_offs_p, align 4
+  %vr_offs = load i32, i32* %vr_offs_p, align 4
   %0 = icmp sgt i32 %vr_offs, -1
   br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
 
@@ -34,7 +34,7 @@
 
 vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
   %reg_top_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 2
-  %reg_top = load i8** %reg_top_p, align 8
+  %reg_top = load i8*, i8** %reg_top_p, align 8
   %1 = sext i32 %vr_offs to i64
   %2 = getelementptr i8, i8* %reg_top, i64 %1
   %3 = ptrtoint i8* %2 to i64
@@ -44,7 +44,7 @@
 
 vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
   %stack_p = getelementptr inbounds %struct.__va_list, %struct.__va_list* %vl, i64 0, i32 0
-  %stack = load i8** %stack_p, align 8
+  %stack = load i8*, i8** %stack_p, align 8
   %new_stack = getelementptr i8, i8* %stack, i64 8
   store i8* %new_stack, i8** %stack_p, align 8
   br label %vaarg.end
@@ -52,7 +52,7 @@
 vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
   %.sink = phi i8* [ %4, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
   %5 = bitcast i8* %.sink to double*
-  %6 = load double* %5, align 8
+  %6 = load double, double* %5, align 8
   call void @llvm.va_end(i8* %vl1)
   ret double %6
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll b/llvm/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
index d72d0a5..c280bef 100644
--- a/llvm/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-big-endian-vector-caller.ll
@@ -5,7 +5,7 @@
 declare i64 @test_i64_f64_helper(double %p)
 define void @test_i64_f64(double* %p, i64* %q) {
 ; CHECK-NOT: rev
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call i64 @test_i64_f64_helper(double %2)
     %4 = add i64 %3, %3
@@ -17,7 +17,7 @@
 declare i64 @test_i64_v1i64_helper(<1 x i64> %p)
 define void @test_i64_v1i64(<1 x i64>* %p, i64* %q) {
 ; CHECK-NOT: rev
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
     %4 = add i64 %3, %3
@@ -29,7 +29,7 @@
 declare i64 @test_i64_v2f32_helper(<2 x float> %p)
 define void @test_i64_v2f32(<2 x float>* %p, i64* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
     %4 = add i64 %3, %3
@@ -41,7 +41,7 @@
 declare i64 @test_i64_v2i32_helper(<2 x i32> %p)
 define void @test_i64_v2i32(<2 x i32>* %p, i64* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
     %4 = add i64 %3, %3
@@ -53,7 +53,7 @@
 declare i64 @test_i64_v4i16_helper(<4 x i16> %p)
 define void @test_i64_v4i16(<4 x i16>* %p, i64* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
     %4 = add i64 %3, %3
@@ -65,7 +65,7 @@
 declare i64 @test_i64_v8i8_helper(<8 x i8> %p)
 define void @test_i64_v8i8(<8 x i8>* %p, i64* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
     %4 = add i64 %3, %3
@@ -77,7 +77,7 @@
 declare double @test_f64_i64_helper(i64 %p)
 define void @test_f64_i64(i64* %p, double* %q) {
 ; CHECK-NOT: rev
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call double @test_f64_i64_helper(i64 %2)
     %4 = fadd double %3, %3
@@ -89,7 +89,7 @@
 declare double @test_f64_v1i64_helper(<1 x i64> %p)
 define void @test_f64_v1i64(<1 x i64>* %p, double* %q) {
 ; CHECK-NOT: rev
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
     %4 = fadd double %3, %3
@@ -101,7 +101,7 @@
 declare double @test_f64_v2f32_helper(<2 x float> %p)
 define void @test_f64_v2f32(<2 x float>* %p, double* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
     %4 = fadd double %3, %3
@@ -113,7 +113,7 @@
 declare double @test_f64_v2i32_helper(<2 x i32> %p)
 define void @test_f64_v2i32(<2 x i32>* %p, double* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
     %4 = fadd double %3, %3
@@ -125,7 +125,7 @@
 declare double @test_f64_v4i16_helper(<4 x i16> %p)
 define void @test_f64_v4i16(<4 x i16>* %p, double* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
     %4 = fadd double %3, %3
@@ -137,7 +137,7 @@
 declare double @test_f64_v8i8_helper(<8 x i8> %p)
 define void @test_f64_v8i8(<8 x i8>* %p, double* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
     %4 = fadd double %3, %3
@@ -149,7 +149,7 @@
 declare <1 x i64> @test_v1i64_i64_helper(i64 %p)
 define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
 ; CHECK-NOT: rev
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
     %4 = add <1 x i64> %3, %3
@@ -161,7 +161,7 @@
 declare <1 x i64> @test_v1i64_f64_helper(double %p)
 define void @test_v1i64_f64(double* %p, <1 x i64>* %q) {
 ; CHECK-NOT: rev
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
     %4 = add <1 x i64> %3, %3
@@ -173,7 +173,7 @@
 declare <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %p)
 define void @test_v1i64_v2f32(<2 x float>* %p, <1 x i64>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
     %4 = add <1 x i64> %3, %3
@@ -185,7 +185,7 @@
 declare <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %p)
 define void @test_v1i64_v2i32(<2 x i32>* %p, <1 x i64>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
     %4 = add <1 x i64> %3, %3
@@ -197,7 +197,7 @@
 declare <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %p)
 define void @test_v1i64_v4i16(<4 x i16>* %p, <1 x i64>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
     %4 = add <1 x i64> %3, %3
@@ -209,7 +209,7 @@
 declare <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %p)
 define void @test_v1i64_v8i8(<8 x i8>* %p, <1 x i64>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
     %4 = add <1 x i64> %3, %3
@@ -221,7 +221,7 @@
 declare <2 x float> @test_v2f32_i64_helper(i64 %p)
 define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
     %4 = fadd <2 x float> %3, %3
@@ -233,7 +233,7 @@
 declare <2 x float> @test_v2f32_f64_helper(double %p)
 define void @test_v2f32_f64(double* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
     %4 = fadd <2 x float> %3, %3
@@ -245,7 +245,7 @@
 declare <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %p)
 define void @test_v2f32_v1i64(<1 x i64>* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
     %4 = fadd <2 x float> %3, %3
@@ -258,7 +258,7 @@
 define void @test_v2f32_v2i32(<2 x i32>* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
     %4 = fadd <2 x float> %3, %3
@@ -271,7 +271,7 @@
 define void @test_v2f32_v4i16(<4 x i16>* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
     %4 = fadd <2 x float> %3, %3
@@ -284,7 +284,7 @@
 define void @test_v2f32_v8i8(<8 x i8>* %p, <2 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
     %4 = fadd <2 x float> %3, %3
@@ -296,7 +296,7 @@
 declare <2 x i32> @test_v2i32_i64_helper(i64 %p)
 define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
     %4 = add <2 x i32> %3, %3
@@ -308,7 +308,7 @@
 declare <2 x i32> @test_v2i32_f64_helper(double %p)
 define void @test_v2i32_f64(double* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
     %4 = add <2 x i32> %3, %3
@@ -320,7 +320,7 @@
 declare <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %p)
 define void @test_v2i32_v1i64(<1 x i64>* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
     %4 = add <2 x i32> %3, %3
@@ -333,7 +333,7 @@
 define void @test_v2i32_v2f32(<2 x float>* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
     %4 = add <2 x i32> %3, %3
@@ -346,7 +346,7 @@
 define void @test_v2i32_v4i16(<4 x i16>* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
     %4 = add <2 x i32> %3, %3
@@ -359,7 +359,7 @@
 define void @test_v2i32_v8i8(<8 x i8>* %p, <2 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: rev64 v{{[0-9]+}}.2s
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
     %4 = add <2 x i32> %3, %3
@@ -371,7 +371,7 @@
 declare <4 x i16> @test_v4i16_i64_helper(i64 %p)
 define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
     %4 = add <4 x i16> %3, %3
@@ -383,7 +383,7 @@
 declare <4 x i16> @test_v4i16_f64_helper(double %p)
 define void @test_v4i16_f64(double* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
     %4 = add <4 x i16> %3, %3
@@ -395,7 +395,7 @@
 declare <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %p)
 define void @test_v4i16_v1i64(<1 x i64>* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
     %4 = add <4 x i16> %3, %3
@@ -408,7 +408,7 @@
 define void @test_v4i16_v2f32(<2 x float>* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
     %4 = add <4 x i16> %3, %3
@@ -421,7 +421,7 @@
 define void @test_v4i16_v2i32(<2 x i32>* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
     %4 = add <4 x i16> %3, %3
@@ -434,7 +434,7 @@
 define void @test_v4i16_v8i8(<8 x i8>* %p, <4 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
 ; CHECK: rev64 v{{[0-9]+}}.4h
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
     %4 = add <4 x i16> %3, %3
@@ -446,7 +446,7 @@
 declare <8 x i8> @test_v8i8_i64_helper(i64 %p)
 define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
     %4 = add <8 x i8> %3, %3
@@ -458,7 +458,7 @@
 declare <8 x i8> @test_v8i8_f64_helper(double %p)
 define void @test_v8i8_f64(double* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
     %4 = add <8 x i8> %3, %3
@@ -470,7 +470,7 @@
 declare <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %p)
 define void @test_v8i8_v1i64(<1 x i64>* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
     %4 = add <8 x i8> %3, %3
@@ -483,7 +483,7 @@
 define void @test_v8i8_v2f32(<2 x float>* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
     %4 = add <8 x i8> %3, %3
@@ -496,7 +496,7 @@
 define void @test_v8i8_v2i32(<2 x i32>* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.2s
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
     %4 = add <8 x i8> %3, %3
@@ -509,7 +509,7 @@
 define void @test_v8i8_v4i16(<4 x i16>* %p, <8 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4h
 ; CHECK: rev64 v{{[0-9]+}}.8b
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
     %4 = add <8 x i8> %3, %3
@@ -521,7 +521,7 @@
 declare fp128 @test_f128_v2f64_helper(<2 x double> %p)
 define void @test_f128_v2f64(<2 x double>* %p, fp128* %q) {
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
     %4 = fadd fp128 %3, %3
@@ -533,7 +533,7 @@
 declare fp128 @test_f128_v2i64_helper(<2 x i64> %p)
 define void @test_f128_v2i64(<2 x i64>* %p, fp128* %q) {
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
     %4 = fadd fp128 %3, %3
@@ -546,7 +546,7 @@
 define void @test_f128_v4f32(<4 x float>* %p, fp128* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
     %4 = fadd fp128 %3, %3
@@ -559,7 +559,7 @@
 define void @test_f128_v4i32(<4 x i32>* %p, fp128* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
     %4 = fadd fp128 %3, %3
@@ -572,7 +572,7 @@
 define void @test_f128_v8i16(<8 x i16>* %p, fp128* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
     %4 = fadd fp128 %3, %3
@@ -585,7 +585,7 @@
 define void @test_f128_v16i8(<16 x i8>* %p, fp128* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
     %4 = fadd fp128 %3, %3
@@ -597,7 +597,7 @@
 declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
 define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
     %4 = fadd <2 x double> %3, %3
@@ -610,7 +610,7 @@
 define void @test_v2f64_v2i64(<2 x i64>* %p, <2 x double>* %q) {
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
     %4 = fadd <2 x double> %3, %3
@@ -624,7 +624,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
     %4 = fadd <2 x double> %3, %3
@@ -638,7 +638,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
     %4 = fadd <2 x double> %3, %3
@@ -652,7 +652,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
     %4 = fadd <2 x double> %3, %3
@@ -666,7 +666,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
     %4 = fadd <2 x double> %3, %3
@@ -678,7 +678,7 @@
 declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
 define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
     %4 = add <2 x i64> %3, %3
@@ -691,7 +691,7 @@
 define void @test_v2i64_v2f64(<2 x double>* %p, <2 x i64>* %q) {
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
     %4 = add <2 x i64> %3, %3
@@ -705,7 +705,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
     %4 = add <2 x i64> %3, %3
@@ -719,7 +719,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
     %4 = add <2 x i64> %3, %3
@@ -733,7 +733,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
     %4 = add <2 x i64> %3, %3
@@ -747,7 +747,7 @@
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
     %4 = add <2 x i64> %3, %3
@@ -760,7 +760,7 @@
 define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
     %4 = fadd <4 x float> %3, %3
@@ -774,7 +774,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
     %4 = fadd <4 x float> %3, %3
@@ -788,7 +788,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
     %4 = fadd <4 x float> %3, %3
@@ -803,7 +803,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
     %4 = fadd <4 x float> %3, %3
@@ -818,7 +818,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
     %4 = fadd <4 x float> %3, %3
@@ -833,7 +833,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
     %4 = fadd <4 x float> %3, %3
@@ -846,7 +846,7 @@
 define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
     %4 = add <4 x i32> %3, %3
@@ -860,7 +860,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
     %4 = add <4 x i32> %3, %3
@@ -874,7 +874,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
     %4 = add <4 x i32> %3, %3
@@ -889,7 +889,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
     %4 = add <4 x i32> %3, %3
@@ -904,7 +904,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
     %4 = add <4 x i32> %3, %3
@@ -919,7 +919,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.4s
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
     %4 = add <4 x i32> %3, %3
@@ -932,7 +932,7 @@
 define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
     %4 = add <8 x i16> %3, %3
@@ -946,7 +946,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
     %4 = add <8 x i16> %3, %3
@@ -960,7 +960,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
     %4 = add <8 x i16> %3, %3
@@ -975,7 +975,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
     %4 = add <8 x i16> %3, %3
@@ -990,7 +990,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
     %4 = add <8 x i16> %3, %3
@@ -1005,7 +1005,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.8h
 ; CHECK: ext
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
     %4 = add <8 x i16> %3, %3
@@ -1018,7 +1018,7 @@
 define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
     %4 = add <16 x i8> %3, %3
@@ -1032,7 +1032,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
     %4 = add <16 x i8> %3, %3
@@ -1046,7 +1046,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
     %4 = add <16 x i8> %3, %3
@@ -1061,7 +1061,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
     %4 = add <16 x i8> %3, %3
@@ -1076,7 +1076,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
     %4 = add <16 x i8> %3, %3
@@ -1091,7 +1091,7 @@
 ; CHECK: ext
 ; CHECK: rev64 v{{[0-9]+}}.16b
 ; CHECK: ext
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
     %4 = add <16 x i8> %3, %3
diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
index d9d6b61..5dca929 100644
--- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll
@@ -12,7 +12,7 @@
 ; CHECK: ret
 
   %tmp = bitcast %struct.X* %x to i32*
-  %tmp1 = load i32* %tmp, align 4
+  %tmp1 = load i32, i32* %tmp, align 4
   %b = getelementptr inbounds %struct.Y, %struct.Y* %y, i64 0, i32 1
   %bf.clear = lshr i32 %tmp1, 3
   %bf.clear.lobit = and i32 %bf.clear, 1
@@ -46,7 +46,7 @@
 ; CHECK: ret
 
   %tmp = bitcast %struct.Z* %x to i64*
-  %tmp1 = load i64* %tmp, align 4
+  %tmp1 = load i64, i64* %tmp, align 4
   %b = getelementptr inbounds %struct.A, %struct.A* %y, i64 0, i32 0
   %bf.clear = lshr i64 %tmp1, 3
   %bf.clear.lobit = and i64 %bf.clear, 1
@@ -77,7 +77,7 @@
 ; CHECK-NEXT: bfxil [[REG1]], x1, #16, #24
 ; CHECK-NEXT: str [[REG1]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -16777216
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 16777215
@@ -93,7 +93,7 @@
 ; CHECK-NEXT: bfxil [[REG1]], w1, #16, #3
 ; CHECK-NEXT: str [[REG1]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -8
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 7
@@ -112,7 +112,7 @@
 ; CHECK-NEXT: lsr [[REG2:w[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -8
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 7
@@ -133,7 +133,7 @@
 ; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -8
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 7
@@ -155,7 +155,7 @@
 ; CHECK-NEXT: lsr [[REG2:x[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -8
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 7
@@ -177,7 +177,7 @@
 ; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -8
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 7
@@ -198,7 +198,7 @@
 ; CHECK-NEXT: lsl [[REG2:w[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -8
   %and1 = and i32 %x, 7
   %or = or i32 %and, %and1
@@ -218,7 +218,7 @@
 ; CHECK-NEXT: lsl [[REG2:x[0-9]+]], [[REG1]], #2
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -8
   %and1 = and i64 %x, 7
   %or = or i64 %and, %and1
@@ -247,7 +247,7 @@
 ; CHECK-NEXT: ubfx [[REG2:w[0-9]+]], [[REG1]], #2, #28
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -8
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 7
@@ -270,7 +270,7 @@
 ; CHECK-NEXT: ubfx [[REG2:x[0-9]+]], [[REG1]], #2, #60
 ; CHECK-NEXT: str [[REG2]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -8
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 7
@@ -296,7 +296,7 @@
 ; CHECK-NEXT: lsl [[REG3:w[0-9]+]], [[REG2]], #2
 ; CHECK-NEXT: str [[REG3]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, -256
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 255
@@ -326,7 +326,7 @@
 ; CHECK-NEXT: lsl [[REG3:x[0-9]+]], [[REG2]], #2
 ; CHECK-NEXT: str [[REG3]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, -256
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 255
@@ -357,7 +357,7 @@
 ; CHECK-NEXT: ubfx [[REG3:w[0-9]+]], [[REG2]], #2, #28
 ; CHECK-NEXT: str [[REG3]],
 ; CHECK-NEXT: ret
-  %0 = load i32* %y, align 8
+  %0 = load i32, i32* %y, align 8
   %and = and i32 %0, 1737056
   %shr = lshr i32 %x, 16
   %and1 = and i32 %shr, 7
@@ -386,7 +386,7 @@
 ; CHECK-NEXT: ubfx [[REG3:x[0-9]+]], [[REG2]], #2, #60
 ; CHECK-NEXT: str [[REG3]],
 ; CHECK-NEXT: ret
-  %0 = load i64* %y, align 8
+  %0 = load i64, i64* %y, align 8
   %and = and i64 %0, 1737056
   %shr = lshr i64 %x, 16
   %and1 = and i64 %shr, 7
@@ -422,7 +422,7 @@
 
 if.then:                                          ; preds = %entry
   %arrayidx3 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %x.sroa.5.0.extract.shift
-  %0 = load i8* %arrayidx3, align 1
+  %0 = load i8, i8* %arrayidx3, align 1
   %conv = zext i8 %0 to i32
   br label %return
 
@@ -444,7 +444,7 @@
 ; CHECK-NOT: ubfm
   %idxprom10 = and i64 %x.sroa.3.0.extract.shift, 65535
   %arrayidx11 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom10
-  %1 = load i8* %arrayidx11, align 1
+  %1 = load i8, i8* %arrayidx11, align 1
   %conv12 = zext i8 %1 to i32
   %add = add nsw i32 %conv12, 16
   br label %return
@@ -467,7 +467,7 @@
 ; CHECK-NOT: ubfm
   %idxprom20 = and i64 %x.sroa.1.0.extract.shift, 65535
   %arrayidx21 = getelementptr inbounds [65536 x i8], [65536 x i8]* @first_ones, i64 0, i64 %idxprom20
-  %2 = load i8* %arrayidx21, align 1
+  %2 = load i8, i8* %arrayidx21, align 1
   %conv22 = zext i8 %2 to i32
   %add23 = add nsw i32 %conv22, 32
   br label %return
@@ -510,7 +510,7 @@
   %shr = lshr i64 %x, 4
   %and = and i64 %shr, 15
   %arrayidx = getelementptr inbounds [8 x [64 x i64]], [8 x [64 x i64]]* @arr, i64 0, i64 0, i64 %and
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   ret i64 %0
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-blockaddress.ll b/llvm/test/CodeGen/AArch64/arm64-blockaddress.ll
index ac4f19e..5df8402 100644
--- a/llvm/test/CodeGen/AArch64/arm64-blockaddress.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-blockaddress.ll
@@ -25,6 +25,6 @@
   br label %mylabel
 
 mylabel:
-  %tmp = load volatile i64* %recover, align 8
+  %tmp = load volatile i64, i64* %recover, align 8
   ret i64 %tmp
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll b/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll
index 487c1d9..71d9327 100644
--- a/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-call-tailcalls.ll
@@ -9,7 +9,7 @@
 ; CHECK: ldr	x[[ADDR:[0-9]+]], [x[[GOTADDR]], _t@GOTPAGEOFF]
 ; CHECK: ldr	x[[DEST:[0-9]+]], [x[[ADDR]]]
 ; CHECK: br	x[[DEST]]
-  %tmp = load i32 ()** @t
+  %tmp = load i32 ()*, i32 ()** @t
   %tmp.upgrd.2 = tail call i32 %tmp()
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-cast-opt.ll b/llvm/test/CodeGen/AArch64/arm64-cast-opt.ll
index c701db2..463add5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cast-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cast-opt.ll
@@ -11,12 +11,12 @@
 ; CHECK-NOT: and
 entry:
   %idxprom = sext i32 %i1 to i64
-  %0 = load i8** @block, align 8
+  %0 = load i8*, i8** @block, align 8
   %arrayidx = getelementptr inbounds i8, i8* %0, i64 %idxprom
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   %idxprom1 = sext i32 %i2 to i64
   %arrayidx2 = getelementptr inbounds i8, i8* %0, i64 %idxprom1
-  %2 = load i8* %arrayidx2, align 1
+  %2 = load i8, i8* %arrayidx2, align 1
   %cmp = icmp eq i8 %1, %2
   br i1 %cmp, label %return, label %if.then
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 730aabf..4e47ab6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -21,7 +21,7 @@
 ; CHECK-NEXT b.cc
 define i32 @Maze1() nounwind ssp {
 entry:
-  %0 = load i64* @channelColumns, align 8, !tbaa !0
+  %0 = load i64, i64* @channelColumns, align 8, !tbaa !0
   %cmp90 = icmp eq i64 %0, 0
   br i1 %cmp90, label %for.end, label %for.body
 
@@ -29,51 +29,51 @@
   %1 = phi i64 [ %0, %entry ], [ %37, %for.inc ]
   %i.092 = phi i64 [ 1, %entry ], [ %inc53, %for.inc ]
   %numLeft.091 = phi i32 [ 0, %entry ], [ %numLeft.1, %for.inc ]
-  %2 = load i8** @mazeRoute, align 8, !tbaa !3
+  %2 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
   %arrayidx = getelementptr inbounds i8, i8* %2, i64 %i.092
-  %3 = load i8* %arrayidx, align 1, !tbaa !1
+  %3 = load i8, i8* %arrayidx, align 1, !tbaa !1
   %tobool = icmp eq i8 %3, 0
   br i1 %tobool, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %4 = load i64** @TOP, align 8, !tbaa !3
+  %4 = load i64*, i64** @TOP, align 8, !tbaa !3
   %arrayidx1 = getelementptr inbounds i64, i64* %4, i64 %i.092
-  %5 = load i64* %arrayidx1, align 8, !tbaa !0
-  %6 = load i64** @netsAssign, align 8, !tbaa !3
+  %5 = load i64, i64* %arrayidx1, align 8, !tbaa !0
+  %6 = load i64*, i64** @netsAssign, align 8, !tbaa !3
   %arrayidx2 = getelementptr inbounds i64, i64* %6, i64 %5
-  %7 = load i64* %arrayidx2, align 8, !tbaa !0
-  %8 = load i64** @BOT, align 8, !tbaa !3
+  %7 = load i64, i64* %arrayidx2, align 8, !tbaa !0
+  %8 = load i64*, i64** @BOT, align 8, !tbaa !3
   %arrayidx3 = getelementptr inbounds i64, i64* %8, i64 %i.092
-  %9 = load i64* %arrayidx3, align 8, !tbaa !0
+  %9 = load i64, i64* %arrayidx3, align 8, !tbaa !0
   %arrayidx4 = getelementptr inbounds i64, i64* %6, i64 %9
-  %10 = load i64* %arrayidx4, align 8, !tbaa !0
+  %10 = load i64, i64* %arrayidx4, align 8, !tbaa !0
   %cmp5 = icmp ugt i64 %i.092, 1
   %cmp6 = icmp ugt i64 %10, 1
   %or.cond = and i1 %cmp5, %cmp6
   br i1 %or.cond, label %land.lhs.true7, label %if.else
 
 land.lhs.true7:                                   ; preds = %if.then
-  %11 = load i64* @channelTracks, align 8, !tbaa !0
+  %11 = load i64, i64* @channelTracks, align 8, !tbaa !0
   %add = add i64 %11, 1
   %call = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add, i64 %10, i64 0, i64 %7, i32 -1, i32 -1)
   %tobool8 = icmp eq i32 %call, 0
   br i1 %tobool8, label %land.lhs.true7.if.else_crit_edge, label %if.then9
 
 land.lhs.true7.if.else_crit_edge:                 ; preds = %land.lhs.true7
-  %.pre = load i64* @channelColumns, align 8, !tbaa !0
+  %.pre = load i64, i64* @channelColumns, align 8, !tbaa !0
   br label %if.else
 
 if.then9:                                         ; preds = %land.lhs.true7
-  %12 = load i8** @mazeRoute, align 8, !tbaa !3
+  %12 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
   %arrayidx10 = getelementptr inbounds i8, i8* %12, i64 %i.092
   store i8 0, i8* %arrayidx10, align 1, !tbaa !1
-  %13 = load i64** @TOP, align 8, !tbaa !3
+  %13 = load i64*, i64** @TOP, align 8, !tbaa !3
   %arrayidx11 = getelementptr inbounds i64, i64* %13, i64 %i.092
-  %14 = load i64* %arrayidx11, align 8, !tbaa !0
+  %14 = load i64, i64* %arrayidx11, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %14)
-  %15 = load i64** @BOT, align 8, !tbaa !3
+  %15 = load i64*, i64** @BOT, align 8, !tbaa !3
   %arrayidx12 = getelementptr inbounds i64, i64* %15, i64 %i.092
-  %16 = load i64* %arrayidx12, align 8, !tbaa !0
+  %16 = load i64, i64* %arrayidx12, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %16)
   br label %for.inc
 
@@ -84,23 +84,23 @@
   br i1 %or.cond89, label %land.lhs.true16, label %if.else24
 
 land.lhs.true16:                                  ; preds = %if.else
-  %18 = load i64* @channelTracks, align 8, !tbaa !0
+  %18 = load i64, i64* @channelTracks, align 8, !tbaa !0
   %add17 = add i64 %18, 1
   %call18 = tail call fastcc i32 @Maze1Mech(i64 %i.092, i64 %add17, i64 %10, i64 0, i64 %7, i32 1, i32 -1)
   %tobool19 = icmp eq i32 %call18, 0
   br i1 %tobool19, label %if.else24, label %if.then20
 
 if.then20:                                        ; preds = %land.lhs.true16
-  %19 = load i8** @mazeRoute, align 8, !tbaa !3
+  %19 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
   %arrayidx21 = getelementptr inbounds i8, i8* %19, i64 %i.092
   store i8 0, i8* %arrayidx21, align 1, !tbaa !1
-  %20 = load i64** @TOP, align 8, !tbaa !3
+  %20 = load i64*, i64** @TOP, align 8, !tbaa !3
   %arrayidx22 = getelementptr inbounds i64, i64* %20, i64 %i.092
-  %21 = load i64* %arrayidx22, align 8, !tbaa !0
+  %21 = load i64, i64* %arrayidx22, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %21)
-  %22 = load i64** @BOT, align 8, !tbaa !3
+  %22 = load i64*, i64** @BOT, align 8, !tbaa !3
   %arrayidx23 = getelementptr inbounds i64, i64* %22, i64 %i.092
-  %23 = load i64* %arrayidx23, align 8, !tbaa !0
+  %23 = load i64, i64* %arrayidx23, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %23)
   br label %for.inc
 
@@ -108,7 +108,7 @@
   br i1 %cmp5, label %land.lhs.true26, label %if.else36
 
 land.lhs.true26:                                  ; preds = %if.else24
-  %24 = load i64* @channelTracks, align 8, !tbaa !0
+  %24 = load i64, i64* @channelTracks, align 8, !tbaa !0
   %cmp27 = icmp ult i64 %7, %24
   br i1 %cmp27, label %land.lhs.true28, label %if.else36
 
@@ -119,26 +119,26 @@
   br i1 %tobool31, label %if.else36, label %if.then32
 
 if.then32:                                        ; preds = %land.lhs.true28
-  %25 = load i8** @mazeRoute, align 8, !tbaa !3
+  %25 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
   %arrayidx33 = getelementptr inbounds i8, i8* %25, i64 %i.092
   store i8 0, i8* %arrayidx33, align 1, !tbaa !1
-  %26 = load i64** @TOP, align 8, !tbaa !3
+  %26 = load i64*, i64** @TOP, align 8, !tbaa !3
   %arrayidx34 = getelementptr inbounds i64, i64* %26, i64 %i.092
-  %27 = load i64* %arrayidx34, align 8, !tbaa !0
+  %27 = load i64, i64* %arrayidx34, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %27)
-  %28 = load i64** @BOT, align 8, !tbaa !3
+  %28 = load i64*, i64** @BOT, align 8, !tbaa !3
   %arrayidx35 = getelementptr inbounds i64, i64* %28, i64 %i.092
-  %29 = load i64* %arrayidx35, align 8, !tbaa !0
+  %29 = load i64, i64* %arrayidx35, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %29)
   br label %for.inc
 
 if.else36:                                        ; preds = %land.lhs.true28, %land.lhs.true26, %if.else24
-  %30 = load i64* @channelColumns, align 8, !tbaa !0
+  %30 = load i64, i64* @channelColumns, align 8, !tbaa !0
   %cmp37 = icmp ult i64 %i.092, %30
   br i1 %cmp37, label %land.lhs.true38, label %if.else48
 
 land.lhs.true38:                                  ; preds = %if.else36
-  %31 = load i64* @channelTracks, align 8, !tbaa !0
+  %31 = load i64, i64* @channelTracks, align 8, !tbaa !0
   %cmp39 = icmp ult i64 %7, %31
   br i1 %cmp39, label %land.lhs.true40, label %if.else48
 
@@ -149,16 +149,16 @@
   br i1 %tobool43, label %if.else48, label %if.then44
 
 if.then44:                                        ; preds = %land.lhs.true40
-  %32 = load i8** @mazeRoute, align 8, !tbaa !3
+  %32 = load i8*, i8** @mazeRoute, align 8, !tbaa !3
   %arrayidx45 = getelementptr inbounds i8, i8* %32, i64 %i.092
   store i8 0, i8* %arrayidx45, align 1, !tbaa !1
-  %33 = load i64** @TOP, align 8, !tbaa !3
+  %33 = load i64*, i64** @TOP, align 8, !tbaa !3
   %arrayidx46 = getelementptr inbounds i64, i64* %33, i64 %i.092
-  %34 = load i64* %arrayidx46, align 8, !tbaa !0
+  %34 = load i64, i64* %arrayidx46, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %34)
-  %35 = load i64** @BOT, align 8, !tbaa !3
+  %35 = load i64*, i64** @BOT, align 8, !tbaa !3
   %arrayidx47 = getelementptr inbounds i64, i64* %35, i64 %i.092
-  %36 = load i64* %arrayidx47, align 8, !tbaa !0
+  %36 = load i64, i64* %arrayidx47, align 8, !tbaa !0
   tail call fastcc void @CleanNet(i64 %36)
   br label %for.inc
 
@@ -169,7 +169,7 @@
 for.inc:                                          ; preds = %if.else48, %if.then44, %if.then32, %if.then20, %if.then9, %for.body
   %numLeft.1 = phi i32 [ %numLeft.091, %if.then9 ], [ %numLeft.091, %if.then20 ], [ %numLeft.091, %if.then32 ], [ %numLeft.091, %if.then44 ], [ %inc, %if.else48 ], [ %numLeft.091, %for.body ]
   %inc53 = add i64 %i.092, 1
-  %37 = load i64* @channelColumns, align 8, !tbaa !0
+  %37 = load i64, i64* @channelColumns, align 8, !tbaa !0
   %cmp = icmp ugt i64 %inc53, %37
   br i1 %cmp, label %for.end, label %for.body
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 4c40313..ff18f73 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -283,7 +283,7 @@
   %ref.tr.i.i = phi %str1* [ %0, %sw.bb.i.i ], [ undef, %entry ]
   %operands.i.i = getelementptr inbounds %str1, %str1* %ref.tr.i.i, i64 0, i32 0, i32 2
   %arrayidx.i.i = bitcast i32* %operands.i.i to %str1**
-  %0 = load %str1** %arrayidx.i.i, align 8
+  %0 = load %str1*, %str1** %arrayidx.i.i, align 8
   %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
   br label %sw.bb.i.i
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-code-model-large-abs.ll b/llvm/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
index 264da2d..9f50fea 100644
--- a/llvm/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-code-model-large-abs.ll
@@ -18,7 +18,7 @@
 
 define i8 @global_i8() {
 ; CHECK-LABEL: global_i8:
-  %val = load i8* @var8
+  %val = load i8, i8* @var8
   ret i8 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
@@ -29,7 +29,7 @@
 
 define i16 @global_i16() {
 ; CHECK-LABEL: global_i16:
-  %val = load i16* @var16
+  %val = load i16, i16* @var16
   ret i16 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
@@ -40,7 +40,7 @@
 
 define i32 @global_i32() {
 ; CHECK-LABEL: global_i32:
-  %val = load i32* @var32
+  %val = load i32, i32* @var32
   ret i32 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
@@ -51,7 +51,7 @@
 
 define i64 @global_i64() {
 ; CHECK-LABEL: global_i64:
-  %val = load i64* @var64
+  %val = load i64, i64* @var64
   ret i64 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
index 2a590f9..e34ef39 100644
--- a/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh-garbage-crash.ll
@@ -22,13 +22,13 @@
 entry:
   br label %if.then83
 if.then83:                                        ; preds = %if.end81
-  %tmp = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+  %tmp = load %"class.H4ISP::H4ISPDevice"*, %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
   %call84 = call i32 @_ZN5H4ISP11H4ISPDevice32ISP_SelectBestMIPIFrequencyIndexEjPj(%"class.H4ISP::H4ISPDevice"* %tmp) #19
   tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27}"()
-  %tmp2 = load %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
+  %tmp2 = load %"class.H4ISP::H4ISPDevice"*, %"class.H4ISP::H4ISPDevice"** @pH4ISPDevice, align 8
   tail call void asm sideeffect "", "~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x28}"()
   %pCameraManager.i268 = getelementptr inbounds %"class.H4ISP::H4ISPDevice", %"class.H4ISP::H4ISPDevice"* %tmp2, i64 0, i32 3
-  %tmp3 = load %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
+  %tmp3 = load %"class.H4ISP::H4ISPCameraManager"*, %"class.H4ISP::H4ISPCameraManager"** %pCameraManager.i268, align 8
   %tobool.i269 = icmp eq %"class.H4ISP::H4ISPCameraManager"* %tmp3, null
   br i1 %tobool.i269, label %if.then83, label %end
 end:
diff --git a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
index 6d73daa..c0aa63c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -12,7 +12,7 @@
 ; Function Attrs: noinline nounwind ssp
 define void @foo(i32 %t) {
 entry:
-  %tmp = load i32* @a, align 4
+  %tmp = load i32, i32* @a, align 4
   %add = add nsw i32 %tmp, %t
   store i32 %add, i32* @a, align 4
   ret void
@@ -32,22 +32,22 @@
   br i1 %cmp, label %if.then, label %if.end4
 
 if.then:                                          ; preds = %entry
-  %tmp = load i32* @a, align 4
+  %tmp = load i32, i32* @a, align 4
   %add = add nsw i32 %tmp, %t
   %cmp1 = icmp sgt i32 %add, 12
   br i1 %cmp1, label %if.then2, label %if.end4
 
 if.then2:                                         ; preds = %if.then
   tail call void @foo(i32 %add)
-  %tmp1 = load i32* @a, align 4
+  %tmp1 = load i32, i32* @a, align 4
   br label %if.end4
 
 if.end4:                                          ; preds = %if.then2, %if.then, %entry
   %t.addr.0 = phi i32 [ %tmp1, %if.then2 ], [ %t, %if.then ], [ %t, %entry ]
-  %tmp2 = load i32* @b, align 4
+  %tmp2 = load i32, i32* @b, align 4
   %add5 = add nsw i32 %tmp2, %t.addr.0
   tail call void @foo(i32 %add5)
-  %tmp3 = load i32* @b, align 4
+  %tmp3 = load i32, i32* @b, align 4
   %add6 = add nsw i32 %tmp3, %t.addr.0
   ret i32 %add6
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll b/llvm/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
index bc66e1f..938bc62 100644
--- a/llvm/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-complex-copy-noneon.ll
@@ -9,9 +9,9 @@
   %dst = alloca { double, double }, align 8
 
   %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
-  %src.real = load double* %src.realp
+  %src.real = load double, double* %src.realp
   %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
-  %src.imag = load double* %src.imagp
+  %src.imag = load double, double* %src.imagp
 
   %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
   %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
diff --git a/llvm/test/CodeGen/AArch64/arm64-const-addr.ll b/llvm/test/CodeGen/AArch64/arm64-const-addr.ll
index 834e9be..ffc1533 100644
--- a/llvm/test/CodeGen/AArch64/arm64-const-addr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-const-addr.ll
@@ -11,12 +11,12 @@
 ; CHECK:        ldr w8, [x8, #12]
   %at = inttoptr i64 68141056 to %T*
   %o1 = getelementptr %T, %T* %at, i32 0, i32 1
-  %t1 = load i32* %o1
+  %t1 = load i32, i32* %o1
   %o2 = getelementptr %T, %T* %at, i32 0, i32 2
-  %t2 = load i32* %o2
+  %t2 = load i32, i32* %o2
   %a1 = add i32 %t1, %t2
   %o3 = getelementptr %T, %T* %at, i32 0, i32 3
-  %t3 = load i32* %o3
+  %t3 = load i32, i32* %o3
   %a2 = add i32 %a1, %t3
   ret i32 %a2
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 7123e5e..c6b7d83 100644
--- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -8,7 +8,7 @@
 ; CHECK-DAG: xtn  v[[LHS_NA:[0-9]+]].2s, v[[LHS]].2d
 ; CHECK-DAG: xtn  v[[RHS_NA:[0-9]+]].2s, v[[RHS]].2d
 ; CHECK:     uzp1  v0.4h, v[[RHS_NA]].4h, v[[LHS_NA]].4h
-  %tmp1 = load <4 x double>* %ptr
+  %tmp1 = load <4 x double>, <4 x double>* %ptr
   %tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
   ret <4 x i16> %tmp2
 }
@@ -26,7 +26,7 @@
 ; CHECK-DAG:  uzp1  v[[TMP1:[0-9]+]].4h, v[[CONV2]].4h, v[[CONV3]].4h
 ; CHECK-DAG:  uzp1  v[[TMP2:[0-9]+]].4h, v[[CONV0]].4h, v[[CONV1]].4h
 ; CHECK:      uzp1  v0.8b, v[[TMP2]].8b, v[[TMP1]].8b
-  %tmp1 = load <8 x double>* %ptr
+  %tmp1 = load <8 x double>, <8 x double>* %ptr
   %tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
   ret <8 x i8> %tmp2
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-cse.ll b/llvm/test/CodeGen/AArch64/arm64-cse.ll
index cefdec8..8d4bf5d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-cse.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-cse.ll
@@ -15,7 +15,7 @@
 ; CHECK: sub
 ; CHECK-NOT: sub
 ; CHECK: ret
- %0 = load i32* %offset, align 4
+ %0 = load i32, i32* %offset, align 4
  %cmp = icmp slt i32 %0, %size
  %s = sub nsw i32 %0, %size
  br i1 %cmp, label %return, label %if.end
@@ -43,7 +43,7 @@
 ; CHECK: b.lt
 ; CHECK-NOT: sub
 ; CHECK: ret
- %0 = load i32* %offset, align 4
+ %0 = load i32, i32* %offset, align 4
  %cmp = icmp slt i32 %0, 1
  br i1 %cmp, label %return, label %if.end
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll b/llvm/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
index 424e1e7..37f3504 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dagcombiner-dead-indexed-load.ll
@@ -19,7 +19,7 @@
 entry:
   %r1 = getelementptr inbounds %"struct.SU", %"struct.SU"* %su, i64 1, i32 5
   %r2 = bitcast %"struct.BO"* %r1 to i48*
-  %r3 = load i48* %r2, align 8
+  %r3 = load i48, i48* %r2, align 8
   %r4 = and i48 %r3, -4294967296
   %r5 = or i48 0, %r4
   store i48 %r5, i48* %r2, align 8
diff --git a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll b/llvm/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
index 1109840..09483ea 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
@@ -16,7 +16,7 @@
 entry:
   %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
   %0 = bitcast %class.Complex* %arrayidx to i64*
-  %1 = load i64* %0, align 4
+  %1 = load i64, i64* %0, align 4
   %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
   %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
   %t0.sroa.2.0.extract.shift = lshr i64 %1, 32
@@ -25,11 +25,11 @@
   %add = add i64 %out_start, 8
   %arrayidx2 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
   %i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 0
-  %4 = load float* %i.i, align 4
+  %4 = load float, float* %i.i, align 4
   %add.i = fadd float %4, %2
   %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
   %r.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 1
-  %5 = load float* %r.i, align 4
+  %5 = load float, float* %r.i, align 4
   %add5.i = fadd float %5, %3
   %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
   %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
@@ -48,7 +48,7 @@
 entry:
   %arrayidx = getelementptr inbounds %class.Complex_int, %class.Complex_int* %out, i64 %out_start
   %0 = bitcast %class.Complex_int* %arrayidx to i64*
-  %1 = load i64* %0, align 4
+  %1 = load i64, i64* %0, align 4
   %t0.sroa.0.0.extract.trunc = trunc i64 %1 to i32
   %2 = bitcast i32 %t0.sroa.0.0.extract.trunc to i32
   %t0.sroa.2.0.extract.shift = lshr i64 %1, 32
@@ -57,11 +57,11 @@
   %add = add i64 %out_start, 8
   %arrayidx2 = getelementptr inbounds %class.Complex_int, %class.Complex_int* %out, i64 %add
   %i.i = getelementptr inbounds %class.Complex_int, %class.Complex_int* %arrayidx2, i64 0, i32 0
-  %4 = load i32* %i.i, align 4
+  %4 = load i32, i32* %i.i, align 4
   %add.i = add i32 %4, %2
   %retval.sroa.0.0.vec.insert.i = insertelement <2 x i32> undef, i32 %add.i, i32 0
   %r.i = getelementptr inbounds %class.Complex_int, %class.Complex_int* %arrayidx2, i64 0, i32 1
-  %5 = load i32* %r.i, align 4
+  %5 = load i32, i32* %r.i, align 4
   %add5.i = add i32 %5, %3
   %retval.sroa.0.4.vec.insert.i = insertelement <2 x i32> %retval.sroa.0.0.vec.insert.i, i32 %add5.i, i32 1
   %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_int* %arrayidx to <2 x i32>*
@@ -80,7 +80,7 @@
 entry:
   %arrayidx = getelementptr inbounds %class.Complex_long, %class.Complex_long* %out, i64 %out_start
   %0 = bitcast %class.Complex_long* %arrayidx to i128*
-  %1 = load i128* %0, align 4
+  %1 = load i128, i128* %0, align 4
   %t0.sroa.0.0.extract.trunc = trunc i128 %1 to i64
   %2 = bitcast i64 %t0.sroa.0.0.extract.trunc to i64
   %t0.sroa.2.0.extract.shift = lshr i128 %1, 64
@@ -89,11 +89,11 @@
   %add = add i64 %out_start, 8
   %arrayidx2 = getelementptr inbounds %class.Complex_long, %class.Complex_long* %out, i64 %add
   %i.i = getelementptr inbounds %class.Complex_long, %class.Complex_long* %arrayidx2, i32 0, i32 0
-  %4 = load i64* %i.i, align 4
+  %4 = load i64, i64* %i.i, align 4
   %add.i = add i64 %4, %2
   %retval.sroa.0.0.vec.insert.i = insertelement <2 x i64> undef, i64 %add.i, i32 0
   %r.i = getelementptr inbounds %class.Complex_long, %class.Complex_long* %arrayidx2, i32 0, i32 1
-  %5 = load i64* %r.i, align 4
+  %5 = load i64, i64* %r.i, align 4
   %add5.i = add i64 %5, %3
   %retval.sroa.0.4.vec.insert.i = insertelement <2 x i64> %retval.sroa.0.0.vec.insert.i, i64 %add5.i, i32 1
   %ref.tmp.sroa.0.0.cast = bitcast %class.Complex_long* %arrayidx to <2 x i64>*
diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll
index 0c56b46..849e227 100644
--- a/llvm/test/CodeGen/AArch64/arm64-dup.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll
@@ -165,7 +165,7 @@
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplane8:
 ;CHECK: dup.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i8> %tmp2
 }
@@ -173,7 +173,7 @@
 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplane16:
 ;CHECK: dup.4h
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i16> %tmp2
 }
@@ -181,7 +181,7 @@
 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplane32:
 ;CHECK: dup.2s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x i32> %tmp2
 }
@@ -189,7 +189,7 @@
 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplanefloat:
 ;CHECK: dup.2s
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x float> %tmp2
 }
@@ -197,7 +197,7 @@
 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ8:
 ;CHECK: dup.16b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <16 x i8> %tmp2
 }
@@ -205,7 +205,7 @@
 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ16:
 ;CHECK: dup.8h
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i16> %tmp2
 }
@@ -213,7 +213,7 @@
 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ32:
 ;CHECK: dup.4s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i32> %tmp2
 }
@@ -221,7 +221,7 @@
 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQfloat:
 ;CHECK: dup.4s
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x float> %tmp2
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
index 795ad70..8164f46 100644
--- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll
@@ -15,7 +15,7 @@
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
   %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
   %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 1
-  %0 = load i32* %p.addr.0, align 4
+  %0 = load i32, i32* %p.addr.0, align 4
   %cmp = icmp sgt i32 %0, %max.0
   br i1 %cmp, label %do.cond, label %if.else
 
@@ -400,7 +400,7 @@
   br label %for.body
 
 for.body:
-  %x0 = load i32* undef, align 4
+  %x0 = load i32, i32* undef, align 4
   br i1 undef, label %if.then.i146, label %is_sbox.exit155
 
 if.then.i146:
@@ -413,7 +413,7 @@
   %seg_offset.0.i151 = phi i32 [ %add9.i145, %if.then.i146 ], [ undef, %for.body ]
   %idxprom15.i152 = sext i32 %seg_offset.0.i151 to i64
   %arrayidx18.i154 = getelementptr inbounds i32, i32* null, i64 %idxprom15.i152
-  %x1 = load i32* %arrayidx18.i154, align 4
+  %x1 = load i32, i32* %arrayidx18.i154, align 4
   br i1 undef, label %for.body51, label %for.body
 
 for.body51:                                       ; preds = %is_sbox.exit155
diff --git a/llvm/test/CodeGen/AArch64/arm64-elf-globals.ll b/llvm/test/CodeGen/AArch64/arm64-elf-globals.ll
index 025aea1..b1d5524 100644
--- a/llvm/test/CodeGen/AArch64/arm64-elf-globals.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-elf-globals.ll
@@ -9,7 +9,7 @@
 @var64 = external global i64, align 8
 
 define i8 @test_i8(i8 %new) {
-  %val = load i8* @var8, align 1
+  %val = load i8, i8* @var8, align 1
   store i8 %new, i8* @var8
   ret i8 %val
 ; CHECK-LABEL: test_i8:
@@ -31,7 +31,7 @@
 }
 
 define i16 @test_i16(i16 %new) {
-  %val = load i16* @var16, align 2
+  %val = load i16, i16* @var16, align 2
   store i16 %new, i16* @var16
   ret i16 %val
 ; CHECK-LABEL: test_i16:
@@ -44,7 +44,7 @@
 }
 
 define i32 @test_i32(i32 %new) {
-  %val = load i32* @var32, align 4
+  %val = load i32, i32* @var32, align 4
   store i32 %new, i32* @var32
   ret i32 %val
 ; CHECK-LABEL: test_i32:
@@ -57,7 +57,7 @@
 }
 
 define i64 @test_i64(i64 %new) {
-  %val = load i64* @var64, align 8
+  %val = load i64, i64* @var64, align 8
   store i64 %new, i64* @var64
   ret i64 %val
 ; CHECK-LABEL: test_i64:
@@ -83,8 +83,8 @@
 @protectedvar = protected global i32 0, align 4
 
 define i32 @test_vis() {
-  %lhs = load i32* @hiddenvar, align 4
-  %rhs = load i32* @protectedvar, align 4
+  %lhs = load i32, i32* @hiddenvar, align 4
+  %rhs = load i32, i32* @protectedvar, align 4
   %ret = add i32 %lhs, %rhs
   ret i32 %ret
 ; CHECK-PIC: adrp {{x[0-9]+}}, hiddenvar
@@ -97,7 +97,7 @@
 
 define i32 @test_default_align() {
   %addr = getelementptr [2 x i32], [2 x i32]* @var_default, i32 0, i32 0
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   ret i32 %val
 ; CHECK-LABEL: test_default_align:
 ; CHECK: adrp x[[HIREG:[0-9]+]], var_default
@@ -106,7 +106,7 @@
 
 define i64 @test_default_unaligned() {
   %addr = bitcast [2 x i32]* @var_default to i64*
-  %val = load i64* %addr
+  %val = load i64, i64* %addr
   ret i64 %val
 ; CHECK-LABEL: test_default_unaligned:
 ; CHECK: adrp [[HIREG:x[0-9]+]], var_default
diff --git a/llvm/test/CodeGen/AArch64/arm64-ext.ll b/llvm/test/CodeGen/AArch64/arm64-ext.ll
index 67860de..8315ffc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ext.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
 ;CHECK: {{ext.8b.*#3}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRd:
 ;CHECK: {{ext.8b.*#5}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
 	ret <8 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextq:
 ;CHECK: {{ext.16b.*3}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
 	ret <16 x i8> %tmp3
 }
@@ -30,8 +30,8 @@
 define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq:
 ;CHECK: {{ext.16b.*7}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_vextd16:
 ;CHECK: {{ext.8b.*#6}}
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: test_vextq32:
 ;CHECK: {{ext.16b.*12}}
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i32> %tmp3
 }
@@ -59,8 +59,8 @@
 define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd_undef:
 ;CHECK: {{ext.8b.*}}
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -68,8 +68,8 @@
 define <8 x i8> @test_vextd_undef2(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd_undef2:
 ;CHECK: {{ext.8b.*#6}}
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 5>
   ret <8 x i8> %tmp3
 }
@@ -77,8 +77,8 @@
 define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq_undef:
 ;CHECK: {{ext.16b.*#7}}
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -86,7 +86,7 @@
 define <8 x i16> @test_vextRq_undef2(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vextRq_undef2:
 ;CHECK: {{ext.16b.*#10}}
-  %tmp1 = load <8 x i16>* %A
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
   %vext = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 2, i32 3, i32 4>
   ret <8 x i16> %vext;
 }
@@ -101,8 +101,8 @@
 ;CHECK-LABEL: test_interleaved:
 ;CHECK: ext.8b
 ;CHECK: zip1.4h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_undef:
 ;CHECK: zip1.4h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-extend.ll b/llvm/test/CodeGen/AArch64/arm64-extend.ll
index ce3f001..0ef68f8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extend.ll
@@ -9,7 +9,7 @@
 ; CHECK:  ret
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @array, i64 0, i64 %idxprom
-  %tmp1 = load i32* %arrayidx, align 4
+  %tmp1 = load i32, i32* %arrayidx, align 4
   %conv = sext i32 %tmp1 to i64
   ret i64 %conv
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
index 14e5fd3..642af87 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extload-knownzero.ll
@@ -9,7 +9,7 @@
 bb1:
 ; CHECK: %bb1
 ; CHECK: ldrh [[REG:w[0-9]+]]
-  %tmp2 = load i16* %ptr, align 2
+  %tmp2 = load i16, i16* %ptr, align 2
   br label %bb2
 bb2:
 ; CHECK: %bb2
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
index 2e6f24f..3a14c7e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-addr-offset.ll
@@ -13,7 +13,7 @@
 ; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
 ; CHECK: ldr w0, [x[[REG3]]]
 ; CHECK: ret
-  %0 = load i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4
+  %0 = load i32, i32* getelementptr inbounds ([5001 x i32]* @sortlist, i32 0, i64 5000), align 4
   ret i32 %0
 }
 
@@ -26,7 +26,7 @@
 ; CHECK: add x[[REG3:[0-9]+]], x[[REG1]], x[[REG2]]
 ; CHECK: ldr x0, [x[[REG3]]]
 ; CHECK: ret
-  %0 = load i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
+  %0 = load i64, i64* getelementptr inbounds ([5001 x i64]* @sortlist2, i32 0, i64 5000), align 4
   ret i64 %0
 }
 
@@ -40,8 +40,8 @@
 ; CHECK: movz x[[REG:[0-9]+]], #0xb3a, lsl #32
 ; CHECK: movk x[[REG]], #0x73ce, lsl #16
 ; CHECK: movk x[[REG]], #0x2ff2
-  %0 = load i8** @pd2, align 8
+  %0 = load i8*, i8** @pd2, align 8
   %arrayidx = getelementptr inbounds i8, i8* %0, i64 12345678901234
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index 8c23c2a..0ef7b14 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -3,7 +3,7 @@
 define void @branch1() nounwind uwtable ssp {
   %x = alloca i32, align 4
   store i32 0, i32* %x, align 4
-  %1 = load i32* %x, align 4
+  %1 = load i32, i32* %x, align 4
   %2 = icmp ne i32 %1, 0
   br i1 %2, label %3, label %4
 
@@ -23,7 +23,7 @@
   store i32 1, i32* %y, align 4
   store i32 1, i32* %x, align 4
   store i32 0, i32* %z, align 4
-  %2 = load i32* %x, align 4
+  %2 = load i32, i32* %x, align 4
   %3 = icmp ne i32 %2, 0
   br i1 %3, label %4, label %5
 
@@ -32,12 +32,12 @@
   br label %14
 
 ; <label>:5                                       ; preds = %0
-  %6 = load i32* %y, align 4
+  %6 = load i32, i32* %y, align 4
   %7 = icmp ne i32 %6, 0
   br i1 %7, label %8, label %13
 
 ; <label>:8                                       ; preds = %5
-  %9 = load i32* %z, align 4
+  %9 = load i32, i32* %z, align 4
   %10 = icmp ne i32 %9, 0
   br i1 %10, label %11, label %12
 
@@ -53,7 +53,7 @@
   br label %14
 
 ; <label>:14                                      ; preds = %4, %11, %12, %13
-  %15 = load i32* %1
+  %15 = load i32, i32* %1
   ret void
 }
 
@@ -93,7 +93,7 @@
   store i16 %b, i16* %b.addr, align 2
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
-  %0 = load i16* %b.addr, align 2
+  %0 = load i16, i16* %b.addr, align 2
 ; CHECK: and w0, w0, #0x1
 ; CHECK: cmp w0, #0
 ; CHECK: b.eq LBB4_2
@@ -105,7 +105,7 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
-  %1 = load i32* %c.addr, align 4
+  %1 = load i32, i32* %c.addr, align 4
 ; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
 ; CHECK: cmp w[[REG]], #0
 ; CHECK: b.eq LBB4_4
@@ -117,7 +117,7 @@
   br label %if.end4
 
 if.end4:                                          ; preds = %if.then3, %if.end
-  %2 = load i64* %d.addr, align 8
+  %2 = load i64, i64* %d.addr, align 8
 ; CHECK: cmp w{{[0-9]+}}, #0
 ; CHECK: b.eq LBB4_6
   %conv5 = trunc i64 %2 to i1
@@ -128,7 +128,7 @@
   br label %if.end8
 
 if.end8:                                          ; preds = %if.then7, %if.end4
-  %3 = load i8* %a.addr, align 1
+  %3 = load i8, i8* %a.addr, align 1
   ret i8 %3
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
index 7a31665..d6957f9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll
@@ -23,7 +23,7 @@
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   ret i32 %tmp
 }
 
@@ -35,7 +35,7 @@
 ; CHECK-NEXT:  bl _call1
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   %call = call i32 @call1(i32 %tmp)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
index d913f11..1b68865 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll
@@ -27,16 +27,16 @@
   store i16 %b, i16* %b.addr, align 2
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
-  %tmp = load i64* %d.addr, align 8
+  %tmp = load i64, i64* %d.addr, align 8
   %conv = trunc i64 %tmp to i32
   store i32 %conv, i32* %c.addr, align 4
-  %tmp1 = load i32* %c.addr, align 4
+  %tmp1 = load i32, i32* %c.addr, align 4
   %conv2 = trunc i32 %tmp1 to i16
   store i16 %conv2, i16* %b.addr, align 2
-  %tmp3 = load i16* %b.addr, align 2
+  %tmp3 = load i16, i16* %b.addr, align 2
   %conv4 = trunc i16 %tmp3 to i8
   store i8 %conv4, i8* %a.addr, align 1
-  %tmp5 = load i8* %a.addr, align 1
+  %tmp5 = load i8, i8* %a.addr, align 1
   %conv6 = zext i8 %tmp5 to i32
   ret i32 %conv6
 }
@@ -66,16 +66,16 @@
   store i16 %b, i16* %b.addr, align 2
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
-  %tmp = load i8* %a.addr, align 1
+  %tmp = load i8, i8* %a.addr, align 1
   %conv = zext i8 %tmp to i16
   store i16 %conv, i16* %b.addr, align 2
-  %tmp1 = load i16* %b.addr, align 2
+  %tmp1 = load i16, i16* %b.addr, align 2
   %conv2 = zext i16 %tmp1 to i32
   store i32 %conv2, i32* %c.addr, align 4
-  %tmp3 = load i32* %c.addr, align 4
+  %tmp3 = load i32, i32* %c.addr, align 4
   %conv4 = zext i32 %tmp3 to i64
   store i64 %conv4, i64* %d.addr, align 8
-  %tmp5 = load i64* %d.addr, align 8
+  %tmp5 = load i64, i64* %d.addr, align 8
   ret i64 %tmp5
 }
 
@@ -121,16 +121,16 @@
   store i16 %b, i16* %b.addr, align 2
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
-  %tmp = load i8* %a.addr, align 1
+  %tmp = load i8, i8* %a.addr, align 1
   %conv = sext i8 %tmp to i16
   store i16 %conv, i16* %b.addr, align 2
-  %tmp1 = load i16* %b.addr, align 2
+  %tmp1 = load i16, i16* %b.addr, align 2
   %conv2 = sext i16 %tmp1 to i32
   store i32 %conv2, i32* %c.addr, align 4
-  %tmp3 = load i32* %c.addr, align 4
+  %tmp3 = load i32, i32* %c.addr, align 4
   %conv4 = sext i32 %tmp3 to i64
   store i64 %conv4, i64* %d.addr, align 8
-  %tmp5 = load i64* %d.addr, align 8
+  %tmp5 = load i64, i64* %d.addr, align 8
   ret i64 %tmp5
 }
 
@@ -409,7 +409,7 @@
 ; CHECK: add  sp, sp, #16
   %a = alloca i8, align 1
   %b = alloca i64, align 8
-  %c = load i64* %b, align 8
+  %c = load i64, i64* %b, align 8
   %d = trunc i64 %c to i8
   store i8 %d, i8* %a, align 1
   ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-gv.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
index 441967e..ab29824 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-gv.ll
@@ -26,12 +26,12 @@
 ; CHECK: and  [[REG8:x[0-9]+]], [[REG7]], #0xffff
 ; CHECK: str  [[REG8]], {{\[}}[[REG1]]{{\]}}
 ; CHECK: ldr  {{x[0-9]+}}, {{\[}}[[REG1]]{{\]}}
-  %0 = load i64* @seed, align 8
+  %0 = load i64, i64* @seed, align 8
   %mul = mul nsw i64 %0, 1309
   %add = add nsw i64 %mul, 13849
   %and = and i64 %add, 65535
   store i64 %and, i64* @seed, align 8
-  %1 = load i64* @seed, align 8
+  %1 = load i64, i64* @seed, align 8
   %conv = trunc i64 %1 to i32
   ret i32 %conv
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
index 9243094..cb54e45 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-indirectbr.ll
@@ -8,10 +8,10 @@
   %retval = alloca i32, align 4
   %target.addr = alloca i32, align 4
   store i32 %target, i32* %target.addr, align 4
-  %0 = load i32* %target.addr, align 4
+  %0 = load i32, i32* %target.addr, align 4
   %idxprom = zext i32 %0 to i64
   %arrayidx = getelementptr inbounds [2 x i8*], [2 x i8*]* @fn.table, i32 0, i64 %idxprom
-  %1 = load i8** %arrayidx, align 8
+  %1 = load i8*, i8** %arrayidx, align 8
   br label %indirectgoto
 
 ZERO:                                             ; preds = %indirectgoto
@@ -25,7 +25,7 @@
   br label %return
 
 return:                                           ; preds = %ONE, %ZERO
-  %2 = load i32* %retval
+  %2 = load i32, i32* %retval
   ret i32 %2
 
 indirectgoto:                                     ; preds = %entry
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel-ret.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
index a3d5d87..1f6a60e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel-ret.ll
@@ -16,7 +16,7 @@
 ; CHECK: ret
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   ret i32 %tmp
 }
 
@@ -28,7 +28,7 @@
 ; CHECK: ret
   %a.addr = alloca i64, align 8
   store i64 %a, i64* %a.addr, align 8
-  %tmp = load i64* %a.addr, align 8
+  %tmp = load i64, i64* %a.addr, align 8
   ret i64 %tmp
 }
 
@@ -38,7 +38,7 @@
 ; CHECK: sxth	w0, w0
   %a.addr = alloca i16, align 1
   store i16 %a, i16* %a.addr, align 1
-  %0 = load i16* %a.addr, align 1
+  %0 = load i16, i16* %a.addr, align 1
   ret i16 %0
 }
 
@@ -48,7 +48,7 @@
 ; CHECK: sxtb	w0, w0
   %a.addr = alloca i8, align 1
   store i8 %a, i8* %a.addr, align 1
-  %0 = load i8* %a.addr, align 1
+  %0 = load i8, i8* %a.addr, align 1
   ret i8 %0
 }
 
@@ -58,6 +58,6 @@
 ; CHECK: and w0, w0, #0x1
   %a.addr = alloca i1, align 1
   store i1 %a, i1* %a.addr, align 1
-  %0 = load i1* %a.addr, align 1
+  %0 = load i1, i1* %a.addr, align 1
   ret i1 %0
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll b/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
index 8ce6091..a4d08f9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fast-isel.ll
@@ -9,7 +9,7 @@
 ; CHECK: ret
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr
-  %tmp = load i32* %a.addr
+  %tmp = load i32, i32* %a.addr
   store i32 %tmp, i32* %a.addr
   ret void
 }
@@ -22,7 +22,7 @@
 ; CHECK: ret
   %a.addr = alloca i64, align 4
   store i64 %a, i64* %a.addr
-  %tmp = load i64* %a.addr
+  %tmp = load i64, i64* %a.addr
   store i64 %tmp, i64* %a.addr
   ret void
 }
@@ -39,7 +39,7 @@
 ; CHECK: ret
   %a.addr = alloca i1, align 1
   store i1 %a, i1* %a.addr, align 1
-  %0 = load i1* %a.addr, align 1
+  %0 = load i1, i1* %a.addr, align 1
   ret i1 %0
 }
 
@@ -49,7 +49,7 @@
 ; CHECK: ldur w0, [x0, #-4]
 ; CHECK: ret
   %0 = getelementptr i32, i32 *%ptr, i32 -1
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
 
@@ -59,7 +59,7 @@
 ; CHECK: ldur w0, [x0, #-256]
 ; CHECK: ret
   %0 = getelementptr i32, i32 *%ptr, i32 -64
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll b/llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
index c8df25d..8268bcf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
-  %0 = load i8** %ptr.addr, align 8
+  %0 = load i8*, i8** %ptr.addr, align 8
 
   ; CHECK-LABEL: _gep_promotion:
   ; CHECK: ldrb {{[a-z][0-9]+}}, {{\[[a-z][0-9]+\]}}
   %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
 
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fmuladd.ll b/llvm/test/CodeGen/AArch64/arm64-fmuladd.ll
index 6c5eeca..cfc8b5f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fmuladd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fmuladd.ll
@@ -4,9 +4,9 @@
 ;CHECK-LABEL: test_f32:
 ;CHECK: fmadd
 ;CHECK-NOT: fmadd
-  %tmp1 = load float* %A
-  %tmp2 = load float* %B
-  %tmp3 = load float* %C
+  %tmp1 = load float, float* %A
+  %tmp2 = load float, float* %B
+  %tmp3 = load float, float* %C
   %tmp4 = call float @llvm.fmuladd.f32(float %tmp1, float %tmp2, float %tmp3)
   ret float %tmp4
 }
@@ -15,9 +15,9 @@
 ;CHECK-LABEL: test_v2f32:
 ;CHECK: fmla.2s
 ;CHECK-NOT: fmla.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = load <2 x float>, <2 x float>* %C
   %tmp4 = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
   ret <2 x float> %tmp4
 }
@@ -26,9 +26,9 @@
 ;CHECK-LABEL: test_v4f32:
 ;CHECK: fmla.4s
 ;CHECK-NOT: fmla.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = load <4 x float>, <4 x float>* %C
   %tmp4 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
   ret <4 x float> %tmp4
 }
@@ -38,9 +38,9 @@
 ;CHECK: fmla.4s
 ;CHECK: fmla.4s
 ;CHECK-NOT: fmla.4s
-  %tmp1 = load <8 x float>* %A
-  %tmp2 = load <8 x float>* %B
-  %tmp3 = load <8 x float>* %C
+  %tmp1 = load <8 x float>, <8 x float>* %A
+  %tmp2 = load <8 x float>, <8 x float>* %B
+  %tmp3 = load <8 x float>, <8 x float>* %C
   %tmp4 = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> %tmp1, <8 x float> %tmp2, <8 x float> %tmp3)
   ret <8 x float> %tmp4
 }
@@ -49,9 +49,9 @@
 ;CHECK-LABEL: test_f64:
 ;CHECK: fmadd
 ;CHECK-NOT: fmadd
-  %tmp1 = load double* %A
-  %tmp2 = load double* %B
-  %tmp3 = load double* %C
+  %tmp1 = load double, double* %A
+  %tmp2 = load double, double* %B
+  %tmp3 = load double, double* %C
   %tmp4 = call double @llvm.fmuladd.f64(double %tmp1, double %tmp2, double %tmp3)
   ret double %tmp4
 }
@@ -60,9 +60,9 @@
 ;CHECK-LABEL: test_v2f64:
 ;CHECK: fmla.2d
 ;CHECK-NOT: fmla.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
+  %tmp3 = load <2 x double>, <2 x double>* %C
   %tmp4 = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
   ret <2 x double> %tmp4
 }
@@ -72,9 +72,9 @@
 ;CHECK: fmla.2d
 ;CHECK: fmla.2d
 ;CHECK-NOT: fmla.2d
-  %tmp1 = load <4 x double>* %A
-  %tmp2 = load <4 x double>* %B
-  %tmp3 = load <4 x double>* %C
+  %tmp1 = load <4 x double>, <4 x double>* %A
+  %tmp2 = load <4 x double>, <4 x double>* %B
+  %tmp3 = load <4 x double>, <4 x double>* %C
   %tmp4 = call <4 x double> @llvm.fmuladd.v4f64(<4 x double> %tmp1, <4 x double> %tmp2, <4 x double> %tmp3)
   ret <4 x double> %tmp4
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-address.ll b/llvm/test/CodeGen/AArch64/arm64-fold-address.ll
index 1c4260c..6d2ea17 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fold-address.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-address.ll
@@ -14,23 +14,23 @@
 ; CHECK: ldp d0, d1, [x[[REG]]]
 ; CHECK: ldp d2, d3, [x[[REG]], #16]
 ; CHECK: ret
-  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+  %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
   %0 = bitcast %0* %self to i8*
   %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
   %add.ptr10.0 = bitcast i8* %add.ptr to double*
-  %tmp11 = load double* %add.ptr10.0, align 8
+  %tmp11 = load double, double* %add.ptr10.0, align 8
   %add.ptr.sum = add i64 %ivar, 8
   %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
   %1 = bitcast i8* %add.ptr10.1 to double*
-  %tmp12 = load double* %1, align 8
+  %tmp12 = load double, double* %1, align 8
   %add.ptr.sum17 = add i64 %ivar, 16
   %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum17
   %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
-  %tmp = load double* %add.ptr4.1.0, align 8
+  %tmp = load double, double* %add.ptr4.1.0, align 8
   %add.ptr4.1.sum = add i64 %ivar, 24
   %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %add.ptr4.1.sum
   %2 = bitcast i8* %add.ptr4.1.1 to double*
-  %tmp5 = load double* %2, align 8
+  %tmp5 = load double, double* %2, align 8
   %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
   %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
   %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
@@ -46,20 +46,20 @@
 ; CHECK: ldr d0, [x0, x{{[0-9]+}}]
 ; CHECK-NOT: add x0, x0, x1
 ; CHECK: ret
-  %ivar = load i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
+  %ivar = load i64, i64* @"OBJC_IVAR_$_UIScreen._bounds", align 8, !invariant.load !4
   %0 = bitcast %0* %self to i8*
   %add.ptr = getelementptr inbounds i8, i8* %0, i64 %ivar
   %add.ptr10.0 = bitcast i8* %add.ptr to double*
-  %tmp11 = load double* %add.ptr10.0, align 8
+  %tmp11 = load double, double* %add.ptr10.0, align 8
   %add.ptr10.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
   %1 = bitcast i8* %add.ptr10.1 to double*
-  %tmp12 = load double* %1, align 8
+  %tmp12 = load double, double* %1, align 8
   %add.ptr4.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
   %add.ptr4.1.0 = bitcast i8* %add.ptr4.1 to double*
-  %tmp = load double* %add.ptr4.1.0, align 8
+  %tmp = load double, double* %add.ptr4.1.0, align 8
   %add.ptr4.1.1 = getelementptr inbounds i8, i8* %0, i64 %ivar
   %2 = bitcast i8* %add.ptr4.1.1 to double*
-  %tmp5 = load double* %2, align 8
+  %tmp5 = load double, double* %2, align 8
   %insert14 = insertvalue %struct.CGPoint undef, double %tmp11, 0
   %insert16 = insertvalue %struct.CGPoint %insert14, double %tmp12, 1
   %insert = insertvalue %struct.CGRect undef, %struct.CGPoint %insert16, 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll b/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll
index acfecaf..e1acd6f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fold-lsl.ll
@@ -14,7 +14,7 @@
   %conv82 = zext i32 %shr81 to i64
   %idxprom83 = and i64 %conv82, 255
   %arrayidx86 = getelementptr inbounds %struct.a, %struct.a* %ctx, i64 0, i64 %idxprom83
-  %result = load i16* %arrayidx86, align 2
+  %result = load i16, i16* %arrayidx86, align 2
   ret i16 %result
 }
 
@@ -26,7 +26,7 @@
   %conv82 = zext i32 %shr81 to i64
   %idxprom83 = and i64 %conv82, 255
   %arrayidx86 = getelementptr inbounds %struct.b, %struct.b* %ctx, i64 0, i64 %idxprom83
-  %result = load i32* %arrayidx86, align 4
+  %result = load i32, i32* %arrayidx86, align 4
   ret i32 %result
 }
 
@@ -38,7 +38,7 @@
   %conv82 = zext i32 %shr81 to i64
   %idxprom83 = and i64 %conv82, 255
   %arrayidx86 = getelementptr inbounds %struct.c, %struct.c* %ctx, i64 0, i64 %idxprom83
-  %result = load i64* %arrayidx86, align 8
+  %result = load i64, i64* %arrayidx86, align 8
   ret i64 %result
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128-folding.ll b/llvm/test/CodeGen/AArch64/arm64-fp128-folding.ll
index 6a7d203..4024dc9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128-folding.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128-folding.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: test_folding:
   %l = alloca i32
   store i32 42, i32* %l
-  %val = load i32* %l
+  %val = load i32, i32* %l
   %fpval = sitofp i32 %val to fp128
   ; If the value is loaded from a constant pool into an fp128, it's been folded
   ; successfully.
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
index b1d5010..aaef39f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll
@@ -6,8 +6,8 @@
 define fp128 @test_add() {
 ; CHECK-LABEL: test_add:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -19,8 +19,8 @@
 define fp128 @test_sub() {
 ; CHECK-LABEL: test_sub:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -32,8 +32,8 @@
 define fp128 @test_mul() {
 ; CHECK-LABEL: test_mul:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -45,8 +45,8 @@
 define fp128 @test_div() {
 ; CHECK-LABEL: test_div:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -60,7 +60,7 @@
 
 define void @test_fptosi() {
 ; CHECK-LABEL: test_fptosi:
-  %val = load fp128* @lhs, align 16
+  %val = load fp128, fp128* @lhs, align 16
 
   %val32 = fptosi fp128 %val to i32
   store i32 %val32, i32* @var32
@@ -75,7 +75,7 @@
 
 define void @test_fptoui() {
 ; CHECK-LABEL: test_fptoui:
-  %val = load fp128* @lhs, align 16
+  %val = load fp128, fp128* @lhs, align 16
 
   %val32 = fptoui fp128 %val to i32
   store i32 %val32, i32* @var32
@@ -91,12 +91,12 @@
 define void @test_sitofp() {
 ; CHECK-LABEL: test_sitofp:
 
-  %src32 = load i32* @var32
+  %src32 = load i32, i32* @var32
   %val32 = sitofp i32 %src32 to fp128
   store volatile fp128 %val32, fp128* @lhs
 ; CHECK: bl __floatsitf
 
-  %src64 = load i64* @var64
+  %src64 = load i64, i64* @var64
   %val64 = sitofp i64 %src64 to fp128
   store volatile fp128 %val64, fp128* @lhs
 ; CHECK: bl __floatditf
@@ -107,12 +107,12 @@
 define void @test_uitofp() {
 ; CHECK-LABEL: test_uitofp:
 
-  %src32 = load i32* @var32
+  %src32 = load i32, i32* @var32
   %val32 = uitofp i32 %src32 to fp128
   store volatile fp128 %val32, fp128* @lhs
 ; CHECK: bl __floatunsitf
 
-  %src64 = load i64* @var64
+  %src64 = load i64, i64* @var64
   %val64 = uitofp i64 %src64 to fp128
   store volatile fp128 %val64, fp128* @lhs
 ; CHECK: bl __floatunditf
@@ -123,8 +123,8 @@
 define i1 @test_setcc1() {
 ; CHECK-LABEL: test_setcc1:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -142,8 +142,8 @@
 define i1 @test_setcc2() {
 ; CHECK-LABEL: test_setcc2:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -164,8 +164,8 @@
 define i32 @test_br_cc() {
 ; CHECK-LABEL: test_br_cc:
 
-  %lhs = load fp128* @lhs, align 16
-  %rhs = load fp128* @rhs, align 16
+  %lhs = load fp128, fp128* @lhs, align 16
+  %rhs = load fp128, fp128* @rhs, align 16
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
@@ -218,7 +218,7 @@
 define void @test_round() {
 ; CHECK-LABEL: test_round:
 
-  %val = load fp128* @lhs, align 16
+  %val = load fp128, fp128* @lhs, align 16
 
   %float = fptrunc fp128 %val to float
   store float %float, float* @varfloat, align 4
@@ -236,15 +236,15 @@
 define void @test_extend() {
 ; CHECK-LABEL: test_extend:
 
-  %val = load fp128* @lhs, align 16
+  %val = load fp128, fp128* @lhs, align 16
 
-  %float = load float* @varfloat
+  %float = load float, float* @varfloat
   %fromfloat = fpext float %float to fp128
   store volatile fp128 %fromfloat, fp128* @lhs, align 16
 ; CHECK: bl __extendsftf2
 ; CHECK: str q0, [{{x[0-9]+}}, :lo12:lhs]
 
-  %double = load double* @vardouble
+  %double = load double, double* @vardouble
   %fromdouble = fpext double %double to fp128
   store volatile fp128 %fromdouble, fp128* @lhs, align 16
 ; CHECK: bl __extenddftf2
diff --git a/llvm/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll b/llvm/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
index ba759e3..8d74ce7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-i16-subreg-extract.ll
@@ -3,7 +3,7 @@
 define i32 @foo(<4 x i16>* %__a) nounwind {
 ; CHECK-LABEL: foo:
 ; CHECK: umov.h w{{[0-9]+}}, v{{[0-9]+}}[0]
-  %tmp18 = load <4 x i16>* %__a, align 8
+  %tmp18 = load <4 x i16>, <4 x i16>* %__a, align 8
   %vget_lane = extractelement <4 x i16> %tmp18, i32 0
   %conv = zext i16 %vget_lane to i32
   %mul = mul nsw i32 3, %conv
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index 94bed8b..b52cddf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: store64:
 ; CHECK: str x{{[0-9+]}}, [x{{[0-9+]}}], #8
 ; CHECK: ret
-  %tmp = load i64** %out, align 8
+  %tmp = load i64*, i64** %out, align 8
   %incdec.ptr = getelementptr inbounds i64, i64* %tmp, i64 1
   store i64 %spacing, i64* %tmp, align 4
   store i64* %incdec.ptr, i64** %out, align 8
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: store32:
 ; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load i32** %out, align 8
+  %tmp = load i32*, i32** %out, align 8
   %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
   store i32 %spacing, i32* %tmp, align 4
   store i32* %incdec.ptr, i32** %out, align 8
@@ -26,7 +26,7 @@
 ; CHECK-LABEL: store16:
 ; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
 ; CHECK: ret
-  %tmp = load i16** %out, align 8
+  %tmp = load i16*, i16** %out, align 8
   %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
   store i16 %spacing, i16* %tmp, align 4
   store i16* %incdec.ptr, i16** %out, align 8
@@ -37,7 +37,7 @@
 ; CHECK-LABEL: store8:
 ; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
 ; CHECK: ret
-  %tmp = load i8** %out, align 8
+  %tmp = load i8*, i8** %out, align 8
   %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
   store i8 %spacing, i8* %tmp, align 4
   store i8* %incdec.ptr, i8** %out, align 8
@@ -48,7 +48,7 @@
 ; CHECK-LABEL: truncst64to32:
 ; CHECK: str w{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load i32** %out, align 8
+  %tmp = load i32*, i32** %out, align 8
   %incdec.ptr = getelementptr inbounds i32, i32* %tmp, i64 1
   %trunc = trunc i64 %spacing to i32
   store i32 %trunc, i32* %tmp, align 4
@@ -60,7 +60,7 @@
 ; CHECK-LABEL: truncst64to16:
 ; CHECK: strh w{{[0-9+]}}, [x{{[0-9+]}}], #2
 ; CHECK: ret
-  %tmp = load i16** %out, align 8
+  %tmp = load i16*, i16** %out, align 8
   %incdec.ptr = getelementptr inbounds i16, i16* %tmp, i64 1
   %trunc = trunc i64 %spacing to i16
   store i16 %trunc, i16* %tmp, align 4
@@ -72,7 +72,7 @@
 ; CHECK-LABEL: truncst64to8:
 ; CHECK: strb w{{[0-9+]}}, [x{{[0-9+]}}], #1
 ; CHECK: ret
-  %tmp = load i8** %out, align 8
+  %tmp = load i8*, i8** %out, align 8
   %incdec.ptr = getelementptr inbounds i8, i8* %tmp, i64 1
   %trunc = trunc i64 %spacing to i8
   store i8 %trunc, i8* %tmp, align 4
@@ -85,7 +85,7 @@
 ; CHECK-LABEL: storef32:
 ; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
 ; CHECK: ret
-  %tmp = load float** %out, align 8
+  %tmp = load float*, float** %out, align 8
   %incdec.ptr = getelementptr inbounds float, float* %tmp, i64 1
   store float %spacing, float* %tmp, align 4
   store float* %incdec.ptr, float** %out, align 8
@@ -96,7 +96,7 @@
 ; CHECK-LABEL: storef64:
 ; CHECK: str d{{[0-9+]}}, [x{{[0-9+]}}], #8
 ; CHECK: ret
-  %tmp = load double** %out, align 8
+  %tmp = load double*, double** %out, align 8
   %incdec.ptr = getelementptr inbounds double, double* %tmp, i64 1
   store double %spacing, double* %tmp, align 4
   store double* %incdec.ptr, double** %out, align 8
@@ -108,7 +108,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: str     d0, [x0, #32]!
 ; CHECK-NEXT: ret
-  %tmp = load double** %out, align 8
+  %tmp = load double*, double** %out, align 8
   %ptr = getelementptr inbounds double, double* %tmp, i64 4
   store double %spacing, double* %ptr, align 4
   ret double *%ptr
@@ -119,7 +119,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: str     s0, [x0, #12]!
 ; CHECK-NEXT: ret
-  %tmp = load float** %out, align 8
+  %tmp = load float*, float** %out, align 8
   %ptr = getelementptr inbounds float, float* %tmp, i64 3
   store float %spacing, float* %ptr, align 4
   ret float *%ptr
@@ -130,7 +130,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: str     x1, [x0, #16]!
 ; CHECK-NEXT: ret
-  %tmp = load i64** %out, align 8
+  %tmp = load i64*, i64** %out, align 8
   %ptr = getelementptr inbounds i64, i64* %tmp, i64 2
   store i64 %spacing, i64* %ptr, align 4
   ret i64 *%ptr
@@ -141,7 +141,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: str     w1, [x0, #8]!
 ; CHECK-NEXT: ret
-  %tmp = load i32** %out, align 8
+  %tmp = load i32*, i32** %out, align 8
   %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
   store i32 %spacing, i32* %ptr, align 4
   ret i32 *%ptr
@@ -152,7 +152,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: strh    w1, [x0, #4]!
 ; CHECK-NEXT: ret
-  %tmp = load i16** %out, align 8
+  %tmp = load i16*, i16** %out, align 8
   %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
   store i16 %spacing, i16* %ptr, align 4
   ret i16 *%ptr
@@ -163,7 +163,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: strb    w1, [x0, #2]!
 ; CHECK-NEXT: ret
-  %tmp = load i8** %out, align 8
+  %tmp = load i8*, i8** %out, align 8
   %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
   store i8 %spacing, i8* %ptr, align 4
   ret i8 *%ptr
@@ -174,7 +174,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: str     w1, [x0, #8]!
 ; CHECK-NEXT: ret
-  %tmp = load i32** %out, align 8
+  %tmp = load i32*, i32** %out, align 8
   %ptr = getelementptr inbounds i32, i32* %tmp, i64 2
   %trunc = trunc i64 %spacing to i32
   store i32 %trunc, i32* %ptr, align 4
@@ -186,7 +186,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: strh    w1, [x0, #4]!
 ; CHECK-NEXT: ret
-  %tmp = load i16** %out, align 8
+  %tmp = load i16*, i16** %out, align 8
   %ptr = getelementptr inbounds i16, i16* %tmp, i64 2
   %trunc = trunc i64 %spacing to i16
   store i16 %trunc, i16* %ptr, align 4
@@ -198,7 +198,7 @@
 ; CHECK: ldr     x0, [x0]
 ; CHECK-NEXT: strb    w1, [x0, #2]!
 ; CHECK-NEXT: ret
-  %tmp = load i8** %out, align 8
+  %tmp = load i8*, i8** %out, align 8
   %ptr = getelementptr inbounds i8, i8* %tmp, i64 2
   %trunc = trunc i64 %spacing to i8
   store i8 %trunc, i8* %ptr, align 4
@@ -214,7 +214,7 @@
 ; CHECK: str     d0, [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds double, double* %src, i64 1
-  %tmp = load double* %ptr, align 4
+  %tmp = load double, double* %ptr, align 4
   store double %tmp, double* %out, align 4
   ret double* %ptr
 }
@@ -225,7 +225,7 @@
 ; CHECK: str     s0, [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds float, float* %src, i64 1
-  %tmp = load float* %ptr, align 4
+  %tmp = load float, float* %ptr, align 4
   store float %tmp, float* %out, align 4
   ret float* %ptr
 }
@@ -236,7 +236,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i64, i64* %src, i64 1
-  %tmp = load i64* %ptr, align 4
+  %tmp = load i64, i64* %ptr, align 4
   store i64 %tmp, i64* %out, align 4
   ret i64* %ptr
 }
@@ -246,7 +246,7 @@
 ; CHECK: str     w[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %tmp = load i32* %ptr, align 4
+  %tmp = load i32, i32* %ptr, align 4
   store i32 %tmp, i32* %out, align 4
   ret i32* %ptr
 }
@@ -256,7 +256,7 @@
 ; CHECK: str     w[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
+  %tmp = load i16, i16* %ptr, align 4
   %ext = zext i16 %tmp to i32
   store i32 %ext, i32* %out, align 4
   ret i16* %ptr
@@ -267,7 +267,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
+  %tmp = load i16, i16* %ptr, align 4
   %ext = zext i16 %tmp to i64
   store i64 %ext, i64* %out, align 4
   ret i16* %ptr
@@ -278,7 +278,7 @@
 ; CHECK: str     w[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
+  %tmp = load i8, i8* %ptr, align 4
   %ext = zext i8 %tmp to i32
   store i32 %ext, i32* %out, align 4
   ret i8* %ptr
@@ -289,7 +289,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
+  %tmp = load i8, i8* %ptr, align 4
   %ext = zext i8 %tmp to i64
   store i64 %ext, i64* %out, align 4
   ret i8* %ptr
@@ -300,7 +300,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i32, i32* %src, i64 1
-  %tmp = load i32* %ptr, align 4
+  %tmp = load i32, i32* %ptr, align 4
   %ext = sext i32 %tmp to i64
   store i64 %ext, i64* %out, align 8
   ret i32* %ptr
@@ -311,7 +311,7 @@
 ; CHECK: str     w[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
+  %tmp = load i16, i16* %ptr, align 4
   %ext = sext i16 %tmp to i32
   store i32 %ext, i32* %out, align 4
   ret i16* %ptr
@@ -322,7 +322,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i16, i16* %src, i64 1
-  %tmp = load i16* %ptr, align 4
+  %tmp = load i16, i16* %ptr, align 4
   %ext = sext i16 %tmp to i64
   store i64 %ext, i64* %out, align 4
   ret i16* %ptr
@@ -333,7 +333,7 @@
 ; CHECK: str     w[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
+  %tmp = load i8, i8* %ptr, align 4
   %ext = sext i8 %tmp to i32
   store i32 %ext, i32* %out, align 4
   ret i8* %ptr
@@ -344,7 +344,7 @@
 ; CHECK: str     x[[REG]], [x1]
 ; CHECK: ret
   %ptr = getelementptr inbounds i8, i8* %src, i64 1
-  %tmp = load i8* %ptr, align 4
+  %tmp = load i8, i8* %ptr, align 4
   %ext = sext i8 %tmp to i64
   store i64 %ext, i64* %out, align 4
   ret i8* %ptr
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
index 08c66cc..14beb1a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst-2.ll
@@ -10,8 +10,8 @@
 define void @f(double* %P1) #0 {
 entry:
   %arrayidx4 = getelementptr inbounds double, double* %P1, i64 1
-  %0 = load double* %arrayidx4, align 8, !tbaa !1
-  %1 = load double* %P1, align 8, !tbaa !1
+  %0 = load double, double* %arrayidx4, align 8, !tbaa !1
+  %1 = load double, double* %P1, align 8, !tbaa !1
   %2 = insertelement <2 x double> undef, double %0, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
   %4 = fsub <2 x double> zeroinitializer, %3
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 230a329..706871e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: test_v8i8_pre_load:
 ; CHECK: ldr d0, [x0, #40]!
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
-  %val = load <8 x i8>* %newaddr, align 8
+  %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
   ret <8 x i8> %val
 }
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: test_v8i8_post_load:
 ; CHECK: ldr d0, [x0], #40
   %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
-  %val = load <8 x i8>* %addr, align 8
+  %val = load <8 x i8>, <8 x i8>* %addr, align 8
   store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
   ret <8 x i8> %val
 }
@@ -42,7 +42,7 @@
 ; CHECK-LABEL: test_v4i16_pre_load:
 ; CHECK: ldr d0, [x0, #40]!
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
-  %val = load <4 x i16>* %newaddr, align 8
+  %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
   ret <4 x i16> %val
 }
@@ -51,7 +51,7 @@
 ; CHECK-LABEL: test_v4i16_post_load:
 ; CHECK: ldr d0, [x0], #40
   %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
-  %val = load <4 x i16>* %addr, align 8
+  %val = load <4 x i16>, <4 x i16>* %addr, align 8
   store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
   ret <4 x i16> %val
 }
@@ -78,7 +78,7 @@
 ; CHECK-LABEL: test_v2i32_pre_load:
 ; CHECK: ldr d0, [x0, #40]!
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
-  %val = load <2 x i32>* %newaddr, align 8
+  %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
   ret <2 x i32> %val
 }
@@ -87,7 +87,7 @@
 ; CHECK-LABEL: test_v2i32_post_load:
 ; CHECK: ldr d0, [x0], #40
   %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
-  %val = load <2 x i32>* %addr, align 8
+  %val = load <2 x i32>, <2 x i32>* %addr, align 8
   store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
   ret <2 x i32> %val
 }
@@ -114,7 +114,7 @@
 ; CHECK-LABEL: test_v2f32_pre_load:
 ; CHECK: ldr d0, [x0, #40]!
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
-  %val = load <2 x float>* %newaddr, align 8
+  %val = load <2 x float>, <2 x float>* %newaddr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
   ret <2 x float> %val
 }
@@ -123,7 +123,7 @@
 ; CHECK-LABEL: test_v2f32_post_load:
 ; CHECK: ldr d0, [x0], #40
   %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
-  %val = load <2 x float>* %addr, align 8
+  %val = load <2 x float>, <2 x float>* %addr, align 8
   store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
   ret <2 x float> %val
 }
@@ -150,7 +150,7 @@
 ; CHECK-LABEL: test_v1i64_pre_load:
 ; CHECK: ldr d0, [x0, #40]!
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
-  %val = load <1 x i64>* %newaddr, align 8
+  %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
   ret <1 x i64> %val
 }
@@ -159,7 +159,7 @@
 ; CHECK-LABEL: test_v1i64_post_load:
 ; CHECK: ldr d0, [x0], #40
   %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
-  %val = load <1 x i64>* %addr, align 8
+  %val = load <1 x i64>, <1 x i64>* %addr, align 8
   store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
   ret <1 x i64> %val
 }
@@ -186,7 +186,7 @@
 ; CHECK-LABEL: test_v16i8_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
-  %val = load <16 x i8>* %newaddr, align 8
+  %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
   ret <16 x i8> %val
 }
@@ -195,7 +195,7 @@
 ; CHECK-LABEL: test_v16i8_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
-  %val = load <16 x i8>* %addr, align 8
+  %val = load <16 x i8>, <16 x i8>* %addr, align 8
   store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
   ret <16 x i8> %val
 }
@@ -222,7 +222,7 @@
 ; CHECK-LABEL: test_v8i16_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
-  %val = load <8 x i16>* %newaddr, align 8
+  %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
   ret <8 x i16> %val
 }
@@ -231,7 +231,7 @@
 ; CHECK-LABEL: test_v8i16_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
-  %val = load <8 x i16>* %addr, align 8
+  %val = load <8 x i16>, <8 x i16>* %addr, align 8
   store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
   ret <8 x i16> %val
 }
@@ -258,7 +258,7 @@
 ; CHECK-LABEL: test_v4i32_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
-  %val = load <4 x i32>* %newaddr, align 8
+  %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
   ret <4 x i32> %val
 }
@@ -267,7 +267,7 @@
 ; CHECK-LABEL: test_v4i32_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
-  %val = load <4 x i32>* %addr, align 8
+  %val = load <4 x i32>, <4 x i32>* %addr, align 8
   store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
   ret <4 x i32> %val
 }
@@ -295,7 +295,7 @@
 ; CHECK-LABEL: test_v4f32_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
-  %val = load <4 x float>* %newaddr, align 8
+  %val = load <4 x float>, <4 x float>* %newaddr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
   ret <4 x float> %val
 }
@@ -304,7 +304,7 @@
 ; CHECK-LABEL: test_v4f32_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
-  %val = load <4 x float>* %addr, align 8
+  %val = load <4 x float>, <4 x float>* %addr, align 8
   store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
   ret <4 x float> %val
 }
@@ -332,7 +332,7 @@
 ; CHECK-LABEL: test_v2i64_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
-  %val = load <2 x i64>* %newaddr, align 8
+  %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
   ret <2 x i64> %val
 }
@@ -341,7 +341,7 @@
 ; CHECK-LABEL: test_v2i64_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
-  %val = load <2 x i64>* %addr, align 8
+  %val = load <2 x i64>, <2 x i64>* %addr, align 8
   store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
   ret <2 x i64> %val
 }
@@ -369,7 +369,7 @@
 ; CHECK-LABEL: test_v2f64_pre_load:
 ; CHECK: ldr q0, [x0, #80]!
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
-  %val = load <2 x double>* %newaddr, align 8
+  %val = load <2 x double>, <2 x double>* %newaddr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
   ret <2 x double> %val
 }
@@ -378,7 +378,7 @@
 ; CHECK-LABEL: test_v2f64_post_load:
 ; CHECK: ldr q0, [x0], #80
   %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
-  %val = load <2 x double>* %addr, align 8
+  %val = load <2 x double>, <2 x double>* %addr, align 8
   store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
   ret <2 x double> %val
 }
@@ -5692,7 +5692,7 @@
 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 ; CHECK-LABEL: test_v16i8_post_imm_ld1r:
 ; CHECK: ld1r.16b { v0 }, [x0], #1
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5717,7 +5717,7 @@
 define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v16i8_post_reg_ld1r:
 ; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5742,7 +5742,7 @@
 define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
 ; CHECK-LABEL: test_v8i8_post_imm_ld1r:
 ; CHECK: ld1r.8b { v0 }, [x0], #1
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5759,7 +5759,7 @@
 define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v8i8_post_reg_ld1r:
 ; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -5776,7 +5776,7 @@
 define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 ; CHECK-LABEL: test_v8i16_post_imm_ld1r:
 ; CHECK: ld1r.8h { v0 }, [x0], #2
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5793,7 +5793,7 @@
 define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v8i16_post_reg_ld1r:
 ; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5810,7 +5810,7 @@
 define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
 ; CHECK-LABEL: test_v4i16_post_imm_ld1r:
 ; CHECK: ld1r.4h { v0 }, [x0], #2
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5823,7 +5823,7 @@
 define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4i16_post_reg_ld1r:
 ; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
@@ -5836,7 +5836,7 @@
 define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 ; CHECK-LABEL: test_v4i32_post_imm_ld1r:
 ; CHECK: ld1r.4s { v0 }, [x0], #4
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
@@ -5849,7 +5849,7 @@
 define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4i32_post_reg_ld1r:
 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
@@ -5862,7 +5862,7 @@
 define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
 ; CHECK-LABEL: test_v2i32_post_imm_ld1r:
 ; CHECK: ld1r.2s { v0 }, [x0], #4
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
   %tmp4 = getelementptr i32, i32* %bar, i64 1
@@ -5873,7 +5873,7 @@
 define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2i32_post_reg_ld1r:
 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
   %tmp4 = getelementptr i32, i32* %bar, i64 %inc
@@ -5884,7 +5884,7 @@
 define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
 ; CHECK-LABEL: test_v2i64_post_imm_ld1r:
 ; CHECK: ld1r.2d { v0 }, [x0], #8
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
   %tmp4 = getelementptr i64, i64* %bar, i64 1
@@ -5895,7 +5895,7 @@
 define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2i64_post_reg_ld1r:
 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
   %tmp4 = getelementptr i64, i64* %bar, i64 %inc
@@ -5906,7 +5906,7 @@
 define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
 ; CHECK-LABEL: test_v4f32_post_imm_ld1r:
 ; CHECK: ld1r.4s { v0 }, [x0], #4
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
   %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
@@ -5919,7 +5919,7 @@
 define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1r:
 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
   %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
@@ -5932,7 +5932,7 @@
 define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
 ; CHECK-LABEL: test_v2f32_post_imm_ld1r:
 ; CHECK: ld1r.2s { v0 }, [x0], #4
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
   %tmp4 = getelementptr float, float* %bar, i64 1
@@ -5943,7 +5943,7 @@
 define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2f32_post_reg_ld1r:
 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
   %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
   %tmp4 = getelementptr float, float* %bar, i64 %inc
@@ -5954,7 +5954,7 @@
 define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
 ; CHECK-LABEL: test_v2f64_post_imm_ld1r:
 ; CHECK: ld1r.2d { v0 }, [x0], #8
-  %tmp1 = load double* %bar
+  %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
   %tmp4 = getelementptr double, double* %bar, i64 1
@@ -5965,7 +5965,7 @@
 define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_v2f64_post_reg_ld1r:
 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
-  %tmp1 = load double* %bar
+  %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
   %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
   %tmp4 = getelementptr double, double* %bar, i64 %inc
@@ -5976,7 +5976,7 @@
 define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
 ; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
 ; CHECK: ld1.b { v0 }[1], [x0], #1
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 1
   store i8* %tmp3, i8** %ptr
@@ -5986,7 +5986,7 @@
 define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
 ; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
   store i8* %tmp3, i8** %ptr
@@ -5996,7 +5996,7 @@
 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
 ; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
 ; CHECK: ld1.b { v0 }[1], [x0], #1
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 1
   store i8* %tmp3, i8** %ptr
@@ -6006,7 +6006,7 @@
 define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
 ; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
   %tmp3 = getelementptr i8, i8* %bar, i64 %inc
   store i8* %tmp3, i8** %ptr
@@ -6016,7 +6016,7 @@
 define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
 ; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
 ; CHECK: ld1.h { v0 }[1], [x0], #2
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 1
   store i16* %tmp3, i16** %ptr
@@ -6026,7 +6026,7 @@
 define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
 ; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
   store i16* %tmp3, i16** %ptr
@@ -6036,7 +6036,7 @@
 define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
 ; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
 ; CHECK: ld1.h { v0 }[1], [x0], #2
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 1
   store i16* %tmp3, i16** %ptr
@@ -6046,7 +6046,7 @@
 define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
   %tmp3 = getelementptr i16, i16* %bar, i64 %inc
   store i16* %tmp3, i16** %ptr
@@ -6056,7 +6056,7 @@
 define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
 ; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], #4
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 1
   store i32* %tmp3, i32** %ptr
@@ -6066,7 +6066,7 @@
 define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
 ; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
   store i32* %tmp3, i32** %ptr
@@ -6076,7 +6076,7 @@
 define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
 ; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], #4
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 1
   store i32* %tmp3, i32** %ptr
@@ -6086,7 +6086,7 @@
 define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
 ; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
   %tmp3 = getelementptr i32, i32* %bar, i64 %inc
   store i32* %tmp3, i32** %ptr
@@ -6096,7 +6096,7 @@
 define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
 ; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
 ; CHECK: ld1.d { v0 }[1], [x0], #8
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   %tmp3 = getelementptr i64, i64* %bar, i64 1
   store i64* %tmp3, i64** %ptr
@@ -6106,7 +6106,7 @@
 define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
 ; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
   %tmp3 = getelementptr i64, i64* %bar, i64 %inc
   store i64* %tmp3, i64** %ptr
@@ -6116,7 +6116,7 @@
 define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
 ; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], #4
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 1
   store float* %tmp3, float** %ptr
@@ -6126,7 +6126,7 @@
 define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 %inc
   store float* %tmp3, float** %ptr
@@ -6136,7 +6136,7 @@
 define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
 ; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], #4
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 1
   store float* %tmp3, float** %ptr
@@ -6146,7 +6146,7 @@
 define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
 ; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
   %tmp3 = getelementptr float, float* %bar, i64 %inc
   store float* %tmp3, float** %ptr
@@ -6156,7 +6156,7 @@
 define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
 ; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
 ; CHECK: ld1.d { v0 }[1], [x0], #8
-  %tmp1 = load double* %bar
+  %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   %tmp3 = getelementptr double, double* %bar, i64 1
   store double* %tmp3, double** %ptr
@@ -6166,7 +6166,7 @@
 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
 ; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
-  %tmp1 = load double* %bar
+  %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
   %tmp3 = getelementptr double, double* %bar, i64 %inc
   store double* %tmp3, double** %ptr
diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
index 013b9a8..dfb2bc8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -125,7 +125,7 @@
 entry:
   ; CHECK-LABEL: t9:
   %data = alloca <2 x double>, align 16
-  %0 = load <2 x double>* %data, align 16
+  %0 = load <2 x double>, <2 x double>* %data, align 16
   call void asm sideeffect "mov.2d v4, $0\0A", "w,~{v4}"(<2 x double> %0) nounwind
   ; CHECK: mov.2d v4, {{v[0-9]+}}
   ret void
@@ -137,7 +137,7 @@
   %data = alloca <2 x float>, align 8
   %a = alloca [2 x float], align 4
   %arraydecay = getelementptr inbounds [2 x float], [2 x float]* %a, i32 0, i32 0
-  %0 = load <2 x float>* %data, align 8
+  %0 = load <2 x float>, <2 x float>* %data, align 8
   call void asm sideeffect "ldr ${1:q}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
   ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}]
   call void asm sideeffect "ldr ${1:d}, [$0]\0A", "r,w"(float* %arraydecay, <2 x float> %0) nounwind
@@ -155,10 +155,10 @@
 entry:
   ; CHECK-LABEL: t11:
   %a = alloca i32, align 4
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   call void asm sideeffect "mov ${1:x}, ${0:x}\0A", "r,i"(i32 %0, i32 0) nounwind
   ; CHECK: mov xzr, {{x[0-9]+}}
-  %1 = load i32* %a, align 4
+  %1 = load i32, i32* %a, align 4
   call void asm sideeffect "mov ${1:w}, ${0:w}\0A", "r,i"(i32 %1, i32 0) nounwind
   ; CHECK: mov wzr, {{w[0-9]+}}
   ret void
@@ -168,7 +168,7 @@
 entry:
   ; CHECK-LABEL: t12:
   %data = alloca <4 x float>, align 16
-  %0 = load <4 x float>* %data, align 16
+  %0 = load <4 x float>, <4 x float>* %data, align 16
   call void asm sideeffect "mov.2d v4, $0\0A", "x,~{v4}"(<4 x float> %0) nounwind
   ; CHECK mov.2d v4, {{v([0-9])|(1[0-5])}}
   ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 72d808c..209065e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -449,7 +449,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.8b { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
@@ -466,7 +466,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.16b { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
   %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
   %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
@@ -491,7 +491,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.4h { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
@@ -504,7 +504,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.8h { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
   %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
   %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
@@ -521,7 +521,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
   ret <2 x i32> %tmp3
@@ -532,7 +532,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.4s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
   %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
   %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
@@ -545,7 +545,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2d { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
   %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
   ret <2 x i64> %tmp3
@@ -804,7 +804,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.b { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0
   ret <16 x i8> %tmp2
 }
@@ -814,7 +814,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.h { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0
   ret <8 x i16> %tmp2
 }
@@ -824,7 +824,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.s { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0
   ret <4 x i32> %tmp2
 }
@@ -834,7 +834,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.s { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0
   ret <4 x float> %tmp2
 }
@@ -844,7 +844,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.d { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i64* %bar
+  %tmp1 = load i64, i64* %bar
   %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0
   ret <2 x i64> %tmp2
 }
@@ -854,7 +854,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.d { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load double* %bar
+  %tmp1 = load double, double* %bar
   %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0
   ret <2 x double> %tmp2
 }
@@ -864,7 +864,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ldr [[REG:d[0-9]+]], [x0]
 ; CHECK-NEXT: ret
-  %tmp = load <1 x i64>* %p, align 8
+  %tmp = load <1 x i64>, <1 x i64>* %p, align 8
   ret <1 x i64> %tmp
 }
 
@@ -873,7 +873,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.b { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i8* %bar
+  %tmp1 = load i8, i8* %bar
   %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0
   ret <8 x i8> %tmp2
 }
@@ -883,7 +883,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.h { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i16* %bar
+  %tmp1 = load i16, i16* %bar
   %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0
   ret <4 x i16> %tmp2
 }
@@ -893,7 +893,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.s { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load i32* %bar
+  %tmp1 = load i32, i32* %bar
   %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0
   ret <2 x i32> %tmp2
 }
@@ -903,7 +903,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1.s { v0 }[0], [x0]
 ; CHECK-NEXT ret
-  %tmp1 = load float* %bar
+  %tmp1 = load float, float* %bar
   %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0
   ret <2 x float> %tmp2
 }
@@ -919,12 +919,12 @@
 ; CHECK-NEXT: str d[[RESREGNUM]], [x2]
 ; CHECK-NEXT: ret
   %tmp = bitcast i8* %a to i32*
-  %tmp1 = load i32* %tmp, align 4
+  %tmp1 = load i32, i32* %tmp, align 4
   %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
   %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
   %tmp3 = bitcast <2 x i32> %lane to <8 x i8>
   %tmp4 = bitcast i8* %b to i32*
-  %tmp5 = load i32* %tmp4, align 4
+  %tmp5 = load i32, i32* %tmp4, align 4
   %tmp6 = insertelement <2 x i32> undef, i32 %tmp5, i32 0
   %lane1 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> zeroinitializer
   %tmp7 = bitcast <2 x i32> %lane1 to <8 x i8>
@@ -946,7 +946,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.4s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
+  %tmp = load float, float* %x, align 4
   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
   %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1
   %tmp3 = insertelement <4 x float> %tmp2, float %tmp, i32 2
@@ -960,7 +960,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
+  %tmp = load float, float* %x, align 4
   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
   %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1
   ret <2 x float> %tmp2
@@ -972,7 +972,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2d { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
+  %tmp = load double, double* %x, align 4
   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
   %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1
   ret <2 x double> %tmp2
@@ -984,7 +984,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ldr d0, [x0]
 ; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
+  %tmp = load double, double* %x, align 4
   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
   ret <1 x double> %tmp1
 }
@@ -995,7 +995,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.4s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
+  %tmp = load float, float* %x, align 4
   %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0
   %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %lane
@@ -1007,7 +1007,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2s { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load float* %x, align 4
+  %tmp = load float, float* %x, align 4
   %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0
   %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
   ret <2 x float> %lane
@@ -1019,7 +1019,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ld1r.2d { v0 }, [x0]
 ; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
+  %tmp = load double, double* %x, align 4
   %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0
   %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer
   ret <2 x double> %lane
@@ -1031,7 +1031,7 @@
 ; Make sure we are using the operands defined by the ABI
 ; CHECK: ldr d0, [x0]
 ; CHECK-NEXT ret
-  %tmp = load double* %x, align 4
+  %tmp = load double, double* %x, align 4
   %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0
   %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer
   ret <1 x double> %lane
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp.ll b/llvm/test/CodeGen/AArch64/arm64-ldp.ll
index 6af1a4d..8642e0d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldp.ll
@@ -5,9 +5,9 @@
 ; CHECK: ldp_int
 ; CHECK: ldp
 define i32 @ldp_int(i32* %p) nounwind {
-  %tmp = load i32* %p, align 4
+  %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
-  %tmp1 = load i32* %add.ptr, align 4
+  %tmp1 = load i32, i32* %add.ptr, align 4
   %add = add nsw i32 %tmp1, %tmp
   ret i32 %add
 }
@@ -15,9 +15,9 @@
 ; CHECK: ldp_sext_int
 ; CHECK: ldpsw
 define i64 @ldp_sext_int(i32* %p) nounwind {
-  %tmp = load i32* %p, align 4
+  %tmp = load i32, i32* %p, align 4
   %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
-  %tmp1 = load i32* %add.ptr, align 4
+  %tmp1 = load i32, i32* %add.ptr, align 4
   %sexttmp = sext i32 %tmp to i64
   %sexttmp1 = sext i32 %tmp1 to i64
   %add = add nsw i64 %sexttmp1, %sexttmp
@@ -27,9 +27,9 @@
 ; CHECK: ldp_long
 ; CHECK: ldp
 define i64 @ldp_long(i64* %p) nounwind {
-  %tmp = load i64* %p, align 8
+  %tmp = load i64, i64* %p, align 8
   %add.ptr = getelementptr inbounds i64, i64* %p, i64 1
-  %tmp1 = load i64* %add.ptr, align 8
+  %tmp1 = load i64, i64* %add.ptr, align 8
   %add = add nsw i64 %tmp1, %tmp
   ret i64 %add
 }
@@ -37,9 +37,9 @@
 ; CHECK: ldp_float
 ; CHECK: ldp
 define float @ldp_float(float* %p) nounwind {
-  %tmp = load float* %p, align 4
+  %tmp = load float, float* %p, align 4
   %add.ptr = getelementptr inbounds float, float* %p, i64 1
-  %tmp1 = load float* %add.ptr, align 4
+  %tmp1 = load float, float* %add.ptr, align 4
   %add = fadd float %tmp, %tmp1
   ret float %add
 }
@@ -47,9 +47,9 @@
 ; CHECK: ldp_double
 ; CHECK: ldp
 define double @ldp_double(double* %p) nounwind {
-  %tmp = load double* %p, align 8
+  %tmp = load double, double* %p, align 8
   %add.ptr = getelementptr inbounds double, double* %p, i64 1
-  %tmp1 = load double* %add.ptr, align 8
+  %tmp1 = load double, double* %add.ptr, align 8
   %add = fadd double %tmp, %tmp1
   ret double %add
 }
@@ -61,9 +61,9 @@
 ; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
-  %tmp1 = load i32* %p1, align 2
+  %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
-  %tmp2 = load i32* %p2, align 2
+  %tmp2 = load i32, i32* %p2, align 2
   %tmp3 = add i32 %tmp1, %tmp2
   ret i32 %tmp3
 }
@@ -74,9 +74,9 @@
 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
-  %tmp1 = load i32* %p1, align 2
+  %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
-  %tmp2 = load i32* %p2, align 2
+  %tmp2 = load i32, i32* %p2, align 2
   %sexttmp1 = sext i32 %tmp1 to i64
   %sexttmp2 = sext i32 %tmp2 to i64
   %tmp3 = add i64 %sexttmp1, %sexttmp2
@@ -89,9 +89,9 @@
 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
-  %tmp1 = load i64* %p1, align 2
+  %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
-  %tmp2 = load i64* %p2, align 2
+  %tmp2 = load i64, i64* %p2, align 2
   %tmp3 = add i64 %tmp1, %tmp2
   ret i64 %tmp3
 }
@@ -102,9 +102,9 @@
 ; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds float, float* %a, i64 -1
-  %tmp1 = load float* %p1, align 2
+  %tmp1 = load float, float* %p1, align 2
   %p2 = getelementptr inbounds float, float* %a, i64 -2
-  %tmp2 = load float* %p2, align 2
+  %tmp2 = load float, float* %p2, align 2
   %tmp3 = fadd float %tmp1, %tmp2
   ret float %tmp3
 }
@@ -115,9 +115,9 @@
 ; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds double, double* %a, i64 -1
-  %tmp1 = load double* %p1, align 2
+  %tmp1 = load double, double* %p1, align 2
   %p2 = getelementptr inbounds double, double* %a, i64 -2
-  %tmp2 = load double* %p2, align 2
+  %tmp2 = load double, double* %p2, align 2
   %tmp3 = fadd double %tmp1, %tmp2
   ret double %tmp3
 }
@@ -130,9 +130,9 @@
 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
-  %tmp1 = load i64* %p1, align 2
+  %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
-  %tmp2 = load i64* %p2, align 2
+  %tmp2 = load i64, i64* %p2, align 2
   %tmp3 = add i64 %tmp1, %tmp2
   ret i64 %tmp3
 }
@@ -144,9 +144,9 @@
 ; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
-  %tmp1 = load i32* %p1, align 2
+  %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
-  %tmp2 = load i32* %p2, align 2
+  %tmp2 = load i32, i32* %p2, align 2
   %sexttmp1 = sext i32 %tmp1 to i64
   %sexttmp2 = sext i32 %tmp2 to i64
   %tmp3 = add i64 %sexttmp1, %sexttmp2
@@ -161,9 +161,9 @@
 ; LDUR_CHK: add
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
-  %tmp1 = load i64* %p1, align 2
+  %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
-  %tmp2 = load i64* %p2, align 2
+  %tmp2 = load i64, i64* %p2, align 2
   %tmp3 = add i64 %tmp1, %tmp2
   ret i64 %tmp3
 }
@@ -176,9 +176,9 @@
 ; LDUR_CHK: add
 ; LDUR_CHK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
-  %tmp1 = load i32* %p1, align 2
+  %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
-  %tmp2 = load i32* %p2, align 2
+  %tmp2 = load i32, i32* %p2, align 2
   %sexttmp1 = sext i32 %tmp1 to i64
   %sexttmp2 = sext i32 %tmp2 to i64
   %tmp3 = add i64 %sexttmp1, %sexttmp2
@@ -196,13 +196,13 @@
   %bp1 = bitcast i64* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
   %dp1 = bitcast i8* %bp1p1 to i64*
-  %tmp1 = load i64* %dp1, align 1
+  %tmp1 = load i64, i64* %dp1, align 1
 
   %p2 = getelementptr inbounds i64, i64* %a, i64 -17
   %bp2 = bitcast i64* %p2 to i8*
   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
   %dp2 = bitcast i8* %bp2p1 to i64*
-  %tmp2 = load i64* %dp2, align 1
+  %tmp2 = load i64, i64* %dp2, align 1
 
   %tmp3 = add i64 %tmp1, %tmp2
   ret i64 %tmp3
@@ -219,13 +219,13 @@
   %bp1 = bitcast i32* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
   %dp1 = bitcast i8* %bp1p1 to i32*
-  %tmp1 = load i32* %dp1, align 1
+  %tmp1 = load i32, i32* %dp1, align 1
 
   %p2 = getelementptr inbounds i32, i32* %a, i64 -17
   %bp2 = bitcast i32* %p2 to i8*
   %bp2p1 = getelementptr inbounds i8, i8* %bp2, i64 1
   %dp2 = bitcast i8* %bp2p1 to i32*
-  %tmp2 = load i32* %dp2, align 1
+  %tmp2 = load i32, i32* %dp2, align 1
 
   %sexttmp1 = sext i32 %tmp1 to i64
   %sexttmp2 = sext i32 %tmp2 to i64
diff --git a/llvm/test/CodeGen/AArch64/arm64-ldur.ll b/llvm/test/CodeGen/AArch64/arm64-ldur.ll
index 9bf0ce2..c4bf397 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ldur.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ldur.ll
@@ -5,7 +5,7 @@
 ; CHECK: ldur x0, [x0, #-8]
 ; CHECK-NEXT: ret
   %tmp = getelementptr inbounds i64, i64* %p, i64 -1
-  %ret = load i64* %tmp, align 2
+  %ret = load i64, i64* %tmp, align 2
   ret i64 %ret
 }
 define i32 @_f1(i32* %p) {
@@ -13,7 +13,7 @@
 ; CHECK: ldur w0, [x0, #-4]
 ; CHECK-NEXT: ret
   %tmp = getelementptr inbounds i32, i32* %p, i64 -1
-  %ret = load i32* %tmp, align 2
+  %ret = load i32, i32* %tmp, align 2
   ret i32 %ret
 }
 define i16 @_f2(i16* %p) {
@@ -21,7 +21,7 @@
 ; CHECK: ldurh w0, [x0, #-2]
 ; CHECK-NEXT: ret
   %tmp = getelementptr inbounds i16, i16* %p, i64 -1
-  %ret = load i16* %tmp, align 2
+  %ret = load i16, i16* %tmp, align 2
   ret i16 %ret
 }
 define i8 @_f3(i8* %p) {
@@ -29,7 +29,7 @@
 ; CHECK: ldurb w0, [x0, #-1]
 ; CHECK-NEXT: ret
   %tmp = getelementptr inbounds i8, i8* %p, i64 -1
-  %ret = load i8* %tmp, align 2
+  %ret = load i8, i8* %tmp, align 2
   ret i8 %ret
 }
 
@@ -39,7 +39,7 @@
 ; CHECK-NEXT: ret
   %p = getelementptr inbounds i8, i8* %a, i64 -12
   %tmp1 = bitcast i8* %p to i32*
-  %tmp2 = load i32* %tmp1, align 4
+  %tmp2 = load i32, i32* %tmp1, align 4
   %ret = zext i32 %tmp2 to i64
 
   ret i64 %ret
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: ret
   %p = getelementptr inbounds i8, i8* %a, i64 -12
   %tmp1 = bitcast i8* %p to i16*
-  %tmp2 = load i16* %tmp1, align 2
+  %tmp2 = load i16, i16* %tmp1, align 2
   %ret = zext i16 %tmp2 to i64
 
   ret i64 %ret
@@ -60,7 +60,7 @@
 ; CHECK: ldurb w0, [x0, #-12]
 ; CHECK-NEXT: ret
   %p = getelementptr inbounds i8, i8* %a, i64 -12
-  %tmp2 = load i8* %p, align 1
+  %tmp2 = load i8, i8* %p, align 1
   %ret = zext i8 %tmp2 to i64
 
   ret i64 %ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
index 5761b52..6db2104 100644
--- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A53.ll
@@ -34,44 +34,44 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %2, 8
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %idxprom = sext i32 %3 to i64
   %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom
-  %4 = load i32* %arrayidx, align 4
+  %4 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %4, 1
   store i32 %add, i32* %xx, align 4
-  %5 = load i32* %xx, align 4
+  %5 = load i32, i32* %xx, align 4
   %add1 = add nsw i32 %5, 12
   store i32 %add1, i32* %xx, align 4
-  %6 = load i32* %xx, align 4
+  %6 = load i32, i32* %xx, align 4
   %add2 = add nsw i32 %6, 23
   store i32 %add2, i32* %xx, align 4
-  %7 = load i32* %xx, align 4
+  %7 = load i32, i32* %xx, align 4
   %add3 = add nsw i32 %7, 34
   store i32 %add3, i32* %xx, align 4
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %8 to i64
   %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
-  %10 = load i32* %yy, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
+  %10 = load i32, i32* %yy, align 4
   %mul = mul nsw i32 %10, %9
   store i32 %mul, i32* %yy, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %11 = load i32* %i, align 4
+  %11 = load i32, i32* %i, align 4
   %inc = add nsw i32 %11, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %12 = load i32* %xx, align 4
-  %13 = load i32* %yy, align 4
+  %12 = load i32, i32* %xx, align 4
+  %13 = load i32, i32* %yy, align 4
   %add6 = add nsw i32 %12, %13
   ret i32 %add6
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
index 482c445..ee8f152 100644
--- a/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-misched-basic-A57.ll
@@ -41,31 +41,31 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %2, 8
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %3 = load i32* %yy, align 4
-  %4 = load i32* %i, align 4
+  %3 = load i32, i32* %yy, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom = sext i32 %4 to i64
   %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %x, i32 0, i64 %idxprom
-  %5 = load i32* %arrayidx, align 4
+  %5 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %5, 1
   store i32 %add, i32* %xx, align 4
-  %6 = load i32* %xx, align 4
+  %6 = load i32, i32* %xx, align 4
   %add1 = add nsw i32 %6, 12
   store i32 %add1, i32* %xx, align 4
-  %7 = load i32* %xx, align 4
+  %7 = load i32, i32* %xx, align 4
   %add2 = add nsw i32 %7, 23
   store i32 %add2, i32* %xx, align 4
-  %8 = load i32* %xx, align 4
+  %8 = load i32, i32* %xx, align 4
   %add3 = add nsw i32 %8, 34
   store i32 %add3, i32* %xx, align 4
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %9 to i64
   %arrayidx5 = getelementptr inbounds [8 x i32], [8 x i32]* %y, i32 0, i64 %idxprom4
-  %10 = load i32* %arrayidx5, align 4
+  %10 = load i32, i32* %arrayidx5, align 4
 
   %add4 = add nsw i32 %9, %add
   %add5 = add nsw i32 %10, %add1
@@ -92,14 +92,14 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %11 = load i32* %i, align 4
+  %11 = load i32, i32* %i, align 4
   %inc = add nsw i32 %11, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %12 = load i32* %xx, align 4
-  %13 = load i32* %yy, align 4
+  %12 = load i32, i32* %xx, align 4
+  %13 = load i32, i32* %yy, align 4
   %add67 = add nsw i32 %12, %13
   ret i32 %add67
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
index cca6bfe..b63200e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -126,7 +126,7 @@
 ; CHECK-LABEL: test_vld1q_dup_s8:
 ; CHECK: ld1r {{{ ?v[0-9]+.16b ?}}}, [x0]
 entry:
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %1 = insertelement <16 x i8> undef, i8 %0, i32 0
   %lane = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> zeroinitializer
   ret <16 x i8> %lane
@@ -136,7 +136,7 @@
 ; CHECK-LABEL: test_vld1q_dup_s16:
 ; CHECK: ld1r {{{ ?v[0-9]+.8h ?}}}, [x0]
 entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %1 = insertelement <8 x i16> undef, i16 %0, i32 0
   %lane = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> zeroinitializer
   ret <8 x i16> %lane
@@ -146,7 +146,7 @@
 ; CHECK-LABEL: test_vld1q_dup_s32:
 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %1 = insertelement <4 x i32> undef, i32 %0, i32 0
   %lane = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %lane
@@ -156,7 +156,7 @@
 ; CHECK-LABEL: test_vld1q_dup_s64:
 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
 entry:
-  %0 = load i64* %a, align 8
+  %0 = load i64, i64* %a, align 8
   %1 = insertelement <2 x i64> undef, i64 %0, i32 0
   %lane = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %lane
@@ -166,7 +166,7 @@
 ; CHECK-LABEL: test_vld1q_dup_f32:
 ; CHECK: ld1r {{{ ?v[0-9]+.4s ?}}}, [x0]
 entry:
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %1 = insertelement <4 x float> undef, float %0, i32 0
   %lane = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %lane
@@ -176,7 +176,7 @@
 ; CHECK-LABEL: test_vld1q_dup_f64:
 ; CHECK: ld1r {{{ ?v[0-9]+.2d ?}}}, [x0]
 entry:
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   %1 = insertelement <2 x double> undef, double %0, i32 0
   %lane = shufflevector <2 x double> %1, <2 x double> undef, <2 x i32> zeroinitializer
   ret <2 x double> %lane
@@ -186,7 +186,7 @@
 ; CHECK-LABEL: test_vld1_dup_s8:
 ; CHECK: ld1r {{{ ?v[0-9]+.8b ?}}}, [x0]
 entry:
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
   ret <8 x i8> %lane
@@ -196,7 +196,7 @@
 ; CHECK-LABEL: test_vld1_dup_s16:
 ; CHECK: ld1r {{{ ?v[0-9]+.4h ?}}}, [x0]
 entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %1 = insertelement <4 x i16> undef, i16 %0, i32 0
   %lane = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> zeroinitializer
   ret <4 x i16> %lane
@@ -206,7 +206,7 @@
 ; CHECK-LABEL: test_vld1_dup_s32:
 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %1 = insertelement <2 x i32> undef, i32 %0, i32 0
   %lane = shufflevector <2 x i32> %1, <2 x i32> undef, <2 x i32> zeroinitializer
   ret <2 x i32> %lane
@@ -216,7 +216,7 @@
 ; CHECK-LABEL: test_vld1_dup_s64:
 ; CHECK: ldr {{d[0-9]+}}, [x0]
 entry:
-  %0 = load i64* %a, align 8
+  %0 = load i64, i64* %a, align 8
   %1 = insertelement <1 x i64> undef, i64 %0, i32 0
   ret <1 x i64> %1
 }
@@ -225,7 +225,7 @@
 ; CHECK-LABEL: test_vld1_dup_f32:
 ; CHECK: ld1r {{{ ?v[0-9]+.2s ?}}}, [x0]
 entry:
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %1 = insertelement <2 x float> undef, float %0, i32 0
   %lane = shufflevector <2 x float> %1, <2 x float> undef, <2 x i32> zeroinitializer
   ret <2 x float> %lane
@@ -235,7 +235,7 @@
 ; CHECK-LABEL: test_vld1_dup_f64:
 ; CHECK: ldr {{d[0-9]+}}, [x0]
 entry:
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   %1 = insertelement <1 x double> undef, double %0, i32 0
   ret <1 x double> %1
 }
@@ -247,7 +247,7 @@
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}]
 ; CHECK-DAG: fmov {{d[0-9]+}}, {{x[0-9]+}}
 ; CHECK-DAG: str {{x[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load i64* %a, align 8
+  %1 = load i64, i64* %a, align 8
   store i64 %1, i64* %b, align 8
   %vecinit.i = insertelement <1 x i64> undef, i64 %1, i32 0
   ret <1 x i64> %vecinit.i
@@ -259,7 +259,7 @@
 ; CHECK-LABEL: testDUP.v1f64:
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}]
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}]
-  %1 = load double* %a, align 8
+  %1 = load double, double* %a, align 8
   store double %1, double* %b, align 8
   %vecinit.i = insertelement <1 x double> undef, double %1, i32 0
   ret <1 x double> %vecinit.i
@@ -269,7 +269,7 @@
 ; CHECK-LABEL: test_vld1q_lane_s8:
 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %vld1_lane = insertelement <16 x i8> %b, i8 %0, i32 15
   ret <16 x i8> %vld1_lane
 }
@@ -278,7 +278,7 @@
 ; CHECK-LABEL: test_vld1q_lane_s16:
 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %vld1_lane = insertelement <8 x i16> %b, i16 %0, i32 7
   ret <8 x i16> %vld1_lane
 }
@@ -287,7 +287,7 @@
 ; CHECK-LABEL: test_vld1q_lane_s32:
 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %vld1_lane = insertelement <4 x i32> %b, i32 %0, i32 3
   ret <4 x i32> %vld1_lane
 }
@@ -296,7 +296,7 @@
 ; CHECK-LABEL: test_vld1q_lane_s64:
 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i64* %a, align 8
+  %0 = load i64, i64* %a, align 8
   %vld1_lane = insertelement <2 x i64> %b, i64 %0, i32 1
   ret <2 x i64> %vld1_lane
 }
@@ -305,7 +305,7 @@
 ; CHECK-LABEL: test_vld1q_lane_f32:
 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %vld1_lane = insertelement <4 x float> %b, float %0, i32 3
   ret <4 x float> %vld1_lane
 }
@@ -314,7 +314,7 @@
 ; CHECK-LABEL: test_vld1q_lane_f64:
 ; CHECK: ld1 { {{v[0-9]+}}.d }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   %vld1_lane = insertelement <2 x double> %b, double %0, i32 1
   ret <2 x double> %vld1_lane
 }
@@ -323,7 +323,7 @@
 ; CHECK-LABEL: test_vld1_lane_s8:
 ; CHECK: ld1 { {{v[0-9]+}}.b }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %vld1_lane = insertelement <8 x i8> %b, i8 %0, i32 7
   ret <8 x i8> %vld1_lane
 }
@@ -332,7 +332,7 @@
 ; CHECK-LABEL: test_vld1_lane_s16:
 ; CHECK: ld1 { {{v[0-9]+}}.h }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %vld1_lane = insertelement <4 x i16> %b, i16 %0, i32 3
   ret <4 x i16> %vld1_lane
 }
@@ -341,7 +341,7 @@
 ; CHECK-LABEL: test_vld1_lane_s32:
 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %vld1_lane = insertelement <2 x i32> %b, i32 %0, i32 1
   ret <2 x i32> %vld1_lane
 }
@@ -350,7 +350,7 @@
 ; CHECK-LABEL: test_vld1_lane_s64:
 ; CHECK: ldr {{d[0-9]+}}, [x0]
 entry:
-  %0 = load i64* %a, align 8
+  %0 = load i64, i64* %a, align 8
   %vld1_lane = insertelement <1 x i64> undef, i64 %0, i32 0
   ret <1 x i64> %vld1_lane
 }
@@ -359,7 +359,7 @@
 ; CHECK-LABEL: test_vld1_lane_f32:
 ; CHECK: ld1 { {{v[0-9]+}}.s }[{{[0-9]+}}], [x0]
 entry:
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %vld1_lane = insertelement <2 x float> %b, float %0, i32 1
   ret <2 x float> %vld1_lane
 }
@@ -368,7 +368,7 @@
 ; CHECK-LABEL: test_vld1_lane_f64:
 ; CHECK: ldr {{d[0-9]+}}, [x0]
 entry:
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   %vld1_lane = insertelement <1 x double> undef, double %0, i32 0
   ret <1 x double> %vld1_lane
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll b/llvm/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
index d39722b..5a740d8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-patchpoint-scratch-regs.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT: Ltmp
 ; CHECK-NEXT: nop
 define void @clobberScratch(i32* %p) {
-  %v = load i32* %p
+  %v = load i32, i32* %p
   tail call void asm sideeffect "nop", "~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() nounwind
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 5, i32 20, i8* null, i32 0, i32* %p, i32 %v)
   store i32 %v, i32* %p
diff --git a/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll b/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
index 8514341..cf06653 100644
--- a/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-patchpoint.ll
@@ -50,13 +50,13 @@
 entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
-  %tmp82 = load i64* %tmp81, align 8
+  %tmp82 = load i64, i64* %tmp81, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
-  %tmp83 = load i64* %tmp33, align 8
+  %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
-  %tmp86 = load i64* %tmp85, align 8
+  %tmp86 = load i64, i64* %tmp85, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
diff --git a/llvm/test/CodeGen/AArch64/arm64-pic-local-symbol.ll b/llvm/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
index 627e741..c242f78 100644
--- a/llvm/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-pic-local-symbol.ll
@@ -7,7 +7,7 @@
 ; CHECK: get:
 ; CHECK: adrp x{{[0-9]+}}, a
 ; CHECK-NEXT: ldr w{{[0-9]+}}, [x{{[0-9]}}, :lo12:a]
-  %res = load i32* @a, align 4
+  %res = load i32, i32* @a, align 4
   ret i32 %res
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll
index b0d3ee0..60672aa 100644
--- a/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -11,7 +11,7 @@
 @var = global [30 x i64] zeroinitializer
 
 define void @keep_live() {
-  %val = load volatile [30 x i64]* @var
+  %val = load volatile [30 x i64], [30 x i64]* @var
   store volatile [30 x i64] %val, [30 x i64]* @var
 
 ; CHECK: ldr x18
diff --git a/llvm/test/CodeGen/AArch64/arm64-prefetch.ll b/llvm/test/CodeGen/AArch64/arm64-prefetch.ll
index 465e418..bdeacb2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-prefetch.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-prefetch.ll
@@ -35,78 +35,78 @@
   ; CHECK: prfum pstl1keep
   call void @llvm.prefetch(i8* %tmp, i32 1, i32 3, i32 1)
 
-  %tmp1 = load i32* %j.addr, align 4, !tbaa !0
+  %tmp1 = load i32, i32* %j.addr, align 4, !tbaa !0
   %add = add nsw i32 %tmp1, %i
   %idxprom = sext i32 %add to i64
-  %tmp2 = load i32** @a, align 8, !tbaa !3
+  %tmp2 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx = getelementptr inbounds i32, i32* %tmp2, i64 %idxprom
   %tmp3 = bitcast i32* %arrayidx to i8*
 
   ; CHECK: prfm pldl1strm
   call void @llvm.prefetch(i8* %tmp3, i32 0, i32 0, i32 1)
-  %tmp4 = load i32** @a, align 8, !tbaa !3
+  %tmp4 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx3 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom
   %tmp5 = bitcast i32* %arrayidx3 to i8*
 
   ; CHECK: prfm pldl3keep
   call void @llvm.prefetch(i8* %tmp5, i32 0, i32 1, i32 1)
-  %tmp6 = load i32** @a, align 8, !tbaa !3
+  %tmp6 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx6 = getelementptr inbounds i32, i32* %tmp6, i64 %idxprom
   %tmp7 = bitcast i32* %arrayidx6 to i8*
 
   ; CHECK: prfm pldl2keep
   call void @llvm.prefetch(i8* %tmp7, i32 0, i32 2, i32 1)
-  %tmp8 = load i32** @a, align 8, !tbaa !3
+  %tmp8 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx9 = getelementptr inbounds i32, i32* %tmp8, i64 %idxprom
   %tmp9 = bitcast i32* %arrayidx9 to i8*
 
   ; CHECK: prfm pldl1keep
   call void @llvm.prefetch(i8* %tmp9, i32 0, i32 3, i32 1)
-  %tmp10 = load i32** @a, align 8, !tbaa !3
+  %tmp10 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx12 = getelementptr inbounds i32, i32* %tmp10, i64 %idxprom
   %tmp11 = bitcast i32* %arrayidx12 to i8*
 
 
   ; CHECK: prfm plil1strm
   call void @llvm.prefetch(i8* %tmp11, i32 0, i32 0, i32 0)
-  %tmp12 = load i32** @a, align 8, !tbaa !3
+  %tmp12 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx15 = getelementptr inbounds i32, i32* %tmp12, i64 %idxprom
   %tmp13 = bitcast i32* %arrayidx3 to i8*
 
   ; CHECK: prfm plil3keep
   call void @llvm.prefetch(i8* %tmp13, i32 0, i32 1, i32 0)
-  %tmp14 = load i32** @a, align 8, !tbaa !3
+  %tmp14 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx18 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom
   %tmp15 = bitcast i32* %arrayidx6 to i8*
 
   ; CHECK: prfm plil2keep
   call void @llvm.prefetch(i8* %tmp15, i32 0, i32 2, i32 0)
-  %tmp16 = load i32** @a, align 8, !tbaa !3
+  %tmp16 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx21 = getelementptr inbounds i32, i32* %tmp16, i64 %idxprom
   %tmp17 = bitcast i32* %arrayidx9 to i8*
 
   ; CHECK: prfm plil1keep
   call void @llvm.prefetch(i8* %tmp17, i32 0, i32 3, i32 0)
-  %tmp18 = load i32** @a, align 8, !tbaa !3
+  %tmp18 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx24 = getelementptr inbounds i32, i32* %tmp18, i64 %idxprom
   %tmp19 = bitcast i32* %arrayidx12 to i8*
 
 
   ; CHECK: prfm pstl1strm
   call void @llvm.prefetch(i8* %tmp19, i32 1, i32 0, i32 1)
-  %tmp20 = load i32** @a, align 8, !tbaa !3
+  %tmp20 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx27 = getelementptr inbounds i32, i32* %tmp20, i64 %idxprom
   %tmp21 = bitcast i32* %arrayidx15 to i8*
 
   ; CHECK: prfm pstl3keep
   call void @llvm.prefetch(i8* %tmp21, i32 1, i32 1, i32 1)
-  %tmp22 = load i32** @a, align 8, !tbaa !3
+  %tmp22 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx30 = getelementptr inbounds i32, i32* %tmp22, i64 %idxprom
   %tmp23 = bitcast i32* %arrayidx18 to i8*
 
   ; CHECK: prfm pstl2keep
   call void @llvm.prefetch(i8* %tmp23, i32 1, i32 2, i32 1)
-  %tmp24 = load i32** @a, align 8, !tbaa !3
+  %tmp24 = load i32*, i32** @a, align 8, !tbaa !3
   %arrayidx33 = getelementptr inbounds i32, i32* %tmp24, i64 %idxprom
   %tmp25 = bitcast i32* %arrayidx21 to i8*
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-redzone.ll b/llvm/test/CodeGen/AArch64/arm64-redzone.ll
index 9b0c384..837249c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-redzone.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-redzone.ll
@@ -9,10 +9,10 @@
   %x = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %tmp = load i32* %a.addr, align 4
-  %tmp1 = load i32* %b.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
+  %tmp1 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %tmp, %tmp1
   store i32 %add, i32* %x, align 4
-  %tmp2 = load i32* %x, align 4
+  %tmp2 = load i32, i32* %x, align 4
   ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-register-offset-addressing.ll b/llvm/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
index e686e68..7078ffc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-register-offset-addressing.ll
@@ -6,7 +6,7 @@
 ; CHECK: ldrb w0, [x0, [[REG]]]
 ; CHECK: ret
   %tmp1 = getelementptr inbounds i16, i16* %a, i64 %b
-  %tmp2 = load i16* %tmp1
+  %tmp2 = load i16, i16* %tmp1
   %tmp3 = trunc i16 %tmp2 to i8
   ret i8 %tmp3
 }
@@ -19,7 +19,7 @@
    %off32.sext.tmp = shl i64 %offset, 32
    %off32.sext = ashr i64 %off32.sext.tmp, 32
    %addr8_sxtw = getelementptr i8, i8* %base, i64 %off32.sext
-   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val8_sxtw = load volatile i8, i8* %addr8_sxtw
    %val32_signed = sext i8 %val8_sxtw to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -28,7 +28,7 @@
   %offset_uxtw = and i64 %offset, 4294967295
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
-  %val8_uxtw = load volatile i8* %addr_uxtw
+  %val8_uxtw = load volatile i8, i8* %addr_uxtw
   %newval8 = add i8 %val8_uxtw, 1
   store volatile i8 %newval8, i8* @var_8bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -44,7 +44,7 @@
   %offset_uxtw = and i64 %offset, 4294967295
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
-  %val8_uxtw = load volatile i16* %addr_uxtw
+  %val8_uxtw = load volatile i16, i16* %addr_uxtw
   %newval8 = add i16 %val8_uxtw, 1
   store volatile i16 %newval8, i16* @var_16bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -54,7 +54,7 @@
   %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
-  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val16_sxtw = load volatile i16, i16* %addr_sxtw
   %val64_signed = sext i16 %val16_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -65,7 +65,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 1
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* %addr_uxtwN
 ; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #1]
@@ -79,7 +79,7 @@
   %offset_uxtw = and i64 %offset, 4294967295
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
-  %val32_uxtw = load volatile i32* %addr_uxtw
+  %val32_uxtw = load volatile i32, i32* %addr_uxtw
   %newval32 = add i32 %val32_uxtw, 1
   store volatile i32 %newval32, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -89,7 +89,7 @@
   %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
-  %val32_sxtw = load volatile i32* %addr_sxtw
+  %val32_sxtw = load volatile i32, i32* %addr_sxtw
   %val64_signed = sext i32 %val32_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
@@ -100,7 +100,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 2
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   store volatile i32 %val32, i32* %addr_uxtwN
 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #2]
    ret void
@@ -113,7 +113,7 @@
   %offset_uxtw = and i64 %offset, 4294967295
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
-  %val64_uxtw = load volatile i64* %addr_uxtw
+  %val64_uxtw = load volatile i64, i64* %addr_uxtw
   %newval8 = add i64 %val64_uxtw, 1
   store volatile i64 %newval8, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw]
@@ -123,7 +123,7 @@
   %offset_sxtw = ashr i64 %offset_sxtw.tmp, 32
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
-  %val64_sxtw = load volatile i64* %addr_sxtw
+  %val64_sxtw = load volatile i64, i64* %addr_sxtw
   store volatile i64 %val64_sxtw, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, sxtw]
 
@@ -133,7 +133,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 3
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   store volatile i64 %val64, i64* %addr_uxtwN
 ; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{w[0-9]+}}, uxtw #3]
    ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll b/llvm/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
index fec8933..d376aaf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-regress-interphase-shift.ll
@@ -11,7 +11,7 @@
 define void @foo(i64* nocapture %d) {
 ; CHECK-LABEL: foo:
 ; CHECK: rorv
-  %tmp = load i64* undef, align 8
+  %tmp = load i64, i64* undef, align 8
   %sub397 = sub i64 0, %tmp
   %and398 = and i64 %sub397, 4294967295
   %shr404 = lshr i64 %and398, 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-return-vector.ll b/llvm/test/CodeGen/AArch64/arm64-return-vector.ll
index 9457d8b..3262c91 100644
--- a/llvm/test/CodeGen/AArch64/arm64-return-vector.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-return-vector.ll
@@ -6,6 +6,6 @@
 ; CHECK: test
 ; CHECK: ldr q0, [x0]
 ; CHECK: ret
-  %tmp1 = load <2 x double>* %p, align 16
+  %tmp1 = load <2 x double>, <2 x double>* %p, align 16
   ret <2 x double> %tmp1
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index fbf4e45..74356d7 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -64,7 +64,7 @@
 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8:
 ;CHECK: rev64.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -72,7 +72,7 @@
 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D16:
 ;CHECK: rev64.4h
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 	ret <4 x i16> %tmp2
 }
@@ -80,7 +80,7 @@
 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D32:
 ;CHECK: rev64.2s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x i32> %tmp2
 }
@@ -88,7 +88,7 @@
 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Df:
 ;CHECK: rev64.2s
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x float> %tmp2
 }
@@ -96,7 +96,7 @@
 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q8:
 ;CHECK: rev64.16b
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 	ret <16 x i8> %tmp2
 }
@@ -104,7 +104,7 @@
 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q16:
 ;CHECK: rev64.8h
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i16> %tmp2
 }
@@ -112,7 +112,7 @@
 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q32:
 ;CHECK: rev64.4s
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i32> %tmp2
 }
@@ -120,7 +120,7 @@
 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Qf:
 ;CHECK: rev64.4s
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x float> %tmp2
 }
@@ -128,7 +128,7 @@
 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D8:
 ;CHECK: rev32.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i8> %tmp2
 }
@@ -136,7 +136,7 @@
 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D16:
 ;CHECK: rev32.4h
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i16> %tmp2
 }
@@ -144,7 +144,7 @@
 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q8:
 ;CHECK: rev32.16b
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 	ret <16 x i8> %tmp2
 }
@@ -152,7 +152,7 @@
 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16:
 ;CHECK: rev32.8h
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i16> %tmp2
 }
@@ -160,7 +160,7 @@
 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16D8:
 ;CHECK: rev16.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i8> %tmp2
 }
@@ -168,7 +168,7 @@
 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16Q8:
 ;CHECK: rev16.16b
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 	ret <16 x i8> %tmp2
 }
@@ -178,7 +178,7 @@
 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8_undef:
 ;CHECK: rev64.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -186,7 +186,7 @@
 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16_undef:
 ;CHECK: rev32.8h
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
 }
@@ -199,7 +199,7 @@
 ; CHECK: st1.h
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
-  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
@@ -215,7 +215,7 @@
 ; CHECK: rev64.4s
 entry:
   %0 = bitcast float* %source to <4 x float>*
-  %tmp2 = load <4 x float>* %0, align 4
+  %tmp2 = load <4 x float>, <4 x float>* %0, align 4
   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
diff --git a/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll b/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
index a5ca632..24f04f4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-scaled_iv.ll
@@ -18,12 +18,12 @@
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = add nsw i64 %indvars.iv, -1
   %arrayidx = getelementptr inbounds double, double* %b, i64 %tmp
-  %tmp1 = load double* %arrayidx, align 8
+  %tmp1 = load double, double* %arrayidx, align 8
 ; The induction variable should carry the scaling factor: 1 * 8 = 8.
 ; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 8
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds double, double* %c, i64 %indvars.iv.next
-  %tmp2 = load double* %arrayidx2, align 8
+  %tmp2 = load double, double* %arrayidx2, align 8
   %mul = fmul double %tmp1, %tmp2
   %arrayidx4 = getelementptr inbounds double, double* %a, i64 %indvars.iv
   store double %mul, double* %arrayidx4, align 8
diff --git a/llvm/test/CodeGen/AArch64/arm64-scvt.ll b/llvm/test/CodeGen/AArch64/arm64-scvt.ll
index d3b4ea4..fc64d7b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-scvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-scvt.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: t1:
 ; CHECK: ldr s0, [x0]
 ; CHECK: scvtf s0, s0
-  %tmp1 = load i32* %src, align 4
+  %tmp1 = load i32, i32* %src, align 4
   %tmp2 = sitofp i32 %tmp1 to float
   ret float %tmp2
 }
@@ -17,7 +17,7 @@
 ; CHECK-LABEL: t2:
 ; CHECK: ldr s0, [x0]
 ; CHECK: ucvtf s0, s0
-  %tmp1 = load i32* %src, align 4
+  %tmp1 = load i32, i32* %src, align 4
   %tmp2 = uitofp i32 %tmp1 to float
   ret float %tmp2
 }
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: t3:
 ; CHECK: ldr d0, [x0]
 ; CHECK: scvtf d0, d0
-  %tmp1 = load i64* %src, align 4
+  %tmp1 = load i64, i64* %src, align 4
   %tmp2 = sitofp i64 %tmp1 to double
   ret double %tmp2
 }
@@ -37,7 +37,7 @@
 ; CHECK-LABEL: t4:
 ; CHECK: ldr d0, [x0]
 ; CHECK: ucvtf d0, d0
-  %tmp1 = load i64* %src, align 4
+  %tmp1 = load i64, i64* %src, align 4
   %tmp2 = uitofp i64 %tmp1 to double
   ret double %tmp2
 }
@@ -48,7 +48,7 @@
 ; CHECK-LABEL: t5:
 ; CHECK: ldr [[REG:w[0-9]+]], [x0]
 ; CHECK: scvtf d0, [[REG]]
-  %tmp1 = load i32* %src, align 4
+  %tmp1 = load i32, i32* %src, align 4
   %tmp2 = sitofp i32 %tmp1 to double
   ret double %tmp2
 }
@@ -76,7 +76,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -89,7 +89,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -102,7 +102,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -116,7 +116,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -130,7 +130,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -143,7 +143,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -156,7 +156,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -170,7 +170,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -185,7 +185,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -198,7 +198,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -211,7 +211,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -224,7 +224,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -238,7 +238,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -251,7 +251,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -264,7 +264,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -277,7 +277,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -293,7 +293,7 @@
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -307,7 +307,7 @@
   %bitcast = ptrtoint i16* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -321,7 +321,7 @@
   %bitcast = ptrtoint i32* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -336,7 +336,7 @@
   %bitcast = ptrtoint i64* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -353,7 +353,7 @@
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = uitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -367,7 +367,7 @@
   %bitcast = ptrtoint i16* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = uitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -381,7 +381,7 @@
   %bitcast = ptrtoint i32* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = uitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -395,7 +395,7 @@
   %bitcast = ptrtoint i64* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = uitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -416,7 +416,7 @@
 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -430,7 +430,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -443,7 +443,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -457,7 +457,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -477,7 +477,7 @@
 ; CHECK-A57-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -491,7 +491,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -504,7 +504,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -518,7 +518,7 @@
 ; CHECK-NEXT: fmul s0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -532,7 +532,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -551,7 +551,7 @@
 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -565,7 +565,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -578,7 +578,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -592,7 +592,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -611,7 +611,7 @@
 ; CHECK-A57-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -625,7 +625,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -638,7 +638,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -660,7 +660,7 @@
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -675,7 +675,7 @@
   %bitcast = ptrtoint i16* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -689,7 +689,7 @@
   %bitcast = ptrtoint i32* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -704,7 +704,7 @@
   %bitcast = ptrtoint i64* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -721,7 +721,7 @@
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -741,7 +741,7 @@
   %bitcast = ptrtoint i16* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i16*
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %val = sitofp i16 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -756,7 +756,7 @@
   %bitcast = ptrtoint i32* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i32*
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -770,7 +770,7 @@
   %bitcast = ptrtoint i64* %sp0 to i64
   %add = add i64 %bitcast, 1
   %addr = inttoptr i64 %add to i64*
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %val = sitofp i64 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
@@ -787,7 +787,7 @@
   %bitcast = ptrtoint i8* %sp0 to i64
   %add = add i64 %bitcast, -1
   %addr = inttoptr i64 %add to i8*
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %val = sitofp i8 %pix_sp0.0.copyload to float
   %vmull.i = fmul float %val, %val
   ret float %vmull.i
@@ -800,7 +800,7 @@
 ; CHECK-NEXT: fmul d0, [[REG]], [[REG]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %val = sitofp i32 %pix_sp0.0.copyload to double
   %vmull.i = fmul double %val, %val
   ret double %vmull.i
diff --git a/llvm/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll b/llvm/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
index 10b433b..2113165 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sitofp-combine-chains.ll
@@ -14,7 +14,7 @@
 ; CHECK: ldr [[SREG:s[0-9]+]], [x[[VARBASE]],
 ; CHECK: str wzr, [x[[VARBASE]],
 
-  %val = load i32* @var, align 4
+  %val = load i32, i32* @var, align 4
   store i32 0, i32* @var, align 4
 
   %fltval = sitofp i32 %val to float
diff --git a/llvm/test/CodeGen/AArch64/arm64-spill-lr.ll b/llvm/test/CodeGen/AArch64/arm64-spill-lr.ll
index 8cc61ce..8810908 100644
--- a/llvm/test/CodeGen/AArch64/arm64-spill-lr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-spill-lr.ll
@@ -13,29 +13,29 @@
   %idxprom = sext i32 %a to i64
   %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %stack, i64 0, i64 %idxprom
   store i32 %b, i32* %arrayidx, align 4
-  %1 = load volatile i32* @bar, align 4
-  %2 = load volatile i32* @bar, align 4
-  %3 = load volatile i32* @bar, align 4
-  %4 = load volatile i32* @bar, align 4
-  %5 = load volatile i32* @bar, align 4
-  %6 = load volatile i32* @bar, align 4
-  %7 = load volatile i32* @bar, align 4
-  %8 = load volatile i32* @bar, align 4
-  %9 = load volatile i32* @bar, align 4
-  %10 = load volatile i32* @bar, align 4
-  %11 = load volatile i32* @bar, align 4
-  %12 = load volatile i32* @bar, align 4
-  %13 = load volatile i32* @bar, align 4
-  %14 = load volatile i32* @bar, align 4
-  %15 = load volatile i32* @bar, align 4
-  %16 = load volatile i32* @bar, align 4
-  %17 = load volatile i32* @bar, align 4
-  %18 = load volatile i32* @bar, align 4
-  %19 = load volatile i32* @bar, align 4
-  %20 = load volatile i32* @bar, align 4
+  %1 = load volatile i32, i32* @bar, align 4
+  %2 = load volatile i32, i32* @bar, align 4
+  %3 = load volatile i32, i32* @bar, align 4
+  %4 = load volatile i32, i32* @bar, align 4
+  %5 = load volatile i32, i32* @bar, align 4
+  %6 = load volatile i32, i32* @bar, align 4
+  %7 = load volatile i32, i32* @bar, align 4
+  %8 = load volatile i32, i32* @bar, align 4
+  %9 = load volatile i32, i32* @bar, align 4
+  %10 = load volatile i32, i32* @bar, align 4
+  %11 = load volatile i32, i32* @bar, align 4
+  %12 = load volatile i32, i32* @bar, align 4
+  %13 = load volatile i32, i32* @bar, align 4
+  %14 = load volatile i32, i32* @bar, align 4
+  %15 = load volatile i32, i32* @bar, align 4
+  %16 = load volatile i32, i32* @bar, align 4
+  %17 = load volatile i32, i32* @bar, align 4
+  %18 = load volatile i32, i32* @bar, align 4
+  %19 = load volatile i32, i32* @bar, align 4
+  %20 = load volatile i32, i32* @bar, align 4
   %idxprom1 = sext i32 %c to i64
   %arrayidx2 = getelementptr inbounds [128 x i32], [128 x i32]* %stack, i64 0, i64 %idxprom1
-  %21 = load i32* %arrayidx2, align 4
+  %21 = load i32, i32* %arrayidx2, align 4
   %factor = mul i32 %h, -2
   %factor67 = mul i32 %g, -2
   %factor68 = mul i32 %f, -2
diff --git a/llvm/test/CodeGen/AArch64/arm64-spill.ll b/llvm/test/CodeGen/AArch64/arm64-spill.ll
index 47cdc2b..9c0cf38 100644
--- a/llvm/test/CodeGen/AArch64/arm64-spill.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-spill.ll
@@ -8,7 +8,7 @@
 ; CHECK: st1.2d
 define void @fpr128(<4 x float>* %p) nounwind ssp {
 entry:
-  %x = load <4 x float>* %p, align 16
+  %x = load <4 x float>, <4 x float>* %p, align 16
   call void asm sideeffect "; inlineasm", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15},~{q16},~{q17},~{q18},~{q19},~{q20},~{q21},~{q22},~{q23},~{q24},~{q25},~{q26},~{q27},~{q28},~{q29},~{q30},~{q31},~{x0},~{x1},~{x2},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{fp},~{lr},~{sp},~{memory}"() nounwind
   store <4 x float> %x, <4 x float>* %p, align 16
   ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-stack-no-frame.ll b/llvm/test/CodeGen/AArch64/arm64-stack-no-frame.ll
index b5970c0..22a6707 100644
--- a/llvm/test/CodeGen/AArch64/arm64-stack-no-frame.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-stack-no-frame.ll
@@ -9,10 +9,10 @@
 ; CHECK: test_stack_no_frame
 ; CHECK: sub sp, sp, #[[STACKSIZE:[0-9]+]]
   %local = alloca [20 x i64]
-  %val = load volatile [20 x i64]* @global, align 8
+  %val = load volatile [20 x i64], [20 x i64]* @global, align 8
   store volatile [20 x i64] %val, [20 x i64]* %local, align 8
 
-  %val2 = load volatile [20 x i64]* %local, align 8
+  %val2 = load volatile [20 x i64], [20 x i64]* %local, align 8
   store volatile [20 x i64] %val2, [20 x i64]* @global, align 8
 
 ; CHECK: add sp, sp, #[[STACKSIZE]]
diff --git a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll
index 5d13704..b707527 100644
--- a/llvm/test/CodeGen/AArch64/arm64-strict-align.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -10,7 +10,7 @@
 
 ; CHECK: ldr w0, [x0]
 ; CHECK: ret
-  %tmp = load i32* %p, align 2
+  %tmp = load i32, i32* %p, align 2
   ret i32 %tmp
 }
 
@@ -21,6 +21,6 @@
 
 ; CHECK: ldr x0, [x0]
 ; CHECK: ret
-  %tmp = load i64* %p, align 4
+  %tmp = load i64, i64* %p, align 4
   ret i64 %tmp
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll b/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
index 5e8ec33..fa4e833 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tls-darwin.ll
@@ -13,6 +13,6 @@
 ; CHECK: blr [[TLV_GET_ADDR]]
 ; CHECK: ldrb w0, [x0]
 
-  %val = load i8* @var, align 1
+  %val = load i8, i8* @var, align 1
   ret i8 %val
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/llvm/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index 3daae62..f94f88a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
@@ -10,7 +10,7 @@
 define i32 @test_generaldynamic() {
 ; CHECK-LABEL: test_generaldynamic:
 
-  %val = load i32* @general_dynamic_var
+  %val = load i32, i32* @general_dynamic_var
   ret i32 %val
 
 ; CHECK: .tlsdesccall general_dynamic_var
diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll b/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll
index 30ea63b..8a9dfd2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll
@@ -6,7 +6,7 @@
 define i32 @test_generaldynamic() {
 ; CHECK-LABEL: test_generaldynamic:
 
-  %val = load i32* @general_dynamic_var
+  %val = load i32, i32* @general_dynamic_var
   ret i32 %val
 
   ; FIXME: the adrp instructions are redundant (if harmless).
@@ -54,7 +54,7 @@
 define i32 @test_localdynamic() {
 ; CHECK-LABEL: test_localdynamic:
 
-  %val = load i32* @local_dynamic_var
+  %val = load i32, i32* @local_dynamic_var
   ret i32 %val
 
 ; CHECK: adrp x[[TLSDESC_HI:[0-9]+]], :tlsdesc:_TLS_MODULE_BASE_
@@ -116,8 +116,8 @@
 define i32 @test_localdynamic_deduplicate() {
 ; CHECK-LABEL: test_localdynamic_deduplicate:
 
-  %val = load i32* @local_dynamic_var
-  %val2 = load i32* @local_dynamic_var2
+  %val = load i32, i32* @local_dynamic_var
+  %val2 = load i32, i32* @local_dynamic_var2
 
   %sum = add i32 %val, %val2
   ret i32 %sum
diff --git a/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll b/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll
index f0130d8..38c30b4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tls-execs.ll
@@ -5,7 +5,7 @@
 
 define i32 @test_initial_exec() {
 ; CHECK-LABEL: test_initial_exec:
-  %val = load i32* @initial_exec_var
+  %val = load i32, i32* @initial_exec_var
 
 ; CHECK: adrp x[[GOTADDR:[0-9]+]], :gottprel:initial_exec_var
 ; CHECK: ldr x[[TP_OFFSET:[0-9]+]], [x[[GOTADDR]], :gottprel_lo12:initial_exec_var]
@@ -36,7 +36,7 @@
 
 define i32 @test_local_exec() {
 ; CHECK-LABEL: test_local_exec:
-  %val = load i32* @local_exec_var
+  %val = load i32, i32* @local_exec_var
 
 ; CHECK: movz [[TP_OFFSET:x[0-9]+]], #:tprel_g1:local_exec_var // encoding: [0bAAA{{[01]+}},A,0b101AAAAA,0x92]
 ; CHECK: movk [[TP_OFFSET]], #:tprel_g0_nc:local_exec_var
diff --git a/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
index 0f6f270..1b1681d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-triv-disjoint-mem-access.ll
@@ -8,11 +8,11 @@
 entry:
 ; CHECK: ldr {{w[0-9]+}}, [x[[REG:[0-9]+]], #4]
 ; CHECK: str {{w[0-9]+}}, [x[[REG]], #8]
-  %0 = load i32** @a, align 8, !tbaa !1
+  %0 = load i32*, i32** @a, align 8, !tbaa !1
   %arrayidx = getelementptr inbounds i32, i32* %0, i64 2
   store i32 %i, i32* %arrayidx, align 4, !tbaa !5
   %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
-  %1 = load i32* %arrayidx1, align 4, !tbaa !5
+  %1 = load i32, i32* %arrayidx1, align 4, !tbaa !5
   %add = add nsw i32 %k, %i
   store i32 %add, i32* @m, align 4, !tbaa !5
   ret i32 %1
diff --git a/llvm/test/CodeGen/AArch64/arm64-trn.ll b/llvm/test/CodeGen/AArch64/arm64-trn.ll
index 2db7a14..92ccf05 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trn.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trn.ll
@@ -5,8 +5,8 @@
 ;CHECK: trn1.8b
 ;CHECK: trn2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@
 ;CHECK: trn1.4h
 ;CHECK: trn2.4h
 ;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -32,8 +32,8 @@
 ;CHECK: zip1.2s
 ;CHECK: zip2.2s
 ;CHECK-NEXT: add.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -45,8 +45,8 @@
 ;CHECK: zip1.2s
 ;CHECK: zip2.2s
 ;CHECK-NEXT: fadd.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -58,8 +58,8 @@
 ;CHECK: trn1.16b
 ;CHECK: trn2.16b
 ;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -71,8 +71,8 @@
 ;CHECK: trn1.8h
 ;CHECK: trn2.8h
 ;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -84,8 +84,8 @@
 ;CHECK: trn1.4s
 ;CHECK: trn2.4s
 ;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -97,8 +97,8 @@
 ;CHECK: trn1.4s
 ;CHECK: trn2.4s
 ;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -112,8 +112,8 @@
 ;CHECK: trn1.8b
 ;CHECK: trn2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -125,8 +125,8 @@
 ;CHECK: trn1.8h
 ;CHECK: trn2.8h
 ;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
index 0e9f21b..7cde629 100644
--- a/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -25,7 +25,7 @@
 ; CHECK-NEXT: str w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #2]
 ; CHECK-NEXT: ret
 bb:
-  %.pre37 = load i32** @zptr32, align 8
+  %.pre37 = load i32*, i32** @zptr32, align 8
   %dec = add nsw i32 %arg, -1
   %idxprom8 = sext i32 %dec to i64
   %arrayidx9 = getelementptr inbounds i32, i32* %.pre37, i64 %idxprom8
@@ -45,7 +45,7 @@
 ; CHECK-NEXT: strh w1, {{\[}}[[GLOBALADDR]], w[[OFFSETREGNUM]], sxtw #1]
 ; CHECK-NEXT: ret
 bb:
-  %.pre37 = load i16** @zptr16, align 8
+  %.pre37 = load i16*, i16** @zptr16, align 8
   %dec = add nsw i32 %arg, -1
   %idxprom8 = sext i32 %dec to i64
   %arrayidx9 = getelementptr inbounds i16, i16* %.pre37, i64 %idxprom8
@@ -65,7 +65,7 @@
 ; CHECK-NEXT: sturb w1, {{\[}}[[ADDR]], #-1]
 ; CHECK-NEXT: ret
 bb:
-  %.pre37 = load i8** @zptr8, align 8
+  %.pre37 = load i8*, i8** @zptr8, align 8
   %dec = add nsw i32 %arg, -1
   %idxprom8 = sext i32 %dec to i64
   %arrayidx9 = getelementptr inbounds i8, i8* %.pre37, i64 %idxprom8
diff --git a/llvm/test/CodeGen/AArch64/arm64-unaligned_ldst.ll b/llvm/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
index 20b80c0..dab8b0f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-unaligned_ldst.ll
@@ -9,7 +9,7 @@
 ; CHECK: str [[X0]], [x0]
   %tmp1 = bitcast i8* %b to i64*
   %tmp2 = bitcast i8* %a to i64*
-  %tmp3 = load i64* %tmp1, align 1
+  %tmp3 = load i64, i64* %tmp1, align 1
   store i64 %tmp3, i64* %tmp2, align 1
   ret void
 }
@@ -22,7 +22,7 @@
 ; CHECK: str [[W0]], [x0]
   %tmp1 = bitcast i8* %b to i32*
   %tmp2 = bitcast i8* %a to i32*
-  %tmp3 = load i32* %tmp1, align 1
+  %tmp3 = load i32, i32* %tmp1, align 1
   store i32 %tmp3, i32* %tmp2, align 1
   ret void
 }
@@ -35,7 +35,7 @@
 ; CHECK: strh [[W0]], [x0]
   %tmp1 = bitcast i8* %b to i16*
   %tmp2 = bitcast i8* %a to i16*
-  %tmp3 = load i16* %tmp1, align 1
+  %tmp3 = load i16, i16* %tmp1, align 1
   store i16 %tmp3, i16* %tmp2, align 1
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-uzp.ll b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
index cdd8d31..517ebae 100644
--- a/llvm/test/CodeGen/AArch64/arm64-uzp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-uzp.ll
@@ -5,8 +5,8 @@
 ;CHECK: uzp1.8b
 ;CHECK: uzp2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@
 ;CHECK: uzp1.4h
 ;CHECK: uzp2.4h
 ;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -31,8 +31,8 @@
 ;CHECK: uzp1.16b
 ;CHECK: uzp2.16b
 ;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -44,8 +44,8 @@
 ;CHECK: uzp1.8h
 ;CHECK: uzp2.8h
 ;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -57,8 +57,8 @@
 ;CHECK: uzp1.4s
 ;CHECK: uzp2.4s
 ;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -70,8 +70,8 @@
 ;CHECK: uzp1.4s
 ;CHECK: uzp2.4s
 ;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -85,8 +85,8 @@
 ;CHECK: uzp1.8b
 ;CHECK: uzp2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -98,8 +98,8 @@
 ;CHECK: uzp1.8h
 ;CHECK: uzp2.8h
 ;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index fae2b90..a52c4eb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -4,8 +4,8 @@
 define <8 x i16> @sabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sabdl8h:
 ;CHECK: sabdl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
         ret <8 x i16> %tmp4
@@ -14,8 +14,8 @@
 define <4 x i32> @sabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sabdl4s:
 ;CHECK: sabdl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
         ret <4 x i32> %tmp4
@@ -24,8 +24,8 @@
 define <2 x i64> @sabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sabdl2d:
 ;CHECK: sabdl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
         ret <2 x i64> %tmp4
@@ -34,8 +34,8 @@
 define <8 x i16> @sabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sabdl2_8h:
 ;CHECK: sabdl2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
+        %load1 = load <16 x i8>, <16 x i8>* %A
+        %load2 = load <16 x i8>, <16 x i8>* %B
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -46,8 +46,8 @@
 define <4 x i32> @sabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sabdl2_4s:
 ;CHECK: sabdl2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
+        %load1 = load <8 x i16>, <8 x i16>* %A
+        %load2 = load <8 x i16>, <8 x i16>* %B
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -58,8 +58,8 @@
 define <2 x i64> @sabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sabdl2_2d:
 ;CHECK: sabdl2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
+        %load1 = load <4 x i32>, <4 x i32>* %A
+        %load2 = load <4 x i32>, <4 x i32>* %B
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -70,8 +70,8 @@
 define <8 x i16> @uabdl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uabdl8h:
 ;CHECK: uabdl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
   %tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
   ret <8 x i16> %tmp4
@@ -80,8 +80,8 @@
 define <4 x i32> @uabdl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uabdl4s:
 ;CHECK: uabdl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
   ret <4 x i32> %tmp4
@@ -90,8 +90,8 @@
 define <2 x i64> @uabdl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uabdl2d:
 ;CHECK: uabdl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
   ret <2 x i64> %tmp4
@@ -100,8 +100,8 @@
 define <8 x i16> @uabdl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uabdl2_8h:
 ;CHECK: uabdl2.8h
-  %load1 = load <16 x i8>* %A
-  %load2 = load <16 x i8>* %B
+  %load1 = load <16 x i8>, <16 x i8>* %A
+  %load2 = load <16 x i8>, <16 x i8>* %B
   %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 
@@ -113,8 +113,8 @@
 define <4 x i32> @uabdl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uabdl2_4s:
 ;CHECK: uabdl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -125,8 +125,8 @@
 define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uabdl2_2d:
 ;CHECK: uabdl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -137,8 +137,8 @@
 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fabd_2s:
 ;CHECK: fabd.2s
-        %tmp1 = load <2 x float>* %A
-        %tmp2 = load <2 x float>* %B
+        %tmp1 = load <2 x float>, <2 x float>* %A
+        %tmp2 = load <2 x float>, <2 x float>* %B
         %tmp3 = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
 }
@@ -146,8 +146,8 @@
 define <4 x float> @fabd_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fabd_4s:
 ;CHECK: fabd.4s
-        %tmp1 = load <4 x float>* %A
-        %tmp2 = load <4 x float>* %B
+        %tmp1 = load <4 x float>, <4 x float>* %A
+        %tmp2 = load <4 x float>, <4 x float>* %B
         %tmp3 = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
 }
@@ -155,8 +155,8 @@
 define <2 x double> @fabd_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fabd_2d:
 ;CHECK: fabd.2d
-        %tmp1 = load <2 x double>* %A
-        %tmp2 = load <2 x double>* %B
+        %tmp1 = load <2 x double>, <2 x double>* %A
+        %tmp2 = load <2 x double>, <2 x double>* %B
         %tmp3 = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
         ret <2 x double> %tmp3
 }
@@ -168,8 +168,8 @@
 define <8 x i8> @sabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sabd_8b:
 ;CHECK: sabd.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -177,8 +177,8 @@
 define <16 x i8> @sabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sabd_16b:
 ;CHECK: sabd.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -186,8 +186,8 @@
 define <4 x i16> @sabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sabd_4h:
 ;CHECK: sabd.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -195,8 +195,8 @@
 define <8 x i16> @sabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sabd_8h:
 ;CHECK: sabd.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -204,8 +204,8 @@
 define <2 x i32> @sabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sabd_2s:
 ;CHECK: sabd.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -213,8 +213,8 @@
 define <4 x i32> @sabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sabd_4s:
 ;CHECK: sabd.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -229,8 +229,8 @@
 define <8 x i8> @uabd_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uabd_8b:
 ;CHECK: uabd.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -238,8 +238,8 @@
 define <16 x i8> @uabd_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uabd_16b:
 ;CHECK: uabd.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -247,8 +247,8 @@
 define <4 x i16> @uabd_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uabd_4h:
 ;CHECK: uabd.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -256,8 +256,8 @@
 define <8 x i16> @uabd_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uabd_8h:
 ;CHECK: uabd.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -265,8 +265,8 @@
 define <2 x i32> @uabd_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uabd_2s:
 ;CHECK: uabd.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -274,8 +274,8 @@
 define <4 x i32> @uabd_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uabd_4s:
 ;CHECK: uabd.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -290,7 +290,7 @@
 define <8 x i8> @sqabs_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqabs_8b:
 ;CHECK: sqabs.8b
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
@@ -298,7 +298,7 @@
 define <16 x i8> @sqabs_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqabs_16b:
 ;CHECK: sqabs.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqabs.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
@@ -306,7 +306,7 @@
 define <4 x i16> @sqabs_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqabs_4h:
 ;CHECK: sqabs.4h
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
@@ -314,7 +314,7 @@
 define <8 x i16> @sqabs_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqabs_8h:
 ;CHECK: sqabs.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqabs.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
@@ -322,7 +322,7 @@
 define <2 x i32> @sqabs_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqabs_2s:
 ;CHECK: sqabs.2s
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqabs.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
@@ -330,7 +330,7 @@
 define <4 x i32> @sqabs_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqabs_4s:
 ;CHECK: sqabs.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqabs.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
@@ -345,7 +345,7 @@
 define <8 x i8> @sqneg_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqneg_8b:
 ;CHECK: sqneg.8b
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
@@ -353,7 +353,7 @@
 define <16 x i8> @sqneg_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqneg_16b:
 ;CHECK: sqneg.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqneg.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
@@ -361,7 +361,7 @@
 define <4 x i16> @sqneg_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqneg_4h:
 ;CHECK: sqneg.4h
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
@@ -369,7 +369,7 @@
 define <8 x i16> @sqneg_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqneg_8h:
 ;CHECK: sqneg.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqneg.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
@@ -377,7 +377,7 @@
 define <2 x i32> @sqneg_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqneg_2s:
 ;CHECK: sqneg.2s
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqneg.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
@@ -385,7 +385,7 @@
 define <4 x i32> @sqneg_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqneg_4s:
 ;CHECK: sqneg.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqneg.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
@@ -400,7 +400,7 @@
 define <8 x i8> @abs_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: abs_8b:
 ;CHECK: abs.8b
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.abs.v8i8(<8 x i8> %tmp1)
         ret <8 x i8> %tmp3
 }
@@ -408,7 +408,7 @@
 define <16 x i8> @abs_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: abs_16b:
 ;CHECK: abs.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.abs.v16i8(<16 x i8> %tmp1)
         ret <16 x i8> %tmp3
 }
@@ -416,7 +416,7 @@
 define <4 x i16> @abs_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: abs_4h:
 ;CHECK: abs.4h
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.abs.v4i16(<4 x i16> %tmp1)
         ret <4 x i16> %tmp3
 }
@@ -424,7 +424,7 @@
 define <8 x i16> @abs_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: abs_8h:
 ;CHECK: abs.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.abs.v8i16(<8 x i16> %tmp1)
         ret <8 x i16> %tmp3
 }
@@ -432,7 +432,7 @@
 define <2 x i32> @abs_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: abs_2s:
 ;CHECK: abs.2s
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.abs.v2i32(<2 x i32> %tmp1)
         ret <2 x i32> %tmp3
 }
@@ -440,7 +440,7 @@
 define <4 x i32> @abs_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: abs_4s:
 ;CHECK: abs.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.abs.v4i32(<4 x i32> %tmp1)
         ret <4 x i32> %tmp3
 }
@@ -471,9 +471,9 @@
 define <8 x i16> @sabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: sabal8h:
 ;CHECK: sabal.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
+        %tmp3 = load <8 x i16>, <8 x i16>* %C
         %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
@@ -483,9 +483,9 @@
 define <4 x i32> @sabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sabal4s:
 ;CHECK: sabal.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i32>, <4 x i32>* %C
         %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
@@ -495,9 +495,9 @@
 define <2 x i64> @sabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sabal2d:
 ;CHECK: sabal.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i64>, <2 x i64>* %C
         %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp4.1.1 = zext <2 x i32> %tmp4 to <2 x i64>
@@ -508,9 +508,9 @@
 define <8 x i16> @sabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: sabal2_8h:
 ;CHECK: sabal2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
+        %load1 = load <16 x i8>, <16 x i8>* %A
+        %load2 = load <16 x i8>, <16 x i8>* %B
+        %tmp3 = load <8 x i16>, <8 x i16>* %C
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp4 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -522,9 +522,9 @@
 define <4 x i32> @sabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sabal2_4s:
 ;CHECK: sabal2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
+        %load1 = load <8 x i16>, <8 x i16>* %A
+        %load2 = load <8 x i16>, <8 x i16>* %B
+        %tmp3 = load <4 x i32>, <4 x i32>* %C
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp4 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -536,9 +536,9 @@
 define <2 x i64> @sabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sabal2_2d:
 ;CHECK: sabal2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
+        %load1 = load <4 x i32>, <4 x i32>* %A
+        %load2 = load <4 x i32>, <4 x i32>* %B
+        %tmp3 = load <2 x i64>, <2 x i64>* %C
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp4 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -550,9 +550,9 @@
 define <8 x i16> @uabal8h(<8 x i8>* %A, <8 x i8>* %B,  <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: uabal8h:
 ;CHECK: uabal.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
+        %tmp3 = load <8 x i16>, <8 x i16>* %C
         %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4.1 = zext <8 x i8> %tmp4 to <8 x i16>
         %tmp5 = add <8 x i16> %tmp3, %tmp4.1
@@ -562,9 +562,9 @@
 define <4 x i32> @uabal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: uabal4s:
 ;CHECK: uabal.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i32>, <4 x i32>* %C
         %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4.1 = zext <4 x i16> %tmp4 to <4 x i32>
         %tmp5 = add <4 x i32> %tmp3, %tmp4.1
@@ -574,9 +574,9 @@
 define <2 x i64> @uabal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: uabal2d:
 ;CHECK: uabal.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i64>, <2 x i64>* %C
         %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4.1 = zext <2 x i32> %tmp4 to <2 x i64>
         %tmp5 = add <2 x i64> %tmp3, %tmp4.1
@@ -586,9 +586,9 @@
 define <8 x i16> @uabal2_8h(<16 x i8>* %A, <16 x i8>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: uabal2_8h:
 ;CHECK: uabal2.8h
-        %load1 = load <16 x i8>* %A
-        %load2 = load <16 x i8>* %B
-        %tmp3 = load <8 x i16>* %C
+        %load1 = load <16 x i8>, <16 x i8>* %A
+        %load2 = load <16 x i8>, <16 x i8>* %B
+        %tmp3 = load <8 x i16>, <8 x i16>* %C
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = shufflevector <16 x i8> %load2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp4 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
@@ -600,9 +600,9 @@
 define <4 x i32> @uabal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: uabal2_4s:
 ;CHECK: uabal2.4s
-        %load1 = load <8 x i16>* %A
-        %load2 = load <8 x i16>* %B
-        %tmp3 = load <4 x i32>* %C
+        %load1 = load <8 x i16>, <8 x i16>* %A
+        %load2 = load <8 x i16>, <8 x i16>* %B
+        %tmp3 = load <4 x i32>, <4 x i32>* %C
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp4 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -614,9 +614,9 @@
 define <2 x i64> @uabal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: uabal2_2d:
 ;CHECK: uabal2.2d
-        %load1 = load <4 x i32>* %A
-        %load2 = load <4 x i32>* %B
-        %tmp3 = load <2 x i64>* %C
+        %load1 = load <4 x i32>, <4 x i32>* %A
+        %load2 = load <4 x i32>, <4 x i32>* %B
+        %tmp3 = load <2 x i64>, <2 x i64>* %C
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp4 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -628,10 +628,10 @@
 define <8 x i8> @saba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: saba_8b:
 ;CHECK: saba.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = load <8 x i8>* %C
+        %tmp4 = load <8 x i8>, <8 x i8>* %C
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -639,10 +639,10 @@
 define <16 x i8> @saba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: saba_16b:
 ;CHECK: saba.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        %tmp4 = load <16 x i8>* %C
+        %tmp4 = load <16 x i8>, <16 x i8>* %C
         %tmp5 = add <16 x i8> %tmp3, %tmp4
         ret <16 x i8> %tmp5
 }
@@ -650,10 +650,10 @@
 define <4 x i16> @saba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: saba_4h:
 ;CHECK: saba.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = load <4 x i16>* %C
+        %tmp4 = load <4 x i16>, <4 x i16>* %C
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -661,10 +661,10 @@
 define <8 x i16> @saba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: saba_8h:
 ;CHECK: saba.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        %tmp4 = load <8 x i16>* %C
+        %tmp4 = load <8 x i16>, <8 x i16>* %C
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
 }
@@ -672,10 +672,10 @@
 define <2 x i32> @saba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: saba_2s:
 ;CHECK: saba.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = load <2 x i32>* %C
+        %tmp4 = load <2 x i32>, <2 x i32>* %C
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -683,10 +683,10 @@
 define <4 x i32> @saba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: saba_4s:
 ;CHECK: saba.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        %tmp4 = load <4 x i32>* %C
+        %tmp4 = load <4 x i32>, <4 x i32>* %C
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
 }
@@ -694,10 +694,10 @@
 define <8 x i8> @uaba_8b(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: uaba_8b:
 ;CHECK: uaba.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
-        %tmp4 = load <8 x i8>* %C
+        %tmp4 = load <8 x i8>, <8 x i8>* %C
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -705,10 +705,10 @@
 define <16 x i8> @uaba_16b(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: uaba_16b:
 ;CHECK: uaba.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
-        %tmp4 = load <16 x i8>* %C
+        %tmp4 = load <16 x i8>, <16 x i8>* %C
         %tmp5 = add <16 x i8> %tmp3, %tmp4
         ret <16 x i8> %tmp5
 }
@@ -716,10 +716,10 @@
 define <4 x i16> @uaba_4h(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: uaba_4h:
 ;CHECK: uaba.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
-        %tmp4 = load <4 x i16>* %C
+        %tmp4 = load <4 x i16>, <4 x i16>* %C
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -727,10 +727,10 @@
 define <8 x i16> @uaba_8h(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: uaba_8h:
 ;CHECK: uaba.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
-        %tmp4 = load <8 x i16>* %C
+        %tmp4 = load <8 x i16>, <8 x i16>* %C
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
 }
@@ -738,10 +738,10 @@
 define <2 x i32> @uaba_2s(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: uaba_2s:
 ;CHECK: uaba.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
-        %tmp4 = load <2 x i32>* %C
+        %tmp4 = load <2 x i32>, <2 x i32>* %C
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -749,10 +749,10 @@
 define <4 x i32> @uaba_4s(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: uaba_4s:
 ;CHECK: uaba.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
-        %tmp4 = load <4 x i32>* %C
+        %tmp4 = load <4 x i32>, <4 x i32>* %C
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 9ed8aa6..e3d8dd2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @addhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addhn8b:
 ;CHECK: addhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.addhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @addhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: addhn4h:
 ;CHECK: addhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.addhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @addhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: addhn2s:
 ;CHECK: addhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.addhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -65,8 +65,8 @@
 define <8 x i8> @raddhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: raddhn8b:
 ;CHECK: raddhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -74,8 +74,8 @@
 define <4 x i16> @raddhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: raddhn4h:
 ;CHECK: raddhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -83,8 +83,8 @@
 define <2 x i32> @raddhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: raddhn2s:
 ;CHECK: raddhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -126,8 +126,8 @@
 define <8 x i16> @saddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: saddl8h:
 ;CHECK: saddl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -137,8 +137,8 @@
 define <4 x i32> @saddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: saddl4s:
 ;CHECK: saddl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -148,8 +148,8 @@
 define <2 x i64> @saddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: saddl2d:
 ;CHECK: saddl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -207,8 +207,8 @@
 define <8 x i16> @uaddl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uaddl8h:
 ;CHECK: uaddl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -218,8 +218,8 @@
 define <4 x i32> @uaddl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uaddl4s:
 ;CHECK: uaddl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -229,8 +229,8 @@
 define <2 x i64> @uaddl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uaddl2d:
 ;CHECK: uaddl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -289,8 +289,8 @@
 define <8 x i16> @uaddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uaddw8h:
 ;CHECK: uaddw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp4 = add <8 x i16> %tmp1, %tmp3
         ret <8 x i16> %tmp4
@@ -299,8 +299,8 @@
 define <4 x i32> @uaddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uaddw4s:
 ;CHECK: uaddw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp4 = add <4 x i32> %tmp1, %tmp3
         ret <4 x i32> %tmp4
@@ -309,8 +309,8 @@
 define <2 x i64> @uaddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uaddw2d:
 ;CHECK: uaddw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp4 = add <2 x i64> %tmp1, %tmp3
         ret <2 x i64> %tmp4
@@ -319,9 +319,9 @@
 define <8 x i16> @uaddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uaddw2_8h:
 ;CHECK: uaddw2.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
 
-        %tmp2 = load <16 x i8>* %B
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext2 = zext <8 x i8> %high2 to <8 x i16>
 
@@ -332,9 +332,9 @@
 define <4 x i32> @uaddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uaddw2_4s:
 ;CHECK: uaddw2.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
 
-        %tmp2 = load <8 x i16>* %B
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext2 = zext <4 x i16> %high2 to <4 x i32>
 
@@ -345,9 +345,9 @@
 define <2 x i64> @uaddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uaddw2_2d:
 ;CHECK: uaddw2.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
 
-        %tmp2 = load <4 x i32>* %B
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext2 = zext <2 x i32> %high2 to <2 x i64>
 
@@ -358,8 +358,8 @@
 define <8 x i16> @saddw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: saddw8h:
 ;CHECK: saddw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
         %tmp4 = add <8 x i16> %tmp1, %tmp3
         ret <8 x i16> %tmp4
@@ -368,8 +368,8 @@
 define <4 x i32> @saddw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: saddw4s:
 ;CHECK: saddw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
         %tmp4 = add <4 x i32> %tmp1, %tmp3
         ret <4 x i32> %tmp4
@@ -378,8 +378,8 @@
 define <2 x i64> @saddw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: saddw2d:
 ;CHECK: saddw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
         %tmp4 = add <2 x i64> %tmp1, %tmp3
         ret <2 x i64> %tmp4
@@ -388,9 +388,9 @@
 define <8 x i16> @saddw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: saddw2_8h:
 ;CHECK: saddw2.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
 
-        %tmp2 = load <16 x i8>* %B
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext2 = sext <8 x i8> %high2 to <8 x i16>
 
@@ -401,9 +401,9 @@
 define <4 x i32> @saddw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: saddw2_4s:
 ;CHECK: saddw2.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
 
-        %tmp2 = load <8 x i16>* %B
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext2 = sext <4 x i16> %high2 to <4 x i32>
 
@@ -414,9 +414,9 @@
 define <2 x i64> @saddw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: saddw2_2d:
 ;CHECK: saddw2.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
 
-        %tmp2 = load <4 x i32>* %B
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext2 = sext <2 x i32> %high2 to <2 x i64>
 
@@ -427,7 +427,7 @@
 define <4 x i16> @saddlp4h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: saddlp4h:
 ;CHECK: saddlp.4h
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
         ret <4 x i16> %tmp3
 }
@@ -435,7 +435,7 @@
 define <2 x i32> @saddlp2s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: saddlp2s:
 ;CHECK: saddlp.2s
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
         ret <2 x i32> %tmp3
 }
@@ -443,7 +443,7 @@
 define <1 x i64> @saddlp1d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: saddlp1d:
 ;CHECK: saddlp.1d
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <1 x i64> @llvm.aarch64.neon.saddlp.v1i64.v2i32(<2 x i32> %tmp1)
         ret <1 x i64> %tmp3
 }
@@ -451,7 +451,7 @@
 define <8 x i16> @saddlp8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: saddlp8h:
 ;CHECK: saddlp.8h
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
         ret <8 x i16> %tmp3
 }
@@ -459,7 +459,7 @@
 define <4 x i32> @saddlp4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: saddlp4s:
 ;CHECK: saddlp.4s
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
         ret <4 x i32> %tmp3
 }
@@ -467,7 +467,7 @@
 define <2 x i64> @saddlp2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: saddlp2d:
 ;CHECK: saddlp.2d
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
         ret <2 x i64> %tmp3
 }
@@ -483,7 +483,7 @@
 define <4 x i16> @uaddlp4h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: uaddlp4h:
 ;CHECK: uaddlp.4h
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
         ret <4 x i16> %tmp3
 }
@@ -491,7 +491,7 @@
 define <2 x i32> @uaddlp2s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uaddlp2s:
 ;CHECK: uaddlp.2s
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
         ret <2 x i32> %tmp3
 }
@@ -499,7 +499,7 @@
 define <1 x i64> @uaddlp1d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: uaddlp1d:
 ;CHECK: uaddlp.1d
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <1 x i64> @llvm.aarch64.neon.uaddlp.v1i64.v2i32(<2 x i32> %tmp1)
         ret <1 x i64> %tmp3
 }
@@ -507,7 +507,7 @@
 define <8 x i16> @uaddlp8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: uaddlp8h:
 ;CHECK: uaddlp.8h
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
         ret <8 x i16> %tmp3
 }
@@ -515,7 +515,7 @@
 define <4 x i32> @uaddlp4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uaddlp4s:
 ;CHECK: uaddlp.4s
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
         ret <4 x i32> %tmp3
 }
@@ -523,7 +523,7 @@
 define <2 x i64> @uaddlp2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uaddlp2d:
 ;CHECK: uaddlp.2d
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
         ret <2 x i64> %tmp3
 }
@@ -539,9 +539,9 @@
 define <4 x i16> @sadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sadalp4h:
 ;CHECK: sadalp.4h
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.saddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -549,9 +549,9 @@
 define <2 x i32> @sadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sadalp2s:
 ;CHECK: sadalp.2s
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.saddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -559,9 +559,9 @@
 define <8 x i16> @sadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sadalp8h:
 ;CHECK: sadalp.8h
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.saddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
 }
@@ -569,9 +569,9 @@
 define <4 x i32> @sadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sadalp4s:
 ;CHECK: sadalp.4s
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.saddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
 }
@@ -579,9 +579,9 @@
 define <2 x i64> @sadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sadalp2d:
 ;CHECK: sadalp.2d
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.saddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
         ret <2 x i64> %tmp5
 }
@@ -589,9 +589,9 @@
 define <4 x i16> @uadalp4h(<8 x i8>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uadalp4h:
 ;CHECK: uadalp.4h
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uaddlp.v4i16.v8i8(<8 x i8> %tmp1)
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -599,9 +599,9 @@
 define <2 x i32> @uadalp2s(<4 x i16>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uadalp2s:
 ;CHECK: uadalp.2s
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uaddlp.v2i32.v4i16(<4 x i16> %tmp1)
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -609,9 +609,9 @@
 define <8 x i16> @uadalp8h(<16 x i8>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uadalp8h:
 ;CHECK: uadalp.8h
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uaddlp.v8i16.v16i8(<16 x i8> %tmp1)
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
         ret <8 x i16> %tmp5
 }
@@ -619,9 +619,9 @@
 define <4 x i32> @uadalp4s(<8 x i16>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uadalp4s:
 ;CHECK: uadalp.4s
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uaddlp.v4i32.v8i16(<8 x i16> %tmp1)
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
         ret <4 x i32> %tmp5
 }
@@ -629,9 +629,9 @@
 define <2 x i64> @uadalp2d(<4 x i32>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uadalp2d:
 ;CHECK: uadalp.2d
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uaddlp.v2i64.v4i32(<4 x i32> %tmp1)
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
         ret <2 x i64> %tmp5
 }
@@ -639,8 +639,8 @@
 define <8 x i8> @addp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: addp_8b:
 ;CHECK: addp.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -648,8 +648,8 @@
 define <16 x i8> @addp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: addp_16b:
 ;CHECK: addp.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -657,8 +657,8 @@
 define <4 x i16> @addp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: addp_4h:
 ;CHECK: addp.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -666,8 +666,8 @@
 define <8 x i16> @addp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addp_8h:
 ;CHECK: addp.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -675,8 +675,8 @@
 define <2 x i32> @addp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: addp_2s:
 ;CHECK: addp.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -684,8 +684,8 @@
 define <4 x i32> @addp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: addp_4s:
 ;CHECK: addp.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -693,8 +693,8 @@
 define <2 x i64> @addp_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: addp_2d:
 ;CHECK: addp.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -710,8 +710,8 @@
 define <2 x float> @faddp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: faddp_2s:
 ;CHECK: faddp.2s
-        %tmp1 = load <2 x float>* %A
-        %tmp2 = load <2 x float>* %B
+        %tmp1 = load <2 x float>, <2 x float>* %A
+        %tmp2 = load <2 x float>, <2 x float>* %B
         %tmp3 = call <2 x float> @llvm.aarch64.neon.addp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         ret <2 x float> %tmp3
 }
@@ -719,8 +719,8 @@
 define <4 x float> @faddp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: faddp_4s:
 ;CHECK: faddp.4s
-        %tmp1 = load <4 x float>* %A
-        %tmp2 = load <4 x float>* %B
+        %tmp1 = load <4 x float>, <4 x float>* %A
+        %tmp2 = load <4 x float>, <4 x float>* %B
         %tmp3 = call <4 x float> @llvm.aarch64.neon.addp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         ret <4 x float> %tmp3
 }
@@ -728,8 +728,8 @@
 define <2 x double> @faddp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: faddp_2d:
 ;CHECK: faddp.2d
-        %tmp1 = load <2 x double>* %A
-        %tmp2 = load <2 x double>* %B
+        %tmp1 = load <2 x double>, <2 x double>* %A
+        %tmp2 = load <2 x double>, <2 x double>* %B
         %tmp3 = call <2 x double> @llvm.aarch64.neon.addp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
         ret <2 x double> %tmp3
 }
@@ -805,8 +805,8 @@
 define <8 x i8> @addhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addhn8b_natural:
 ;CHECK: addhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %sum = add <8 x i16> %tmp1, %tmp2
         %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -816,8 +816,8 @@
 define <4 x i16> @addhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: addhn4h_natural:
 ;CHECK: addhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %sum = add <4 x i32> %tmp1, %tmp2
         %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
         %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -827,8 +827,8 @@
 define <2 x i32> @addhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: addhn2s_natural:
 ;CHECK: addhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %sum = add <2 x i64> %tmp1, %tmp2
         %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
         %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -838,8 +838,8 @@
 define <16 x i8> @addhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: addhn2_16b_natural:
 ;CHECK: addhn2.16b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %sum = add <8 x i16> %tmp1, %tmp2
         %high_bits = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -850,8 +850,8 @@
 define <8 x i16> @addhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: addhn2_8h_natural:
 ;CHECK: addhn2.8h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %sum = add <4 x i32> %tmp1, %tmp2
         %high_bits = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
         %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -862,8 +862,8 @@
 define <4 x i32> @addhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: addhn2_4s_natural:
 ;CHECK: addhn2.4s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %sum = add <2 x i64> %tmp1, %tmp2
         %high_bits = lshr <2 x i64> %sum, <i64 32, i64 32>
         %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -874,8 +874,8 @@
 define <8 x i8> @subhn8b_natural(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn8b_natural:
 ;CHECK: subhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %diff = sub <8 x i16> %tmp1, %tmp2
         %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -885,8 +885,8 @@
 define <4 x i16> @subhn4h_natural(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: subhn4h_natural:
 ;CHECK: subhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %diff = sub <4 x i32> %tmp1, %tmp2
         %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
         %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -896,8 +896,8 @@
 define <2 x i32> @subhn2s_natural(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: subhn2s_natural:
 ;CHECK: subhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %diff = sub <2 x i64> %tmp1, %tmp2
         %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
         %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
@@ -907,8 +907,8 @@
 define <16 x i8> @subhn2_16b_natural(<8 x i8> %low, <8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn2_16b_natural:
 ;CHECK: subhn2.16b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %diff = sub <8 x i16> %tmp1, %tmp2
         %high_bits = lshr <8 x i16> %diff, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %narrowed = trunc <8 x i16> %high_bits to <8 x i8>
@@ -919,8 +919,8 @@
 define <8 x i16> @subhn2_8h_natural(<4 x i16> %low, <4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: subhn2_8h_natural:
 ;CHECK: subhn2.8h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %diff = sub <4 x i32> %tmp1, %tmp2
         %high_bits = lshr <4 x i32> %diff, <i32 16, i32 16, i32 16, i32 16>
         %narrowed = trunc <4 x i32> %high_bits to <4 x i16>
@@ -931,8 +931,8 @@
 define <4 x i32> @subhn2_4s_natural(<2 x i32> %low, <2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: subhn2_4s_natural:
 ;CHECK: subhn2.4s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %diff = sub <2 x i64> %tmp1, %tmp2
         %high_bits = lshr <2 x i64> %diff, <i64 32, i64 32>
         %narrowed = trunc <2 x i64> %high_bits to <2 x i32>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vbitwise.ll b/llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
index 93de95e..9cfcaaf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vbitwise.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @rbit_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: rbit_8b:
 ;CHECK: rbit.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.rbit.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp3
 }
@@ -11,7 +11,7 @@
 define <16 x i8> @rbit_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: rbit_16b:
 ;CHECK: rbit.16b
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.rbit.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp3
 }
@@ -22,7 +22,7 @@
 define <8 x i16> @sxtl8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sxtl8h:
 ;CHECK: sshll.8h
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
   %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
   ret <8 x i16> %tmp2
 }
@@ -30,7 +30,7 @@
 define <8 x i16> @uxtl8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: uxtl8h:
 ;CHECK: ushll.8h
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
   %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
   ret <8 x i16> %tmp2
 }
@@ -38,7 +38,7 @@
 define <4 x i32> @sxtl4s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sxtl4s:
 ;CHECK: sshll.4s
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
   %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
   ret <4 x i32> %tmp2
 }
@@ -46,7 +46,7 @@
 define <4 x i32> @uxtl4s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uxtl4s:
 ;CHECK: ushll.4s
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
   %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
   ret <4 x i32> %tmp2
 }
@@ -54,7 +54,7 @@
 define <2 x i64> @sxtl2d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sxtl2d:
 ;CHECK: sshll.2d
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
   %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
   ret <2 x i64> %tmp2
 }
@@ -62,7 +62,7 @@
 define <2 x i64> @uxtl2d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: uxtl2d:
 ;CHECK: ushll.2d
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
   %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
   ret <2 x i64> %tmp2
 }
@@ -76,7 +76,7 @@
 ; CHECK: movi.2d [[REG1:v[0-9]+]], #0x0000ff000000ff
 ; CHECK: and.16b v{{[0-9]+}}, v{{[0-9]+}}, [[REG1]]
   %0 = bitcast i8* %src to <16 x i8>*
-  %1 = load <16 x i8>* %0, align 16
+  %1 = load <16 x i8>, <16 x i8>* %0, align 16
   %and.i = and <16 x i8> %1, <i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 -1, i8 0, i8 0, i8 0>
   %2 = bitcast <16 x i8> %and.i to <8 x i16>
   %vshl_n = shl <8 x i16> %2, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcmp.ll b/llvm/test/CodeGen/AArch64/arm64-vcmp.ll
index 982ab09..1b33eb5 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcmp.ll
@@ -16,8 +16,8 @@
 define <2 x i32> @facge_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: facge_2s:
 ;CHECK: facge.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -25,8 +25,8 @@
 define <4 x i32> @facge_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: facge_4s:
 ;CHECK: facge.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -34,8 +34,8 @@
 define <2 x i64> @facge_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: facge_2d:
 ;CHECK: facge.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -47,8 +47,8 @@
 define <2 x i32> @facgt_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: facgt_2s:
 ;CHECK: facgt.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -56,8 +56,8 @@
 define <4 x i32> @facgt_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: facgt_4s:
 ;CHECK: facgt.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -65,8 +65,8 @@
 define <2 x i64> @facgt_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: facgt_2d:
 ;CHECK: facgt.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -112,8 +112,8 @@
 define <8 x i8> @cmtst_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: cmtst_8b:
 ;CHECK: cmtst.8b
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %commonbits = and <8 x i8> %tmp1, %tmp2
   %mask = icmp ne <8 x i8> %commonbits, zeroinitializer
   %res = sext <8 x i1> %mask to <8 x i8>
@@ -123,8 +123,8 @@
 define <16 x i8> @cmtst_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: cmtst_16b:
 ;CHECK: cmtst.16b
-  %tmp1 = load <16 x i8>* %A
-  %tmp2 = load <16 x i8>* %B
+  %tmp1 = load <16 x i8>, <16 x i8>* %A
+  %tmp2 = load <16 x i8>, <16 x i8>* %B
   %commonbits = and <16 x i8> %tmp1, %tmp2
   %mask = icmp ne <16 x i8> %commonbits, zeroinitializer
   %res = sext <16 x i1> %mask to <16 x i8>
@@ -134,8 +134,8 @@
 define <4 x i16> @cmtst_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: cmtst_4h:
 ;CHECK: cmtst.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %commonbits = and <4 x i16> %tmp1, %tmp2
   %mask = icmp ne <4 x i16> %commonbits, zeroinitializer
   %res = sext <4 x i1> %mask to <4 x i16>
@@ -145,8 +145,8 @@
 define <8 x i16> @cmtst_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: cmtst_8h:
 ;CHECK: cmtst.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %commonbits = and <8 x i16> %tmp1, %tmp2
   %mask = icmp ne <8 x i16> %commonbits, zeroinitializer
   %res = sext <8 x i1> %mask to <8 x i16>
@@ -156,8 +156,8 @@
 define <2 x i32> @cmtst_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: cmtst_2s:
 ;CHECK: cmtst.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %commonbits = and <2 x i32> %tmp1, %tmp2
   %mask = icmp ne <2 x i32> %commonbits, zeroinitializer
   %res = sext <2 x i1> %mask to <2 x i32>
@@ -167,8 +167,8 @@
 define <4 x i32> @cmtst_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: cmtst_4s:
 ;CHECK: cmtst.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %commonbits = and <4 x i32> %tmp1, %tmp2
   %mask = icmp ne <4 x i32> %commonbits, zeroinitializer
   %res = sext <4 x i1> %mask to <4 x i32>
@@ -178,8 +178,8 @@
 define <2 x i64> @cmtst_2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: cmtst_2d:
 ;CHECK: cmtst.2d
-  %tmp1 = load <2 x i64>* %A
-  %tmp2 = load <2 x i64>* %B
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
+  %tmp2 = load <2 x i64>, <2 x i64>* %B
   %commonbits = and <2 x i64> %tmp1, %tmp2
   %mask = icmp ne <2 x i64> %commonbits, zeroinitializer
   %res = sext <2 x i1> %mask to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcnt.ll b/llvm/test/CodeGen/AArch64/arm64-vcnt.ll
index 903501e..5cff10c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcnt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcnt.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @cls_8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: cls_8b:
 ;CHECK: cls.8b
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.cls.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp3
 }
@@ -11,7 +11,7 @@
 define <16 x i8> @cls_16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: cls_16b:
 ;CHECK: cls.16b
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.cls.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp3
 }
@@ -19,7 +19,7 @@
 define <4 x i16> @cls_4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: cls_4h:
 ;CHECK: cls.4h
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.cls.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp3
 }
@@ -27,7 +27,7 @@
 define <8 x i16> @cls_8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: cls_8h:
 ;CHECK: cls.8h
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.cls.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp3
 }
@@ -35,7 +35,7 @@
 define <2 x i32> @cls_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: cls_2s:
 ;CHECK: cls.2s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.cls.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
@@ -43,7 +43,7 @@
 define <4 x i32> @cls_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: cls_4s:
 ;CHECK: cls.4s
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
index 6570f0e..13d2d28 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll
@@ -675,7 +675,7 @@
 ;CHECK: fcvt
 ;CHECK: ret
 define void @autogen_SD19225(<8 x double>* %addr.f64, <8 x float>* %addr.f32) {
-  %A = load <8 x float>* %addr.f32
+  %A = load <8 x float>, <8 x float>* %addr.f32
   %Tr53 = fpext <8 x float> %A to <8 x double>
   store <8 x double> %Tr53, <8 x double>* %addr.f64
   ret void
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-imm.ll b/llvm/test/CodeGen/AArch64/arm64-vector-imm.ll
index 9fb088b..d3de88d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-imm.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-imm.ll
@@ -5,7 +5,7 @@
 ; CHECK-NOT: mov
 ; CHECK-NOT: mvn
 ; CHECK: orr
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <8 x i8> %tmp3
 }
@@ -15,7 +15,7 @@
 ; CHECK-NOT: mov
 ; CHECK-NOT: mvn
 ; CHECK: orr
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <16 x i8> %tmp3
 }
@@ -25,7 +25,7 @@
 ; CHECK-NOT: mov
 ; CHECK-NOT: mvn
 ; CHECK: bic
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <8 x i8> %tmp3
 }
@@ -35,7 +35,7 @@
 ; CHECK-NOT: mov
 ; CHECK-NOT: mvn
 ; CHECK: bic
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <16 x i8> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
index 1574db6..c58c199 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll
@@ -12,7 +12,7 @@
 ; CHECK-LABEL: t1:
 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
 ; CHECK: str q0, [x[[REG]]]
-  %tmp1 = load %type1** %argtable, align 8
+  %tmp1 = load %type1*, %type1** %argtable, align 8
   %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
   store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
   ret void
@@ -23,7 +23,7 @@
 ; CHECK-LABEL: t2:
 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
 ; CHECK: str d0, [x[[REG]]]
-  %tmp1 = load %type2** %argtable, align 8
+  %tmp1 = load %type2*, %type2** %argtable, align 8
   %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
   store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
   ret void
@@ -52,8 +52,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
-  %tmp = load <2 x i64>* %arrayidx, align 16
-  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
+  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
+  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
   ret void
@@ -66,8 +66,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
   %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
-  %tmp = load <2 x i64>* %arrayidx, align 16
-  %tmp1 = load <2 x i64>** @globalArray64x2, align 8
+  %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
+  %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
   store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
   ret void
@@ -81,8 +81,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
-  %tmp = load <4 x i32>* %arrayidx, align 16
-  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
+  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
+  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
   %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
   ret void
@@ -95,8 +95,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
   %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
-  %tmp = load <4 x i32>* %arrayidx, align 16
-  %tmp1 = load <4 x i32>** @globalArray32x4, align 8
+  %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
+  %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
   %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
   store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
   ret void
@@ -110,8 +110,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
-  %tmp = load <8 x i16>* %arrayidx, align 16
-  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
+  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
+  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
   %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
   ret void
@@ -124,8 +124,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
   %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
-  %tmp = load <8 x i16>* %arrayidx, align 16
-  %tmp1 = load <8 x i16>** @globalArray16x8, align 8
+  %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
+  %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
   %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
   store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
   ret void
@@ -139,8 +139,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
-  %tmp = load <16 x i8>* %arrayidx, align 16
-  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
+  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
+  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
   ret void
@@ -153,8 +153,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
   %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
-  %tmp = load <16 x i8>* %arrayidx, align 16
-  %tmp1 = load <16 x i8>** @globalArray8x16, align 8
+  %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
+  %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
   %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
   store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
   ret void
@@ -168,8 +168,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
-  %tmp = load <1 x i64>* %arrayidx, align 8
-  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
+  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
+  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
   %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
   ret void
@@ -182,8 +182,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
   %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
-  %tmp = load <1 x i64>* %arrayidx, align 8
-  %tmp1 = load <1 x i64>** @globalArray64x1, align 8
+  %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
+  %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
   %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
   store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
   ret void
@@ -197,8 +197,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
-  %tmp = load <2 x i32>* %arrayidx, align 8
-  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
+  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
+  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
   %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
   ret void
@@ -211,8 +211,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
   %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
-  %tmp = load <2 x i32>* %arrayidx, align 8
-  %tmp1 = load <2 x i32>** @globalArray32x2, align 8
+  %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
+  %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
   %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
   store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
   ret void
@@ -226,8 +226,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
-  %tmp = load <4 x i16>* %arrayidx, align 8
-  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
+  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
+  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
   %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
   ret void
@@ -240,8 +240,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
   %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
-  %tmp = load <4 x i16>* %arrayidx, align 8
-  %tmp1 = load <4 x i16>** @globalArray16x4, align 8
+  %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
+  %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
   %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
   store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
   ret void
@@ -255,8 +255,8 @@
 ; CHECK: ldr [[BASE:x[0-9]+]],
 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
   %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
-  %tmp = load <8 x i8>* %arrayidx, align 8
-  %tmp1 = load <8 x i8>** @globalArray8x8, align 8
+  %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
+  %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
   %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
   store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
   ret void
@@ -270,7 +270,7 @@
 entry:
 ; CHECK-LABEL: fct0:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
   ret <1 x i64> %0
 }
 
@@ -278,7 +278,7 @@
 entry:
 ; CHECK-LABEL: fct1:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
   ret <2 x i32> %0
 }
 
@@ -286,7 +286,7 @@
 entry:
 ; CHECK-LABEL: fct2:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
   ret <4 x i16> %0
 }
 
@@ -294,7 +294,7 @@
 entry:
 ; CHECK-LABEL: fct3:
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
   ret <8 x i8> %0
 }
 
@@ -302,7 +302,7 @@
 entry:
 ; CHECK-LABEL: fct4:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
   ret <2 x i64> %0
 }
 
@@ -310,7 +310,7 @@
 entry:
 ; CHECK-LABEL: fct5:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
   ret <4 x i32> %0
 }
 
@@ -318,7 +318,7 @@
 entry:
 ; CHECK-LABEL: fct6:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
   ret <8 x i16> %0
 }
 
@@ -326,7 +326,7 @@
 entry:
 ; CHECK-LABEL: fct7:
 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
-  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
   ret <16 x i8> %0
 }
 
@@ -335,7 +335,7 @@
 ; CHECK-LABEL: fct8:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
+  %0 = load <1 x i64>, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <1 x i64>*), align 8
   store <1 x i64> %0, <1 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <1 x i64>*), align 8
   ret void
 }
@@ -345,7 +345,7 @@
 ; CHECK-LABEL: fct9:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
+  %0 = load <2 x i32>, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i32>*), align 8
   store <2 x i32> %0, <2 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i32>*), align 8
   ret void
 }
@@ -355,7 +355,7 @@
 ; CHECK-LABEL: fct10:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
+  %0 = load <4 x i16>, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i16>*), align 8
   store <4 x i16> %0, <4 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i16>*), align 8
   ret void
 }
@@ -365,7 +365,7 @@
 ; CHECK-LABEL: fct11:
 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
+  %0 = load <8 x i8>, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i8>*), align 8
   store <8 x i8> %0, <8 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i8>*), align 8
   ret void
 }
@@ -375,7 +375,7 @@
 ; CHECK-LABEL: fct12:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
+  %0 = load <2 x i64>, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <2 x i64>*), align 16
   store <2 x i64> %0, <2 x i64>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <2 x i64>*), align 16
   ret void
 }
@@ -385,7 +385,7 @@
 ; CHECK-LABEL: fct13:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
+  %0 = load <4 x i32>, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <4 x i32>*), align 16
   store <4 x i32> %0, <4 x i32>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <4 x i32>*), align 16
   ret void
 }
@@ -395,7 +395,7 @@
 ; CHECK-LABEL: fct14:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
+  %0 = load <8 x i16>, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <8 x i16>*), align 16
   store <8 x i16> %0, <8 x i16>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <8 x i16>*), align 16
   ret void
 }
@@ -405,7 +405,7 @@
 ; CHECK-LABEL: fct15:
 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
-  %0 = load <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
+  %0 = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 3) to <16 x i8>*), align 16
   store <16 x i8> %0, <16 x i8>* bitcast (i8* getelementptr inbounds ([63 x i8]* @str, i64 0, i64 4) to <16 x i8>*), align 16
   ret void
 }
@@ -420,7 +420,7 @@
 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <8 x i8> %vec, %vec
   ret <8 x i8> %vmull.i
@@ -432,7 +432,7 @@
 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 1
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <16 x i8> %vec, %vec
   ret <16 x i8> %vmull.i
@@ -444,7 +444,7 @@
 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <4 x i16> %vec, %vec
   ret <4 x i16> %vmull.i
@@ -456,7 +456,7 @@
 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 1
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <8 x i16> %vec, %vec
   ret <8 x i16> %vmull.i
@@ -468,7 +468,7 @@
 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <2 x i32> %vec, %vec
   ret <2 x i32> %vmull.i
@@ -480,7 +480,7 @@
 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 1
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <4 x i32> %vec, %vec
   ret <4 x i32> %vmull.i
@@ -491,7 +491,7 @@
 ; CHECK: ldr d0, [x0, #8]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    ret <1 x i64> %vec
 }
@@ -501,7 +501,7 @@
 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 1
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
   ret <2 x i64> %vec
 }
@@ -514,7 +514,7 @@
 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <8 x i8> %vec, %vec
   ret <8 x i8> %vmull.i
@@ -526,7 +526,7 @@
 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i8, i8* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i8* %addr, align 1
+  %pix_sp0.0.copyload = load i8, i8* %addr, align 1
   %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <16 x i8> %vec, %vec
   ret <16 x i8> %vmull.i
@@ -538,7 +538,7 @@
 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <4 x i16> %vec, %vec
   ret <4 x i16> %vmull.i
@@ -550,7 +550,7 @@
 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i16, i16* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i16* %addr, align 1
+  %pix_sp0.0.copyload = load i16, i16* %addr, align 1
   %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <8 x i16> %vec, %vec
   ret <8 x i16> %vmull.i
@@ -562,7 +562,7 @@
 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <2 x i32> %vec, %vec
   ret <2 x i32> %vmull.i
@@ -574,7 +574,7 @@
 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
 entry:
   %addr = getelementptr i32, i32* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i32* %addr, align 1
+  %pix_sp0.0.copyload = load i32, i32* %addr, align 1
   %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vmull.i = mul <4 x i32> %vec, %vec
   ret <4 x i32> %vmull.i
@@ -585,7 +585,7 @@
 ; CHECK: ldr d0, [x0, x1, lsl #3]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
    ret <1 x i64> %vec
 }
@@ -595,7 +595,7 @@
 ; CHECK: ldr d0, [x0, x1, lsl #3]
 entry:
   %addr = getelementptr i64, i64* %sp0, i64 %offset
-  %pix_sp0.0.copyload = load i64* %addr, align 1
+  %pix_sp0.0.copyload = load i64, i64* %addr, align 1
   %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
   ret <2 x i64> %vec
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vext.ll b/llvm/test/CodeGen/AArch64/arm64-vext.ll
index 2240dfd..fa57eeb 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vext.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vext.ll
@@ -6,12 +6,12 @@
   %xS8x8 = alloca <8 x i8>, align 8
   %__a = alloca <8 x i8>, align 8
   %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xS8x8, align 8
+  %tmp = load <8 x i8>, <8 x i8>* %xS8x8, align 8
   store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xS8x8, align 8
+  %tmp1 = load <8 x i8>, <8 x i8>* %xS8x8, align 8
   store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
   %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   store <8 x i8> %vext, <8 x i8>* %xS8x8, align 8
   ret void
@@ -23,12 +23,12 @@
   %xU8x8 = alloca <8 x i8>, align 8
   %__a = alloca <8 x i8>, align 8
   %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xU8x8, align 8
+  %tmp = load <8 x i8>, <8 x i8>* %xU8x8, align 8
   store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xU8x8, align 8
+  %tmp1 = load <8 x i8>, <8 x i8>* %xU8x8, align 8
   store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
   %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
   store <8 x i8> %vext, <8 x i8>* %xU8x8, align 8
   ret void
@@ -40,12 +40,12 @@
   %xP8x8 = alloca <8 x i8>, align 8
   %__a = alloca <8 x i8>, align 8
   %__b = alloca <8 x i8>, align 8
-  %tmp = load <8 x i8>* %xP8x8, align 8
+  %tmp = load <8 x i8>, <8 x i8>* %xP8x8, align 8
   store <8 x i8> %tmp, <8 x i8>* %__a, align 8
-  %tmp1 = load <8 x i8>* %xP8x8, align 8
+  %tmp1 = load <8 x i8>, <8 x i8>* %xP8x8, align 8
   store <8 x i8> %tmp1, <8 x i8>* %__b, align 8
-  %tmp2 = load <8 x i8>* %__a, align 8
-  %tmp3 = load <8 x i8>* %__b, align 8
+  %tmp2 = load <8 x i8>, <8 x i8>* %__a, align 8
+  %tmp3 = load <8 x i8>, <8 x i8>* %__b, align 8
   %vext = shufflevector <8 x i8> %tmp2, <8 x i8> %tmp3, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
   store <8 x i8> %vext, <8 x i8>* %xP8x8, align 8
   ret void
@@ -57,13 +57,13 @@
   %xS16x4 = alloca <4 x i16>, align 8
   %__a = alloca <4 x i16>, align 8
   %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xS16x4, align 8
+  %tmp = load <4 x i16>, <4 x i16>* %xS16x4, align 8
   store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xS16x4, align 8
+  %tmp1 = load <4 x i16>, <4 x i16>* %xS16x4, align 8
   store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
   %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
   %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
   %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -78,13 +78,13 @@
   %xU16x4 = alloca <4 x i16>, align 8
   %__a = alloca <4 x i16>, align 8
   %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xU16x4, align 8
+  %tmp = load <4 x i16>, <4 x i16>* %xU16x4, align 8
   store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xU16x4, align 8
+  %tmp1 = load <4 x i16>, <4 x i16>* %xU16x4, align 8
   store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
   %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
   %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
   %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -99,13 +99,13 @@
   %xP16x4 = alloca <4 x i16>, align 8
   %__a = alloca <4 x i16>, align 8
   %__b = alloca <4 x i16>, align 8
-  %tmp = load <4 x i16>* %xP16x4, align 8
+  %tmp = load <4 x i16>, <4 x i16>* %xP16x4, align 8
   store <4 x i16> %tmp, <4 x i16>* %__a, align 8
-  %tmp1 = load <4 x i16>* %xP16x4, align 8
+  %tmp1 = load <4 x i16>, <4 x i16>* %xP16x4, align 8
   store <4 x i16> %tmp1, <4 x i16>* %__b, align 8
-  %tmp2 = load <4 x i16>* %__a, align 8
+  %tmp2 = load <4 x i16>, <4 x i16>* %__a, align 8
   %tmp3 = bitcast <4 x i16> %tmp2 to <8 x i8>
-  %tmp4 = load <4 x i16>* %__b, align 8
+  %tmp4 = load <4 x i16>, <4 x i16>* %__b, align 8
   %tmp5 = bitcast <4 x i16> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <4 x i16>
   %tmp7 = bitcast <8 x i8> %tmp5 to <4 x i16>
@@ -120,13 +120,13 @@
   %xS32x2 = alloca <2 x i32>, align 8
   %__a = alloca <2 x i32>, align 8
   %__b = alloca <2 x i32>, align 8
-  %tmp = load <2 x i32>* %xS32x2, align 8
+  %tmp = load <2 x i32>, <2 x i32>* %xS32x2, align 8
   store <2 x i32> %tmp, <2 x i32>* %__a, align 8
-  %tmp1 = load <2 x i32>* %xS32x2, align 8
+  %tmp1 = load <2 x i32>, <2 x i32>* %xS32x2, align 8
   store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
-  %tmp2 = load <2 x i32>* %__a, align 8
+  %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
   %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x i32>* %__b, align 8
+  %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
   %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
   %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
@@ -141,13 +141,13 @@
   %xU32x2 = alloca <2 x i32>, align 8
   %__a = alloca <2 x i32>, align 8
   %__b = alloca <2 x i32>, align 8
-  %tmp = load <2 x i32>* %xU32x2, align 8
+  %tmp = load <2 x i32>, <2 x i32>* %xU32x2, align 8
   store <2 x i32> %tmp, <2 x i32>* %__a, align 8
-  %tmp1 = load <2 x i32>* %xU32x2, align 8
+  %tmp1 = load <2 x i32>, <2 x i32>* %xU32x2, align 8
   store <2 x i32> %tmp1, <2 x i32>* %__b, align 8
-  %tmp2 = load <2 x i32>* %__a, align 8
+  %tmp2 = load <2 x i32>, <2 x i32>* %__a, align 8
   %tmp3 = bitcast <2 x i32> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x i32>* %__b, align 8
+  %tmp4 = load <2 x i32>, <2 x i32>* %__b, align 8
   %tmp5 = bitcast <2 x i32> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <2 x i32>
   %tmp7 = bitcast <8 x i8> %tmp5 to <2 x i32>
@@ -162,13 +162,13 @@
   %xF32x2 = alloca <2 x float>, align 8
   %__a = alloca <2 x float>, align 8
   %__b = alloca <2 x float>, align 8
-  %tmp = load <2 x float>* %xF32x2, align 8
+  %tmp = load <2 x float>, <2 x float>* %xF32x2, align 8
   store <2 x float> %tmp, <2 x float>* %__a, align 8
-  %tmp1 = load <2 x float>* %xF32x2, align 8
+  %tmp1 = load <2 x float>, <2 x float>* %xF32x2, align 8
   store <2 x float> %tmp1, <2 x float>* %__b, align 8
-  %tmp2 = load <2 x float>* %__a, align 8
+  %tmp2 = load <2 x float>, <2 x float>* %__a, align 8
   %tmp3 = bitcast <2 x float> %tmp2 to <8 x i8>
-  %tmp4 = load <2 x float>* %__b, align 8
+  %tmp4 = load <2 x float>, <2 x float>* %__b, align 8
   %tmp5 = bitcast <2 x float> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <2 x float>
   %tmp7 = bitcast <8 x i8> %tmp5 to <2 x float>
@@ -184,13 +184,13 @@
   %xS64x1 = alloca <1 x i64>, align 8
   %__a = alloca <1 x i64>, align 8
   %__b = alloca <1 x i64>, align 8
-  %tmp = load <1 x i64>* %xS64x1, align 8
+  %tmp = load <1 x i64>, <1 x i64>* %xS64x1, align 8
   store <1 x i64> %tmp, <1 x i64>* %__a, align 8
-  %tmp1 = load <1 x i64>* %xS64x1, align 8
+  %tmp1 = load <1 x i64>, <1 x i64>* %xS64x1, align 8
   store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
-  %tmp2 = load <1 x i64>* %__a, align 8
+  %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
   %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
-  %tmp4 = load <1 x i64>* %__b, align 8
+  %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
   %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
   %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
@@ -206,13 +206,13 @@
   %xU64x1 = alloca <1 x i64>, align 8
   %__a = alloca <1 x i64>, align 8
   %__b = alloca <1 x i64>, align 8
-  %tmp = load <1 x i64>* %xU64x1, align 8
+  %tmp = load <1 x i64>, <1 x i64>* %xU64x1, align 8
   store <1 x i64> %tmp, <1 x i64>* %__a, align 8
-  %tmp1 = load <1 x i64>* %xU64x1, align 8
+  %tmp1 = load <1 x i64>, <1 x i64>* %xU64x1, align 8
   store <1 x i64> %tmp1, <1 x i64>* %__b, align 8
-  %tmp2 = load <1 x i64>* %__a, align 8
+  %tmp2 = load <1 x i64>, <1 x i64>* %__a, align 8
   %tmp3 = bitcast <1 x i64> %tmp2 to <8 x i8>
-  %tmp4 = load <1 x i64>* %__b, align 8
+  %tmp4 = load <1 x i64>, <1 x i64>* %__b, align 8
   %tmp5 = bitcast <1 x i64> %tmp4 to <8 x i8>
   %tmp6 = bitcast <8 x i8> %tmp3 to <1 x i64>
   %tmp7 = bitcast <8 x i8> %tmp5 to <1 x i64>
@@ -227,12 +227,12 @@
   %xS8x16 = alloca <16 x i8>, align 16
   %__a = alloca <16 x i8>, align 16
   %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xS8x16, align 16
+  %tmp = load <16 x i8>, <16 x i8>* %xS8x16, align 16
   store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xS8x16, align 16
+  %tmp1 = load <16 x i8>, <16 x i8>* %xS8x16, align 16
   store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
   %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
   store <16 x i8> %vext, <16 x i8>* %xS8x16, align 16
   ret void
@@ -244,12 +244,12 @@
   %xU8x16 = alloca <16 x i8>, align 16
   %__a = alloca <16 x i8>, align 16
   %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xU8x16, align 16
+  %tmp = load <16 x i8>, <16 x i8>* %xU8x16, align 16
   store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xU8x16, align 16
+  %tmp1 = load <16 x i8>, <16 x i8>* %xU8x16, align 16
   store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
   %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
   store <16 x i8> %vext, <16 x i8>* %xU8x16, align 16
   ret void
@@ -261,12 +261,12 @@
   %xP8x16 = alloca <16 x i8>, align 16
   %__a = alloca <16 x i8>, align 16
   %__b = alloca <16 x i8>, align 16
-  %tmp = load <16 x i8>* %xP8x16, align 16
+  %tmp = load <16 x i8>, <16 x i8>* %xP8x16, align 16
   store <16 x i8> %tmp, <16 x i8>* %__a, align 16
-  %tmp1 = load <16 x i8>* %xP8x16, align 16
+  %tmp1 = load <16 x i8>, <16 x i8>* %xP8x16, align 16
   store <16 x i8> %tmp1, <16 x i8>* %__b, align 16
-  %tmp2 = load <16 x i8>* %__a, align 16
-  %tmp3 = load <16 x i8>* %__b, align 16
+  %tmp2 = load <16 x i8>, <16 x i8>* %__a, align 16
+  %tmp3 = load <16 x i8>, <16 x i8>* %__b, align 16
   %vext = shufflevector <16 x i8> %tmp2, <16 x i8> %tmp3, <16 x i32> <i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21>
   store <16 x i8> %vext, <16 x i8>* %xP8x16, align 16
   ret void
@@ -278,13 +278,13 @@
   %xS16x8 = alloca <8 x i16>, align 16
   %__a = alloca <8 x i16>, align 16
   %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xS16x8, align 16
+  %tmp = load <8 x i16>, <8 x i16>* %xS16x8, align 16
   store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xS16x8, align 16
+  %tmp1 = load <8 x i16>, <8 x i16>* %xS16x8, align 16
   store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
   %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
   %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
   %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -299,13 +299,13 @@
   %xU16x8 = alloca <8 x i16>, align 16
   %__a = alloca <8 x i16>, align 16
   %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xU16x8, align 16
+  %tmp = load <8 x i16>, <8 x i16>* %xU16x8, align 16
   store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xU16x8, align 16
+  %tmp1 = load <8 x i16>, <8 x i16>* %xU16x8, align 16
   store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
   %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
   %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
   %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -320,13 +320,13 @@
   %xP16x8 = alloca <8 x i16>, align 16
   %__a = alloca <8 x i16>, align 16
   %__b = alloca <8 x i16>, align 16
-  %tmp = load <8 x i16>* %xP16x8, align 16
+  %tmp = load <8 x i16>, <8 x i16>* %xP16x8, align 16
   store <8 x i16> %tmp, <8 x i16>* %__a, align 16
-  %tmp1 = load <8 x i16>* %xP16x8, align 16
+  %tmp1 = load <8 x i16>, <8 x i16>* %xP16x8, align 16
   store <8 x i16> %tmp1, <8 x i16>* %__b, align 16
-  %tmp2 = load <8 x i16>* %__a, align 16
+  %tmp2 = load <8 x i16>, <8 x i16>* %__a, align 16
   %tmp3 = bitcast <8 x i16> %tmp2 to <16 x i8>
-  %tmp4 = load <8 x i16>* %__b, align 16
+  %tmp4 = load <8 x i16>, <8 x i16>* %__b, align 16
   %tmp5 = bitcast <8 x i16> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <8 x i16>
   %tmp7 = bitcast <16 x i8> %tmp5 to <8 x i16>
@@ -341,13 +341,13 @@
   %xS32x4 = alloca <4 x i32>, align 16
   %__a = alloca <4 x i32>, align 16
   %__b = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>* %xS32x4, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %xS32x4, align 16
   store <4 x i32> %tmp, <4 x i32>* %__a, align 16
-  %tmp1 = load <4 x i32>* %xS32x4, align 16
+  %tmp1 = load <4 x i32>, <4 x i32>* %xS32x4, align 16
   store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
-  %tmp2 = load <4 x i32>* %__a, align 16
+  %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x i32>* %__b, align 16
+  %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
   %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
   %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
@@ -362,13 +362,13 @@
   %xU32x4 = alloca <4 x i32>, align 16
   %__a = alloca <4 x i32>, align 16
   %__b = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>* %xU32x4, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %xU32x4, align 16
   store <4 x i32> %tmp, <4 x i32>* %__a, align 16
-  %tmp1 = load <4 x i32>* %xU32x4, align 16
+  %tmp1 = load <4 x i32>, <4 x i32>* %xU32x4, align 16
   store <4 x i32> %tmp1, <4 x i32>* %__b, align 16
-  %tmp2 = load <4 x i32>* %__a, align 16
+  %tmp2 = load <4 x i32>, <4 x i32>* %__a, align 16
   %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x i32>* %__b, align 16
+  %tmp4 = load <4 x i32>, <4 x i32>* %__b, align 16
   %tmp5 = bitcast <4 x i32> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <4 x i32>
   %tmp7 = bitcast <16 x i8> %tmp5 to <4 x i32>
@@ -383,13 +383,13 @@
   %xF32x4 = alloca <4 x float>, align 16
   %__a = alloca <4 x float>, align 16
   %__b = alloca <4 x float>, align 16
-  %tmp = load <4 x float>* %xF32x4, align 16
+  %tmp = load <4 x float>, <4 x float>* %xF32x4, align 16
   store <4 x float> %tmp, <4 x float>* %__a, align 16
-  %tmp1 = load <4 x float>* %xF32x4, align 16
+  %tmp1 = load <4 x float>, <4 x float>* %xF32x4, align 16
   store <4 x float> %tmp1, <4 x float>* %__b, align 16
-  %tmp2 = load <4 x float>* %__a, align 16
+  %tmp2 = load <4 x float>, <4 x float>* %__a, align 16
   %tmp3 = bitcast <4 x float> %tmp2 to <16 x i8>
-  %tmp4 = load <4 x float>* %__b, align 16
+  %tmp4 = load <4 x float>, <4 x float>* %__b, align 16
   %tmp5 = bitcast <4 x float> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <4 x float>
   %tmp7 = bitcast <16 x i8> %tmp5 to <4 x float>
@@ -404,13 +404,13 @@
   %xS64x2 = alloca <2 x i64>, align 16
   %__a = alloca <2 x i64>, align 16
   %__b = alloca <2 x i64>, align 16
-  %tmp = load <2 x i64>* %xS64x2, align 16
+  %tmp = load <2 x i64>, <2 x i64>* %xS64x2, align 16
   store <2 x i64> %tmp, <2 x i64>* %__a, align 16
-  %tmp1 = load <2 x i64>* %xS64x2, align 16
+  %tmp1 = load <2 x i64>, <2 x i64>* %xS64x2, align 16
   store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
-  %tmp2 = load <2 x i64>* %__a, align 16
+  %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
   %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
-  %tmp4 = load <2 x i64>* %__b, align 16
+  %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
   %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
   %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
@@ -425,13 +425,13 @@
   %xU64x2 = alloca <2 x i64>, align 16
   %__a = alloca <2 x i64>, align 16
   %__b = alloca <2 x i64>, align 16
-  %tmp = load <2 x i64>* %xU64x2, align 16
+  %tmp = load <2 x i64>, <2 x i64>* %xU64x2, align 16
   store <2 x i64> %tmp, <2 x i64>* %__a, align 16
-  %tmp1 = load <2 x i64>* %xU64x2, align 16
+  %tmp1 = load <2 x i64>, <2 x i64>* %xU64x2, align 16
   store <2 x i64> %tmp1, <2 x i64>* %__b, align 16
-  %tmp2 = load <2 x i64>* %__a, align 16
+  %tmp2 = load <2 x i64>, <2 x i64>* %__a, align 16
   %tmp3 = bitcast <2 x i64> %tmp2 to <16 x i8>
-  %tmp4 = load <2 x i64>* %__b, align 16
+  %tmp4 = load <2 x i64>, <2 x i64>* %__b, align 16
   %tmp5 = bitcast <2 x i64> %tmp4 to <16 x i8>
   %tmp6 = bitcast <16 x i8> %tmp3 to <2 x i64>
   %tmp7 = bitcast <16 x i8> %tmp5 to <2 x i64>
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 6178bf9..2e82b2a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @shadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shadd8b:
 ;CHECK: shadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <16 x i8> @shadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: shadd16b:
 ;CHECK: shadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 define <4 x i16> @shadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: shadd4h:
 ;CHECK: shadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i16> @shadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: shadd8h:
 ;CHECK: shadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x i32> @shadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: shadd2s:
 ;CHECK: shadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i32> @shadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: shadd4s:
 ;CHECK: shadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i8> @uhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uhadd8b:
 ;CHECK: uhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <16 x i8> @uhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uhadd16b:
 ;CHECK: uhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i16> @uhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uhadd4h:
 ;CHECK: uhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @uhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uhadd8h:
 ;CHECK: uhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <2 x i32> @uhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uhadd2s:
 ;CHECK: uhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <4 x i32> @uhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uhadd4s:
 ;CHECK: uhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -127,8 +127,8 @@
 define <8 x i8> @srhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srhadd8b:
 ;CHECK: srhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -136,8 +136,8 @@
 define <16 x i8> @srhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: srhadd16b:
 ;CHECK: srhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -145,8 +145,8 @@
 define <4 x i16> @srhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: srhadd4h:
 ;CHECK: srhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -154,8 +154,8 @@
 define <8 x i16> @srhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: srhadd8h:
 ;CHECK: srhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -163,8 +163,8 @@
 define <2 x i32> @srhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: srhadd2s:
 ;CHECK: srhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -172,8 +172,8 @@
 define <4 x i32> @srhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: srhadd4s:
 ;CHECK: srhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -181,8 +181,8 @@
 define <8 x i8> @urhadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: urhadd8b:
 ;CHECK: urhadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -190,8 +190,8 @@
 define <16 x i8> @urhadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: urhadd16b:
 ;CHECK: urhadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -199,8 +199,8 @@
 define <4 x i16> @urhadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: urhadd4h:
 ;CHECK: urhadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -208,8 +208,8 @@
 define <8 x i16> @urhadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: urhadd8h:
 ;CHECK: urhadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -217,8 +217,8 @@
 define <2 x i32> @urhadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: urhadd2s:
 ;CHECK: urhadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -226,8 +226,8 @@
 define <4 x i32> @urhadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: urhadd4s:
 ;CHECK: urhadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vhsub.ll b/llvm/test/CodeGen/AArch64/arm64-vhsub.ll
index 13bfda3..e50fd3d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhsub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @shsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: shsub8b:
 ;CHECK: shsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <16 x i8> @shsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: shsub16b:
 ;CHECK: shsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 define <4 x i16> @shsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: shsub4h:
 ;CHECK: shsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i16> @shsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: shsub8h:
 ;CHECK: shsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x i32> @shsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: shsub2s:
 ;CHECK: shsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i32> @shsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: shsub4s:
 ;CHECK: shsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i8> @uhsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uhsub8b:
 ;CHECK: uhsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <16 x i8> @uhsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uhsub16b:
 ;CHECK: uhsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i16> @uhsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uhsub4h:
 ;CHECK: uhsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @uhsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uhsub8h:
 ;CHECK: uhsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <2 x i32> @uhsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uhsub2s:
 ;CHECK: uhsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <4 x i32> @uhsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uhsub4s:
 ;CHECK: uhsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmax.ll b/llvm/test/CodeGen/AArch64/arm64-vmax.ll
index 3f2c134..7e36323 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmax.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmax.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @smax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smax_8b:
 ;CHECK: smax.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <16 x i8> @smax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: smax_16b:
 ;CHECK: smax.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 define <4 x i16> @smax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: smax_4h:
 ;CHECK: smax.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i16> @smax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: smax_8h:
 ;CHECK: smax.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x i32> @smax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: smax_2s:
 ;CHECK: smax.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i32> @smax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: smax_4s:
 ;CHECK: smax.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -64,8 +64,8 @@
 define <8 x i8> @umax_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umax_8b:
 ;CHECK: umax.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -73,8 +73,8 @@
 define <16 x i8> @umax_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: umax_16b:
 ;CHECK: umax.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -82,8 +82,8 @@
 define <4 x i16> @umax_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: umax_4h:
 ;CHECK: umax.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -91,8 +91,8 @@
 define <8 x i16> @umax_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: umax_8h:
 ;CHECK: umax.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -100,8 +100,8 @@
 define <2 x i32> @umax_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: umax_2s:
 ;CHECK: umax.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -109,8 +109,8 @@
 define <4 x i32> @umax_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: umax_4s:
 ;CHECK: umax.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -125,8 +125,8 @@
 define <8 x i8> @smin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smin_8b:
 ;CHECK: smin.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -134,8 +134,8 @@
 define <16 x i8> @smin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: smin_16b:
 ;CHECK: smin.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -143,8 +143,8 @@
 define <4 x i16> @smin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: smin_4h:
 ;CHECK: smin.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -152,8 +152,8 @@
 define <8 x i16> @smin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: smin_8h:
 ;CHECK: smin.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -161,8 +161,8 @@
 define <2 x i32> @smin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: smin_2s:
 ;CHECK: smin.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -170,8 +170,8 @@
 define <4 x i32> @smin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: smin_4s:
 ;CHECK: smin.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -186,8 +186,8 @@
 define <8 x i8> @umin_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umin_8b:
 ;CHECK: umin.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -195,8 +195,8 @@
 define <16 x i8> @umin_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: umin_16b:
 ;CHECK: umin.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -204,8 +204,8 @@
 define <4 x i16> @umin_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: umin_4h:
 ;CHECK: umin.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -213,8 +213,8 @@
 define <8 x i16> @umin_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: umin_8h:
 ;CHECK: umin.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -222,8 +222,8 @@
 define <2 x i32> @umin_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: umin_2s:
 ;CHECK: umin.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -231,8 +231,8 @@
 define <4 x i32> @umin_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: umin_4s:
 ;CHECK: umin.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -249,8 +249,8 @@
 define <8 x i8> @smaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smaxp_8b:
 ;CHECK: smaxp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -258,8 +258,8 @@
 define <16 x i8> @smaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: smaxp_16b:
 ;CHECK: smaxp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -267,8 +267,8 @@
 define <4 x i16> @smaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: smaxp_4h:
 ;CHECK: smaxp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -276,8 +276,8 @@
 define <8 x i16> @smaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: smaxp_8h:
 ;CHECK: smaxp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -285,8 +285,8 @@
 define <2 x i32> @smaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: smaxp_2s:
 ;CHECK: smaxp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -294,8 +294,8 @@
 define <4 x i32> @smaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: smaxp_4s:
 ;CHECK: smaxp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -310,8 +310,8 @@
 define <8 x i8> @umaxp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umaxp_8b:
 ;CHECK: umaxp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -319,8 +319,8 @@
 define <16 x i8> @umaxp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: umaxp_16b:
 ;CHECK: umaxp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -328,8 +328,8 @@
 define <4 x i16> @umaxp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: umaxp_4h:
 ;CHECK: umaxp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -337,8 +337,8 @@
 define <8 x i16> @umaxp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: umaxp_8h:
 ;CHECK: umaxp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -346,8 +346,8 @@
 define <2 x i32> @umaxp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: umaxp_2s:
 ;CHECK: umaxp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -355,8 +355,8 @@
 define <4 x i32> @umaxp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: umaxp_4s:
 ;CHECK: umaxp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -373,8 +373,8 @@
 define <8 x i8> @sminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sminp_8b:
 ;CHECK: sminp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -382,8 +382,8 @@
 define <16 x i8> @sminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sminp_16b:
 ;CHECK: sminp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -391,8 +391,8 @@
 define <4 x i16> @sminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sminp_4h:
 ;CHECK: sminp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -400,8 +400,8 @@
 define <8 x i16> @sminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sminp_8h:
 ;CHECK: sminp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -409,8 +409,8 @@
 define <2 x i32> @sminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sminp_2s:
 ;CHECK: sminp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -418,8 +418,8 @@
 define <4 x i32> @sminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sminp_4s:
 ;CHECK: sminp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -434,8 +434,8 @@
 define <8 x i8> @uminp_8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uminp_8b:
 ;CHECK: uminp.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -443,8 +443,8 @@
 define <16 x i8> @uminp_16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uminp_16b:
 ;CHECK: uminp.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -452,8 +452,8 @@
 define <4 x i16> @uminp_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uminp_4h:
 ;CHECK: uminp.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -461,8 +461,8 @@
 define <8 x i16> @uminp_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uminp_8h:
 ;CHECK: uminp.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -470,8 +470,8 @@
 define <2 x i32> @uminp_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uminp_2s:
 ;CHECK: uminp.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -479,8 +479,8 @@
 define <4 x i32> @uminp_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uminp_4s:
 ;CHECK: uminp.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -495,8 +495,8 @@
 define <2 x float> @fmax_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmax_2s:
 ;CHECK: fmax.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -504,8 +504,8 @@
 define <4 x float> @fmax_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fmax_4s:
 ;CHECK: fmax.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -513,8 +513,8 @@
 define <2 x double> @fmax_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fmax_2d:
 ;CHECK: fmax.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -526,8 +526,8 @@
 define <2 x float> @fmaxp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxp_2s:
 ;CHECK: fmaxp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -535,8 +535,8 @@
 define <4 x float> @fmaxp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxp_4s:
 ;CHECK: fmaxp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -544,8 +544,8 @@
 define <2 x double> @fmaxp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fmaxp_2d:
 ;CHECK: fmaxp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -557,8 +557,8 @@
 define <2 x float> @fmin_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmin_2s:
 ;CHECK: fmin.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -566,8 +566,8 @@
 define <4 x float> @fmin_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fmin_4s:
 ;CHECK: fmin.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -575,8 +575,8 @@
 define <2 x double> @fmin_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fmin_2d:
 ;CHECK: fmin.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -588,8 +588,8 @@
 define <2 x float> @fminp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fminp_2s:
 ;CHECK: fminp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -597,8 +597,8 @@
 define <4 x float> @fminp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fminp_4s:
 ;CHECK: fminp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -606,8 +606,8 @@
 define <2 x double> @fminp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fminp_2d:
 ;CHECK: fminp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -619,8 +619,8 @@
 define <2 x float> @fminnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fminnmp_2s:
 ;CHECK: fminnmp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -628,8 +628,8 @@
 define <4 x float> @fminnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fminnmp_4s:
 ;CHECK: fminnmp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -637,8 +637,8 @@
 define <2 x double> @fminnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fminnmp_2d:
 ;CHECK: fminnmp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -650,8 +650,8 @@
 define <2 x float> @fmaxnmp_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxnmp_2s:
 ;CHECK: fmaxnmp.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -659,8 +659,8 @@
 define <4 x float> @fmaxnmp_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fmaxnmp_4s:
 ;CHECK: fmaxnmp.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -668,8 +668,8 @@
 define <2 x double> @fmaxnmp_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fmaxnmp_2d:
 ;CHECK: fmaxnmp.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index 6fa60fe..3df847e 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -4,8 +4,8 @@
 define <8 x i16> @smull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: smull8h:
 ;CHECK: smull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
@@ -13,8 +13,8 @@
 define <4 x i32> @smull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: smull4s:
 ;CHECK: smull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
@@ -22,8 +22,8 @@
 define <2 x i64> @smull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: smull2d:
 ;CHECK: smull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
@@ -35,8 +35,8 @@
 define <8 x i16> @umull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: umull8h:
 ;CHECK: umull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
@@ -44,8 +44,8 @@
 define <4 x i32> @umull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: umull4s:
 ;CHECK: umull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
@@ -53,8 +53,8 @@
 define <2 x i64> @umull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: umull2d:
 ;CHECK: umull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
@@ -66,8 +66,8 @@
 define <4 x i32> @sqdmull4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmull4s:
 ;CHECK: sqdmull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@
 define <2 x i64> @sqdmull2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqdmull2d:
 ;CHECK: sqdmull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x i32> @sqdmull2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmull2_4s:
 ;CHECK: sqdmull2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -95,8 +95,8 @@
 define <2 x i64> @sqdmull2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqdmull2_2d:
 ;CHECK: sqdmull2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -110,8 +110,8 @@
 define <8 x i16> @pmull8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: pmull8h:
 ;CHECK: pmull.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
   ret <8 x i16> %tmp3
 }
@@ -121,8 +121,8 @@
 define <4 x i16> @sqdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_4h:
 ;CHECK: sqdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
@@ -130,8 +130,8 @@
 define <8 x i16> @sqdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_8h:
 ;CHECK: sqdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
@@ -139,8 +139,8 @@
 define <2 x i32> @sqdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_2s:
 ;CHECK: sqdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
@@ -148,8 +148,8 @@
 define <4 x i32> @sqdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_4s:
 ;CHECK: sqdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
@@ -157,8 +157,8 @@
 define i32 @sqdmulh_1s(i32* %A, i32* %B) nounwind {
 ;CHECK-LABEL: sqdmulh_1s:
 ;CHECK: sqdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
-  %tmp1 = load i32* %A
-  %tmp2 = load i32* %B
+  %tmp1 = load i32, i32* %A
+  %tmp2 = load i32, i32* %B
   %tmp3 = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
@@ -172,8 +172,8 @@
 define <4 x i16> @sqrdmulh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_4h:
 ;CHECK: sqrdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
   ret <4 x i16> %tmp3
 }
@@ -181,8 +181,8 @@
 define <8 x i16> @sqrdmulh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_8h:
 ;CHECK: sqrdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
   ret <8 x i16> %tmp3
 }
@@ -190,8 +190,8 @@
 define <2 x i32> @sqrdmulh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_2s:
 ;CHECK: sqrdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
   ret <2 x i32> %tmp3
 }
@@ -199,8 +199,8 @@
 define <4 x i32> @sqrdmulh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_4s:
 ;CHECK: sqrdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
   ret <4 x i32> %tmp3
 }
@@ -208,8 +208,8 @@
 define i32 @sqrdmulh_1s(i32* %A, i32* %B) nounwind {
 ;CHECK-LABEL: sqrdmulh_1s:
 ;CHECK: sqrdmulh s0, {{s[0-9]+}}, {{s[0-9]+}}
-  %tmp1 = load i32* %A
-  %tmp2 = load i32* %B
+  %tmp1 = load i32, i32* %A
+  %tmp2 = load i32, i32* %B
   %tmp3 = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %tmp1, i32 %tmp2)
   ret i32 %tmp3
 }
@@ -223,8 +223,8 @@
 define <2 x float> @fmulx_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fmulx_2s:
 ;CHECK: fmulx.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
@@ -232,8 +232,8 @@
 define <4 x float> @fmulx_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: fmulx_4s:
 ;CHECK: fmulx.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
@@ -241,8 +241,8 @@
 define <2 x double> @fmulx_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: fmulx_2d:
 ;CHECK: fmulx.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
   %tmp3 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
   ret <2 x double> %tmp3
 }
@@ -254,9 +254,9 @@
 define <4 x i32> @smlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: smlal4s:
 ;CHECK: smlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
@@ -265,9 +265,9 @@
 define <2 x i64> @smlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: smlal2d:
 ;CHECK: smlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
@@ -276,9 +276,9 @@
 define <4 x i32> @smlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: smlsl4s:
 ;CHECK: smlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
@@ -287,9 +287,9 @@
 define <2 x i64> @smlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: smlsl2d:
 ;CHECK: smlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
@@ -303,9 +303,9 @@
 define <4 x i32> @sqdmlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal4s:
 ;CHECK: sqdmlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
@@ -314,9 +314,9 @@
 define <2 x i64> @sqdmlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal2d:
 ;CHECK: sqdmlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
@@ -325,9 +325,9 @@
 define <4 x i32> @sqdmlal2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal2_4s:
 ;CHECK: sqdmlal2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -338,9 +338,9 @@
 define <2 x i64> @sqdmlal2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sqdmlal2_2d:
 ;CHECK: sqdmlal2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -351,9 +351,9 @@
 define <4 x i32> @sqdmlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlsl4s:
 ;CHECK: sqdmlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp4)
   ret <4 x i32> %tmp5
@@ -362,9 +362,9 @@
 define <2 x i64> @sqdmlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sqdmlsl2d:
 ;CHECK: sqdmlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp4)
   ret <2 x i64> %tmp5
@@ -373,9 +373,9 @@
 define <4 x i32> @sqdmlsl2_4s(<8 x i16>* %A, <8 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: sqdmlsl2_4s:
 ;CHECK: sqdmlsl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -386,9 +386,9 @@
 define <2 x i64> @sqdmlsl2_2d(<4 x i32>* %A, <4 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: sqdmlsl2_2d:
 ;CHECK: sqdmlsl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -399,9 +399,9 @@
 define <4 x i32> @umlal4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: umlal4s:
 ;CHECK: umlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = add <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
@@ -410,9 +410,9 @@
 define <2 x i64> @umlal2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: umlal2d:
 ;CHECK: umlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = add <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
@@ -421,9 +421,9 @@
 define <4 x i32> @umlsl4s(<4 x i16>* %A, <4 x i16>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: umlsl4s:
 ;CHECK: umlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
   ret <4 x i32> %tmp5
@@ -432,9 +432,9 @@
 define <2 x i64> @umlsl2d(<2 x i32>* %A, <2 x i32>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: umlsl2d:
 ;CHECK: umlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
   ret <2 x i64> %tmp5
@@ -443,9 +443,9 @@
 define <2 x float> @fmla_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: fmla_2s:
 ;CHECK: fmla.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = load <2 x float>, <2 x float>* %C
   %tmp4 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp2, <2 x float> %tmp3)
   ret <2 x float> %tmp4
 }
@@ -453,9 +453,9 @@
 define <4 x float> @fmla_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: fmla_4s:
 ;CHECK: fmla.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = load <4 x float>, <4 x float>* %C
   %tmp4 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp2, <4 x float> %tmp3)
   ret <4 x float> %tmp4
 }
@@ -463,9 +463,9 @@
 define <2 x double> @fmla_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
 ;CHECK-LABEL: fmla_2d:
 ;CHECK: fmla.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
+  %tmp3 = load <2 x double>, <2 x double>* %C
   %tmp4 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp2, <2 x double> %tmp3)
   ret <2 x double> %tmp4
 }
@@ -477,9 +477,9 @@
 define <2 x float> @fmls_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: fmls_2s:
 ;CHECK: fmls.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = load <2 x float>, <2 x float>* %C
   %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
   %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp1, <2 x float> %tmp4, <2 x float> %tmp3)
   ret <2 x float> %tmp5
@@ -488,9 +488,9 @@
 define <4 x float> @fmls_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: fmls_4s:
 ;CHECK: fmls.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = load <4 x float>, <4 x float>* %C
   %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
   %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp1, <4 x float> %tmp4, <4 x float> %tmp3)
   ret <4 x float> %tmp5
@@ -499,9 +499,9 @@
 define <2 x double> @fmls_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
 ;CHECK-LABEL: fmls_2d:
 ;CHECK: fmls.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
+  %tmp3 = load <2 x double>, <2 x double>* %C
   %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
   %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp1, <2 x double> %tmp4, <2 x double> %tmp3)
   ret <2 x double> %tmp5
@@ -510,9 +510,9 @@
 define <2 x float> @fmls_commuted_neg_2s(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: fmls_commuted_neg_2s:
 ;CHECK: fmls.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
-  %tmp3 = load <2 x float>* %C
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = load <2 x float>, <2 x float>* %C
   %tmp4 = fsub <2 x float> <float -0.0, float -0.0>, %tmp2
   %tmp5 = call <2 x float> @llvm.fma.v2f32(<2 x float> %tmp4, <2 x float> %tmp1, <2 x float> %tmp3)
   ret <2 x float> %tmp5
@@ -521,9 +521,9 @@
 define <4 x float> @fmls_commuted_neg_4s(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: fmls_commuted_neg_4s:
 ;CHECK: fmls.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
-  %tmp3 = load <4 x float>* %C
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = load <4 x float>, <4 x float>* %C
   %tmp4 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %tmp2
   %tmp5 = call <4 x float> @llvm.fma.v4f32(<4 x float> %tmp4, <4 x float> %tmp1, <4 x float> %tmp3)
   ret <4 x float> %tmp5
@@ -532,9 +532,9 @@
 define <2 x double> @fmls_commuted_neg_2d(<2 x double>* %A, <2 x double>* %B, <2 x double>* %C) nounwind {
 ;CHECK-LABEL: fmls_commuted_neg_2d:
 ;CHECK: fmls.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
-  %tmp3 = load <2 x double>* %C
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
+  %tmp3 = load <2 x double>, <2 x double>* %C
   %tmp4 = fsub <2 x double> <double -0.0, double -0.0>, %tmp2
   %tmp5 = call <2 x double> @llvm.fma.v2f64(<2 x double> %tmp4, <2 x double> %tmp1, <2 x double> %tmp3)
   ret <2 x double> %tmp5
@@ -609,8 +609,8 @@
 ;CHECK-LABEL: mul_4h:
 ;CHECK-NOT: dup
 ;CHECK: mul.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = mul <4 x i16> %tmp1, %tmp3
   ret <4 x i16> %tmp4
@@ -620,8 +620,8 @@
 ;CHECK-LABEL: mul_8h:
 ;CHECK-NOT: dup
 ;CHECK: mul.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp4 = mul <8 x i16> %tmp1, %tmp3
   ret <8 x i16> %tmp4
@@ -631,8 +631,8 @@
 ;CHECK-LABEL: mul_2s:
 ;CHECK-NOT: dup
 ;CHECK: mul.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = mul <2 x i32> %tmp1, %tmp3
   ret <2 x i32> %tmp4
@@ -642,8 +642,8 @@
 ;CHECK-LABEL: mul_4s:
 ;CHECK-NOT: dup
 ;CHECK: mul.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = mul <4 x i32> %tmp1, %tmp3
   ret <4 x i32> %tmp4
@@ -661,8 +661,8 @@
 ;CHECK-LABEL: fmul_lane_2s:
 ;CHECK-NOT: dup
 ;CHECK: fmul.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = fmul <2 x float> %tmp1, %tmp3
   ret <2 x float> %tmp4
@@ -672,8 +672,8 @@
 ;CHECK-LABEL: fmul_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: fmul.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = fmul <4 x float> %tmp1, %tmp3
   ret <4 x float> %tmp4
@@ -683,8 +683,8 @@
 ;CHECK-LABEL: fmul_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: fmul.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
   %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = fmul <2 x double> %tmp1, %tmp3
   ret <2 x double> %tmp4
@@ -714,8 +714,8 @@
 ;CHECK-LABEL: fmulx_lane_2s:
 ;CHECK-NOT: dup
 ;CHECK: fmulx.2s
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = shufflevector <2 x float> %tmp2, <2 x float> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %tmp1, <2 x float> %tmp3)
   ret <2 x float> %tmp4
@@ -725,8 +725,8 @@
 ;CHECK-LABEL: fmulx_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: fmulx.4s
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = shufflevector <4 x float> %tmp2, <4 x float> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %tmp1, <4 x float> %tmp3)
   ret <4 x float> %tmp4
@@ -736,8 +736,8 @@
 ;CHECK-LABEL: fmulx_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: fmulx.2d
-  %tmp1 = load <2 x double>* %A
-  %tmp2 = load <2 x double>* %B
+  %tmp1 = load <2 x double>, <2 x double>* %A
+  %tmp2 = load <2 x double>, <2 x double>* %B
   %tmp3 = shufflevector <2 x double> %tmp2, <2 x double> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %tmp1, <2 x double> %tmp3)
   ret <2 x double> %tmp4
@@ -747,8 +747,8 @@
 ;CHECK-LABEL: sqdmulh_lane_4h:
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
@@ -758,8 +758,8 @@
 ;CHECK-LABEL: sqdmulh_lane_8h:
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
@@ -769,8 +769,8 @@
 ;CHECK-LABEL: sqdmulh_lane_2s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
@@ -780,8 +780,8 @@
 ;CHECK-LABEL: sqdmulh_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
@@ -800,8 +800,8 @@
 ;CHECK-LABEL: sqrdmulh_lane_4h:
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.4h
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i16> %tmp4
@@ -811,8 +811,8 @@
 ;CHECK-LABEL: sqrdmulh_lane_8h:
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.8h
-  %tmp1 = load <8 x i16>* %A
-  %tmp2 = load <8 x i16>* %B
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp3)
   ret <8 x i16> %tmp4
@@ -822,8 +822,8 @@
 ;CHECK-LABEL: sqrdmulh_lane_2s:
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.2s
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i32> %tmp4
@@ -833,8 +833,8 @@
 ;CHECK-LABEL: sqrdmulh_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqrdmulh.4s
-  %tmp1 = load <4 x i32>* %A
-  %tmp2 = load <4 x i32>* %B
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %tmp3 = shufflevector <4 x i32> %tmp2, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp3)
   ret <4 x i32> %tmp4
@@ -853,8 +853,8 @@
 ;CHECK-LABEL: sqdmull_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
@@ -864,8 +864,8 @@
 ;CHECK-LABEL: sqdmull_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
@@ -875,8 +875,8 @@
 ;CHECK-LABEL: sqdmull2_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmull2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -887,8 +887,8 @@
 ;CHECK-LABEL: sqdmull2_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmull2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -899,8 +899,8 @@
 ;CHECK-LABEL: umull_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: umull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
@@ -910,8 +910,8 @@
 ;CHECK-LABEL: umull_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: umull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
@@ -921,8 +921,8 @@
 ;CHECK-LABEL: smull_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: smull.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp4 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp3)
   ret <4 x i32> %tmp4
@@ -932,8 +932,8 @@
 ;CHECK-LABEL: smull_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: smull.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp4 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp3)
   ret <2 x i64> %tmp4
@@ -943,9 +943,9 @@
 ;CHECK-LABEL: smlal_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: smlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
@@ -956,9 +956,9 @@
 ;CHECK-LABEL: smlal_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: smlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
@@ -969,9 +969,9 @@
 ;CHECK-LABEL: sqdmlal_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
@@ -982,9 +982,9 @@
 ;CHECK-LABEL: sqdmlal_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
@@ -995,9 +995,9 @@
 ;CHECK-LABEL: sqdmlal2_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlal2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -1009,9 +1009,9 @@
 ;CHECK-LABEL: sqdmlal2_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlal2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -1069,9 +1069,9 @@
 ;CHECK-LABEL: umlal_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: umlal.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = add <4 x i32> %tmp3, %tmp5
@@ -1082,9 +1082,9 @@
 ;CHECK-LABEL: umlal_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: umlal.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = add <2 x i64> %tmp3, %tmp5
@@ -1096,9 +1096,9 @@
 ;CHECK-LABEL: smlsl_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: smlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
@@ -1109,9 +1109,9 @@
 ;CHECK-LABEL: smlsl_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: smlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
@@ -1122,9 +1122,9 @@
 ;CHECK-LABEL: sqdmlsl_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp3, <4 x i32> %tmp5)
@@ -1135,9 +1135,9 @@
 ;CHECK-LABEL: sqdmlsl_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp3, <2 x i64> %tmp5)
@@ -1148,9 +1148,9 @@
 ;CHECK-LABEL: sqdmlsl2_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlsl2.4s
-  %load1 = load <8 x i16>* %A
-  %load2 = load <8 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %load1 = load <8 x i16>, <8 x i16>* %A
+  %load2 = load <8 x i16>, <8 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %tmp2 = shufflevector <8 x i16> %load2, <8 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -1162,9 +1162,9 @@
 ;CHECK-LABEL: sqdmlsl2_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: sqdmlsl2.2d
-  %load1 = load <4 x i32>* %A
-  %load2 = load <4 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %load1 = load <4 x i32>, <4 x i32>* %A
+  %load2 = load <4 x i32>, <4 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %tmp2 = shufflevector <4 x i32> %load2, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
@@ -1176,9 +1176,9 @@
 ;CHECK-LABEL: umlsl_lane_4s:
 ;CHECK-NOT: dup
 ;CHECK: umlsl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
-  %tmp3 = load <4 x i32>* %C
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %tmp3 = load <4 x i32>, <4 x i32>* %C
   %tmp4 = shufflevector <4 x i16> %tmp2, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   %tmp5 = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp4)
   %tmp6 = sub <4 x i32> %tmp3, %tmp5
@@ -1189,9 +1189,9 @@
 ;CHECK-LABEL: umlsl_lane_2d:
 ;CHECK-NOT: dup
 ;CHECK: umlsl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
-  %tmp3 = load <2 x i64>* %C
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %tmp3 = load <2 x i64>, <2 x i64>* %C
   %tmp4 = shufflevector <2 x i32> %tmp2, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 1>
   %tmp5 = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp4)
   %tmp6 = sub <2 x i64> %tmp3, %tmp5
diff --git a/llvm/test/CodeGen/AArch64/arm64-volatile.ll b/llvm/test/CodeGen/AArch64/arm64-volatile.ll
index 721d4fb..28facb6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-volatile.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-volatile.ll
@@ -5,9 +5,9 @@
 ; CHECK-NEXT: add
 ; CHECK-NEXT: ret
   %add.ptr = getelementptr inbounds i64, i64* %bar, i64 1
-  %tmp = load i64* %add.ptr, align 8
+  %tmp = load i64, i64* %add.ptr, align 8
   %add.ptr1 = getelementptr inbounds i64, i64* %bar, i64 2
-  %tmp1 = load i64* %add.ptr1, align 8
+  %tmp1 = load i64, i64* %add.ptr1, align 8
   %add = add nsw i64 %tmp1, %tmp
   ret i64 %add
 }
@@ -19,9 +19,9 @@
 ; CHECK-NEXT: add
 ; CHECK-NEXT: ret
   %add.ptr = getelementptr inbounds i64, i64* %bar, i64 1
-  %tmp = load volatile i64* %add.ptr, align 8
+  %tmp = load volatile i64, i64* %add.ptr, align 8
   %add.ptr1 = getelementptr inbounds i64, i64* %bar, i64 2
-  %tmp1 = load volatile i64* %add.ptr1, align 8
+  %tmp1 = load volatile i64, i64* %add.ptr1, align 8
   %add = add nsw i64 %tmp1, %tmp
   ret i64 %add
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
index 20f7e2c..9932899 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @sqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqadd8b:
 ;CHECK: sqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @sqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqadd4h:
 ;CHECK: sqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @sqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqadd2s:
 ;CHECK: sqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @uqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqadd8b:
 ;CHECK: uqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @uqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqadd4h:
 ;CHECK: uqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @uqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqadd2s:
 ;CHECK: uqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <16 x i8> @sqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqadd16b:
 ;CHECK: sqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <8 x i16> @sqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqadd8h:
 ;CHECK: sqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i32> @sqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqadd4s:
 ;CHECK: sqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@
 define <2 x i64> @sqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sqadd2d:
 ;CHECK: sqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -93,8 +93,8 @@
 define <16 x i8> @uqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqadd16b:
 ;CHECK: uqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <8 x i16> @uqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqadd8h:
 ;CHECK: uqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i32> @uqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqadd4s:
 ;CHECK: uqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@
 define <2 x i64> @uqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uqadd2d:
 ;CHECK: uqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -149,8 +149,8 @@
 define <8 x i8> @usqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usqadd8b:
 ;CHECK: usqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -158,8 +158,8 @@
 define <4 x i16> @usqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: usqadd4h:
 ;CHECK: usqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -167,8 +167,8 @@
 define <2 x i32> @usqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: usqadd2s:
 ;CHECK: usqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -176,8 +176,8 @@
 define <16 x i8> @usqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: usqadd16b:
 ;CHECK: usqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -185,8 +185,8 @@
 define <8 x i16> @usqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: usqadd8h:
 ;CHECK: usqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -194,8 +194,8 @@
 define <4 x i32> @usqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: usqadd4s:
 ;CHECK: usqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -203,8 +203,8 @@
 define <2 x i64> @usqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: usqadd2d:
 ;CHECK: usqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -238,8 +238,8 @@
 define <8 x i8> @suqadd8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: suqadd8b:
 ;CHECK: suqadd.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -247,8 +247,8 @@
 define <4 x i16> @suqadd4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: suqadd4h:
 ;CHECK: suqadd.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -256,8 +256,8 @@
 define <2 x i32> @suqadd2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: suqadd2s:
 ;CHECK: suqadd.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -265,8 +265,8 @@
 define <16 x i8> @suqadd16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: suqadd16b:
 ;CHECK: suqadd.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -274,8 +274,8 @@
 define <8 x i16> @suqadd8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: suqadd8h:
 ;CHECK: suqadd.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -283,8 +283,8 @@
 define <4 x i32> @suqadd4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: suqadd4s:
 ;CHECK: suqadd.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -292,8 +292,8 @@
 define <2 x i64> @suqadd2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: suqadd2d:
 ;CHECK: suqadd.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
index dde3ac3..4fc588d 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vqsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @sqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqsub8b:
 ;CHECK: sqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @sqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqsub4h:
 ;CHECK: sqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @sqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqsub2s:
 ;CHECK: sqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @uqsub8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqsub8b:
 ;CHECK: uqsub.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @uqsub4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqsub4h:
 ;CHECK: uqsub.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @uqsub2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqsub2s:
 ;CHECK: uqsub.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <16 x i8> @sqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqsub16b:
 ;CHECK: sqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <8 x i16> @sqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqsub8h:
 ;CHECK: sqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i32> @sqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqsub4s:
 ;CHECK: sqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@
 define <2 x i64> @sqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sqsub2d:
 ;CHECK: sqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -93,8 +93,8 @@
 define <16 x i8> @uqsub16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqsub16b:
 ;CHECK: uqsub.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <8 x i16> @uqsub8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqsub8h:
 ;CHECK: uqsub.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i32> @uqsub4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqsub4s:
 ;CHECK: uqsub.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@
 define <2 x i64> @uqsub2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uqsub2d:
 ;CHECK: uqsub.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index 65bd50c..d5a1248 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @sqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqshl8b:
 ;CHECK: sqshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @sqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqshl4h:
 ;CHECK: sqshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @sqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqshl2s:
 ;CHECK: sqshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @uqshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqshl8b:
 ;CHECK: uqshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @uqshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqshl4h:
 ;CHECK: uqshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @uqshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqshl2s:
 ;CHECK: uqshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <16 x i8> @sqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqshl16b:
 ;CHECK: sqshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <8 x i16> @sqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqshl8h:
 ;CHECK: sqshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i32> @sqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqshl4s:
 ;CHECK: sqshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@
 define <2 x i64> @sqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sqshl2d:
 ;CHECK: sqshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -93,8 +93,8 @@
 define <16 x i8> @uqshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqshl16b:
 ;CHECK: uqshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <8 x i16> @uqshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqshl8h:
 ;CHECK: uqshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i32> @uqshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqshl4s:
 ;CHECK: uqshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@
 define <2 x i64> @uqshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uqshl2d:
 ;CHECK: uqshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -149,8 +149,8 @@
 define <8 x i8> @srshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srshl8b:
 ;CHECK: srshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -158,8 +158,8 @@
 define <4 x i16> @srshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: srshl4h:
 ;CHECK: srshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -167,8 +167,8 @@
 define <2 x i32> @srshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: srshl2s:
 ;CHECK: srshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -176,8 +176,8 @@
 define <8 x i8> @urshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: urshl8b:
 ;CHECK: urshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -185,8 +185,8 @@
 define <4 x i16> @urshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: urshl4h:
 ;CHECK: urshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -194,8 +194,8 @@
 define <2 x i32> @urshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: urshl2s:
 ;CHECK: urshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -203,8 +203,8 @@
 define <16 x i8> @srshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: srshl16b:
 ;CHECK: srshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -212,8 +212,8 @@
 define <8 x i16> @srshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: srshl8h:
 ;CHECK: srshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -221,8 +221,8 @@
 define <4 x i32> @srshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: srshl4s:
 ;CHECK: srshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -230,8 +230,8 @@
 define <2 x i64> @srshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: srshl2d:
 ;CHECK: srshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -239,8 +239,8 @@
 define <16 x i8> @urshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: urshl16b:
 ;CHECK: urshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -248,8 +248,8 @@
 define <8 x i16> @urshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: urshl8h:
 ;CHECK: urshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -257,8 +257,8 @@
 define <4 x i32> @urshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: urshl4s:
 ;CHECK: urshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -266,8 +266,8 @@
 define <2 x i64> @urshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: urshl2d:
 ;CHECK: urshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -295,8 +295,8 @@
 define <8 x i8> @sqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqrshl8b:
 ;CHECK: sqrshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -304,8 +304,8 @@
 define <4 x i16> @sqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrshl4h:
 ;CHECK: sqrshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -313,8 +313,8 @@
 define <2 x i32> @sqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqrshl2s:
 ;CHECK: sqrshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -322,8 +322,8 @@
 define <8 x i8> @uqrshl8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqrshl8b:
 ;CHECK: uqrshl.8b
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -331,8 +331,8 @@
 define <4 x i16> @uqrshl4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqrshl4h:
 ;CHECK: uqrshl.4h
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -340,8 +340,8 @@
 define <2 x i32> @uqrshl2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqrshl2s:
 ;CHECK: uqrshl.2s
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -349,8 +349,8 @@
 define <16 x i8> @sqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sqrshl16b:
 ;CHECK: sqrshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -358,8 +358,8 @@
 define <8 x i16> @sqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sqrshl8h:
 ;CHECK: sqrshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -367,8 +367,8 @@
 define <4 x i32> @sqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sqrshl4s:
 ;CHECK: sqrshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -376,8 +376,8 @@
 define <2 x i64> @sqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sqrshl2d:
 ;CHECK: sqrshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -385,8 +385,8 @@
 define <16 x i8> @uqrshl16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: uqrshl16b:
 ;CHECK: uqrshl.16b
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         ret <16 x i8> %tmp3
 }
@@ -394,8 +394,8 @@
 define <8 x i16> @uqrshl8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: uqrshl8h:
 ;CHECK: uqrshl.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i16> %tmp3
 }
@@ -403,8 +403,8 @@
 define <4 x i32> @uqrshl4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: uqrshl4s:
 ;CHECK: uqrshl.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i32> %tmp3
 }
@@ -412,8 +412,8 @@
 define <2 x i64> @uqrshl2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: uqrshl2d:
 ;CHECK: uqrshl.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i64> %tmp3
 }
@@ -441,7 +441,7 @@
 define <8 x i8> @urshr8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: urshr8b:
 ;CHECK: urshr.8b
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
@@ -449,7 +449,7 @@
 define <4 x i16> @urshr4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: urshr4h:
 ;CHECK: urshr.4h
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
@@ -457,7 +457,7 @@
 define <2 x i32> @urshr2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: urshr2s:
 ;CHECK: urshr.2s
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
@@ -465,7 +465,7 @@
 define <16 x i8> @urshr16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: urshr16b:
 ;CHECK: urshr.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
@@ -473,7 +473,7 @@
 define <8 x i16> @urshr8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: urshr8h:
 ;CHECK: urshr.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
@@ -481,7 +481,7 @@
 define <4 x i32> @urshr4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: urshr4s:
 ;CHECK: urshr.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
@@ -489,7 +489,7 @@
 define <2 x i64> @urshr2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: urshr2d:
 ;CHECK: urshr.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
@@ -497,7 +497,7 @@
 define <8 x i8> @srshr8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: srshr8b:
 ;CHECK: srshr.8b
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <8 x i8> %tmp3
 }
@@ -505,7 +505,7 @@
 define <4 x i16> @srshr4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: srshr4h:
 ;CHECK: srshr.4h
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <4 x i16> %tmp3
 }
@@ -513,7 +513,7 @@
 define <2 x i32> @srshr2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: srshr2s:
 ;CHECK: srshr.2s
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
         ret <2 x i32> %tmp3
 }
@@ -521,7 +521,7 @@
 define <16 x i8> @srshr16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: srshr16b:
 ;CHECK: srshr.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
         ret <16 x i8> %tmp3
 }
@@ -529,7 +529,7 @@
 define <8 x i16> @srshr8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: srshr8h:
 ;CHECK: srshr.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
         ret <8 x i16> %tmp3
 }
@@ -537,7 +537,7 @@
 define <4 x i32> @srshr4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: srshr4s:
 ;CHECK: srshr.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
         ret <4 x i32> %tmp3
 }
@@ -545,7 +545,7 @@
 define <2 x i64> @srshr2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: srshr2d:
 ;CHECK: srshr.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
         ret <2 x i64> %tmp3
 }
@@ -553,7 +553,7 @@
 define <8 x i8> @sqshlu8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshlu8b:
 ;CHECK: sqshlu.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
@@ -561,7 +561,7 @@
 define <4 x i16> @sqshlu4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshlu4h:
 ;CHECK: sqshlu.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
@@ -569,7 +569,7 @@
 define <2 x i32> @sqshlu2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshlu2s:
 ;CHECK: sqshlu.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
@@ -577,7 +577,7 @@
 define <16 x i8> @sqshlu16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshlu16b:
 ;CHECK: sqshlu.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
@@ -585,7 +585,7 @@
 define <8 x i16> @sqshlu8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshlu8h:
 ;CHECK: sqshlu.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
@@ -593,7 +593,7 @@
 define <4 x i32> @sqshlu4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshlu4s:
 ;CHECK: sqshlu.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
@@ -601,7 +601,7 @@
 define <2 x i64> @sqshlu2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshlu2d:
 ;CHECK: sqshlu.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
@@ -619,7 +619,7 @@
 define <8 x i8> @rshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: rshrn8b:
 ;CHECK: rshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -627,7 +627,7 @@
 define <4 x i16> @rshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: rshrn4h:
 ;CHECK: rshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -635,7 +635,7 @@
 define <2 x i32> @rshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: rshrn2s:
 ;CHECK: rshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -643,8 +643,8 @@
 define <16 x i8> @rshrn16b(<8 x i8> *%ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: rshrn16b:
 ;CHECK: rshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -653,8 +653,8 @@
 define <8 x i16> @rshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: rshrn8h:
 ;CHECK: rshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -663,8 +663,8 @@
 define <4 x i32> @rshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: rshrn4s:
 ;CHECK: rshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -677,7 +677,7 @@
 define <8 x i8> @shrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: shrn8b:
 ;CHECK: shrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
         ret <8 x i8> %tmp3
@@ -686,7 +686,7 @@
 define <4 x i16> @shrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: shrn4h:
 ;CHECK: shrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
         ret <4 x i16> %tmp3
@@ -695,7 +695,7 @@
 define <2 x i32> @shrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: shrn2s:
 ;CHECK: shrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
         ret <2 x i32> %tmp3
@@ -704,8 +704,8 @@
 define <16 x i8> @shrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: shrn16b:
 ;CHECK: shrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -715,8 +715,8 @@
 define <8 x i16> @shrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: shrn8h:
 ;CHECK: shrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -726,8 +726,8 @@
 define <4 x i32> @shrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: shrn4s:
 ;CHECK: shrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -748,7 +748,7 @@
 define <8 x i8> @sqshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrn8b:
 ;CHECK: sqshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -756,7 +756,7 @@
 define <4 x i16> @sqshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrn4h:
 ;CHECK: sqshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -764,7 +764,7 @@
 define <2 x i32> @sqshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrn2s:
 ;CHECK: sqshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -773,8 +773,8 @@
 define <16 x i8> @sqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrn16b:
 ;CHECK: sqshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -783,8 +783,8 @@
 define <8 x i16> @sqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrn8h:
 ;CHECK: sqshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -793,8 +793,8 @@
 define <4 x i32> @sqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrn4s:
 ;CHECK: sqshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -815,7 +815,7 @@
 define <8 x i8> @sqshrun8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrun8b:
 ;CHECK: sqshrun.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -823,7 +823,7 @@
 define <4 x i16> @sqshrun4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrun4h:
 ;CHECK: sqshrun.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -831,7 +831,7 @@
 define <2 x i32> @sqshrun2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrun2s:
 ;CHECK: sqshrun.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -839,8 +839,8 @@
 define <16 x i8> @sqshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshrun16b:
 ;CHECK: sqshrun2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -849,8 +849,8 @@
 define <8 x i16> @sqshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshrun8h:
 ;CHECK: sqshrun2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -859,8 +859,8 @@
 define <4 x i32> @sqshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshrun4s:
 ;CHECK: sqshrun2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -881,7 +881,7 @@
 define <8 x i8> @sqrshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn8b:
 ;CHECK: sqrshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -889,7 +889,7 @@
 define <4 x i16> @sqrshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn4h:
 ;CHECK: sqrshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -897,7 +897,7 @@
 define <2 x i32> @sqrshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn2s:
 ;CHECK: sqrshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -905,8 +905,8 @@
 define <16 x i8> @sqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn16b:
 ;CHECK: sqrshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -915,8 +915,8 @@
 define <8 x i16> @sqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn8h:
 ;CHECK: sqrshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -925,8 +925,8 @@
 define <4 x i32> @sqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrn4s:
 ;CHECK: sqrshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -947,7 +947,7 @@
 define <8 x i8> @sqrshrun8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun8b:
 ;CHECK: sqrshrun.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -955,7 +955,7 @@
 define <4 x i16> @sqrshrun4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun4h:
 ;CHECK: sqrshrun.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -963,7 +963,7 @@
 define <2 x i32> @sqrshrun2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun2s:
 ;CHECK: sqrshrun.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -971,8 +971,8 @@
 define <16 x i8> @sqrshrun16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun16b:
 ;CHECK: sqrshrun2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -981,8 +981,8 @@
 define <8 x i16> @sqrshrun8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun8h:
 ;CHECK: sqrshrun2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -991,8 +991,8 @@
 define <4 x i32> @sqrshrun4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqrshrun4s:
 ;CHECK: sqrshrun2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -1013,7 +1013,7 @@
 define <8 x i8> @uqrshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn8b:
 ;CHECK: uqrshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -1021,7 +1021,7 @@
 define <4 x i16> @uqrshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn4h:
 ;CHECK: uqrshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -1029,7 +1029,7 @@
 define <2 x i32> @uqrshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn2s:
 ;CHECK: uqrshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -1037,8 +1037,8 @@
 define <16 x i8> @uqrshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn16b:
 ;CHECK: uqrshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -1047,8 +1047,8 @@
 define <8 x i16> @uqrshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn8h:
 ;CHECK: uqrshrn2.8h v0, {{v[0-9]+}}, #1
-        %out = load <4 x i16>* %ret
-        %tmp1 = load <4 x i32>* %A
+        %out = load <4 x i16>, <4 x i16>* %ret
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
         %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
         ret <8 x i16> %tmp4
@@ -1057,8 +1057,8 @@
 define <4 x i32> @uqrshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqrshrn4s:
 ;CHECK: uqrshrn2.4s v0, {{v[0-9]+}}, #1
-        %out = load <2 x i32>* %ret
-        %tmp1 = load <2 x i64>* %A
+        %out = load <2 x i32>, <2 x i32>* %ret
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
         %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i32> %tmp4
@@ -1079,7 +1079,7 @@
 define <8 x i8> @uqshrn8b(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshrn8b:
 ;CHECK: uqshrn.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -1087,7 +1087,7 @@
 define <4 x i16> @uqshrn4h(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshrn4h:
 ;CHECK: uqshrn.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -1095,7 +1095,7 @@
 define <2 x i32> @uqshrn2s(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqshrn2s:
 ;CHECK: uqshrn.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -1103,8 +1103,8 @@
 define <16 x i8> @uqshrn16b(<8 x i8>* %ret, <8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshrn16b:
 ;CHECK: uqshrn2.16b v0, {{v[0-9]+}}, #1
-        %out = load <8 x i8>* %ret
-        %tmp1 = load <8 x i16>* %A
+        %out = load <8 x i8>, <8 x i8>* %ret
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
         %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <16 x i8> %tmp4
@@ -1113,8 +1113,8 @@
 define <8 x i16> @uqshrn8h(<4 x i16>* %ret, <4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshrn8h:
 ;CHECK: uqshrn2.8h v0, {{v[0-9]+}}, #1
-  %out = load <4 x i16>* %ret
-  %tmp1 = load <4 x i32>* %A
+  %out = load <4 x i16>, <4 x i16>* %ret
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
   %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
   %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i16> %tmp4
@@ -1123,8 +1123,8 @@
 define <4 x i32> @uqshrn4s(<2 x i32>* %ret, <2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqshrn4s:
 ;CHECK: uqshrn2.4s v0, {{v[0-9]+}}, #1
-  %out = load <2 x i32>* %ret
-  %tmp1 = load <2 x i64>* %A
+  %out = load <2 x i32>, <2 x i32>* %ret
+  %tmp1 = load <2 x i64>, <2 x i64>* %A
   %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
   %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i32> %tmp4
@@ -1138,7 +1138,7 @@
 define <8 x i16> @ushll8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: ushll8h:
 ;CHECK: ushll.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         ret <8 x i16> %tmp3
@@ -1147,7 +1147,7 @@
 define <4 x i32> @ushll4s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: ushll4s:
 ;CHECK: ushll.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
         ret <4 x i32> %tmp3
@@ -1156,7 +1156,7 @@
 define <2 x i64> @ushll2d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: ushll2d:
 ;CHECK: ushll.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
         ret <2 x i64> %tmp3
@@ -1165,7 +1165,7 @@
 define <8 x i16> @ushll2_8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: ushll2_8h:
 ;CHECK: ushll2.8h v0, {{v[0-9]+}}, #1
-        %load1 = load <16 x i8>* %A
+        %load1 = load <16 x i8>, <16 x i8>* %A
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1175,7 +1175,7 @@
 define <4 x i32> @ushll2_4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: ushll2_4s:
 ;CHECK: ushll2.4s v0, {{v[0-9]+}}, #1
-        %load1 = load <8 x i16>* %A
+        %load1 = load <8 x i16>, <8 x i16>* %A
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
@@ -1185,7 +1185,7 @@
 define <2 x i64> @ushll2_2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: ushll2_2d:
 ;CHECK: ushll2.2d v0, {{v[0-9]+}}, #1
-        %load1 = load <4 x i32>* %A
+        %load1 = load <4 x i32>, <4 x i32>* %A
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
@@ -1195,7 +1195,7 @@
 define <8 x i16> @sshll8h(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sshll8h:
 ;CHECK: sshll.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         ret <8 x i16> %tmp3
@@ -1204,7 +1204,7 @@
 define <4 x i32> @sshll4s(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sshll4s:
 ;CHECK: sshll.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
         ret <4 x i32> %tmp3
@@ -1213,7 +1213,7 @@
 define <2 x i64> @sshll2d(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sshll2d:
 ;CHECK: sshll.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
         ret <2 x i64> %tmp3
@@ -1222,7 +1222,7 @@
 define <8 x i16> @sshll2_8h(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sshll2_8h:
 ;CHECK: sshll2.8h v0, {{v[0-9]+}}, #1
-        %load1 = load <16 x i8>* %A
+        %load1 = load <16 x i8>, <16 x i8>* %A
         %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
         %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1232,7 +1232,7 @@
 define <4 x i32> @sshll2_4s(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sshll2_4s:
 ;CHECK: sshll2.4s v0, {{v[0-9]+}}, #1
-        %load1 = load <8 x i16>* %A
+        %load1 = load <8 x i16>, <8 x i16>* %A
         %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
         %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
@@ -1242,7 +1242,7 @@
 define <2 x i64> @sshll2_2d(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sshll2_2d:
 ;CHECK: sshll2.2d v0, {{v[0-9]+}}, #1
-        %load1 = load <4 x i32>* %A
+        %load1 = load <4 x i32>, <4 x i32>* %A
         %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
         %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
@@ -1252,7 +1252,7 @@
 define <8 x i8> @sqshli8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshli8b:
 ;CHECK: sqshl.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
@@ -1260,7 +1260,7 @@
 define <4 x i16> @sqshli4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshli4h:
 ;CHECK: sqshl.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
@@ -1268,7 +1268,7 @@
 define <2 x i32> @sqshli2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshli2s:
 ;CHECK: sqshl.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
@@ -1276,7 +1276,7 @@
 define <16 x i8> @sqshli16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: sqshli16b:
 ;CHECK: sqshl.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
@@ -1284,7 +1284,7 @@
 define <8 x i16> @sqshli8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: sqshli8h:
 ;CHECK: sqshl.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
@@ -1292,7 +1292,7 @@
 define <4 x i32> @sqshli4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: sqshli4s:
 ;CHECK: sqshl.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
@@ -1300,7 +1300,7 @@
 define <2 x i64> @sqshli2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: sqshli2d:
 ;CHECK: sqshl.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
@@ -1308,7 +1308,7 @@
 define <8 x i8> @uqshli8b(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: uqshli8b:
 ;CHECK: uqshl.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <8 x i8> %tmp3
 }
@@ -1317,7 +1317,7 @@
 ;CHECK-LABEL: uqshli8b_1:
 ;CHECK: movi.8b [[REG:v[0-9]+]], #0x8
 ;CHECK: uqshl.8b v0, v0, [[REG]]
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
         ret <8 x i8> %tmp3
 }
@@ -1325,7 +1325,7 @@
 define <4 x i16> @uqshli4h(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshli4h:
 ;CHECK: uqshl.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
         ret <4 x i16> %tmp3
 }
@@ -1333,7 +1333,7 @@
 define <2 x i32> @uqshli2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshli2s:
 ;CHECK: uqshl.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
         ret <2 x i32> %tmp3
 }
@@ -1341,7 +1341,7 @@
 define <16 x i8> @uqshli16b(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: uqshli16b:
 ;CHECK: uqshl.16b
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
         ret <16 x i8> %tmp3
 }
@@ -1349,7 +1349,7 @@
 define <8 x i16> @uqshli8h(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: uqshli8h:
 ;CHECK: uqshl.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
         ret <8 x i16> %tmp3
 }
@@ -1357,7 +1357,7 @@
 define <4 x i32> @uqshli4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: uqshli4s:
 ;CHECK: uqshl.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
         ret <4 x i32> %tmp3
 }
@@ -1365,7 +1365,7 @@
 define <2 x i64> @uqshli2d(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: uqshli2d:
 ;CHECK: uqshl.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
         ret <2 x i64> %tmp3
 }
@@ -1373,9 +1373,9 @@
 define <8 x i8> @ursra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ursra8b:
 ;CHECK: ursra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <8 x i8>* %B
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -1383,9 +1383,9 @@
 define <4 x i16> @ursra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: ursra4h:
 ;CHECK: ursra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -1393,9 +1393,9 @@
 define <2 x i32> @ursra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: ursra2s:
 ;CHECK: ursra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -1403,9 +1403,9 @@
 define <16 x i8> @ursra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: ursra16b:
 ;CHECK: ursra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <16 x i8>* %B
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
 }
@@ -1413,9 +1413,9 @@
 define <8 x i16> @ursra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: ursra8h:
 ;CHECK: ursra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
 }
@@ -1423,9 +1423,9 @@
 define <4 x i32> @ursra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: ursra4s:
 ;CHECK: ursra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
 }
@@ -1433,9 +1433,9 @@
 define <2 x i64> @ursra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: ursra2d:
 ;CHECK: ursra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
 }
@@ -1443,9 +1443,9 @@
 define <8 x i8> @srsra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: srsra8b:
 ;CHECK: srsra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <8 x i8>* %B
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -1453,9 +1453,9 @@
 define <4 x i16> @srsra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: srsra4h:
 ;CHECK: srsra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -1463,9 +1463,9 @@
 define <2 x i32> @srsra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: srsra2s:
 ;CHECK: srsra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -1473,9 +1473,9 @@
 define <16 x i8> @srsra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: srsra16b:
 ;CHECK: srsra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
-        %tmp4 = load <16 x i8>* %B
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
 }
@@ -1483,9 +1483,9 @@
 define <8 x i16> @srsra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: srsra8h:
 ;CHECK: srsra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
 }
@@ -1493,9 +1493,9 @@
 define <4 x i32> @srsra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: srsra4s:
 ;CHECK: srsra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
 }
@@ -1503,9 +1503,9 @@
 define <2 x i64> @srsra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: srsra2d:
 ;CHECK: srsra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
 }
@@ -1513,9 +1513,9 @@
 define <8 x i8> @usra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usra8b:
 ;CHECK: usra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <8 x i8>* %B
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -1523,9 +1523,9 @@
 define <4 x i16> @usra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: usra4h:
 ;CHECK: usra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -1533,9 +1533,9 @@
 define <2 x i32> @usra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: usra2s:
 ;CHECK: usra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -1543,9 +1543,9 @@
 define <16 x i8> @usra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: usra16b:
 ;CHECK: usra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <16 x i8>* %B
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
 }
@@ -1553,9 +1553,9 @@
 define <8 x i16> @usra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: usra8h:
 ;CHECK: usra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
 }
@@ -1563,9 +1563,9 @@
 define <4 x i32> @usra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: usra4s:
 ;CHECK: usra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
 }
@@ -1573,9 +1573,9 @@
 define <2 x i64> @usra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: usra2d:
 ;CHECK: usra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
 }
@@ -1583,9 +1583,9 @@
 define <8 x i8> @ssra8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssra8b:
 ;CHECK: ssra.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <8 x i8>* %B
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp5 = add <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
 }
@@ -1593,9 +1593,9 @@
 define <4 x i16> @ssra4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssra4h:
 ;CHECK: ssra.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <4 x i16>* %B
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp5 = add <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
 }
@@ -1603,9 +1603,9 @@
 define <2 x i32> @ssra2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssra2s:
 ;CHECK: ssra.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
-        %tmp4 = load <2 x i32>* %B
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp5 = add <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
 }
@@ -1613,9 +1613,9 @@
 define <16 x i8> @ssra16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssra16b:
 ;CHECK: ssra.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-        %tmp4 = load <16 x i8>* %B
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp5 = add <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
 }
@@ -1623,9 +1623,9 @@
 define <8 x i16> @ssra8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssra8h:
 ;CHECK: ssra.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-        %tmp4 = load <8 x i16>* %B
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp5 = add <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
 }
@@ -1633,9 +1633,9 @@
 define <4 x i32> @ssra4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssra4s:
 ;CHECK: ssra.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
-        %tmp4 = load <4 x i32>* %B
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp5 = add <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
 }
@@ -1643,9 +1643,9 @@
 define <2 x i64> @ssra2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: ssra2d:
 ;CHECK: ssra.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
-        %tmp4 = load <2 x i64>* %B
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp5 = add <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
 }
@@ -1655,8 +1655,8 @@
 ;CHECK: shr.8b v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <8 x i8>* %A
-        %tmp4 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
         %tmp5 = or <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1667,8 +1667,8 @@
 ;CHECK: shr.4h v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <4 x i16>* %A
-        %tmp4 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
         %tmp5 = or <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1679,8 +1679,8 @@
 ;CHECK: shr.2s v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <2 x i32>* %A
-        %tmp4 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
         %tmp5 = or <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1691,8 +1691,8 @@
 ;CHECK: shr.16b v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <16 x i8>* %A
-        %tmp4 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
         %tmp5 = or <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1703,8 +1703,8 @@
 ;CHECK: shr.8h v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp4 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         %tmp5 = or <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1715,8 +1715,8 @@
 ;CHECK: shr.4s v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp4 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
         %tmp5 = or <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1727,8 +1727,8 @@
 ;CHECK: shr.2d v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp4 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
         %tmp5 = or <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1739,8 +1739,8 @@
 ;CHECK: shl.8b v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <8 x i8>* %A
-        %tmp4 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp4 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
         %tmp5 = or <8 x i8> %tmp3, %tmp4
         ret <8 x i8> %tmp5
@@ -1751,8 +1751,8 @@
 ;CHECK: shl.4h v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <4 x i16>* %A
-        %tmp4 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp4 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
         %tmp5 = or <4 x i16> %tmp3, %tmp4
         ret <4 x i16> %tmp5
@@ -1763,8 +1763,8 @@
 ;CHECK: shl.2s v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.8b
 ;CHECK-NEXT: ret
-        %tmp1 = load <2 x i32>* %A
-        %tmp4 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp4 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
         %tmp5 = or <2 x i32> %tmp3, %tmp4
         ret <2 x i32> %tmp5
@@ -1775,8 +1775,8 @@
 ;CHECK: shl.16b v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <16 x i8>* %A
-        %tmp4 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp4 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
         %tmp5 = or <16 x i8> %tmp3, %tmp4
          ret <16 x i8> %tmp5
@@ -1787,8 +1787,8 @@
 ;CHECK: shl.8h v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <8 x i16>* %A
-        %tmp4 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp4 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
         %tmp5 = or <8 x i16> %tmp3, %tmp4
          ret <8 x i16> %tmp5
@@ -1799,8 +1799,8 @@
 ;CHECK: shl.4s v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <4 x i32>* %A
-        %tmp4 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp4 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
         %tmp5 = or <4 x i32> %tmp3, %tmp4
          ret <4 x i32> %tmp5
@@ -1811,8 +1811,8 @@
 ;CHECK: shl.2d v0, {{v[0-9]+}}, #1
 ;CHECK-NEXT: orr.16b
 ;CHECK-NEXT: ret
-        %tmp1 = load <2 x i64>* %A
-        %tmp4 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp4 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
         %tmp5 = or <2 x i64> %tmp3, %tmp4
          ret <2 x i64> %tmp5
@@ -1838,8 +1838,8 @@
 define <8 x i8> @sli8b(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: sli8b:
 ;CHECK: sli.8b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
         ret <8 x i8> %tmp3
 }
@@ -1847,8 +1847,8 @@
 define <4 x i16> @sli4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: sli4h:
 ;CHECK: sli.4h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
         ret <4 x i16> %tmp3
 }
@@ -1856,8 +1856,8 @@
 define <2 x i32> @sli2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: sli2s:
 ;CHECK: sli.2s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
         ret <2 x i32> %tmp3
 }
@@ -1865,8 +1865,8 @@
 define <1 x i64> @sli1d(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: sli1d:
 ;CHECK: sli d0, {{d[0-9]+}}, #1
-        %tmp1 = load <1 x i64>* %A
-        %tmp2 = load <1 x i64>* %B
+        %tmp1 = load <1 x i64>, <1 x i64>* %A
+        %tmp2 = load <1 x i64>, <1 x i64>* %B
         %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
         ret <1 x i64> %tmp3
 }
@@ -1874,8 +1874,8 @@
 define <16 x i8> @sli16b(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: sli16b:
 ;CHECK: sli.16b v0, {{v[0-9]+}}, #1
-        %tmp1 = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
         ret <16 x i8> %tmp3
 }
@@ -1883,8 +1883,8 @@
 define <8 x i16> @sli8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: sli8h:
 ;CHECK: sli.8h v0, {{v[0-9]+}}, #1
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
         ret <8 x i16> %tmp3
 }
@@ -1892,8 +1892,8 @@
 define <4 x i32> @sli4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: sli4s:
 ;CHECK: sli.4s v0, {{v[0-9]+}}, #1
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
         ret <4 x i32> %tmp3
 }
@@ -1901,8 +1901,8 @@
 define <2 x i64> @sli2d(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: sli2d:
 ;CHECK: sli.2d v0, {{v[0-9]+}}, #1
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
         ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshr.ll b/llvm/test/CodeGen/AArch64/arm64-vshr.ll
index 21eb579..8d263f2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshr.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshr.ll
@@ -10,8 +10,8 @@
   %b.addr = alloca <8 x i16>, align 16
   store <8 x i16> %a, <8 x i16>* %a.addr, align 16
   store <8 x i16> %b, <8 x i16>* %b.addr, align 16
-  %0 = load <8 x i16>* %a.addr, align 16
-  %1 = load <8 x i16>* %b.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %a.addr, align 16
+  %1 = load <8 x i16>, <8 x i16>* %b.addr, align 16
   %shr = ashr <8 x i16> %0, %1
   ret <8 x i16> %shr
 }
@@ -25,8 +25,8 @@
   %b.addr = alloca <4 x i32>, align 32
   store <4 x i32> %a, <4 x i32>* %a.addr, align 32
   store <4 x i32> %b, <4 x i32>* %b.addr, align 32
-  %0 = load <4 x i32>* %a.addr, align 32
-  %1 = load <4 x i32>* %b.addr, align 32
+  %0 = load <4 x i32>, <4 x i32>* %a.addr, align 32
+  %1 = load <4 x i32>, <4 x i32>* %b.addr, align 32
   %shr = ashr <4 x i32> %0, %1
   ret <4 x i32> %shr
 }
@@ -40,8 +40,8 @@
   %b.addr = alloca <8 x i16>, align 16
   store <8 x i16> %a, <8 x i16>* %a.addr, align 16
   store <8 x i16> %b, <8 x i16>* %b.addr, align 16
-  %0 = load <8 x i16>* %a.addr, align 16
-  %1 = load <8 x i16>* %b.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %a.addr, align 16
+  %1 = load <8 x i16>, <8 x i16>* %b.addr, align 16
   %shr = lshr <8 x i16> %0, %1
   ret <8 x i16> %shr
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vsqrt.ll b/llvm/test/CodeGen/AArch64/arm64-vsqrt.ll
index 02b7c7e..20aebd9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vsqrt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vsqrt.ll
@@ -3,8 +3,8 @@
 define <2 x float> @frecps_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: frecps_2s:
 ;CHECK: frecps.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x float> @frecps_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: frecps_4s:
 ;CHECK: frecps.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x double> @frecps_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: frecps_2d:
 ;CHECK: frecps.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -35,8 +35,8 @@
 define <2 x float> @frsqrts_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: frsqrts_2s:
 ;CHECK: frsqrts.2s
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -44,8 +44,8 @@
 define <4 x float> @frsqrts_4s(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: frsqrts_4s:
 ;CHECK: frsqrts.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -53,8 +53,8 @@
 define <2 x double> @frsqrts_2d(<2 x double>* %A, <2 x double>* %B) nounwind {
 ;CHECK-LABEL: frsqrts_2d:
 ;CHECK: frsqrts.2d
-	%tmp1 = load <2 x double>* %A
-	%tmp2 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %A
+	%tmp2 = load <2 x double>, <2 x double>* %B
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %tmp1, <2 x double> %tmp2)
 	ret <2 x double> %tmp3
 }
@@ -66,7 +66,7 @@
 define <2 x float> @frecpe_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frecpe_2s:
 ;CHECK: frecpe.2s
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.frecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
@@ -74,7 +74,7 @@
 define <4 x float> @frecpe_4s(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: frecpe_4s:
 ;CHECK: frecpe.4s
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.frecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
@@ -82,7 +82,7 @@
 define <2 x double> @frecpe_2d(<2 x double>* %A) nounwind {
 ;CHECK-LABEL: frecpe_2d:
 ;CHECK: frecpe.2d
-	%tmp1 = load <2 x double>* %A
+	%tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.frecpe.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
@@ -90,7 +90,7 @@
 define float @frecpe_s(float* %A) nounwind {
 ;CHECK-LABEL: frecpe_s:
 ;CHECK: frecpe s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
+  %tmp1 = load float, float* %A
   %tmp3 = call float @llvm.aarch64.neon.frecpe.f32(float %tmp1)
   ret float %tmp3
 }
@@ -98,7 +98,7 @@
 define double @frecpe_d(double* %A) nounwind {
 ;CHECK-LABEL: frecpe_d:
 ;CHECK: frecpe d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
+  %tmp1 = load double, double* %A
   %tmp3 = call double @llvm.aarch64.neon.frecpe.f64(double %tmp1)
   ret double %tmp3
 }
@@ -112,7 +112,7 @@
 define float @frecpx_s(float* %A) nounwind {
 ;CHECK-LABEL: frecpx_s:
 ;CHECK: frecpx s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
+  %tmp1 = load float, float* %A
   %tmp3 = call float @llvm.aarch64.neon.frecpx.f32(float %tmp1)
   ret float %tmp3
 }
@@ -120,7 +120,7 @@
 define double @frecpx_d(double* %A) nounwind {
 ;CHECK-LABEL: frecpx_d:
 ;CHECK: frecpx d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
+  %tmp1 = load double, double* %A
   %tmp3 = call double @llvm.aarch64.neon.frecpx.f64(double %tmp1)
   ret double %tmp3
 }
@@ -131,7 +131,7 @@
 define <2 x float> @frsqrte_2s(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_2s:
 ;CHECK: frsqrte.2s
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp3 = call <2 x float> @llvm.aarch64.neon.frsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp3
 }
@@ -139,7 +139,7 @@
 define <4 x float> @frsqrte_4s(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_4s:
 ;CHECK: frsqrte.4s
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp3 = call <4 x float> @llvm.aarch64.neon.frsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp3
 }
@@ -147,7 +147,7 @@
 define <2 x double> @frsqrte_2d(<2 x double>* %A) nounwind {
 ;CHECK-LABEL: frsqrte_2d:
 ;CHECK: frsqrte.2d
-	%tmp1 = load <2 x double>* %A
+	%tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp3 = call <2 x double> @llvm.aarch64.neon.frsqrte.v2f64(<2 x double> %tmp1)
 	ret <2 x double> %tmp3
 }
@@ -155,7 +155,7 @@
 define float @frsqrte_s(float* %A) nounwind {
 ;CHECK-LABEL: frsqrte_s:
 ;CHECK: frsqrte s0, {{s[0-9]+}}
-  %tmp1 = load float* %A
+  %tmp1 = load float, float* %A
   %tmp3 = call float @llvm.aarch64.neon.frsqrte.f32(float %tmp1)
   ret float %tmp3
 }
@@ -163,7 +163,7 @@
 define double @frsqrte_d(double* %A) nounwind {
 ;CHECK-LABEL: frsqrte_d:
 ;CHECK: frsqrte d0, {{d[0-9]+}}
-  %tmp1 = load double* %A
+  %tmp1 = load double, double* %A
   %tmp3 = call double @llvm.aarch64.neon.frsqrte.f64(double %tmp1)
   ret double %tmp3
 }
@@ -177,7 +177,7 @@
 define <2 x i32> @urecpe_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: urecpe_2s:
 ;CHECK: urecpe.2s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.urecpe.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
@@ -185,7 +185,7 @@
 define <4 x i32> @urecpe_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: urecpe_4s:
 ;CHECK: urecpe.4s
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.urecpe.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
@@ -196,7 +196,7 @@
 define <2 x i32> @ursqrte_2s(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: ursqrte_2s:
 ;CHECK: ursqrte.2s
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp3 = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp3
 }
@@ -204,7 +204,7 @@
 define <4 x i32> @ursqrte_4s(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: ursqrte_4s:
 ;CHECK: ursqrte.4s
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp3 = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-vsra.ll b/llvm/test/CodeGen/AArch64/arm64-vsra.ll
index 5e9cef3..d480dfe 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vsra.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vsra.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
 ;CHECK: ssra.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsras16:
 ;CHECK: ssra.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsras32:
 ;CHECK: ssra.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQs8:
 ;CHECK: ssra.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -43,8 +43,8 @@
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQs16:
 ;CHECK: ssra.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -53,8 +53,8 @@
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQs32:
 ;CHECK: ssra.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -63,8 +63,8 @@
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQs64:
 ;CHECK: ssra.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -73,8 +73,8 @@
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsrau8:
 ;CHECK: usra.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -83,8 +83,8 @@
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsrau16:
 ;CHECK: usra.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -93,8 +93,8 @@
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsrau32:
 ;CHECK: usra.2s
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -104,8 +104,8 @@
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQu8:
 ;CHECK: usra.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -114,8 +114,8 @@
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQu16:
 ;CHECK: usra.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -124,8 +124,8 @@
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQu32:
 ;CHECK: usra.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -134,8 +134,8 @@
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQu64:
 ;CHECK: usra.2d
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/AArch64/arm64-vsub.ll b/llvm/test/CodeGen/AArch64/arm64-vsub.ll
index c2c8755..6b44b56 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vsub.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @subhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: subhn8b:
 ;CHECK: subhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.subhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @subhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: subhn4h:
 ;CHECK: subhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.subhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @subhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: subhn2s:
 ;CHECK: subhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.subhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -64,8 +64,8 @@
 define <8 x i8> @rsubhn8b(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: rsubhn8b:
 ;CHECK: rsubhn.8b
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
         ret <8 x i8> %tmp3
 }
@@ -73,8 +73,8 @@
 define <4 x i16> @rsubhn4h(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: rsubhn4h:
 ;CHECK: rsubhn.4h
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i32>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %tmp3 = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
         ret <4 x i16> %tmp3
 }
@@ -82,8 +82,8 @@
 define <2 x i32> @rsubhn2s(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: rsubhn2s:
 ;CHECK: rsubhn.2s
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i64>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i64>, <2 x i64>* %B
         %tmp3 = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
         ret <2 x i32> %tmp3
 }
@@ -125,8 +125,8 @@
 define <8 x i16> @ssubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubl8h:
 ;CHECK: ssubl.8h
-        %tmp1 = load <8 x i8>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -136,8 +136,8 @@
 define <4 x i32> @ssubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssubl4s:
 ;CHECK: ssubl.4s
-        %tmp1 = load <4 x i16>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -147,8 +147,8 @@
 define <2 x i64> @ssubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssubl2d:
 ;CHECK: ssubl.2d
-        %tmp1 = load <2 x i32>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -158,11 +158,11 @@
 define <8 x i16> @ssubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubl2_8h:
 ;CHECK: ssubl2.8h
-        %tmp1 = load <16 x i8>* %A
+        %tmp1 = load <16 x i8>, <16 x i8>* %A
         %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext1 = sext <8 x i8> %high1 to <8 x i16>
 
-        %tmp2 = load <16 x i8>* %B
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext2 = sext <8 x i8> %high2 to <8 x i16>
 
@@ -173,11 +173,11 @@
 define <4 x i32> @ssubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssubl2_4s:
 ;CHECK: ssubl2.4s
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext1 = sext <4 x i16> %high1 to <4 x i32>
 
-        %tmp2 = load <8 x i16>* %B
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext2 = sext <4 x i16> %high2 to <4 x i32>
 
@@ -188,11 +188,11 @@
 define <2 x i64> @ssubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssubl2_2d:
 ;CHECK: ssubl2.2d
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext1 = sext <2 x i32> %high1 to <2 x i64>
 
-        %tmp2 = load <4 x i32>* %B
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext2 = sext <2 x i32> %high2 to <2 x i64>
 
@@ -203,8 +203,8 @@
 define <8 x i16> @usubl8h(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usubl8h:
 ;CHECK: usubl.8h
-  %tmp1 = load <8 x i8>* %A
-  %tmp2 = load <8 x i8>* %B
+  %tmp1 = load <8 x i8>, <8 x i8>* %A
+  %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
   %tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -214,8 +214,8 @@
 define <4 x i32> @usubl4s(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: usubl4s:
 ;CHECK: usubl.4s
-  %tmp1 = load <4 x i16>* %A
-  %tmp2 = load <4 x i16>* %B
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
   %tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -225,8 +225,8 @@
 define <2 x i64> @usubl2d(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: usubl2d:
 ;CHECK: usubl.2d
-  %tmp1 = load <2 x i32>* %A
-  %tmp2 = load <2 x i32>* %B
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
   %tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -236,11 +236,11 @@
 define <8 x i16> @usubl2_8h(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: usubl2_8h:
 ;CHECK: usubl2.8h
-  %tmp1 = load <16 x i8>* %A
+  %tmp1 = load <16 x i8>, <16 x i8>* %A
   %high1 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %ext1 = zext <8 x i8> %high1 to <8 x i16>
 
-  %tmp2 = load <16 x i8>* %B
+  %tmp2 = load <16 x i8>, <16 x i8>* %B
   %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %ext2 = zext <8 x i8> %high2 to <8 x i16>
 
@@ -251,11 +251,11 @@
 define <4 x i32> @usubl2_4s(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: usubl2_4s:
 ;CHECK: usubl2.4s
-  %tmp1 = load <8 x i16>* %A
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
   %high1 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %ext1 = zext <4 x i16> %high1 to <4 x i32>
 
-  %tmp2 = load <8 x i16>* %B
+  %tmp2 = load <8 x i16>, <8 x i16>* %B
   %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   %ext2 = zext <4 x i16> %high2 to <4 x i32>
 
@@ -266,11 +266,11 @@
 define <2 x i64> @usubl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: usubl2_2d:
 ;CHECK: usubl2.2d
-  %tmp1 = load <4 x i32>* %A
+  %tmp1 = load <4 x i32>, <4 x i32>* %A
   %high1 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %ext1 = zext <2 x i32> %high1 to <2 x i64>
 
-  %tmp2 = load <4 x i32>* %B
+  %tmp2 = load <4 x i32>, <4 x i32>* %B
   %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
   %ext2 = zext <2 x i32> %high2 to <2 x i64>
 
@@ -281,8 +281,8 @@
 define <8 x i16> @ssubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubw8h:
 ;CHECK: ssubw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
   %tmp4 = sub <8 x i16> %tmp1, %tmp3
         ret <8 x i16> %tmp4
@@ -291,8 +291,8 @@
 define <4 x i32> @ssubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssubw4s:
 ;CHECK: ssubw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
   %tmp4 = sub <4 x i32> %tmp1, %tmp3
         ret <4 x i32> %tmp4
@@ -301,8 +301,8 @@
 define <2 x i64> @ssubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssubw2d:
 ;CHECK: ssubw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
   %tmp4 = sub <2 x i64> %tmp1, %tmp3
         ret <2 x i64> %tmp4
@@ -311,9 +311,9 @@
 define <8 x i16> @ssubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: ssubw2_8h:
 ;CHECK: ssubw2.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
 
-        %tmp2 = load <16 x i8>* %B
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext2 = sext <8 x i8> %high2 to <8 x i16>
 
@@ -324,9 +324,9 @@
 define <4 x i32> @ssubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: ssubw2_4s:
 ;CHECK: ssubw2.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
 
-        %tmp2 = load <8 x i16>* %B
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext2 = sext <4 x i16> %high2 to <4 x i32>
 
@@ -337,9 +337,9 @@
 define <2 x i64> @ssubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: ssubw2_2d:
 ;CHECK: ssubw2.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
 
-        %tmp2 = load <4 x i32>* %B
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext2 = sext <2 x i32> %high2 to <2 x i64>
 
@@ -350,8 +350,8 @@
 define <8 x i16> @usubw8h(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: usubw8h:
 ;CHECK: usubw.8h
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i8>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i8>, <8 x i8>* %B
   %tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
   %tmp4 = sub <8 x i16> %tmp1, %tmp3
         ret <8 x i16> %tmp4
@@ -360,8 +360,8 @@
 define <4 x i32> @usubw4s(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: usubw4s:
 ;CHECK: usubw.4s
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
   %tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
   %tmp4 = sub <4 x i32> %tmp1, %tmp3
         ret <4 x i32> %tmp4
@@ -370,8 +370,8 @@
 define <2 x i64> @usubw2d(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: usubw2d:
 ;CHECK: usubw.2d
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
   %tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
   %tmp4 = sub <2 x i64> %tmp1, %tmp3
         ret <2 x i64> %tmp4
@@ -380,9 +380,9 @@
 define <8 x i16> @usubw2_8h(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: usubw2_8h:
 ;CHECK: usubw2.8h
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
 
-        %tmp2 = load <16 x i8>* %B
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %high2 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         %ext2 = zext <8 x i8> %high2 to <8 x i16>
 
@@ -393,9 +393,9 @@
 define <4 x i32> @usubw2_4s(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: usubw2_4s:
 ;CHECK: usubw2.4s
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
 
-        %tmp2 = load <8 x i16>* %B
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %high2 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
         %ext2 = zext <4 x i16> %high2 to <4 x i32>
 
@@ -406,9 +406,9 @@
 define <2 x i64> @usubw2_2d(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: usubw2_2d:
 ;CHECK: usubw2.2d
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
 
-        %tmp2 = load <4 x i32>* %B
+        %tmp2 = load <4 x i32>, <4 x i32>* %B
         %high2 = shufflevector <4 x i32> %tmp2, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
         %ext2 = zext <2 x i32> %high2 to <2 x i64>
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-weak-reference.ll b/llvm/test/CodeGen/AArch64/arm64-weak-reference.ll
index b2135e0..e8074de 100644
--- a/llvm/test/CodeGen/AArch64/arm64-weak-reference.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-weak-reference.ll
@@ -5,6 +5,6 @@
 define i32 @fn() nounwind ssp {
 ; CHECK-LABEL: fn:
 ; CHECK: .weak_reference
-  %val = load i32* @x, align 4
+  %val = load i32, i32* @x, align 4
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-zextload-unscaled.ll b/llvm/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
index 8ea2149..321cf10 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zextload-unscaled.ll
@@ -7,7 +7,7 @@
 ; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-7]
 
   %addr = getelementptr i1, i1* %base, i32 -7
-  %val = load i1* %addr, align 1
+  %val = load i1, i1* %addr, align 1
 
   %extended = zext i1 %val to i32
   store i32 %extended, i32* @var32, align 4
@@ -19,7 +19,7 @@
 ; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-7]
 
   %addr = getelementptr i8, i8* %base, i32 -7
-  %val = load i8* %addr, align 1
+  %val = load i8, i8* %addr, align 1
 
   %extended = zext i8 %val to i32
   store i32 %extended, i32* @var32, align 4
@@ -31,7 +31,7 @@
 ; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-14]
 
   %addr = getelementptr i16, i16* %base, i32 -7
-  %val = load i16* %addr, align 2
+  %val = load i16, i16* %addr, align 2
 
   %extended = zext i16 %val to i32
   store i32 %extended, i32* @var32, align 4
diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll
index 304b280..ddce002 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zip.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll
@@ -5,8 +5,8 @@
 ;CHECK: zip1.8b
 ;CHECK: zip2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -18,8 +18,8 @@
 ;CHECK: zip1.4h
 ;CHECK: zip2.4h
 ;CHECK-NEXT: add.4h
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -31,8 +31,8 @@
 ;CHECK: zip1.16b
 ;CHECK: zip2.16b
 ;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -44,8 +44,8 @@
 ;CHECK: zip1.8h
 ;CHECK: zip2.8h
 ;CHECK-NEXT: add.8h
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -57,8 +57,8 @@
 ;CHECK: zip1.4s
 ;CHECK: zip2.4s
 ;CHECK-NEXT: add.4s
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -70,8 +70,8 @@
 ;CHECK: zip1.4s
 ;CHECK: zip2.4s
 ;CHECK-NEXT: fadd.4s
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -85,8 +85,8 @@
 ;CHECK: zip1.8b
 ;CHECK: zip2.8b
 ;CHECK-NEXT: add.8b
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -98,8 +98,8 @@
 ;CHECK: zip1.16b
 ;CHECK: zip2.16b
 ;CHECK-NEXT: add.16b
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll b/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll
index 287a426..c216c44 100644
--- a/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll
+++ b/llvm/test/CodeGen/AArch64/assertion-rc-mismatch.ll
@@ -12,7 +12,7 @@
 else:
   %tmp3 = call i8* @llvm.returnaddress(i32 0)
   %ptr = getelementptr inbounds i8, i8* %tmp3, i64 -16
-  %ld = load i8* %ptr, align 4
+  %ld = load i8, i8* %ptr, align 4
   %tmp2 = inttoptr i8 %ld to i8*
   br label %end
 end:
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
index da095a0..44e7771 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops-not-barriers.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: foo:
   br i1 %cond, label %atomic_ver, label %simple_ver
 simple_ver:
-  %oldval = load i32* %var
+  %oldval = load i32, i32* %var
   %newval = add nsw i32 %oldval, -1
   store i32 %newval, i32* %var
   br label %somewhere
diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll
index ef209e9c..cb90cae 100644
--- a/llvm/test/CodeGen/AArch64/atomic-ops.ll
+++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll
@@ -972,7 +972,7 @@
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i8:
-  %val = load atomic i8* @var8 monotonic, align 1
+  %val = load atomic i8, i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var8
 ; CHECK: ldrb w0, [x[[HIADDR]], {{#?}}:lo12:var8]
@@ -986,7 +986,7 @@
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
-  %val = load atomic i8* %addr monotonic, align 1
+  %val = load atomic i8, i8* %addr monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK: ldrb w0, [x0, x1]
 ; CHECK-NOT: dmb
@@ -996,7 +996,7 @@
 
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_acquire_i8:
-  %val = load atomic i8* @var8 acquire, align 1
+  %val = load atomic i8, i8* @var8 acquire, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
@@ -1009,7 +1009,7 @@
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i8:
-  %val = load atomic i8* @var8 seq_cst, align 1
+  %val = load atomic i8, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var8
 ; CHECK-NOT: dmb
@@ -1022,7 +1022,7 @@
 
 define i16 @test_atomic_load_monotonic_i16() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i16:
-  %val = load atomic i16* @var16 monotonic, align 2
+  %val = load atomic i16, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK: adrp x[[HIADDR:[0-9]+]], var16
 ; CHECK-NOT: dmb
@@ -1037,7 +1037,7 @@
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
-  %val = load atomic i32* %addr monotonic, align 4
+  %val = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NOT: dmb
 ; CHECK: ldr w0, [x0, x1]
 ; CHECK-NOT: dmb
@@ -1047,7 +1047,7 @@
 
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i64:
-  %val = load atomic i64* @var64 seq_cst, align 8
+  %val = load atomic i64, i64* @var64 seq_cst, align 8
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[HIADDR:x[0-9]+]], var64
 ; CHECK-NOT: dmb
diff --git a/llvm/test/CodeGen/AArch64/basic-pic.ll b/llvm/test/CodeGen/AArch64/basic-pic.ll
index 62d41bcea..8765a6d 100644
--- a/llvm/test/CodeGen/AArch64/basic-pic.ll
+++ b/llvm/test/CodeGen/AArch64/basic-pic.ll
@@ -5,7 +5,7 @@
 define i32 @get_globalvar() {
 ; CHECK-LABEL: get_globalvar:
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x[[GOTLOC:[0-9]+]], [x[[GOTHI]], {{#?}}:got_lo12:var]
 ; CHECK: ldr w0, [x[[GOTLOC]]]
@@ -16,7 +16,7 @@
 define i32* @get_globalvaraddr() {
 ; CHECK-LABEL: get_globalvaraddr:
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
 ; CHECK: adrp x[[GOTHI:[0-9]+]], :got:var
 ; CHECK: ldr x0, [x[[GOTHI]], {{#?}}:got_lo12:var]
 
@@ -28,7 +28,7 @@
 define i32 @get_hiddenvar() {
 ; CHECK-LABEL: get_hiddenvar:
 
-  %val = load i32* @hiddenvar
+  %val = load i32, i32* @hiddenvar
 ; CHECK: adrp x[[HI:[0-9]+]], hiddenvar
 ; CHECK: ldr w0, [x[[HI]], {{#?}}:lo12:hiddenvar]
 
@@ -38,7 +38,7 @@
 define i32* @get_hiddenvaraddr() {
 ; CHECK-LABEL: get_hiddenvaraddr:
 
-  %val = load i32* @hiddenvar
+  %val = load i32, i32* @hiddenvar
 ; CHECK: adrp [[HI:x[0-9]+]], hiddenvar
 ; CHECK: add x0, [[HI]], {{#?}}:lo12:hiddenvar
 
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert-0.ll b/llvm/test/CodeGen/AArch64/bitfield-insert-0.ll
index da0ed8a..21f3895 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert-0.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert-0.ll
@@ -6,10 +6,10 @@
 define void @test_bfi0(i32* %existing, i32* %new) {
 ; CHECK: bfxil {{w[0-9]+}}, {{w[0-9]+}}, #0, #18
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 4294705152 ; 0xfffc_0000
 
-  %newval = load volatile i32* %new
+  %newval = load volatile i32, i32* %new
   %newval_masked = and i32 %newval, 262143 ; = 0x0003_ffff
 
   %combined = or i32 %newval_masked, %oldval_keep
diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
index 2369a55..9b731fa 100644
--- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll
@@ -28,10 +28,10 @@
 
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #26, #5
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
 
-  %newval = load volatile i32* %new
+  %newval = load volatile i32, i32* %new
   %newval_shifted = shl i32 %newval, 26
   %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
 
@@ -47,10 +47,10 @@
 ; CHECK-NOT: and
 ; CHECK: ret
 
-  %oldval = load volatile i64* %existing
+  %oldval = load volatile i64, i64* %existing
   %oldval_keep = and i64 %oldval, 18446742974265032703 ; = 0xffffff0003ffffffL
 
-  %newval = load volatile i64* %new
+  %newval = load volatile i64, i64* %new
   %newval_shifted = shl i64 %newval, 26
   %newval_masked = and i64 %newval_shifted, 1099444518912 ; = 0xfffc000000
 
@@ -68,10 +68,10 @@
 
 ; CHECK: ret
 
-  %oldval = load volatile i64* %existing
+  %oldval = load volatile i64, i64* %existing
   %oldval_keep = and i64 %oldval, 4294901760 ; = 0xffff0000
 
-  %newval = load volatile i64* %new
+  %newval = load volatile i64, i64* %new
   %newval_masked = and i64 %newval, 65535 ; = 0xffff
 
   %combined = or i64 %oldval_keep, %newval_masked
@@ -86,10 +86,10 @@
 ; CHECK: and
 ; CHECK: bfi [[INSERT:w[0-9]+]], {{w[0-9]+}}, #3, #4
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 135 ; = 0x87
 
-  %newval = load volatile i32* %new
+  %newval = load volatile i32, i32* %new
   %newval_shifted = shl i32 %newval, 3
   %newval_masked = and i32 %newval_shifted, 120 ; = 0x78
 
@@ -104,10 +104,10 @@
 ; CHECK: and
 ; CHECK: bfi [[INSERT:x[0-9]+]], {{x[0-9]+}}, #40, #8
 
-  %oldval = load volatile i64* %existing
+  %oldval = load volatile i64, i64* %existing
   %oldval_keep = and i64 %oldval, 1095216660480 ; = 0xff_0000_0000
 
-  %newval = load volatile i64* %new
+  %newval = load volatile i64, i64* %new
   %newval_shifted = shl i64 %newval, 40
   %newval_masked = and i64 %newval_shifted, 280375465082880 ; = 0xff00_0000_0000
 
@@ -124,10 +124,10 @@
 ; CHECK: and
 ; CHECK: bfi {{w[0-9]+}}, {{w[0-9]+}}, #3, #4
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 647 ; = 0x287
 
-  %newval = load volatile i32* %new
+  %newval = load volatile i32, i32* %new
   %newval_shifted = shl i32 %newval, 3
   %newval_masked = and i32 %newval_shifted, 120 ; = 0x278
 
@@ -144,10 +144,10 @@
 ; CHECK-NOT: bfm
 ; CHECK: ret
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 135 ; = 0x87
 
-  %newval = load volatile i32* %new
+  %newval = load volatile i32, i32* %new
   %newval_shifted = shl i32 %newval, 3
   %newval_masked = and i32 %newval_shifted, 632 ; = 0x278
 
@@ -164,10 +164,10 @@
 ; CHECK-NOT: bfm
 ; CHECK: ret
 
-  %oldval = load volatile i64* %existing
+  %oldval = load volatile i64, i64* %existing
   %oldval_keep = and i64 %oldval, 135 ; = 0x87
 
-  %newval = load volatile i64* %new
+  %newval = load volatile i64, i64* %new
   %newval_shifted = shl i64 %newval, 3
   %newval_masked = and i64 %newval_shifted, 664 ; = 0x278
 
@@ -182,10 +182,10 @@
 define void @test_32bit_with_shr(i32* %existing, i32* %new) {
 ; CHECK-LABEL: test_32bit_with_shr:
 
-  %oldval = load volatile i32* %existing
+  %oldval = load volatile i32, i32* %existing
   %oldval_keep = and i32 %oldval, 2214592511 ; =0x83ffffff
 
-  %newval = load i32* %new
+  %newval = load i32, i32* %new
   %newval_shifted = shl i32 %newval, 12
   %newval_masked = and i32 %newval_shifted, 2080374784 ; = 0x7c000000
 
diff --git a/llvm/test/CodeGen/AArch64/bitfield.ll b/llvm/test/CodeGen/AArch64/bitfield.ll
index 0e12653..78399c8 100644
--- a/llvm/test/CodeGen/AArch64/bitfield.ll
+++ b/llvm/test/CodeGen/AArch64/bitfield.ll
@@ -180,7 +180,7 @@
 ; CHECK-LABEL: test_ubfx32:
 ; CHECK: ubfx {{w[0-9]+}}, {{w[0-9]+}}, #23, #3
 
-   %fields = load i32* %addr
+   %fields = load i32, i32* %addr
    %shifted = lshr i32 %fields, 23
    %masked = and i32 %shifted, 7
    ret i32 %masked
@@ -189,7 +189,7 @@
 define i64 @test_ubfx64(i64* %addr) {
 ; CHECK-LABEL: test_ubfx64:
 ; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #25, #10
-   %fields = load i64* %addr
+   %fields = load i64, i64* %addr
    %shifted = lshr i64 %fields, 25
    %masked = and i64 %shifted, 1023
    ret i64 %masked
@@ -199,7 +199,7 @@
 ; CHECK-LABEL: test_sbfx32:
 ; CHECK: sbfx {{w[0-9]+}}, {{w[0-9]+}}, #6, #3
 
-   %fields = load i32* %addr
+   %fields = load i32, i32* %addr
    %shifted = shl i32 %fields, 23
    %extended = ashr i32 %shifted, 29
    ret i32 %extended
@@ -209,7 +209,7 @@
 ; CHECK-LABEL: test_sbfx64:
 ; CHECK: sbfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #63
 
-   %fields = load i64* %addr
+   %fields = load i64, i64* %addr
    %shifted = shl i64 %fields, 1
    %extended = ashr i64 %shifted, 1
    ret i64 %extended
diff --git a/llvm/test/CodeGen/AArch64/blockaddress.ll b/llvm/test/CodeGen/AArch64/blockaddress.ll
index 3a5dbdc..e93c69f 100644
--- a/llvm/test/CodeGen/AArch64/blockaddress.ll
+++ b/llvm/test/CodeGen/AArch64/blockaddress.ll
@@ -6,7 +6,7 @@
 define void @test_blockaddress() {
 ; CHECK-LABEL: test_blockaddress:
   store volatile i8* blockaddress(@test_blockaddress, %block), i8** @addr
-  %val = load volatile i8** @addr
+  %val = load volatile i8*, i8** @addr
   indirectbr i8* %val, [label %block]
 ; CHECK: adrp [[DEST_HI:x[0-9]+]], [[DEST_LBL:.Ltmp[0-9]+]]
 ; CHECK: add [[DEST:x[0-9]+]], [[DEST_HI]], {{#?}}:lo12:[[DEST_LBL]]
diff --git a/llvm/test/CodeGen/AArch64/bool-loads.ll b/llvm/test/CodeGen/AArch64/bool-loads.ll
index 881aeaa..b0ee1b4 100644
--- a/llvm/test/CodeGen/AArch64/bool-loads.ll
+++ b/llvm/test/CodeGen/AArch64/bool-loads.ll
@@ -5,7 +5,7 @@
 define i32 @test_sextloadi32() {
 ; CHECK-LABEL: test_sextloadi32
 
-  %val = load i1* @var
+  %val = load i1, i1* @var
   %ret = sext i1 %val to i32
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 ; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1|sbfx w[0-9]+, w[0-9]+, #0, #1}}
@@ -17,7 +17,7 @@
 define i64 @test_sextloadi64() {
 ; CHECK-LABEL: test_sextloadi64
 
-  %val = load i1* @var
+  %val = load i1, i1* @var
   %ret = sext i1 %val to i64
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 ; CHECK: {{sbfx x[0-9]+, x[0-9]+, #0, #1}}
@@ -32,7 +32,7 @@
 ; It's not actually necessary that "ret" is next, but as far as LLVM
 ; is concerned only 0 or 1 should be loadable so no extension is
 ; necessary.
-  %val = load i1* @var
+  %val = load i1, i1* @var
   %ret = zext i1 %val to i32
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 
@@ -46,7 +46,7 @@
 ; It's not actually necessary that "ret" is next, but as far as LLVM
 ; is concerned only 0 or 1 should be loadable so no extension is
 ; necessary.
-  %val = load i1* @var
+  %val = load i1, i1* @var
   %ret = zext i1 %val to i64
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var]
 
diff --git a/llvm/test/CodeGen/AArch64/breg.ll b/llvm/test/CodeGen/AArch64/breg.ll
index 9524044..42061a8 100644
--- a/llvm/test/CodeGen/AArch64/breg.ll
+++ b/llvm/test/CodeGen/AArch64/breg.ll
@@ -4,7 +4,7 @@
 
 define void @foo() {
 ; CHECK-LABEL: foo:
-  %lab = load i8** @stored_label
+  %lab = load i8*, i8** @stored_label
   indirectbr i8* %lab, [label  %otherlab, label %retlab]
 ; CHECK: adrp {{x[0-9]+}}, stored_label
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:stored_label]
diff --git a/llvm/test/CodeGen/AArch64/callee-save.ll b/llvm/test/CodeGen/AArch64/callee-save.ll
index 046e6ce..1234039 100644
--- a/llvm/test/CodeGen/AArch64/callee-save.ll
+++ b/llvm/test/CodeGen/AArch64/callee-save.ll
@@ -12,38 +12,38 @@
 
   ; Create lots of live variables to exhaust the supply of
   ; caller-saved registers
-  %val1 = load volatile float* @var
-  %val2 = load volatile float* @var
-  %val3 = load volatile float* @var
-  %val4 = load volatile float* @var
-  %val5 = load volatile float* @var
-  %val6 = load volatile float* @var
-  %val7 = load volatile float* @var
-  %val8 = load volatile float* @var
-  %val9 = load volatile float* @var
-  %val10 = load volatile float* @var
-  %val11 = load volatile float* @var
-  %val12 = load volatile float* @var
-  %val13 = load volatile float* @var
-  %val14 = load volatile float* @var
-  %val15 = load volatile float* @var
-  %val16 = load volatile float* @var
-  %val17 = load volatile float* @var
-  %val18 = load volatile float* @var
-  %val19 = load volatile float* @var
-  %val20 = load volatile float* @var
-  %val21 = load volatile float* @var
-  %val22 = load volatile float* @var
-  %val23 = load volatile float* @var
-  %val24 = load volatile float* @var
-  %val25 = load volatile float* @var
-  %val26 = load volatile float* @var
-  %val27 = load volatile float* @var
-  %val28 = load volatile float* @var
-  %val29 = load volatile float* @var
-  %val30 = load volatile float* @var
-  %val31 = load volatile float* @var
-  %val32 = load volatile float* @var
+  %val1 = load volatile float, float* @var
+  %val2 = load volatile float, float* @var
+  %val3 = load volatile float, float* @var
+  %val4 = load volatile float, float* @var
+  %val5 = load volatile float, float* @var
+  %val6 = load volatile float, float* @var
+  %val7 = load volatile float, float* @var
+  %val8 = load volatile float, float* @var
+  %val9 = load volatile float, float* @var
+  %val10 = load volatile float, float* @var
+  %val11 = load volatile float, float* @var
+  %val12 = load volatile float, float* @var
+  %val13 = load volatile float, float* @var
+  %val14 = load volatile float, float* @var
+  %val15 = load volatile float, float* @var
+  %val16 = load volatile float, float* @var
+  %val17 = load volatile float, float* @var
+  %val18 = load volatile float, float* @var
+  %val19 = load volatile float, float* @var
+  %val20 = load volatile float, float* @var
+  %val21 = load volatile float, float* @var
+  %val22 = load volatile float, float* @var
+  %val23 = load volatile float, float* @var
+  %val24 = load volatile float, float* @var
+  %val25 = load volatile float, float* @var
+  %val26 = load volatile float, float* @var
+  %val27 = load volatile float, float* @var
+  %val28 = load volatile float, float* @var
+  %val29 = load volatile float, float* @var
+  %val30 = load volatile float, float* @var
+  %val31 = load volatile float, float* @var
+  %val32 = load volatile float, float* @var
 
   store volatile float %val1, float* @var
   store volatile float %val2, float* @var
diff --git a/llvm/test/CodeGen/AArch64/cmpwithshort.ll b/llvm/test/CodeGen/AArch64/cmpwithshort.ll
index 4a017d4..6590997 100644
--- a/llvm/test/CodeGen/AArch64/cmpwithshort.ll
+++ b/llvm/test/CodeGen/AArch64/cmpwithshort.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT: cmn
 entry:
   %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
-  %val = load i16* %addr, align 2
+  %val = load i16, i16* %addr, align 2
   %cmp = icmp eq i16 %val, -1
   br i1 %cmp, label %if, label %if.then
 if:
@@ -21,7 +21,7 @@
 ; CHECK-NEXT: cmn
 entry:
   %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
-  %val = load i16* %addr, align 2
+  %val = load i16, i16* %addr, align 2
   %cmp = icmp sge i16 %val, -1
   br i1 %cmp, label %if, label %if.then
 if:
@@ -36,7 +36,7 @@
 ; CHECK-NEXT: cmn
 entry:
   %addr = getelementptr inbounds i16, i16* %ptr1, i16 0
-  %val = load i16* %addr, align 2
+  %val = load i16, i16* %addr, align 2
   %cmp = icmp uge i16 %val, -1
   br i1 %cmp, label %if, label %if.then
 if:
diff --git a/llvm/test/CodeGen/AArch64/code-model-large-abs.ll b/llvm/test/CodeGen/AArch64/code-model-large-abs.ll
index ca92500..1680815 100644
--- a/llvm/test/CodeGen/AArch64/code-model-large-abs.ll
+++ b/llvm/test/CodeGen/AArch64/code-model-large-abs.ll
@@ -18,7 +18,7 @@
 
 define i8 @global_i8() {
 ; CHECK-LABEL: global_i8:
-  %val = load i8* @var8
+  %val = load i8, i8* @var8
   ret i8 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var8
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var8
@@ -29,7 +29,7 @@
 
 define i16 @global_i16() {
 ; CHECK-LABEL: global_i16:
-  %val = load i16* @var16
+  %val = load i16, i16* @var16
   ret i16 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var16
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var16
@@ -40,7 +40,7 @@
 
 define i32 @global_i32() {
 ; CHECK-LABEL: global_i32:
-  %val = load i32* @var32
+  %val = load i32, i32* @var32
   ret i32 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var32
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var32
@@ -51,7 +51,7 @@
 
 define i64 @global_i64() {
 ; CHECK-LABEL: global_i64:
-  %val = load i64* @var64
+  %val = load i64, i64* @var64
   ret i64 %val
 ; CHECK: movz x[[ADDR_REG:[0-9]+]], #:abs_g3:var64
 ; CHECK: movk x[[ADDR_REG]], #:abs_g2_nc:var64
diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index ccdd207..c78faba 100644
--- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -15,13 +15,13 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.lt
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp sgt i32 %0, 10
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %land.lhs.true3
 
@@ -30,8 +30,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false, %land.lhs.true
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -52,13 +52,13 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp sgt i32 %0, 5
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %if.end
 
@@ -67,8 +67,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -89,13 +89,13 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.gt
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, 5
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %land.lhs.true3
 
@@ -104,8 +104,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false, %land.lhs.true
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -126,13 +126,13 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.le
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, 5
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %if.end
 
@@ -141,8 +141,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -163,13 +163,13 @@
 ; CHECK-NOT: cmn
 ; CHECK: b.ge
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp sgt i32 %0, -5
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %if.end
 
@@ -178,8 +178,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -200,13 +200,13 @@
 ; CHECK-NOT: cmn
 ; CHECK: b.le
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, -5
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
 
 land.lhs.true:                                    ; preds = %entry
-  %1 = load i32* @b, align 4
-  %2 = load i32* @c, align 4
+  %1 = load i32, i32* @b, align 4
+  %2 = load i32, i32* @c, align 4
   %cmp1 = icmp eq i32 %1, %2
   br i1 %cmp1, label %return, label %if.end
 
@@ -215,8 +215,8 @@
   br i1 %cmp2, label %land.lhs.true3, label %if.end
 
 land.lhs.true3:                                   ; preds = %lor.lhs.false
-  %3 = load i32* @b, align 4
-  %4 = load i32* @d, align 4
+  %3 = load i32, i32* @b, align 4
+  %4 = load i32, i32* @d, align 4
   %cmp4 = icmp eq i32 %3, %4
   br i1 %cmp4, label %return, label %if.end
 
@@ -238,17 +238,17 @@
 define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 {
 entry:
   %size = getelementptr inbounds %struct.Struct, %struct.Struct* %hdCall, i64 0, i32 0
-  %0 = load i64* %size, align 8
+  %0 = load i64, i64* %size, align 8
   br label %land.rhs
 
 land.rhs:
   %rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ]
-  %1 = load i64* inttoptr (i64 24 to i64*), align 8
+  %1 = load i64, i64* inttoptr (i64 24 to i64*), align 8
   %cmp2 = icmp sgt i64 %1, 0
   br i1 %cmp2, label %while.body, label %while.end
 
 while.body:
-  %2 = load %struct.Struct** @glob, align 8
+  %2 = load %struct.Struct*, %struct.Struct** @glob, align 8
   %call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2
   %sub = add nsw i64 %rp.06, -2
   %cmp = icmp slt i64 %0, %rp.06
@@ -268,7 +268,7 @@
 ; CHECK: cmp
 ; CHECK: b.gt
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp4 = icmp slt i32 %0, -1
   br i1 %cmp4, label %while.body.preheader, label %while.end
 
@@ -283,7 +283,7 @@
   br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
 
 while.cond.while.end_crit_edge:                   ; preds = %while.body
-  %.pre = load i32* @a, align 4
+  %.pre = load i32, i32* @a, align 4
   br label %while.end
 
 while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
@@ -292,8 +292,8 @@
   br i1 %cmp1, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %while.end
-  %2 = load i32* @b, align 4
-  %3 = load i32* @d, align 4
+  %2 = load i32, i32* @b, align 4
+  %3 = load i32, i32* @d, align 4
   %cmp2 = icmp eq i32 %2, %3
   br i1 %cmp2, label %return, label %if.end
 
@@ -312,7 +312,7 @@
 ; CHECK: cmn
 ; CHECK: b.lt
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp4 = icmp slt i32 %0, 1
   br i1 %cmp4, label %while.body.preheader, label %while.end
 
@@ -330,13 +330,13 @@
   br label %while.end
 
 while.end:                                        ; preds = %while.end.loopexit, %entry
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %cmp1 = icmp sgt i32 %1, -3
   br i1 %cmp1, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %while.end
-  %2 = load i32* @b, align 4
-  %3 = load i32* @d, align 4
+  %2 = load i32, i32* @b, align 4
+  %3 = load i32, i32* @d, align 4
   %cmp2 = icmp eq i32 %2, %3
   br i1 %cmp2, label %return, label %if.end
 
@@ -375,7 +375,7 @@
 
 land.lhs.true:                                    ; preds = %entry
   %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1
-  %0 = load i8** %arrayidx, align 8
+  %0 = load i8*, i8** %arrayidx, align 8
   %cmp1 = icmp eq i8* %0, null
   br i1 %cmp1, label %if.end, label %return
 
diff --git a/llvm/test/CodeGen/AArch64/compare-branch.ll b/llvm/test/CodeGen/AArch64/compare-branch.ll
index a1a87cf..4e0f69d 100644
--- a/llvm/test/CodeGen/AArch64/compare-branch.ll
+++ b/llvm/test/CodeGen/AArch64/compare-branch.ll
@@ -6,25 +6,25 @@
 define void @foo() {
 ; CHECK-LABEL: foo:
 
-  %val1 = load volatile i32* @var32
+  %val1 = load volatile i32, i32* @var32
   %tst1 = icmp eq i32 %val1, 0
   br i1 %tst1, label %end, label %test2
 ; CHECK: cbz {{w[0-9]+}}, .LBB
 
 test2:
-  %val2 = load volatile i32* @var32
+  %val2 = load volatile i32, i32* @var32
   %tst2 = icmp ne i32 %val2, 0
   br i1 %tst2, label %end, label %test3
 ; CHECK: cbnz {{w[0-9]+}}, .LBB
 
 test3:
-  %val3 = load volatile i64* @var64
+  %val3 = load volatile i64, i64* @var64
   %tst3 = icmp eq i64 %val3, 0
   br i1 %tst3, label %end, label %test4
 ; CHECK: cbz {{x[0-9]+}}, .LBB
 
 test4:
-  %val4 = load volatile i64* @var64
+  %val4 = load volatile i64, i64* @var64
   %tst4 = icmp ne i64 %val4, 0
   br i1 %tst4, label %end, label %test5
 ; CHECK: cbnz {{x[0-9]+}}, .LBB
diff --git a/llvm/test/CodeGen/AArch64/complex-copy-noneon.ll b/llvm/test/CodeGen/AArch64/complex-copy-noneon.ll
index d5bd6b6e..b7c7043 100644
--- a/llvm/test/CodeGen/AArch64/complex-copy-noneon.ll
+++ b/llvm/test/CodeGen/AArch64/complex-copy-noneon.ll
@@ -9,9 +9,9 @@
   %dst = alloca { double, double }, align 8
 
   %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
-  %src.real = load double* %src.realp
+  %src.real = load double, double* %src.realp
   %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
-  %src.imag = load double* %src.imagp
+  %src.imag = load double, double* %src.imagp
 
   %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
   %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
diff --git a/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll b/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
index 5c943f9..1102553 100644
--- a/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/complex-int-to-fp.ll
@@ -4,7 +4,7 @@
 ; CHECK: scvtf
 ; CHECK: ret
 define void @autogen_SD19655(<2 x i64>* %addr, <2 x float>* %addrfloat) {
-  %T = load <2 x i64>* %addr
+  %T = load <2 x i64>, <2 x i64>* %addr
   %F = sitofp <2 x i64> %T to <2 x float>
   store <2 x float> %F, <2 x float>* %addrfloat
   ret void
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll b/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
index 115fc64..11c6385 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-invaraints.ll
@@ -12,14 +12,14 @@
   store i32 0, i32* %tmp
   store i32 15, i32* %i32T, align 4
   store i32 5, i32* %i32F, align 4
-  %tmp6 = load i32* %tmp, align 4
+  %tmp6 = load i32, i32* %tmp, align 4
   %tmp7 = icmp ne i32 %tmp6, 0
   %tmp8 = xor i1 %tmp7, true
-  %tmp9 = load i32* %i32T, align 4
-  %tmp10 = load i32* %i32F, align 4
+  %tmp9 = load i32, i32* %i32T, align 4
+  %tmp10 = load i32, i32* %i32F, align 4
   %DHSelect = select i1 %tmp8, i32 %tmp9, i32 %tmp10
   store i32 %DHSelect, i32* %i32X, align 4
-  %tmp15 = load i32* %i32X, align 4
+  %tmp15 = load i32, i32* %i32X, align 4
   %tmp17 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str2, i32 0, i32 0), i32 %tmp15)
   ret i32 0
 
diff --git a/llvm/test/CodeGen/AArch64/dp-3source.ll b/llvm/test/CodeGen/AArch64/dp-3source.ll
index bd96ec7..3982fea 100644
--- a/llvm/test/CodeGen/AArch64/dp-3source.ll
+++ b/llvm/test/CodeGen/AArch64/dp-3source.ll
@@ -168,8 +168,8 @@
 
 define void @test_mneg(){
 ; CHECK-LABEL: test_mneg:
-  %1 = load i32* @a, align 4
-  %2 = load i32* @b, align 4
+  %1 = load i32, i32* @a, align 4
+  %2 = load i32, i32* @b, align 4
   %3 = sub i32 0, %1
   %4 = mul i32 %2, %3
   store i32 %4, i32* @c, align 4
diff --git a/llvm/test/CodeGen/AArch64/dp1.ll b/llvm/test/CodeGen/AArch64/dp1.ll
index 662b415..4247afa 100644
--- a/llvm/test/CodeGen/AArch64/dp1.ll
+++ b/llvm/test/CodeGen/AArch64/dp1.ll
@@ -5,7 +5,7 @@
 
 define void @rev_i32() {
 ; CHECK-LABEL: rev_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val1_tmp = call i32 @llvm.bswap.i32(i32 %val0_tmp)
 ; CHECK: rev	{{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val1_tmp, i32* @var32
@@ -14,7 +14,7 @@
 
 define void @rev_i64() {
 ; CHECK-LABEL: rev_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val1_tmp = call i64 @llvm.bswap.i64(i64 %val0_tmp)
 ; CHECK: rev	{{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val1_tmp, i64* @var64
@@ -23,7 +23,7 @@
 
 define void @rev32_i64() {
 ; CHECK-LABEL: rev32_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val1_tmp = shl i64 %val0_tmp, 32
     %val5_tmp = sub i64 64, 32
     %val2_tmp = lshr i64 %val0_tmp, %val5_tmp
@@ -36,7 +36,7 @@
 
 define void @rev16_i32() {
 ; CHECK-LABEL: rev16_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val1_tmp = shl i32 %val0_tmp, 16
     %val2_tmp = lshr i32 %val0_tmp, 16
     %val3_tmp = or i32 %val1_tmp, %val2_tmp
@@ -48,7 +48,7 @@
 
 define void @clz_zerodef_i32() {
 ; CHECK-LABEL: clz_zerodef_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 0)
 ; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32
@@ -57,7 +57,7 @@
 
 define void @clz_zerodef_i64() {
 ; CHECK-LABEL: clz_zerodef_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 0)
 ; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64
@@ -66,7 +66,7 @@
 
 define void @clz_zeroundef_i32() {
 ; CHECK-LABEL: clz_zeroundef_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val4_tmp = call i32 @llvm.ctlz.i32(i32 %val0_tmp, i1 1)
 ; CHECK: clz	{{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32
@@ -75,7 +75,7 @@
 
 define void @clz_zeroundef_i64() {
 ; CHECK-LABEL: clz_zeroundef_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val4_tmp = call i64 @llvm.ctlz.i64(i64 %val0_tmp, i1 1)
 ; CHECK: clz	{{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64
@@ -84,7 +84,7 @@
 
 define void @cttz_zerodef_i32() {
 ; CHECK-LABEL: cttz_zerodef_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 0)
 ; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
 ; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
@@ -94,7 +94,7 @@
 
 define void @cttz_zerodef_i64() {
 ; CHECK-LABEL: cttz_zerodef_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 0)
 ; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
 ; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
@@ -104,7 +104,7 @@
 
 define void @cttz_zeroundef_i32() {
 ; CHECK-LABEL: cttz_zeroundef_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val4_tmp = call i32 @llvm.cttz.i32(i32 %val0_tmp, i1 1)
 ; CHECK: rbit   [[REVERSED:w[0-9]+]], {{w[0-9]+}}
 ; CHECK: clz	{{w[0-9]+}}, [[REVERSED]]
@@ -114,7 +114,7 @@
 
 define void @cttz_zeroundef_i64() {
 ; CHECK-LABEL: cttz_zeroundef_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val4_tmp = call i64 @llvm.cttz.i64(i64 %val0_tmp, i1 1)
 ; CHECK: rbit   [[REVERSED:x[0-9]+]], {{x[0-9]+}}
 ; CHECK: clz	{{x[0-9]+}}, [[REVERSED]]
@@ -126,7 +126,7 @@
 ; ISelLowering.
 define void @ctpop_i32() {
 ; CHECK-LABEL: ctpop_i32:
-    %val0_tmp = load i32* @var32
+    %val0_tmp = load i32, i32* @var32
     %val4_tmp = call i32 @llvm.ctpop.i32(i32 %val0_tmp)
     store volatile i32 %val4_tmp, i32* @var32
     ret void
@@ -134,7 +134,7 @@
 
 define void @ctpop_i64() {
 ; CHECK-LABEL: ctpop_i64:
-    %val0_tmp = load i64* @var64
+    %val0_tmp = load i64, i64* @var64
     %val4_tmp = call i64 @llvm.ctpop.i64(i64 %val0_tmp)
     store volatile i64 %val4_tmp, i64* @var64
     ret void
diff --git a/llvm/test/CodeGen/AArch64/dp2.ll b/llvm/test/CodeGen/AArch64/dp2.ll
index 71b3169..0cd2d7e 100644
--- a/llvm/test/CodeGen/AArch64/dp2.ll
+++ b/llvm/test/CodeGen/AArch64/dp2.ll
@@ -7,8 +7,8 @@
 
 define void @rorv_i64() {
 ; CHECK-LABEL: rorv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val2_tmp = sub i64 64, %val1_tmp
     %val3_tmp = shl i64 %val0_tmp, %val2_tmp
     %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
@@ -20,8 +20,8 @@
 
 define void @asrv_i64() {
 ; CHECK-LABEL: asrv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val4_tmp = ashr i64 %val0_tmp, %val1_tmp
 ; CHECK: {{asr|asrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_1
@@ -30,8 +30,8 @@
 
 define void @lsrv_i64() {
 ; CHECK-LABEL: lsrv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val4_tmp = lshr i64 %val0_tmp, %val1_tmp
 ; CHECK: {{lsr|lsrv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_0
@@ -40,8 +40,8 @@
 
 define void @lslv_i64() {
 ; CHECK-LABEL: lslv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val4_tmp = shl i64 %val0_tmp, %val1_tmp
 ; CHECK: {{lsl|lslv}} {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_1
@@ -50,8 +50,8 @@
 
 define void @udiv_i64() {
 ; CHECK-LABEL: udiv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val4_tmp = udiv i64 %val0_tmp, %val1_tmp
 ; CHECK: udiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_0
@@ -60,8 +60,8 @@
 
 define void @sdiv_i64() {
 ; CHECK-LABEL: sdiv_i64:
-    %val0_tmp = load i64* @var64_0
-    %val1_tmp = load i64* @var64_1
+    %val0_tmp = load i64, i64* @var64_0
+    %val1_tmp = load i64, i64* @var64_1
     %val4_tmp = sdiv i64 %val0_tmp, %val1_tmp
 ; CHECK: sdiv	{{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}
     store volatile i64 %val4_tmp, i64* @var64_1
@@ -71,8 +71,8 @@
 
 define void @lsrv_i32() {
 ; CHECK-LABEL: lsrv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val1_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val1_tmp = load i32, i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = lshr i32 %val0_tmp, %val2_tmp
 ; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -82,8 +82,8 @@
 
 define void @lslv_i32() {
 ; CHECK-LABEL: lslv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val1_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val1_tmp = load i32, i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = shl i32 %val0_tmp, %val2_tmp
 ; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -93,8 +93,8 @@
 
 define void @rorv_i32() {
 ; CHECK-LABEL: rorv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val6_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val6_tmp = load i32, i32* @var32_1
     %val1_tmp = add i32 1, %val6_tmp
     %val2_tmp = sub i32 32, %val1_tmp
     %val3_tmp = shl i32 %val0_tmp, %val2_tmp
@@ -107,8 +107,8 @@
 
 define void @asrv_i32() {
 ; CHECK-LABEL: asrv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val1_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val1_tmp = load i32, i32* @var32_1
     %val2_tmp = add i32 1, %val1_tmp
     %val4_tmp = ashr i32 %val0_tmp, %val2_tmp
 ; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
@@ -118,8 +118,8 @@
 
 define void @sdiv_i32() {
 ; CHECK-LABEL: sdiv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val1_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val1_tmp = load i32, i32* @var32_1
     %val4_tmp = sdiv i32 %val0_tmp, %val1_tmp
 ; CHECK: sdiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32_1
@@ -128,8 +128,8 @@
 
 define void @udiv_i32() {
 ; CHECK-LABEL: udiv_i32:
-    %val0_tmp = load i32* @var32_0
-    %val1_tmp = load i32* @var32_1
+    %val0_tmp = load i32, i32* @var32_0
+    %val1_tmp = load i32, i32* @var32_1
     %val4_tmp = udiv i32 %val0_tmp, %val1_tmp
 ; CHECK: udiv	{{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
     store volatile i32 %val4_tmp, i32* @var32_0
@@ -141,7 +141,7 @@
 define i32 @test_lsl32() {
 ; CHECK-LABEL: test_lsl32:
 
-  %val = load i32* @var32_0
+  %val = load i32, i32* @var32_0
   %ret = shl i32 1, %val
 ; CHECK: {{lsl|lslv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
@@ -151,7 +151,7 @@
 define i32 @test_lsr32() {
 ; CHECK-LABEL: test_lsr32:
 
-  %val = load i32* @var32_0
+  %val = load i32, i32* @var32_0
   %ret = lshr i32 1, %val
 ; CHECK: {{lsr|lsrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
@@ -161,7 +161,7 @@
 define i32 @test_asr32(i32 %in) {
 ; CHECK-LABEL: test_asr32:
 
-  %val = load i32* @var32_0
+  %val = load i32, i32* @var32_0
   %ret = ashr i32 %in, %val
 ; CHECK: {{asr|asrv}} {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}
 
diff --git a/llvm/test/CodeGen/AArch64/eliminate-trunc.ll b/llvm/test/CodeGen/AArch64/eliminate-trunc.ll
index d8872a4..bc4ac7d 100644
--- a/llvm/test/CodeGen/AArch64/eliminate-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/eliminate-trunc.ll
@@ -15,10 +15,10 @@
 for.body4.us:
   %indvars.iv = phi i64 [ 0, %for.body4.lr.ph.us ], [ %indvars.iv.next, %for.body4.us ]
   %arrayidx6.us = getelementptr inbounds [8 x i8], [8 x i8]* %a, i64 %indvars.iv26, i64 %indvars.iv
-  %0 = load i8* %arrayidx6.us, align 1
+  %0 = load i8, i8* %arrayidx6.us, align 1
   %idxprom7.us = zext i8 %0 to i64
   %arrayidx8.us = getelementptr inbounds i8, i8* %box, i64 %idxprom7.us
-  %1 = load i8* %arrayidx8.us, align 1
+  %1 = load i8, i8* %arrayidx8.us, align 1
   store i8 %1, i8* %arrayidx6.us, align 1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %2 = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/CodeGen/AArch64/f16-convert.ll b/llvm/test/CodeGen/AArch64/f16-convert.ll
index 6b897f4..8caa1f5 100644
--- a/llvm/test/CodeGen/AArch64/f16-convert.ll
+++ b/llvm/test/CodeGen/AArch64/f16-convert.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT: fcvt s0, [[HREG]]
 ; CHECK-NEXT: ret
 
-  %tmp = load i16* %a, align 2
+  %tmp = load i16, i16* %a, align 2
   %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
   ret float %tmp1
 }
@@ -17,7 +17,7 @@
 ; CHECK-NEXT: fcvt d0, [[HREG]]
 ; CHECK-NEXT: ret
 
-  %tmp = load i16* %a, align 2
+  %tmp = load i16, i16* %a, align 2
   %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
   ret double %conv
 }
@@ -30,7 +30,7 @@
 
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
   ret float %tmp1
 }
@@ -43,7 +43,7 @@
 
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 %idxprom
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
   ret double %conv
 }
@@ -55,7 +55,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
   ret float %tmp1
 }
@@ -67,7 +67,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 %i
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
   ret double %conv
 }
@@ -79,7 +79,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
   ret float %tmp1
 }
@@ -91,7 +91,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 10
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
   ret double %conv
 }
@@ -103,7 +103,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %tmp1 = tail call float @llvm.convert.from.fp16.f32(i16 %tmp)
   ret float %tmp1
 }
@@ -115,7 +115,7 @@
 ; CHECK-NEXT: ret
 
   %arrayidx = getelementptr inbounds i16, i16* %a, i64 -10
-  %tmp = load i16* %arrayidx, align 2
+  %tmp = load i16, i16* %arrayidx, align 2
   %conv = tail call double @llvm.convert.from.fp16.f64(i16 %tmp)
   ret double %conv
 }
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
index 0cbee54..6ab6a66 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll
@@ -5,49 +5,49 @@
 define zeroext i1 @load_breg_i1(i1* %a) {
 ; CHECK-LABEL: load_breg_i1
 ; CHECK:       ldrb {{w[0-9]+}}, [x0]
-  %1 = load i1* %a
+  %1 = load i1, i1* %a
   ret i1 %1
 }
 
 define zeroext i8 @load_breg_i8(i8* %a) {
 ; CHECK-LABEL: load_breg_i8
 ; CHECK:       ldrb {{w[0-9]+}}, [x0]
-  %1 = load i8* %a
+  %1 = load i8, i8* %a
   ret i8 %1
 }
 
 define zeroext i16 @load_breg_i16(i16* %a) {
 ; CHECK-LABEL: load_breg_i16
 ; CHECK:       ldrh {{w[0-9]+}}, [x0]
-  %1 = load i16* %a
+  %1 = load i16, i16* %a
   ret i16 %1
 }
 
 define i32 @load_breg_i32(i32* %a) {
 ; CHECK-LABEL: load_breg_i32
 ; CHECK:       ldr {{w[0-9]+}}, [x0]
-  %1 = load i32* %a
+  %1 = load i32, i32* %a
   ret i32 %1
 }
 
 define i64 @load_breg_i64(i64* %a) {
 ; CHECK-LABEL: load_breg_i64
 ; CHECK:       ldr {{x[0-9]+}}, [x0]
-  %1 = load i64* %a
+  %1 = load i64, i64* %a
   ret i64 %1
 }
 
 define float @load_breg_f32(float* %a) {
 ; CHECK-LABEL: load_breg_f32
 ; CHECK:       ldr {{s[0-9]+}}, [x0]
-  %1 = load float* %a
+  %1 = load float, float* %a
   ret float %1
 }
 
 define double @load_breg_f64(double* %a) {
 ; CHECK-LABEL: load_breg_f64
 ; CHECK:       ldr {{d[0-9]+}}, [x0]
-  %1 = load double* %a
+  %1 = load double, double* %a
   ret double %1
 }
 
@@ -113,7 +113,7 @@
 ; CHECK:       orr {{w|x}}[[REG:[0-9]+]], {{wzr|xzr}}, #0x80
 ; CHECK:       ldr {{w[0-9]+}}, {{\[}}x[[REG]]{{\]}}
   %1 = inttoptr i64 128 to i32*
-  %2 = load i32* %1
+  %2 = load i32, i32* %1
   ret i32 %2
 }
 
@@ -124,7 +124,7 @@
 ; CHECK:       ldur {{w[0-9]+}}, [x0, #-256]
   %1 = add i64 %a, -256
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -135,7 +135,7 @@
 ; CHECK-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
   %1 = add i64 %a, -257
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -145,7 +145,7 @@
 ; CHECK:       ldur {{w[0-9]+}}, [x0, #255]
   %1 = add i64 %a, 255
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -156,7 +156,7 @@
 ; CHECK-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
   %1 = add i64 %a, 257
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -166,7 +166,7 @@
 ; CHECK:       ldr {{w[0-9]+}}, [x0, #16380]
   %1 = add i64 %a, 16380
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -180,7 +180,7 @@
 ; FAST-NEXT:  ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
   %1 = add i64 %a, 16384
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -255,7 +255,7 @@
 ; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
   %1 = add i64 %a, 48
   %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
+  %3 = load i64, i64* %2
   ret i64 %3
 }
 
@@ -265,7 +265,7 @@
 ; CHECK:       ldr {{x[0-9]+}}, [x0, #48]
   %1 = add i64 48, %a
   %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
+  %3 = load i64, i64* %2
   ret i64 %3
 }
 
@@ -275,7 +275,7 @@
 ; CHECK:       ldr {{x[0-9]+}}, [x0, x1]
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
+  %3 = load i64, i64* %2
   ret i64 %3
 }
 
@@ -285,7 +285,7 @@
 ; CHECK:       ldr {{x[0-9]+}}, [x1, x0]
   %1 = add i64 %b, %a
   %2 = inttoptr i64 %1 to i64*
-  %3 = load i64* %2
+  %3 = load i64, i64* %2
   ret i64 %3
 }
 
@@ -297,7 +297,7 @@
   %1 = add i64 %a, %b
   %2 = add i64 %1, 48
   %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
+  %4 = load i64, i64* %3
   ret i64 %4
 }
 
@@ -312,7 +312,7 @@
   %1 = add i64 %a, %b
   %2 = add i64 %1, 61440
   %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
+  %4 = load i64, i64* %3
   ret i64 %4
 }
 
@@ -323,7 +323,7 @@
 ; CHECK:       ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
   %1 = shl i64 %a, 2
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -333,7 +333,7 @@
 ; CHECK:       ldr {{w[0-9]+}}, {{\[}}[[REG]]{{\]}}
   %1 = mul i64 %a, 4
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   ret i32 %3
 }
 
@@ -344,7 +344,7 @@
   %1 = shl i64 %a, 2
   %2 = add i64 %1, %b
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 }
 
@@ -354,7 +354,7 @@
   %1 = shl i64 %a, 2
   %2 = add i64 %b, %1
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 }
 
@@ -369,7 +369,7 @@
   %2 = shl i64 %b, 2
   %3 = add i64 %1, %2
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -384,7 +384,7 @@
   %2 = shl i64 %b, 2
   %3 = add i64 %2, %1
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -399,7 +399,7 @@
   %2 = shl i64 %b, 3
   %3 = add i64 %1, %2
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -409,7 +409,7 @@
   %1 = mul i64 %a, 4
   %2 = add i64 %1, %b
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 }
 
@@ -419,7 +419,7 @@
   %1 = and i64 %a, 4294967295
   %2 = add i64 %1, %b
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   ret i8 %4
 }
 
@@ -430,7 +430,7 @@
   %2 = shl i64 %1, 1
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i16*
-  %5 = load i16* %4
+  %5 = load i16, i16* %4
   ret i16 %5
 }
 
@@ -441,7 +441,7 @@
   %2 = shl i64 %1, 2
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -452,7 +452,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -464,7 +464,7 @@
   %1 = and i64 %a, %c
   %2 = add i64 %1, %b
   %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
+  %4 = load i64, i64* %3
   ret i64 %4
 }
 
@@ -476,7 +476,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -488,7 +488,7 @@
   %2 = shl i64 %1, 2
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -499,7 +499,7 @@
   %2 = shl i64 %1, 2
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -510,7 +510,7 @@
   %2 = mul i64 %1, 4
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -521,7 +521,7 @@
   %2 = shl i64 %1, 2
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -532,7 +532,7 @@
   %2 = shl i64 %1, 2
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -546,7 +546,7 @@
   %3 = shl i64 %2, 2
   %4 = add i64 %b, %3
   %5 = inttoptr i64 %4 to i32*
-  %6 = load i32* %5
+  %6 = load i32, i32* %5
   ret i32 %6
 }
 
@@ -558,7 +558,7 @@
   %2 = mul i64 %1, 4
   %3 = add i64 %2, %b
   %4 = inttoptr i64 %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   ret i32 %5
 }
 
@@ -571,7 +571,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %2, 8
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -585,7 +585,7 @@
   %3 = add i64 %b, %2
   %4 = add i64 %3, 8
   %5 = inttoptr i64 %4 to i64*
-  %6 = load i64* %5
+  %6 = load i64, i64* %5
   ret i64 %6
 }
 
@@ -594,7 +594,7 @@
   %1 = sub i64 %a, 8
   %2 = add i64 %1, 96
   %3 = inttoptr i64 %2 to i64*
-  %4 = load i64* %3
+  %4 = load i64, i64* %3
   %5 = add i64 %2, %4
   ret i64 %5
 }
@@ -621,7 +621,7 @@
   %3 = mul i64 %i, 4
   %4 = add i64 %2, %3
   %5 = inttoptr i64 %4 to i32*
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   ret i32 %6
 }
 
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-int-ext.ll b/llvm/test/CodeGen/AArch64/fast-isel-int-ext.ll
index aec991a..4b2cab5 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-int-ext.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-int-ext.ll
@@ -13,7 +13,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -24,7 +24,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -35,7 +35,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -46,7 +46,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -57,7 +57,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -68,7 +68,7 @@
   %2 = shl i64 %1, 3
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -82,7 +82,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -93,7 +93,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -104,7 +104,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -115,7 +115,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -126,7 +126,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -137,7 +137,7 @@
   %2 = mul i64 %1, 8
   %3 = add i64 %b, %2
   %4 = inttoptr i64 %3 to i64*
-  %5 = load i64* %4
+  %5 = load i64, i64* %4
   ret i64 %5
 }
 
@@ -153,7 +153,7 @@
 ; CHECK-NOT:   uxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i32
   ret i32 %4
 }
@@ -164,7 +164,7 @@
 ; CHECK-NOT:   uxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = zext i16 %3 to i32
   ret i32 %4
 }
@@ -175,7 +175,7 @@
 ; CHECK-NOT:   uxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i64
   ret i64 %4
 }
@@ -186,7 +186,7 @@
 ; CHECK-NOT:   uxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = zext i16 %3 to i64
   ret i64 %4
 }
@@ -197,7 +197,7 @@
 ; CHECK-NOT:   uxtw
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = zext i32 %3 to i64
   ret i64 %4
 }
@@ -208,7 +208,7 @@
 ; CHECK-NOT:   sxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = sext i8 %3 to i32
   ret i32 %4
 }
@@ -219,7 +219,7 @@
 ; CHECK-NOT:   sxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = sext i16 %3 to i32
   ret i32 %4
 }
@@ -230,7 +230,7 @@
 ; CHECK-NOT:   sxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = sext i8 %3 to i64
   ret i64 %4
 }
@@ -241,7 +241,7 @@
 ; CHECK-NOT:   sxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = sext i16 %3 to i64
   ret i64 %4
 }
@@ -252,7 +252,7 @@
 ; CHECK-NOT:   sxtw
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = sext i32 %3 to i64
   ret i64 %4
 }
@@ -264,7 +264,7 @@
 ; CHECK-NOT:   uxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i32
   ret i32 %4
 }
@@ -275,7 +275,7 @@
 ; CHECK-NOT:   uxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = zext i16 %3 to i32
   ret i32 %4
 }
@@ -286,7 +286,7 @@
 ; CHECK-NOT:   uxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i64
   ret i64 %4
 }
@@ -297,7 +297,7 @@
 ; CHECK-NOT:   uxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = zext i16 %3 to i64
   ret i64 %4
 }
@@ -308,7 +308,7 @@
 ; CHECK-NOT:   uxtw
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = zext i32 %3 to i64
   ret i64 %4
 }
@@ -319,7 +319,7 @@
 ; CHECK-NOT:   sxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = sext i8 %3 to i32
   ret i32 %4
 }
@@ -330,7 +330,7 @@
 ; CHECK-NOT:   sxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = sext i16 %3 to i32
   ret i32 %4
 }
@@ -341,7 +341,7 @@
 ; CHECK-NOT:   sxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = sext i8 %3 to i64
   ret i64 %4
 }
@@ -352,7 +352,7 @@
 ; CHECK-NOT:   sxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   %4 = sext i16 %3 to i64
   ret i64 %4
 }
@@ -363,7 +363,7 @@
 ; CHECK-NOT:   sxtw
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = sext i32 %3 to i64
   ret i64 %4
 }
@@ -376,7 +376,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = zext i8 %4 to i32
   ret i32 %5
 }
@@ -388,7 +388,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = zext i16 %4 to i32
   ret i32 %5
 }
@@ -400,7 +400,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = zext i8 %4 to i64
   ret i64 %5
 }
@@ -412,7 +412,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = zext i16 %4 to i64
   ret i64 %5
 }
@@ -424,7 +424,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   %5 = zext i32 %4 to i64
   ret i64 %5
 }
@@ -436,7 +436,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = sext i8 %4 to i32
   ret i32 %5
 }
@@ -448,7 +448,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = sext i16 %4 to i32
   ret i32 %5
 }
@@ -460,7 +460,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = sext i8 %4 to i64
   ret i64 %5
 }
@@ -472,7 +472,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = sext i16 %4 to i64
   ret i64 %5
 }
@@ -484,7 +484,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   %5 = sext i32 %4 to i64
   ret i64 %5
 }
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-int-ext2.ll b/llvm/test/CodeGen/AArch64/fast-isel-int-ext2.ll
index 493f3bb..93741d6 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-int-ext2.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-int-ext2.ll
@@ -11,7 +11,7 @@
 ; CHECK-NOT:   uxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -25,7 +25,7 @@
 ; CHECK-NOT:   uxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -39,7 +39,7 @@
 ; CHECK-NOT:   uxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -53,7 +53,7 @@
 ; CHECK-NOT:   uxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -67,7 +67,7 @@
 ; CHECK-NOT:   uxtw
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   br label %bb2
 
 bb2:
@@ -81,7 +81,7 @@
 ; CHECK-NOT:   sxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -95,7 +95,7 @@
 ; CHECK-NOT:   sxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -109,7 +109,7 @@
 ; CHECK-NOT:   sxtb
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -123,7 +123,7 @@
 ; CHECK-NOT:   sxth
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -137,7 +137,7 @@
 ; CHECK-NOT:   sxtw
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   br label %bb2
 
 bb2:
@@ -152,7 +152,7 @@
 ; CHECK-NOT:   uxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -166,7 +166,7 @@
 ; CHECK-NOT:   uxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -180,7 +180,7 @@
 ; CHECK-NOT:   uxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -194,7 +194,7 @@
 ; CHECK-NOT:   uxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -208,7 +208,7 @@
 ; CHECK-NOT:   uxtw
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   br label %bb2
 
 bb2:
@@ -222,7 +222,7 @@
 ; CHECK-NOT:   sxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -236,7 +236,7 @@
 ; CHECK-NOT:   sxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -250,7 +250,7 @@
 ; CHECK-NOT:   sxtb
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i8*
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   br label %bb2
 
 bb2:
@@ -264,7 +264,7 @@
 ; CHECK-NOT:   sxth
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i16*
-  %3 = load i16* %2
+  %3 = load i16, i16* %2
   br label %bb2
 
 bb2:
@@ -278,7 +278,7 @@
 ; CHECK-NOT:   sxtw
   %1 = add i64 %a, %b
   %2 = inttoptr i64 %1 to i32*
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   br label %bb2
 
 bb2:
@@ -294,7 +294,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   br label %bb2
 
 bb2:
@@ -309,7 +309,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   br label %bb2
 
 bb2:
@@ -324,7 +324,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   br label %bb2
 
 bb2:
@@ -339,7 +339,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   br label %bb2
 
 bb2:
@@ -354,7 +354,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   br label %bb2
 
 bb2:
@@ -369,7 +369,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   br label %bb2
 
 bb2:
@@ -384,7 +384,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   br label %bb2
 
 bb2:
@@ -399,7 +399,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i8*
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   br label %bb2
 
 bb2:
@@ -414,7 +414,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i16*
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   br label %bb2
 
 bb2:
@@ -429,7 +429,7 @@
   %1 = sext i32 %b to i64
   %2 = add i64 %a, %1
   %3 = inttoptr i64 %2 to i32*
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   br label %bb2
 
 bb2:
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll b/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
index 5d55a6b..83740c8 100644
--- a/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
+++ b/llvm/test/CodeGen/AArch64/fast-isel-int-ext3.ll
@@ -11,7 +11,7 @@
 ; CHECK:       uxtb w0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8 addrspace(256)*
-  %3 = load i8 addrspace(256)* %2
+  %3 = load i8, i8 addrspace(256)* %2
   %4 = zext i8 %3 to i32
   ret i32 %4
 }
@@ -22,7 +22,7 @@
 ; CHECK:       uxth w0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16 addrspace(256)*
-  %3 = load i16 addrspace(256)* %2
+  %3 = load i16, i16 addrspace(256)* %2
   %4 = zext i16 %3 to i32
   ret i32 %4
 }
@@ -33,7 +33,7 @@
 ; CHECK:       ubfx x0, x[[REG]], #0, #8
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8 addrspace(256)*
-  %3 = load i8 addrspace(256)* %2
+  %3 = load i8, i8 addrspace(256)* %2
   %4 = zext i8 %3 to i64
   ret i64 %4
 }
@@ -44,7 +44,7 @@
 ; CHECK:       ubfx x0, x[[REG]], #0, #16
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16 addrspace(256)*
-  %3 = load i16 addrspace(256)* %2
+  %3 = load i16, i16 addrspace(256)* %2
   %4 = zext i16 %3 to i64
   ret i64 %4
 }
@@ -55,7 +55,7 @@
 ; CHECK:       ubfx x0, x[[REG]], #0, #32
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32 addrspace(256)*
-  %3 = load i32 addrspace(256)* %2
+  %3 = load i32, i32 addrspace(256)* %2
   %4 = zext i32 %3 to i64
   ret i64 %4
 }
@@ -66,7 +66,7 @@
 ; CHECK:       sxtb w0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8 addrspace(256)*
-  %3 = load i8 addrspace(256)* %2
+  %3 = load i8, i8 addrspace(256)* %2
   %4 = sext i8 %3 to i32
   ret i32 %4
 }
@@ -77,7 +77,7 @@
 ; CHECK:       sxth w0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16 addrspace(256)*
-  %3 = load i16 addrspace(256)* %2
+  %3 = load i16, i16 addrspace(256)* %2
   %4 = sext i16 %3 to i32
   ret i32 %4
 }
@@ -88,7 +88,7 @@
 ; CHECK:       sxtb x0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i8 addrspace(256)*
-  %3 = load i8 addrspace(256)* %2
+  %3 = load i8, i8 addrspace(256)* %2
   %4 = sext i8 %3 to i64
   ret i64 %4
 }
@@ -99,7 +99,7 @@
 ; CHECK:       sxth x0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i16 addrspace(256)*
-  %3 = load i16 addrspace(256)* %2
+  %3 = load i16, i16 addrspace(256)* %2
   %4 = sext i16 %3 to i64
   ret i64 %4
 }
@@ -110,7 +110,7 @@
 ; CHECK:       sxtw x0, [[REG]]
   %1 = sub i64 %a, 8
   %2 = inttoptr i64 %1 to i32 addrspace(256)*
-  %3 = load i32 addrspace(256)* %2
+  %3 = load i32, i32 addrspace(256)* %2
   %4 = sext i32 %3 to i64
   ret i64 %4
 }
diff --git a/llvm/test/CodeGen/AArch64/floatdp_1source.ll b/llvm/test/CodeGen/AArch64/floatdp_1source.ll
index 8c02787..ec7d32d 100644
--- a/llvm/test/CodeGen/AArch64/floatdp_1source.ll
+++ b/llvm/test/CodeGen/AArch64/floatdp_1source.ll
@@ -27,7 +27,7 @@
 
 define void @simple_float() {
 ; CHECK-LABEL: simple_float:
-  %val1 = load volatile float* @varfloat
+  %val1 = load volatile float, float* @varfloat
 
   %valabs = call float @fabsf(float %val1)
   store volatile float %valabs, float* @varfloat
@@ -66,7 +66,7 @@
 
 define void @simple_double() {
 ; CHECK-LABEL: simple_double:
-  %val1 = load volatile double* @vardouble
+  %val1 = load volatile double, double* @vardouble
 
   %valabs = call double @fabs(double %val1)
   store volatile double %valabs, double* @vardouble
@@ -106,9 +106,9 @@
 define void @converts() {
 ; CHECK-LABEL: converts:
 
-  %val16 = load volatile half* @varhalf
-  %val32 = load volatile float* @varfloat
-  %val64 = load volatile double* @vardouble
+  %val16 = load volatile half, half* @varhalf
+  %val32 = load volatile float, float* @varfloat
+  %val64 = load volatile double, double* @vardouble
 
   %val16to32 = fpext half %val16 to float
   store volatile float %val16to32, float* @varfloat
diff --git a/llvm/test/CodeGen/AArch64/floatdp_2source.ll b/llvm/test/CodeGen/AArch64/floatdp_2source.ll
index 2622717..30e2856 100644
--- a/llvm/test/CodeGen/AArch64/floatdp_2source.ll
+++ b/llvm/test/CodeGen/AArch64/floatdp_2source.ll
@@ -5,7 +5,7 @@
 
 define void @testfloat() {
 ; CHECK-LABEL: testfloat:
-  %val1 = load float* @varfloat
+  %val1 = load float, float* @varfloat
 
   %val2 = fadd float %val1, %val1
 ; CHECK: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
@@ -33,7 +33,7 @@
 
 define void @testdouble() {
 ; CHECK-LABEL: testdouble:
-  %val1 = load double* @vardouble
+  %val1 = load double, double* @vardouble
 
   %val2 = fadd double %val1, %val1
 ; CHECK: fadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
diff --git a/llvm/test/CodeGen/AArch64/fp128-folding.ll b/llvm/test/CodeGen/AArch64/fp128-folding.ll
index 892b19c..5027e83 100644
--- a/llvm/test/CodeGen/AArch64/fp128-folding.ll
+++ b/llvm/test/CodeGen/AArch64/fp128-folding.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: test_folding:
   %l = alloca i32
   store i32 42, i32* %l
-  %val = load i32* %l
+  %val = load i32, i32* %l
   %fpval = sitofp i32 %val to fp128
   ; If the value is loaded from a constant pool into an fp128, it's been folded
   ; successfully.
diff --git a/llvm/test/CodeGen/AArch64/fp16-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-instructions.ll
index 7a44cd1..ba96694 100644
--- a/llvm/test/CodeGen/AArch64/fp16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-instructions.ll
@@ -52,7 +52,7 @@
 entry:
 ; CHECK-LABEL: load_h:
 ; CHECK: ldr h0, [x0]
-  %0 = load half* %a, align 4
+  %0 = load half, half* %a, align 4
   ret half %0
 }
 
diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 8e89681..f2945a1 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -52,7 +52,7 @@
 entry:
 ; CHECK-LABEL: load_h:
 ; CHECK: ldr d0, [x0]
-  %0 = load <4 x half>* %a, align 4
+  %0 = load <4 x half>, <4 x half>* %a, align 4
   ret <4 x half> %0
 }
 
diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
index b75f160..e51c0c5 100644
--- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -164,7 +164,7 @@
 entry:
 ; CHECK-LABEL: load_h:
 ; CHECK: ldr q0, [x0]
-  %0 = load <8 x half>* %a, align 4
+  %0 = load <8 x half>, <8 x half>* %a, align 4
   ret <8 x half> %0
 }
 
diff --git a/llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll b/llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll
index edbbffe..b71b39f 100644
--- a/llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll
+++ b/llvm/test/CodeGen/AArch64/fp16-vector-load-store.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: load_64:
 ; CHECK: ldr d0, [x0]
 entry:
-  %0 = load <4 x half>* %a, align 8
+  %0 = load <4 x half>, <4 x half>* %a, align 8
   ret <4 x half> %0
 }
 
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: load_128:
 ; CHECK: ldr q0, [x0]
 entry:
-  %0 = load <8 x half>* %a, align 16
+  %0 = load <8 x half>, <8 x half>* %a, align 16
   ret <8 x half> %0
 }
 
@@ -23,7 +23,7 @@
 ; CHECK-LABEL: load_dup_64:
 ; CHECK: ld1r { v0.4h }, [x0]
 entry:
-  %0 = load half* %a, align 2
+  %0 = load half, half* %a, align 2
   %1 = insertelement <4 x half> undef, half %0, i32 0
   %2 = shufflevector <4 x half> %1, <4 x half> undef, <4 x i32> zeroinitializer
   ret <4 x half> %2
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: load_dup_128:
 ; CHECK: ld1r { v0.8h }, [x0]
 entry:
-  %0 = load half* %a, align 2
+  %0 = load half, half* %a, align 2
   %1 = insertelement <8 x half> undef, half %0, i32 0
   %2 = shufflevector <8 x half> %1, <8 x half> undef, <8 x i32> zeroinitializer
   ret <8 x half> %2
@@ -45,7 +45,7 @@
 ; CHECK-LABEL: load_lane_64:
 ; CHECK: ld1 { v0.h }[2], [x0]
 entry:
-  %0 = load half* %a, align 2
+  %0 = load half, half* %a, align 2
   %1 = insertelement <4 x half> %b, half %0, i32 2
   ret <4 x half> %1
 }
@@ -55,7 +55,7 @@
 ; CHECK-LABEL: load_lane_128:
 ; CHECK: ld1 { v0.h }[5], [x0]
 entry:
-  %0 = load half* %a, align 2
+  %0 = load half, half* %a, align 2
   %1 = insertelement <8 x half> %b, half %0, i32 5
   ret <8 x half> %1
 }
diff --git a/llvm/test/CodeGen/AArch64/fpimm.ll b/llvm/test/CodeGen/AArch64/fpimm.ll
index ffc3018..6acb111 100644
--- a/llvm/test/CodeGen/AArch64/fpimm.ll
+++ b/llvm/test/CodeGen/AArch64/fpimm.ll
@@ -8,7 +8,7 @@
 define void @check_float() {
 ; CHECK-LABEL: check_float:
 
-  %val = load float* @varf32
+  %val = load float, float* @varf32
   %newval1 = fadd float %val, 8.5
   store volatile float %newval1, float* @varf32
 ; CHECK-DAG: fmov [[EIGHT5:s[0-9]+]], #8.5
@@ -24,7 +24,7 @@
 define void @check_double() {
 ; CHECK-LABEL: check_double:
 
-  %val = load double* @varf64
+  %val = load double, double* @varf64
   %newval1 = fadd double %val, 8.5
   store volatile double %newval1, double* @varf64
 ; CHECK-DAG: fmov {{d[0-9]+}}, #8.5
diff --git a/llvm/test/CodeGen/AArch64/free-zext.ll b/llvm/test/CodeGen/AArch64/free-zext.ll
index d69105e..cff11f8 100644
--- a/llvm/test/CodeGen/AArch64/free-zext.ll
+++ b/llvm/test/CodeGen/AArch64/free-zext.ll
@@ -5,9 +5,9 @@
 ; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0]
 ; CHECK: ldrh w[[B:[0-9]+]], [x1]
 ; CHECK: add x0, x[[B]], x[[A]]
-  %1 = load i8* %a, align 1
+  %1 = load i8, i8* %a, align 1
   %conv = zext i8 %1 to i64
-  %2 = load i16* %b, align 2
+  %2 = load i16, i16* %b, align 2
   %conv1 = zext i16 %2 to i64
   %add = add nsw i64 %conv1, %conv
   ret i64 %add
diff --git a/llvm/test/CodeGen/AArch64/func-argpassing.ll b/llvm/test/CodeGen/AArch64/func-argpassing.ll
index f6b5cf0..9100ae3 100644
--- a/llvm/test/CodeGen/AArch64/func-argpassing.ll
+++ b/llvm/test/CodeGen/AArch64/func-argpassing.ll
@@ -37,13 +37,13 @@
     %addr0 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load volatile i32* %addr0
+    %val0 = load volatile i32, i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: ldr [[REG32:w[0-9]+]], [{{x[0-9]+|sp}}, #12]
     store volatile i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
-    %val1 = load volatile i64* %addr1
+    %val1 = load volatile i64, i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [{{x[0-9]+|sp}}]
     store volatile i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
@@ -58,13 +58,13 @@
     %addr0 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 2
     %addr1 = getelementptr %myStruct, %myStruct* %structval, i64 0, i32 0
 
-    %val0 = load volatile i32* %addr0
+    %val0 = load volatile i32, i32* %addr0
     ; Some weird move means x0 is used for one access
 ; CHECK: ldr [[REG32:w[0-9]+]], [sp, #28]
     store i32 %val0, i32* @var32
 ; CHECK: str [[REG32]], [{{x[0-9]+}}, {{#?}}:lo12:var32]
 
-    %val1 = load volatile i64* %addr1
+    %val1 = load volatile i64, i64* %addr1
 ; CHECK: ldr [[REG64:x[0-9]+]], [sp, #16]
     store i64 %val1, i64* @var64
 ; CHECK: str [[REG64]], [{{x[0-9]+}}, {{#?}}:lo12:var64]
@@ -74,7 +74,7 @@
 
 define i32 @return_int() {
 ; CHECK-LABEL: return_int:
-    %val = load i32* @var32
+    %val = load i32, i32* @var32
     ret i32 %val
 ; CHECK: ldr w0, [{{x[0-9]+}}, {{#?}}:lo12:var32]
     ; Make sure epilogue follows
@@ -94,7 +94,7 @@
 define [2 x i64] @return_struct() {
 ; CHECK-LABEL: return_struct:
     %addr = bitcast %myStruct* @varstruct to [2 x i64]*
-    %val = load [2 x i64]* %addr
+    %val = load [2 x i64], [2 x i64]* %addr
     ret [2 x i64] %val
 ; CHECK: add x[[VARSTRUCT:[0-9]+]], {{x[0-9]+}}, :lo12:varstruct
 ; CHECK: ldp x0, x1, [x[[VARSTRUCT]]]
@@ -130,7 +130,7 @@
                           double %notstacked) {
 ; CHECK-LABEL: struct_on_stack:
     %addr = getelementptr %myStruct, %myStruct* %struct, i64 0, i32 0
-    %val64 = load volatile i64* %addr
+    %val64 = load volatile i64, i64* %addr
     store volatile i64 %val64, i64* @var64
     ; Currently nothing on local stack, so struct should be at sp
 ; CHECK: ldr [[VAL64:x[0-9]+]], [sp]
@@ -141,7 +141,7 @@
 ; CHECK: str d0, [{{x[0-9]+}}, {{#?}}:lo12:vardouble
 ; CHECK-NOFP-NOT: str d0,
 
-    %retval = load volatile i32* %stacked
+    %retval = load volatile i32, i32* %stacked
     ret i32 %retval
 ; CHECK-LE: ldr w0, [sp, #16]
 }
diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll
index 16157f8..22a3315 100644
--- a/llvm/test/CodeGen/AArch64/func-calls.ll
+++ b/llvm/test/CodeGen/AArch64/func-calls.ll
@@ -21,15 +21,15 @@
 
 define void @simple_args() {
 ; CHECK-LABEL: simple_args:
-  %char1 = load i8* @var8
-  %char2 = load i8* @var8_2
+  %char1 = load i8, i8* @var8
+  %char2 = load i8, i8* @var8_2
   call void @take_i8s(i8 %char1, i8 %char2)
 ; CHECK-DAG: ldrb w0, [{{x[0-9]+}}, {{#?}}:lo12:var8]
 ; CHECK-DAG: ldrb w1, [{{x[0-9]+}}, {{#?}}:lo12:var8_2]
 ; CHECK: bl take_i8s
 
-  %float1 = load float* @varfloat
-  %float2 = load float* @varfloat_2
+  %float1 = load float, float* @varfloat
+  %float2 = load float, float* @varfloat_2
   call void @take_floats(float %float1, float %float2)
 ; CHECK-DAG: ldr s1, [{{x[0-9]+}}, {{#?}}:lo12:varfloat_2]
 ; CHECK-DAG: ldr s0, [{{x[0-9]+}}, {{#?}}:lo12:varfloat]
@@ -124,7 +124,7 @@
 
 define void @check_i128_align() {
 ; CHECK-LABEL: check_i128_align:
-  %val = load i128* @var128
+  %val = load i128, i128* @var128
   call void @check_i128_stackalign(i32 0, i32 1, i32 2, i32 3,
                                    i32 4, i32 5, i32 6, i32 7,
                                    i32 42, i128 %val)
@@ -152,7 +152,7 @@
 
 define void @check_indirect_call() {
 ; CHECK-LABEL: check_indirect_call:
-  %func = load void()** @fptr
+  %func = load void()*, void()** @fptr
   call void %func()
 ; CHECK: ldr [[FPTR:x[0-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:fptr]
 ; CHECK: blr [[FPTR]]
diff --git a/llvm/test/CodeGen/AArch64/funcptr_cast.ll b/llvm/test/CodeGen/AArch64/funcptr_cast.ll
index a00b7bc..506485e 100644
--- a/llvm/test/CodeGen/AArch64/funcptr_cast.ll
+++ b/llvm/test/CodeGen/AArch64/funcptr_cast.ll
@@ -6,7 +6,7 @@
 ; CHECK: add {{x[0-9]+}}, {{x[0-9]+}}, :lo12:foo
 ; CHECK: ldrb w0, [{{x[0-9]+}}]
 entry:
-  %0 = load i8* bitcast (void (...)* @foo to i8*), align 1
+  %0 = load i8, i8* bitcast (void (...)* @foo to i8*), align 1
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/AArch64/ghc-cc.ll b/llvm/test/CodeGen/AArch64/ghc-cc.ll
index 505bd5f..01ed785 100644
--- a/llvm/test/CodeGen/AArch64/ghc-cc.ll
+++ b/llvm/test/CodeGen/AArch64/ghc-cc.ll
@@ -51,7 +51,7 @@
   ; CHECK-NEXT:  bl      bar_i64
   ; CHECK-NEXT:  ret
 
-  %0 = load i64* @base
+  %0 = load i64, i64* @base
   tail call ghccc void @bar_i64( i64 %0 ) nounwind
   ret void
 }
@@ -64,7 +64,7 @@
   ; CHECK-NEXT:  bl      bar_float
   ; CHECK-NEXT:  ret
 
-  %0 = load float* @f1
+  %0 = load float, float* @f1
   tail call ghccc void @bar_float( float %0 ) nounwind
   ret void
 }
@@ -77,7 +77,7 @@
   ; CHECK-NEXT:  bl      bar_double
   ; CHECK-NEXT:  ret
 
-  %0 = load double* @d1
+  %0 = load double, double* @d1
   tail call ghccc void @bar_double( double %0 ) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/AArch64/global-alignment.ll b/llvm/test/CodeGen/AArch64/global-alignment.ll
index 451b9d6..657778e 100644
--- a/llvm/test/CodeGen/AArch64/global-alignment.ll
+++ b/llvm/test/CodeGen/AArch64/global-alignment.ll
@@ -11,7 +11,7 @@
 
   ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
   ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
-  %val = load i64* %addr
+  %val = load i64, i64* %addr
 ; CHECK: adrp [[HIBITS:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:var32
 ; CHECK: ldr x0, [x[[ADDR]]]
@@ -25,7 +25,7 @@
 
   ; However, var64 *is* properly aligned and emitting an adrp/add/ldr would be
   ; inefficient.
-  %val = load i64* %addr
+  %val = load i64, i64* %addr
 ; CHECK: adrp x[[HIBITS:[0-9]+]], var64
 ; CHECK-NOT: add x[[HIBITS]]
 ; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var64]
@@ -39,7 +39,7 @@
 
   ; Since @var32 is only guaranteed to be aligned to 32-bits, it's invalid to
   ; emit an "LDR x0, [x0, #:lo12:var32] instruction to implement this load.
-  %val = load i64* %addr
+  %val = load i64, i64* %addr
 ; CHECK: adrp x[[HIBITS:[0-9]+]], var32_align64
 ; CHECK-NOT: add x[[HIBITS]]
 ; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:var32_align64]
@@ -52,7 +52,7 @@
   %addr = bitcast [3 x i32]* @alias to i64*
 
   ; Test that we can find the alignment for aliases.
-  %val = load i64* %addr
+  %val = load i64, i64* %addr
 ; CHECK: adrp x[[HIBITS:[0-9]+]], alias
 ; CHECK-NOT: add x[[HIBITS]]
 ; CHECK: ldr x0, [x[[HIBITS]], {{#?}}:lo12:alias]
@@ -68,7 +68,7 @@
   ; @yet_another_var has a preferred alignment of 8, but that's not enough if
   ; we're going to be linking against other things. Its ABI alignment is only 4
   ; so we can't fold the load.
-  %val = load i64* bitcast({i32, i32}* @yet_another_var to i64*)
+  %val = load i64, i64* bitcast({i32, i32}* @yet_another_var to i64*)
 ; CHECK: adrp [[HIBITS:x[0-9]+]], yet_another_var
 ; CHECK: add x[[ADDR:[0-9]+]], [[HIBITS]], {{#?}}:lo12:yet_another_var
 ; CHECK: ldr x0, [x[[ADDR]]]
diff --git a/llvm/test/CodeGen/AArch64/global-merge-4.ll b/llvm/test/CodeGen/AArch64/global-merge-4.ll
index a525ccd..6c1177f 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-4.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-4.ll
@@ -36,24 +36,24 @@
 
 ; Function Attrs: nounwind ssp
 define internal void @calculate() #0 {
-  %1 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
-  %2 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
+  %1 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 0), align 4
+  %2 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 0), align 4
   %3 = mul nsw i32 %2, %1
   store i32 %3, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 1), align 4
+  %5 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 1), align 4
   %6 = mul nsw i32 %5, %4
   store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
-  %8 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
+  %7 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 2), align 4
+  %8 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 2), align 4
   %9 = mul nsw i32 %8, %7
   store i32 %9, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 2), align 4
-  %10 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
-  %11 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
+  %10 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 3), align 4
+  %11 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 3), align 4
   %12 = mul nsw i32 %11, %10
   store i32 %12, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
-  %14 = load i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
+  %13 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i64 0, i64 4), align 4
+  %14 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i64 0, i64 4), align 4
   %15 = mul nsw i32 %14, %13
   store i32 %15, i32* getelementptr inbounds ([5 x i32]* @foo, i64 0, i64 4), align 4
   ret void
diff --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll
index a46094b..d4cbbc9 100644
--- a/llvm/test/CodeGen/AArch64/half.ll
+++ b/llvm/test/CodeGen/AArch64/half.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: test_load_store:
 ; CHECK: ldr [[TMP:h[0-9]+]], [x0]
 ; CHECK: str [[TMP]], [x1]
-  %val = load half* %in
+  %val = load half, half* %in
   store half %val, half* %out
   ret void
 }
@@ -12,7 +12,7 @@
 define i16 @test_bitcast_from_half(half* %addr) {
 ; CHECK-LABEL: test_bitcast_from_half:
 ; CHECK: ldrh w0, [x0]
-  %val = load half* %addr
+  %val = load half, half* %addr
   %val_int = bitcast half %val to i16
   ret i16 %val_int
 }
@@ -50,7 +50,7 @@
 ; CHECK-LABEL: test_extend32:
 ; CHECK: fcvt {{s[0-9]+}}, {{h[0-9]+}}
 
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to float
   ret float %val32
 }
@@ -59,7 +59,7 @@
 ; CHECK-LABEL: test_extend64:
 ; CHECK: fcvt {{d[0-9]+}}, {{h[0-9]+}}
 
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to double
   ret double %val32
 }
diff --git a/llvm/test/CodeGen/AArch64/i1-contents.ll b/llvm/test/CodeGen/AArch64/i1-contents.ll
index 7f133fc..a3830e4 100644
--- a/llvm/test/CodeGen/AArch64/i1-contents.ll
+++ b/llvm/test/CodeGen/AArch64/i1-contents.ll
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: produce_i1_ret:
 ; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
 ; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
-  %val = load %big* @var
+  %val = load %big, %big* @var
   %val1 = trunc %big %val to i1
   ret i1 %val1
 }
@@ -42,7 +42,7 @@
 ; CHECK: ldr [[VAR32:w[0-9]+]], [{{x[0-9]+}}, :lo12:var]
 ; CHECK: and w0, [[VAR32]], #{{0x1|0xff}}
 ; CHECK: bl consume_i1_arg
-  %val = load %big* @var
+  %val = load %big, %big* @var
   %val1 = trunc %big %val to i1
   call void @consume_i1_arg(i1 %val1)
   ret void
diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll
index d8bca89..b2c11c7 100644
--- a/llvm/test/CodeGen/AArch64/ldst-opt.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll
@@ -31,7 +31,7 @@
 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
 entry:
   %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
-  %add = load i32* %a, align 4
+  %add = load i32, i32* %a, align 4
   br label %bar
 bar:
   %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
@@ -59,7 +59,7 @@
 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #32]!
 entry:
   %a = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1, i32 0
-  %add = load i64* %a, align 4
+  %add = load i64, i64* %a, align 4
   br label %bar
 bar:
   %c = getelementptr inbounds %struct.doubleword, %struct.doubleword* %ptr, i64 0, i32 1
@@ -87,7 +87,7 @@
 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
 entry:
   %a = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1, i32 0
-  %add = load fp128* %a, align 4
+  %add = load fp128, fp128* %a, align 4
   br label %bar
 bar:
   %c = getelementptr inbounds %struct.quadword, %struct.quadword* %ptr, i64 0, i32 1
@@ -115,7 +115,7 @@
 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #32]!
 entry:
   %a = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1, i32 0
-  %add = load float* %a, align 4
+  %add = load float, float* %a, align 4
   br label %bar
 bar:
   %c = getelementptr inbounds %struct.float, %struct.float* %ptr, i64 0, i32 1
@@ -143,7 +143,7 @@
 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #32]!
 entry:
   %a = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1, i32 0
-  %add = load double* %a, align 4
+  %add = load double, double* %a, align 4
   br label %bar
 bar:
   %c = getelementptr inbounds %struct.double, %struct.double* %ptr, i64 0, i32 1
@@ -186,7 +186,7 @@
 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i32** %this
+  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -194,7 +194,7 @@
   br label %return
 return:
   %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
-  %ret = load i32* %retptr
+  %ret = load i32, i32* %retptr
   ret i32 %ret
 }
 
@@ -204,7 +204,7 @@
 ; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i64** %this
+  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -212,7 +212,7 @@
   br label %return
 return:
   %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
-  %ret = load i64* %retptr
+  %ret = load i64, i64* %retptr
   ret i64 %ret
 }
 
@@ -222,7 +222,7 @@
 ; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i128** %this
+  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -230,7 +230,7 @@
   br label %return
 return:
   %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
-  %ret = load <2 x i64>* %retptr
+  %ret = load <2 x i64>, <2 x i64>* %retptr
   ret <2 x i64> %ret
 }
 
@@ -240,7 +240,7 @@
 ; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.float** %this
+  %load1 = load %pre.struct.float*, %pre.struct.float** %this
   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -248,7 +248,7 @@
   br label %return
 return:
   %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
-  %ret = load float* %retptr
+  %ret = load float, float* %retptr
   ret float %ret
 }
 
@@ -258,7 +258,7 @@
 ; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.double** %this
+  %load1 = load %pre.struct.double*, %pre.struct.double** %this
   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -266,7 +266,7 @@
   br label %return
 return:
   %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
-  %ret = load double* %retptr
+  %ret = load double, double* %retptr
   ret double %ret
 }
 
@@ -287,7 +287,7 @@
 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #4]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i32** %this
+  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
   %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -306,7 +306,7 @@
 ; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #8]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i64** %this
+  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
   %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -325,7 +325,7 @@
 ; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #16]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.i128** %this
+  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
   %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -344,7 +344,7 @@
 ; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #4]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.float** %this
+  %load1 = load %pre.struct.float*, %pre.struct.float** %this
   %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -363,7 +363,7 @@
 ; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #8]!
   br i1 %cond, label %if.then, label %if.end
 if.then:
-  %load1 = load %pre.struct.double** %this
+  %load1 = load %pre.struct.double*, %pre.struct.double** %this
   %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 1
   br label %return
 if.end:
@@ -396,9 +396,9 @@
   %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr i32, i32* %iv2, i64 -1
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   call void @use-word(i32 %load)
-  %load2 = load i32* %iv2
+  %load2 = load i32, i32* %iv2
   call void @use-word(i32 %load2)
   %iv.next = add i64 %iv, -4
   %gep3 = getelementptr i32, i32* %iv2, i64 4
@@ -420,9 +420,9 @@
   %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr i64, i64* %iv2, i64 -1
-  %load = load i64* %gep2
+  %load = load i64, i64* %gep2
   call void @use-doubleword(i64 %load)
-  %load2 = load i64* %iv2
+  %load2 = load i64, i64* %iv2
   call void @use-doubleword(i64 %load2)
   %iv.next = add i64 %iv, -4
   %gep3 = getelementptr i64, i64* %iv2, i64 4
@@ -444,9 +444,9 @@
   %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
-  %load = load <2 x i64>* %gep2
+  %load = load <2 x i64>, <2 x i64>* %gep2
   call void @use-quadword(<2 x i64> %load)
-  %load2 = load <2 x i64>* %iv2
+  %load2 = load <2 x i64>, <2 x i64>* %iv2
   call void @use-quadword(<2 x i64> %load2)
   %iv.next = add i64 %iv, -4
   %gep3 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 4
@@ -468,9 +468,9 @@
   %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr float, float* %iv2, i64 -1
-  %load = load float* %gep2
+  %load = load float, float* %gep2
   call void @use-float(float %load)
-  %load2 = load float* %iv2
+  %load2 = load float, float* %iv2
   call void @use-float(float %load2)
   %iv.next = add i64 %iv, -4
   %gep3 = getelementptr float, float* %iv2, i64 4
@@ -492,9 +492,9 @@
   %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr double, double* %iv2, i64 -1
-  %load = load double* %gep2
+  %load = load double, double* %gep2
   call void @use-double(double %load)
-  %load2 = load double* %iv2
+  %load2 = load double, double* %iv2
   call void @use-double(double %load2)
   %iv.next = add i64 %iv, -4
   %gep3 = getelementptr double, double* %iv2, i64 4
@@ -526,7 +526,7 @@
   %iv2 = phi i32* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr i32, i32* %iv2, i64 -1
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   call void @use-word(i32 %load)
   store i32 %val, i32* %iv2
   %iv.next = add i64 %iv, -4
@@ -549,7 +549,7 @@
   %iv2 = phi i64* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr i64, i64* %iv2, i64 -1
-  %load = load i64* %gep2
+  %load = load i64, i64* %gep2
   call void @use-doubleword(i64 %load)
   store i64 %val, i64* %iv2
   %iv.next = add i64 %iv, -4
@@ -572,7 +572,7 @@
   %iv2 = phi <2 x i64>* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr <2 x i64>, <2 x i64>* %iv2, i64 -1
-  %load = load <2 x i64>* %gep2
+  %load = load <2 x i64>, <2 x i64>* %gep2
   call void @use-quadword(<2 x i64> %load)
   store <2 x i64> %val, <2 x i64>* %iv2
   %iv.next = add i64 %iv, -4
@@ -595,7 +595,7 @@
   %iv2 = phi float* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr float, float* %iv2, i64 -1
-  %load = load float* %gep2
+  %load = load float, float* %gep2
   call void @use-float(float %load)
   store float %val, float* %iv2
   %iv.next = add i64 %iv, -4
@@ -618,7 +618,7 @@
   %iv2 = phi double* [ %gep3, %body ], [ %gep1, %entry ]
   %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
   %gep2 = getelementptr double, double* %iv2, i64 -1
-  %load = load double* %gep2
+  %load = load double, double* %gep2
   call void @use-double(double %load)
   store double %val, double* %iv2
   %iv.next = add i64 %iv, -4
@@ -656,10 +656,10 @@
   %phi2 = phi i32* [ %gep3, %for.body ], [ %a, %0 ]
   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   %gep1 = getelementptr i32, i32* %phi1, i64 -1
-  %load1 = load i32* %gep1
+  %load1 = load i32, i32* %gep1
   %gep2 = getelementptr i32, i32* %phi2, i64 -1
   store i32 %load1, i32* %gep2
-  %load2 = load i32* %phi1
+  %load2 = load i32, i32* %phi1
   store i32 %load2, i32* %phi2
   %dec.i = add nsw i64 %i, -1
   %gep3 = getelementptr i32, i32* %phi2, i64 -2
@@ -680,10 +680,10 @@
   %phi2 = phi i64* [ %gep3, %for.body ], [ %a, %0 ]
   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   %gep1 = getelementptr i64, i64* %phi1, i64 -1
-  %load1 = load i64* %gep1
+  %load1 = load i64, i64* %gep1
   %gep2 = getelementptr i64, i64* %phi2, i64 -1
   store i64 %load1, i64* %gep2
-  %load2 = load i64* %phi1
+  %load2 = load i64, i64* %phi1
   store i64 %load2, i64* %phi2
   %dec.i = add nsw i64 %i, -1
   %gep3 = getelementptr i64, i64* %phi2, i64 -2
@@ -704,10 +704,10 @@
   %phi2 = phi <2 x i64>* [ %gep3, %for.body ], [ %a, %0 ]
   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   %gep1 = getelementptr <2 x i64>, <2 x i64>* %phi1, i64 -1
-  %load1 = load <2 x i64>* %gep1
+  %load1 = load <2 x i64>, <2 x i64>* %gep1
   %gep2 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -1
   store <2 x i64> %load1, <2 x i64>* %gep2
-  %load2 = load <2 x i64>* %phi1
+  %load2 = load <2 x i64>, <2 x i64>* %phi1
   store <2 x i64> %load2, <2 x i64>* %phi2
   %dec.i = add nsw i64 %i, -1
   %gep3 = getelementptr <2 x i64>, <2 x i64>* %phi2, i64 -2
@@ -728,10 +728,10 @@
   %phi2 = phi float* [ %gep3, %for.body ], [ %a, %0 ]
   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   %gep1 = getelementptr float, float* %phi1, i64 -1
-  %load1 = load float* %gep1
+  %load1 = load float, float* %gep1
   %gep2 = getelementptr float, float* %phi2, i64 -1
   store float %load1, float* %gep2
-  %load2 = load float* %phi1
+  %load2 = load float, float* %phi1
   store float %load2, float* %phi2
   %dec.i = add nsw i64 %i, -1
   %gep3 = getelementptr float, float* %phi2, i64 -2
@@ -752,10 +752,10 @@
   %phi2 = phi double* [ %gep3, %for.body ], [ %a, %0 ]
   %i = phi i64 [ %dec.i, %for.body], [ %count, %0 ]
   %gep1 = getelementptr double, double* %phi1, i64 -1
-  %load1 = load double* %gep1
+  %load1 = load double, double* %gep1
   %gep2 = getelementptr double, double* %phi2, i64 -1
   store double %load1, double* %gep2
-  %load2 = load double* %phi1
+  %load2 = load double, double* %phi1
   store double %load2, double* %phi2
   %dec.i = add nsw i64 %i, -1
   %gep3 = getelementptr double, double* %phi2, i64 -2
diff --git a/llvm/test/CodeGen/AArch64/ldst-regoffset.ll b/llvm/test/CodeGen/AArch64/ldst-regoffset.ll
index 0f2b52f..85d6db1 100644
--- a/llvm/test/CodeGen/AArch64/ldst-regoffset.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-regoffset.ll
@@ -13,13 +13,13 @@
 ; CHECK-LABEL: ldst_8bit:
 
    %addr8_sxtw = getelementptr i8, i8* %base, i32 %off32
-   %val8_sxtw = load volatile i8* %addr8_sxtw
+   %val8_sxtw = load volatile i8, i8* %addr8_sxtw
    %val32_signed = sext i8 %val8_sxtw to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
 
   %addr_lsl = getelementptr i8, i8* %base, i64 %off64
-  %val8_lsl = load volatile i8* %addr_lsl
+  %val8_lsl = load volatile i8, i8* %addr_lsl
   %val32_unsigned = zext i8 %val8_lsl to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -28,7 +28,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i8*
-  %val8_uxtw = load volatile i8* %addr_uxtw
+  %val8_uxtw = load volatile i8, i8* %addr_uxtw
   %newval8 = add i8 %val8_uxtw, 1
   store volatile i8 %newval8, i8* @var_8bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -41,13 +41,13 @@
 ; CHECK-LABEL: ldst_16bit:
 
    %addr8_sxtwN = getelementptr i16, i16* %base, i32 %off32
-   %val8_sxtwN = load volatile i16* %addr8_sxtwN
+   %val8_sxtwN = load volatile i16, i16* %addr8_sxtwN
    %val32_signed = sext i16 %val8_sxtwN to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #1]
 
   %addr_lslN = getelementptr i16, i16* %base, i64 %off64
-  %val8_lslN = load volatile i16* %addr_lslN
+  %val8_lslN = load volatile i16, i16* %addr_lslN
   %val32_unsigned = zext i16 %val8_lslN to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #1]
@@ -56,7 +56,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i16*
-  %val8_uxtw = load volatile i16* %addr_uxtw
+  %val8_uxtw = load volatile i16, i16* %addr_uxtw
   %newval8 = add i16 %val8_uxtw, 1
   store volatile i16 %newval8, i16* @var_16bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -65,7 +65,7 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i16*
-  %val16_sxtw = load volatile i16* %addr_sxtw
+  %val16_sxtw = load volatile i16, i16* %addr_sxtw
   %val64_signed = sext i16 %val16_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{[wx][0-9]+}}, sxtw]
@@ -74,7 +74,7 @@
   %base_lsl = ptrtoint i16* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to i16*
-  %val16_lsl = load volatile i16* %addr_lsl
+  %val16_lsl = load volatile i16, i16* %addr_lsl
   %val64_unsigned = zext i16 %val16_lsl to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -84,7 +84,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 1
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i16*
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* %addr_uxtwN
 ; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #1]
@@ -95,12 +95,12 @@
 ; CHECK-LABEL: ldst_32bit:
 
    %addr_sxtwN = getelementptr i32, i32* %base, i32 %off32
-   %val_sxtwN = load volatile i32* %addr_sxtwN
+   %val_sxtwN = load volatile i32, i32* %addr_sxtwN
    store volatile i32 %val_sxtwN, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
 
   %addr_lslN = getelementptr i32, i32* %base, i64 %off64
-  %val_lslN = load volatile i32* %addr_lslN
+  %val_lslN = load volatile i32, i32* %addr_lslN
   store volatile i32 %val_lslN, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
 
@@ -108,7 +108,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i32*
-  %val_uxtw = load volatile i32* %addr_uxtw
+  %val_uxtw = load volatile i32, i32* %addr_uxtw
   %newval8 = add i32 %val_uxtw, 1
   store volatile i32 %newval8, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -118,7 +118,7 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i32*
-  %val16_sxtw = load volatile i32* %addr_sxtw
+  %val16_sxtw = load volatile i32, i32* %addr_sxtw
   %val64_signed = sext i32 %val16_sxtw to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
@@ -127,7 +127,7 @@
   %base_lsl = ptrtoint i32* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to i32*
-  %val16_lsl = load volatile i32* %addr_lsl
+  %val16_lsl = load volatile i32, i32* %addr_lsl
   %val64_unsigned = zext i32 %val16_lsl to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
@@ -137,7 +137,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 2
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i32*
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   store volatile i32 %val32, i32* %addr_uxtwN
 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
    ret void
@@ -147,12 +147,12 @@
 ; CHECK-LABEL: ldst_64bit:
 
    %addr_sxtwN = getelementptr i64, i64* %base, i32 %off32
-   %val_sxtwN = load volatile i64* %addr_sxtwN
+   %val_sxtwN = load volatile i64, i64* %addr_sxtwN
    store volatile i64 %val_sxtwN, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
 
   %addr_lslN = getelementptr i64, i64* %base, i64 %off64
-  %val_lslN = load volatile i64* %addr_lslN
+  %val_lslN = load volatile i64, i64* %addr_lslN
   store volatile i64 %val_lslN, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
 
@@ -160,7 +160,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to i64*
-  %val8_uxtw = load volatile i64* %addr_uxtw
+  %val8_uxtw = load volatile i64, i64* %addr_uxtw
   %newval8 = add i64 %val8_uxtw, 1
   store volatile i64 %newval8, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
@@ -169,14 +169,14 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to i64*
-  %val64_sxtw = load volatile i64* %addr_sxtw
+  %val64_sxtw = load volatile i64, i64* %addr_sxtw
   store volatile i64 %val64_sxtw, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 
   %base_lsl = ptrtoint i64* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to i64*
-  %val64_lsl = load volatile i64* %addr_lsl
+  %val64_lsl = load volatile i64, i64* %addr_lsl
   store volatile i64 %val64_lsl, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
 
@@ -185,7 +185,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 3
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to i64*
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   store volatile i64 %val64, i64* %addr_uxtwN
 ; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
    ret void
@@ -195,13 +195,13 @@
 ; CHECK-LABEL: ldst_float:
 
    %addr_sxtwN = getelementptr float, float* %base, i32 %off32
-   %val_sxtwN = load volatile float* %addr_sxtwN
+   %val_sxtwN = load volatile float, float* %addr_sxtwN
    store volatile float %val_sxtwN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #2]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
 
   %addr_lslN = getelementptr float, float* %base, i64 %off64
-  %val_lslN = load volatile float* %addr_lslN
+  %val_lslN = load volatile float, float* %addr_lslN
   store volatile float %val_lslN, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #2]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -210,7 +210,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to float*
-  %val_uxtw = load volatile float* %addr_uxtw
+  %val_uxtw = load volatile float, float* %addr_uxtw
   store volatile float %val_uxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -219,7 +219,7 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to float*
-  %val64_sxtw = load volatile float* %addr_sxtw
+  %val64_sxtw = load volatile float, float* %addr_sxtw
   store volatile float %val64_sxtw, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -227,7 +227,7 @@
   %base_lsl = ptrtoint float* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to float*
-  %val64_lsl = load volatile float* %addr_lsl
+  %val64_lsl = load volatile float, float* %addr_lsl
   store volatile float %val64_lsl, float* @var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -237,7 +237,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 2
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to float*
-  %val64 = load volatile float* @var_float
+  %val64 = load volatile float, float* @var_float
   store volatile float %val64, float* %addr_uxtwN
 ; CHECK: str {{s[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #2]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -248,13 +248,13 @@
 ; CHECK-LABEL: ldst_double:
 
    %addr_sxtwN = getelementptr double, double* %base, i32 %off32
-   %val_sxtwN = load volatile double* %addr_sxtwN
+   %val_sxtwN = load volatile double, double* %addr_sxtwN
    store volatile double %val_sxtwN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #3]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
 
   %addr_lslN = getelementptr double, double* %base, i64 %off64
-  %val_lslN = load volatile double* %addr_lslN
+  %val_lslN = load volatile double, double* %addr_lslN
   store volatile double %val_lslN, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #3]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -263,7 +263,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to double*
-  %val_uxtw = load volatile double* %addr_uxtw
+  %val_uxtw = load volatile double, double* %addr_uxtw
   store volatile double %val_uxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -272,7 +272,7 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to double*
-  %val64_sxtw = load volatile double* %addr_sxtw
+  %val64_sxtw = load volatile double, double* %addr_sxtw
   store volatile double %val64_sxtw, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -280,7 +280,7 @@
   %base_lsl = ptrtoint double* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to double*
-  %val64_lsl = load volatile double* %addr_lsl
+  %val64_lsl = load volatile double, double* %addr_lsl
   store volatile double %val64_lsl, double* @var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -290,7 +290,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 3
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to double*
-  %val64 = load volatile double* @var_double
+  %val64 = load volatile double, double* @var_double
   store volatile double %val64, double* %addr_uxtwN
 ; CHECK: str {{d[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #3]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
@@ -302,13 +302,13 @@
 ; CHECK-LABEL: ldst_128bit:
 
    %addr_sxtwN = getelementptr fp128, fp128* %base, i32 %off32
-   %val_sxtwN = load volatile fp128* %addr_sxtwN
+   %val_sxtwN = load volatile fp128, fp128* %addr_sxtwN
    store volatile fp128 %val_sxtwN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
 
   %addr_lslN = getelementptr fp128, fp128* %base, i64 %off64
-  %val_lslN = load volatile fp128* %addr_lslN
+  %val_lslN = load volatile fp128, fp128* %addr_lslN
   store volatile fp128 %val_lslN, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}, lsl #4]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -317,7 +317,7 @@
   %offset_uxtw = zext i32 %off32 to i64
   %addrint1_uxtw = add i64 %addrint_uxtw, %offset_uxtw
   %addr_uxtw = inttoptr i64 %addrint1_uxtw to fp128*
-  %val_uxtw = load volatile fp128* %addr_uxtw
+  %val_uxtw = load volatile fp128, fp128* %addr_uxtw
   store volatile fp128 %val_uxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -326,7 +326,7 @@
   %offset_sxtw = sext i32 %off32 to i64
   %addrint_sxtw = add i64 %base_sxtw, %offset_sxtw
   %addr_sxtw = inttoptr i64 %addrint_sxtw to fp128*
-  %val64_sxtw = load volatile fp128* %addr_sxtw
+  %val64_sxtw = load volatile fp128, fp128* %addr_sxtw
   store volatile fp128 %val64_sxtw, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -334,7 +334,7 @@
   %base_lsl = ptrtoint fp128* %base to i64
   %addrint_lsl = add i64 %base_lsl, %off64
   %addr_lsl = inttoptr i64 %addrint_lsl to fp128*
-  %val64_lsl = load volatile fp128* %addr_lsl
+  %val64_lsl = load volatile fp128, fp128* %addr_lsl
   store volatile fp128 %val64_lsl, fp128* %base
 ; CHECK: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{x[0-9]+}}]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
@@ -344,7 +344,7 @@
   %offset2_uxtwN = shl i64 %offset_uxtwN, 4
   %addrint_uxtwN = add i64 %base_uxtwN, %offset2_uxtwN
   %addr_uxtwN = inttoptr i64 %addrint_uxtwN to fp128*
-  %val64 = load volatile fp128* %base
+  %val64 = load volatile fp128, fp128* %base
   store volatile fp128 %val64, fp128* %addr_uxtwN
 ; CHECK: str {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, uxtw #4]
 ; CHECK-NOFP-NOT: ldr {{q[0-9]+}}, [{{x[0-9]+}}, {{[xw][0-9]+}}, sxtw #4]
diff --git a/llvm/test/CodeGen/AArch64/ldst-unscaledimm.ll b/llvm/test/CodeGen/AArch64/ldst-unscaledimm.ll
index af4d7d7..a362597 100644
--- a/llvm/test/CodeGen/AArch64/ldst-unscaledimm.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-unscaledimm.ll
@@ -16,32 +16,32 @@
 
 ; No architectural support for loads to 16-bit or 8-bit since we
 ; promote i8 during lowering.
-  %addr_8bit = load i8** @varptr
+  %addr_8bit = load i8*, i8** @varptr
 
 ; match a sign-extending load 8-bit -> 32-bit
    %addr_sext32 = getelementptr i8, i8* %addr_8bit, i64 -256
-   %val8_sext32 = load volatile i8* %addr_sext32
+   %val8_sext32 = load volatile i8, i8* %addr_sext32
    %val32_signed = sext i8 %val8_sext32 to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: ldursb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
 
 ; match a zero-extending load volatile 8-bit -> 32-bit
   %addr_zext32 = getelementptr i8, i8* %addr_8bit, i64 -12
-  %val8_zext32 = load volatile i8* %addr_zext32
+  %val8_zext32 = load volatile i8, i8* %addr_zext32
   %val32_unsigned = zext i8 %val8_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-12]
 
 ; match an any-extending load volatile 8-bit -> 32-bit
   %addr_anyext = getelementptr i8, i8* %addr_8bit, i64 -1
-  %val8_anyext = load volatile i8* %addr_anyext
+  %val8_anyext = load volatile i8, i8* %addr_anyext
   %newval8 = add i8 %val8_anyext, 1
   store volatile i8 %newval8, i8* @var_8bit
 ; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
 
 ; match a sign-extending load volatile 8-bit -> 64-bit
   %addr_sext64 = getelementptr i8, i8* %addr_8bit, i64 -5
-  %val8_sext64 = load volatile i8* %addr_sext64
+  %val8_sext64 = load volatile i8, i8* %addr_sext64
   %val64_signed = sext i8 %val8_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldursb {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
@@ -50,21 +50,21 @@
 ; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
 ; of x0 so it's identical to load volatileing to 32-bits.
   %addr_zext64 = getelementptr i8, i8* %addr_8bit, i64 -9
-  %val8_zext64 = load volatile i8* %addr_zext64
+  %val8_zext64 = load volatile i8, i8* %addr_zext64
   %val64_unsigned = zext i8 %val8_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldurb {{w[0-9]+}}, [{{x[0-9]+}}, #-9]
 
 ; truncating store volatile 32-bits to 8-bits
   %addr_trunc32 = getelementptr i8, i8* %addr_8bit, i64 -256
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val8_trunc32 = trunc i32 %val32 to i8
   store volatile i8 %val8_trunc32, i8* %addr_trunc32
 ; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
 
 ; truncating store volatile 64-bits to 8-bits
   %addr_trunc64 = getelementptr i8, i8* %addr_8bit, i64 -1
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   %val8_trunc64 = trunc i64 %val64 to i8
   store volatile i8 %val8_trunc64, i8* %addr_trunc64
 ; CHECK: sturb {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
@@ -77,12 +77,12 @@
 
 ; No architectural support for loads to 16-bit or 16-bit since we
 ; promote i16 during lowering.
-  %addr_8bit = load i8** @varptr
+  %addr_8bit = load i8*, i8** @varptr
 
 ; match a sign-extending load 16-bit -> 32-bit
    %addr8_sext32 = getelementptr i8, i8* %addr_8bit, i64 -256
    %addr_sext32 = bitcast i8* %addr8_sext32 to i16*
-   %val16_sext32 = load volatile i16* %addr_sext32
+   %val16_sext32 = load volatile i16, i16* %addr_sext32
    %val32_signed = sext i16 %val16_sext32 to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: ldursh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
@@ -90,7 +90,7 @@
 ; match a zero-extending load volatile 16-bit -> 32-bit. With offset that would be unaligned.
   %addr8_zext32 = getelementptr i8, i8* %addr_8bit, i64 15
   %addr_zext32 = bitcast i8* %addr8_zext32 to i16*
-  %val16_zext32 = load volatile i16* %addr_zext32
+  %val16_zext32 = load volatile i16, i16* %addr_zext32
   %val32_unsigned = zext i16 %val16_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #15]
@@ -98,7 +98,7 @@
 ; match an any-extending load volatile 16-bit -> 32-bit
   %addr8_anyext = getelementptr i8, i8* %addr_8bit, i64 -1
   %addr_anyext = bitcast i8* %addr8_anyext to i16*
-  %val16_anyext = load volatile i16* %addr_anyext
+  %val16_anyext = load volatile i16, i16* %addr_anyext
   %newval16 = add i16 %val16_anyext, 1
   store volatile i16 %newval16, i16* @var_16bit
 ; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
@@ -106,7 +106,7 @@
 ; match a sign-extending load volatile 16-bit -> 64-bit
   %addr8_sext64 = getelementptr i8, i8* %addr_8bit, i64 -5
   %addr_sext64 = bitcast i8* %addr8_sext64 to i16*
-  %val16_sext64 = load volatile i16* %addr_sext64
+  %val16_sext64 = load volatile i16, i16* %addr_sext64
   %val64_signed = sext i16 %val16_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldursh {{x[0-9]+}}, [{{x[0-9]+}}, #-5]
@@ -116,7 +116,7 @@
 ; of x0 so it's identical to load volatileing to 32-bits.
   %addr8_zext64 = getelementptr i8, i8* %addr_8bit, i64 9
   %addr_zext64 = bitcast i8* %addr8_zext64 to i16*
-  %val16_zext64 = load volatile i16* %addr_zext64
+  %val16_zext64 = load volatile i16, i16* %addr_zext64
   %val64_unsigned = zext i16 %val16_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldurh {{w[0-9]+}}, [{{x[0-9]+}}, #9]
@@ -124,7 +124,7 @@
 ; truncating store volatile 32-bits to 16-bits
   %addr8_trunc32 = getelementptr i8, i8* %addr_8bit, i64 -256
   %addr_trunc32 = bitcast i8* %addr8_trunc32 to i16*
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* %addr_trunc32
 ; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
@@ -132,7 +132,7 @@
 ; truncating store volatile 64-bits to 16-bits
   %addr8_trunc64 = getelementptr i8, i8* %addr_8bit, i64 -1
   %addr_trunc64 = bitcast i8* %addr8_trunc64 to i16*
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   %val16_trunc64 = trunc i64 %val64 to i16
   store volatile i16 %val16_trunc64, i16* %addr_trunc64
 ; CHECK: sturh {{w[0-9]+}}, [{{x[0-9]+}}, #-1]
@@ -143,12 +143,12 @@
 define void @ldst_32bit() {
 ; CHECK-LABEL: ldst_32bit:
 
-  %addr_8bit = load i8** @varptr
+  %addr_8bit = load i8*, i8** @varptr
 
 ; Straight 32-bit load/store
   %addr32_8_noext = getelementptr i8, i8* %addr_8bit, i64 1
   %addr32_noext = bitcast i8* %addr32_8_noext to i32*
-  %val32_noext = load volatile i32* %addr32_noext
+  %val32_noext = load volatile i32, i32* %addr32_noext
   store volatile i32 %val32_noext, i32* %addr32_noext
 ; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
 ; CHECK: stur {{w[0-9]+}}, [{{x[0-9]+}}, #1]
@@ -156,7 +156,7 @@
 ; Zero-extension to 64-bits
   %addr32_8_zext = getelementptr i8, i8* %addr_8bit, i64 -256
   %addr32_zext = bitcast i8* %addr32_8_zext to i32*
-  %val32_zext = load volatile i32* %addr32_zext
+  %val32_zext = load volatile i32, i32* %addr32_zext
   %val64_unsigned = zext i32 %val32_zext to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldur {{w[0-9]+}}, [{{x[0-9]+}}, #-256]
@@ -165,7 +165,7 @@
 ; Sign-extension to 64-bits
   %addr32_8_sext = getelementptr i8, i8* %addr_8bit, i64 -12
   %addr32_sext = bitcast i8* %addr32_8_sext to i32*
-  %val32_sext = load volatile i32* %addr32_sext
+  %val32_sext = load volatile i32, i32* %addr32_sext
   %val64_signed = sext i32 %val32_sext to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldursw {{x[0-9]+}}, [{{x[0-9]+}}, #-12]
@@ -177,7 +177,7 @@
   %addr32_8_trunc = getelementptr i8, i8* %addr_8bit, i64 -20
   %addr32_trunc = bitcast i8* %addr32_8_trunc to i32*
 
-  %val64_trunc = load volatile i64* %addr64_trunc
+  %val64_trunc = load volatile i64, i64* %addr64_trunc
   %val32_trunc = trunc i64 %val64_trunc to i32
   store volatile i32 %val32_trunc, i32* %addr32_trunc
 ; CHECK: ldur {{x[0-9]+}}, [{{x[0-9]+}}, #255]
@@ -189,11 +189,11 @@
 define void @ldst_float() {
 ; CHECK-LABEL: ldst_float:
 
-  %addr_8bit = load i8** @varptr
+  %addr_8bit = load i8*, i8** @varptr
   %addrfp_8 = getelementptr i8, i8* %addr_8bit, i64 -5
   %addrfp = bitcast i8* %addrfp_8 to float*
 
-  %valfp = load volatile float* %addrfp
+  %valfp = load volatile float, float* %addrfp
 ; CHECK: ldur {{s[0-9]+}}, [{{x[0-9]+}}, #-5]
 ; CHECK-NOFP-NOT: ldur {{s[0-9]+}},
 
@@ -207,11 +207,11 @@
 define void @ldst_double() {
 ; CHECK-LABEL: ldst_double:
 
-  %addr_8bit = load i8** @varptr
+  %addr_8bit = load i8*, i8** @varptr
   %addrfp_8 = getelementptr i8, i8* %addr_8bit, i64 4
   %addrfp = bitcast i8* %addrfp_8 to double*
 
-  %valfp = load volatile double* %addrfp
+  %valfp = load volatile double, double* %addrfp
 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #4]
 ; CHECK-NOFP-NOT: ldur {{d[0-9]+}},
 
diff --git a/llvm/test/CodeGen/AArch64/ldst-unsignedimm.ll b/llvm/test/CodeGen/AArch64/ldst-unsignedimm.ll
index 92e366a..9777d3e 100644
--- a/llvm/test/CodeGen/AArch64/ldst-unsignedimm.ll
+++ b/llvm/test/CodeGen/AArch64/ldst-unsignedimm.ll
@@ -16,26 +16,26 @@
 ; promote i8 during lowering.
 
 ; match a sign-extending load 8-bit -> 32-bit
-   %val8_sext32 = load volatile i8* @var_8bit
+   %val8_sext32 = load volatile i8, i8* @var_8bit
    %val32_signed = sext i8 %val8_sext32 to i32
    store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_8bit
 ; CHECK: ldrsb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match a zero-extending load volatile 8-bit -> 32-bit
-  %val8_zext32 = load volatile i8* @var_8bit
+  %val8_zext32 = load volatile i8, i8* @var_8bit
   %val32_unsigned = zext i8 %val8_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match an any-extending load volatile 8-bit -> 32-bit
-  %val8_anyext = load volatile i8* @var_8bit
+  %val8_anyext = load volatile i8, i8* @var_8bit
   %newval8 = add i8 %val8_anyext, 1
   store volatile i8 %newval8, i8* @var_8bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; match a sign-extending load volatile 8-bit -> 64-bit
-  %val8_sext64 = load volatile i8* @var_8bit
+  %val8_sext64 = load volatile i8, i8* @var_8bit
   %val64_signed = sext i8 %val8_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsb {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
@@ -43,19 +43,19 @@
 ; match a zero-extending load volatile 8-bit -> 64-bit.
 ; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
 ; of x0 so it's identical to load volatileing to 32-bits.
-  %val8_zext64 = load volatile i8* @var_8bit
+  %val8_zext64 = load volatile i8, i8* @var_8bit
   %val64_unsigned = zext i8 %val8_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; truncating store volatile 32-bits to 8-bits
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val8_trunc32 = trunc i32 %val32 to i8
   store volatile i8 %val8_trunc32, i8* @var_8bit
 ; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
 
 ; truncating store volatile 64-bits to 8-bits
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   %val8_trunc64 = trunc i64 %val64 to i8
   store volatile i8 %val8_trunc64, i8* @var_8bit
 ; CHECK: strb {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_8bit]
@@ -70,26 +70,26 @@
 ; lowering.
 
 ; match a sign-extending load volatile 16-bit -> 32-bit
-  %val16_sext32 = load volatile i16* @var_16bit
+  %val16_sext32 = load volatile i16, i16* @var_16bit
   %val32_signed = sext i16 %val16_sext32 to i32
   store volatile i32 %val32_signed, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_16bit
 ; CHECK: ldrsh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match a zero-extending load volatile 16-bit -> 32-bit
-  %val16_zext32 = load volatile i16* @var_16bit
+  %val16_zext32 = load volatile i16, i16* @var_16bit
   %val32_unsigned = zext i16 %val16_zext32 to i32
   store volatile i32 %val32_unsigned, i32* @var_32bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match an any-extending load volatile 16-bit -> 32-bit
-  %val16_anyext = load volatile i16* @var_16bit
+  %val16_anyext = load volatile i16, i16* @var_16bit
   %newval16 = add i16 %val16_anyext, 1
   store volatile i16 %newval16, i16* @var_16bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; match a sign-extending load volatile 16-bit -> 64-bit
-  %val16_sext64 = load volatile i16* @var_16bit
+  %val16_sext64 = load volatile i16, i16* @var_16bit
   %val64_signed = sext i16 %val16_sext64 to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsh {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
@@ -97,19 +97,19 @@
 ; match a zero-extending load volatile 16-bit -> 64-bit.
 ; This uses the fact that ldrb w0, [x0] will zero out the high 32-bits
 ; of x0 so it's identical to load volatileing to 32-bits.
-  %val16_zext64 = load volatile i16* @var_16bit
+  %val16_zext64 = load volatile i16, i16* @var_16bit
   %val64_unsigned = zext i16 %val16_zext64 to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; truncating store volatile 32-bits to 16-bits
-  %val32 = load volatile i32* @var_32bit
+  %val32 = load volatile i32, i32* @var_32bit
   %val16_trunc32 = trunc i32 %val32 to i16
   store volatile i16 %val16_trunc32, i16* @var_16bit
 ; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
 
 ; truncating store volatile 64-bits to 16-bits
-  %val64 = load volatile i64* @var_64bit
+  %val64 = load volatile i64, i64* @var_64bit
   %val16_trunc64 = trunc i64 %val64 to i16
   store volatile i16 %val16_trunc64, i16* @var_16bit
 ; CHECK: strh {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_16bit]
@@ -121,28 +121,28 @@
 ; CHECK-LABEL: ldst_32bit:
 
 ; Straight 32-bit load/store
-  %val32_noext = load volatile i32* @var_32bit
+  %val32_noext = load volatile i32, i32* @var_32bit
   store volatile i32 %val32_noext, i32* @var_32bit
 ; CHECK: adrp {{x[0-9]+}}, var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 ; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 
 ; Zero-extension to 64-bits
-  %val32_zext = load volatile i32* @var_32bit
+  %val32_zext = load volatile i32, i32* @var_32bit
   %val64_unsigned = zext i32 %val32_zext to i64
   store volatile i64 %val64_unsigned, i64* @var_64bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 ; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Sign-extension to 64-bits
-  %val32_sext = load volatile i32* @var_32bit
+  %val32_sext = load volatile i32, i32* @var_32bit
   %val64_signed = sext i32 %val32_sext to i64
   store volatile i64 %val64_signed, i64* @var_64bit
 ; CHECK: ldrsw {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_32bit]
 ; CHECK: str {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
 
 ; Truncation from 64-bits
-  %val64_trunc = load volatile i64* @var_64bit
+  %val64_trunc = load volatile i64, i64* @var_64bit
   %val32_trunc = trunc i64 %val64_trunc to i32
   store volatile i32 %val32_trunc, i32* @var_32bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_64bit]
@@ -163,62 +163,62 @@
 
 define void @ldst_complex_offsets() {
 ; CHECK: ldst_complex_offsets
-  %arr8_addr = load volatile i8** @arr8
+  %arr8_addr = load volatile i8*, i8** @arr8
 ; CHECK: adrp {{x[0-9]+}}, arr8
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr8]
 
   %arr8_sub1_addr = getelementptr i8, i8* %arr8_addr, i64 1
-  %arr8_sub1 = load volatile i8* %arr8_sub1_addr
+  %arr8_sub1 = load volatile i8, i8* %arr8_sub1_addr
   store volatile i8 %arr8_sub1, i8* @var_8bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #1]
 
   %arr8_sub4095_addr = getelementptr i8, i8* %arr8_addr, i64 4095
-  %arr8_sub4095 = load volatile i8* %arr8_sub4095_addr
+  %arr8_sub4095 = load volatile i8, i8* %arr8_sub4095_addr
   store volatile i8 %arr8_sub4095, i8* @var_8bit
 ; CHECK: ldrb {{w[0-9]+}}, [{{x[0-9]+}}, #4095]
 
 
-  %arr16_addr = load volatile i16** @arr16
+  %arr16_addr = load volatile i16*, i16** @arr16
 ; CHECK: adrp {{x[0-9]+}}, arr16
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr16]
 
   %arr16_sub1_addr = getelementptr i16, i16* %arr16_addr, i64 1
-  %arr16_sub1 = load volatile i16* %arr16_sub1_addr
+  %arr16_sub1 = load volatile i16, i16* %arr16_sub1_addr
   store volatile i16 %arr16_sub1, i16* @var_16bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #2]
 
   %arr16_sub4095_addr = getelementptr i16, i16* %arr16_addr, i64 4095
-  %arr16_sub4095 = load volatile i16* %arr16_sub4095_addr
+  %arr16_sub4095 = load volatile i16, i16* %arr16_sub4095_addr
   store volatile i16 %arr16_sub4095, i16* @var_16bit
 ; CHECK: ldrh {{w[0-9]+}}, [{{x[0-9]+}}, #8190]
 
 
-  %arr32_addr = load volatile i32** @arr32
+  %arr32_addr = load volatile i32*, i32** @arr32
 ; CHECK: adrp {{x[0-9]+}}, arr32
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr32]
 
   %arr32_sub1_addr = getelementptr i32, i32* %arr32_addr, i64 1
-  %arr32_sub1 = load volatile i32* %arr32_sub1_addr
+  %arr32_sub1 = load volatile i32, i32* %arr32_sub1_addr
   store volatile i32 %arr32_sub1, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #4]
 
   %arr32_sub4095_addr = getelementptr i32, i32* %arr32_addr, i64 4095
-  %arr32_sub4095 = load volatile i32* %arr32_sub4095_addr
+  %arr32_sub4095 = load volatile i32, i32* %arr32_sub4095_addr
   store volatile i32 %arr32_sub4095, i32* @var_32bit
 ; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #16380]
 
 
-  %arr64_addr = load volatile i64** @arr64
+  %arr64_addr = load volatile i64*, i64** @arr64
 ; CHECK: adrp {{x[0-9]+}}, arr64
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:arr64]
 
   %arr64_sub1_addr = getelementptr i64, i64* %arr64_addr, i64 1
-  %arr64_sub1 = load volatile i64* %arr64_sub1_addr
+  %arr64_sub1 = load volatile i64, i64* %arr64_sub1_addr
   store volatile i64 %arr64_sub1, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #8]
 
   %arr64_sub4095_addr = getelementptr i64, i64* %arr64_addr, i64 4095
-  %arr64_sub4095 = load volatile i64* %arr64_sub4095_addr
+  %arr64_sub4095 = load volatile i64, i64* %arr64_sub4095_addr
   store volatile i64 %arr64_sub4095, i64* @var_64bit
 ; CHECK: ldr {{x[0-9]+}}, [{{x[0-9]+}}, #32760]
 
@@ -228,7 +228,7 @@
 define void @ldst_float() {
 ; CHECK-LABEL: ldst_float:
 
-   %valfp = load volatile float* @var_float
+   %valfp = load volatile float, float* @var_float
 ; CHECK: adrp {{x[0-9]+}}, var_float
 ; CHECK: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_float]
 ; CHECK-NOFP-NOT: ldr {{s[0-9]+}},
@@ -243,7 +243,7 @@
 define void @ldst_double() {
 ; CHECK-LABEL: ldst_double:
 
-   %valfp = load volatile double* @var_double
+   %valfp = load volatile double, double* @var_double
 ; CHECK: adrp {{x[0-9]+}}, var_double
 ; CHECK: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:var_double]
 ; CHECK-NOFP-NOT: ldr {{d[0-9]+}},
diff --git a/llvm/test/CodeGen/AArch64/literal_pools_float.ll b/llvm/test/CodeGen/AArch64/literal_pools_float.ll
index e53b8b6..f5d6a17 100644
--- a/llvm/test/CodeGen/AArch64/literal_pools_float.ll
+++ b/llvm/test/CodeGen/AArch64/literal_pools_float.ll
@@ -9,7 +9,7 @@
 define void @floating_lits() {
 ; CHECK-LABEL: floating_lits:
 
-  %floatval = load float* @varfloat
+  %floatval = load float, float* @varfloat
   %newfloat = fadd float %floatval, 128.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
 ; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
@@ -26,7 +26,7 @@
 
   store float %newfloat, float* @varfloat
 
-  %doubleval = load double* @vardouble
+  %doubleval = load double, double* @vardouble
   %newdouble = fadd double %doubleval, 129.0
 ; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]]
 ; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]]
diff --git a/llvm/test/CodeGen/AArch64/local_vars.ll b/llvm/test/CodeGen/AArch64/local_vars.ll
index 2f5b9f2..332d660 100644
--- a/llvm/test/CodeGen/AArch64/local_vars.ll
+++ b/llvm/test/CodeGen/AArch64/local_vars.ll
@@ -49,7 +49,7 @@
 ; CHECK-LABEL: stack_local:
 ; CHECK: sub sp, sp, #16
 
-  %val = load i64* @var
+  %val = load i64, i64* @var
   store i64 %val, i64* %local_var
 ; CHECK-DAG: str {{x[0-9]+}}, [sp, #{{[0-9]+}}]
 
diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll
index b249d72..6b3246d1d 100644
--- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll
+++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll
@@ -8,8 +8,8 @@
 
 define void @logical_32bit() minsize {
 ; CHECK-LABEL: logical_32bit:
-  %val1 = load i32* @var1_32
-  %val2 = load i32* @var2_32
+  %val1 = load i32, i32* @var1_32
+  %val2 = load i32, i32* @var2_32
 
   ; First check basic and/bic/or/orn/eor/eon patterns with no shift
   %neg_val2 = xor i32 -1, %val2
@@ -98,8 +98,8 @@
 
 define void @logical_64bit() minsize {
 ; CHECK-LABEL: logical_64bit:
-  %val1 = load i64* @var1_64
-  %val2 = load i64* @var2_64
+  %val1 = load i64, i64* @var1_64
+  %val2 = load i64, i64* @var2_64
 
   ; First check basic and/bic/or/orn/eor/eon patterns with no shift
   %neg_val2 = xor i64 -1, %val2
@@ -191,8 +191,8 @@
 
 define void @flag_setting() {
 ; CHECK-LABEL: flag_setting:
-  %val1 = load i64* @var1_64
-  %val2 = load i64* @var2_64
+  %val1 = load i64, i64* @var1_64
+  %val2 = load i64, i64* @var2_64
 
 ; CHECK: tst {{x[0-9]+}}, {{x[0-9]+}}
 ; CHECK: b.gt .L
diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll
index bc9ab10..032199e 100644
--- a/llvm/test/CodeGen/AArch64/machine_cse.ll
+++ b/llvm/test/CodeGen/AArch64/machine_cse.ll
@@ -14,11 +14,11 @@
 ; CHECK: b.le
 
 entry:
-  %a = load i32* @a, align 4
-  %b = load i32* @b, align 4
-  %c = load i32* @c, align 4
-  %d = load i32* @d, align 4
-  %e = load i32* @e, align 4
+  %a = load i32, i32* @a, align 4
+  %b = load i32, i32* @b, align 4
+  %c = load i32, i32* @c, align 4
+  %d = load i32, i32* @d, align 4
+  %e = load i32, i32* @e, align 4
 
   %cmp = icmp slt i32 %a, %e
   br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
diff --git a/llvm/test/CodeGen/AArch64/neon-fpround_f128.ll b/llvm/test/CodeGen/AArch64/neon-fpround_f128.ll
index a93f3f2..265664e 100644
--- a/llvm/test/CodeGen/AArch64/neon-fpround_f128.ll
+++ b/llvm/test/CodeGen/AArch64/neon-fpround_f128.ll
@@ -3,7 +3,7 @@
 define <1 x double> @test_fpround_v1f128(<1 x fp128>* %a) {
 ; CHECK-LABEL: test_fpround_v1f128:
 ; CHECK: bl __trunctfdf2
-  %b = load <1 x fp128>* %a
+  %b = load <1 x fp128>, <1 x fp128>* %a
   %c = fptrunc <1 x fp128> %b to <1 x double>
   ret <1 x double> %c
 }
@@ -12,7 +12,7 @@
 ; CHECK-LABEL: test_fpround_v2f128:
 ; CHECK: bl __trunctfdf2
 ; CHECK: bl __trunctfdf2
-  %b = load <2 x fp128>* %a
+  %b = load <2 x fp128>, <2 x fp128>* %a
   %c = fptrunc <2 x fp128> %b to <2 x double>
   ret <2 x double> %c
 }
diff --git a/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll b/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
index 1df3719..0d5ebb3 100644
--- a/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
+++ b/llvm/test/CodeGen/AArch64/neon-truncStore-extLoad.ll
@@ -34,7 +34,7 @@
 define <4 x i32> @loadSExt.v4i8(<4 x i8>* %ref) {
 ; CHECK-LABEL: loadSExt.v4i8:
 ; CHECK: ldrsb
-  %a = load <4 x i8>* %ref
+  %a = load <4 x i8>, <4 x i8>* %ref
   %conv = sext <4 x i8> %a to <4 x i32>
   ret <4 x i32> %conv
 }
@@ -42,7 +42,7 @@
 define <4 x i32> @loadZExt.v4i8(<4 x i8>* %ref) {
 ; CHECK-LABEL: loadZExt.v4i8:
 ; CHECK: ldrb
-  %a = load <4 x i8>* %ref
+  %a = load <4 x i8>, <4 x i8>* %ref
   %conv = zext <4 x i8> %a to <4 x i32>
   ret <4 x i32> %conv
 }
@@ -50,7 +50,7 @@
 define i32 @loadExt.i32(<4 x i8>* %ref) {
 ; CHECK-LABEL: loadExt.i32:
 ; CHECK: ldrb
-  %a = load <4 x i8>* %ref
+  %a = load <4 x i8>, <4 x i8>* %ref
   %vecext = extractelement <4 x i8> %a, i32 0
   %conv = zext i8 %vecext to i32
   ret i32 %conv
diff --git a/llvm/test/CodeGen/AArch64/nzcv-save.ll b/llvm/test/CodeGen/AArch64/nzcv-save.ll
index 32baff3..f8f42ec 100644
--- a/llvm/test/CodeGen/AArch64/nzcv-save.ll
+++ b/llvm/test/CodeGen/AArch64/nzcv-save.ll
@@ -7,8 +7,8 @@
 ; must be saved for later.
 define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
 entry:
-  %c = load i256* %cc
-  %d = load i256* %dd
+  %c = load i256, i256* %cc
+  %d = load i256, i256* %dd
   %add = add nsw i256 %c, %d
   store i256 %add, i256* %a, align 8
   %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
diff --git a/llvm/test/CodeGen/AArch64/paired-load.ll b/llvm/test/CodeGen/AArch64/paired-load.ll
index 72b5afc..e6d2d4f 100644
--- a/llvm/test/CodeGen/AArch64/paired-load.ll
+++ b/llvm/test/CodeGen/AArch64/paired-load.ll
@@ -8,8 +8,8 @@
 define void @f(i64* %p, i64* %q) {
   %addr2 = getelementptr i64, i64* %q, i32 1
   %addr = getelementptr i64, i64* %p, i32 1
-  %x = load i64* %p
-  %y = load i64* %addr
+  %x = load i64, i64* %p
+  %y = load i64, i64* %addr
   store i64 %x, i64* %q
   store i64 %y, i64* %addr2
   ret void
diff --git a/llvm/test/CodeGen/AArch64/pic-eh-stubs.ll b/llvm/test/CodeGen/AArch64/pic-eh-stubs.ll
index 93ee0e6..f761a87 100644
--- a/llvm/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/llvm/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -38,7 +38,7 @@
   %3 = extractvalue { i8*, i32 } %0, 0
   %4 = tail call i8* @__cxa_begin_catch(i8* %3) nounwind
   %5 = bitcast i8* %4 to i32*
-  %exn.scalar = load i32* %5, align 4
+  %exn.scalar = load i32, i32* %5, align 4
   tail call void @__cxa_end_catch() nounwind
   br label %return
 
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 2f165e8..94eeba7 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -35,31 +35,31 @@
 define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 {
 entry:
   %label56 = bitcast %struct.Connector_struct* %a to i16*
-  %0 = load i16* %label56, align 2
+  %0 = load i16, i16* %label56, align 2
   %label157 = bitcast %struct.Connector_struct* %b to i16*
-  %1 = load i16* %label157, align 2
+  %1 = load i16, i16* %label157, align 2
   %cmp = icmp eq i16 %0, %1
   br i1 %cmp, label %if.end, label %return, !prof !988
 if.end:
   %priority = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 2
-  %2 = load i8* %priority, align 1
+  %2 = load i8, i8* %priority, align 1
   %priority5 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 2
-  %3 = load i8* %priority5, align 1
+  %3 = load i8, i8* %priority5, align 1
   %string = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 5
-  %4 = load i8** %string, align 8
+  %4 = load i8*, i8** %string, align 8
   %string7 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 5
-  %5 = load i8** %string7, align 8
+  %5 = load i8*, i8** %string7, align 8
   br label %while.cond
 while.cond:
   %lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ]
   %scevgep55 = getelementptr i8, i8* %4, i64 %lsr.iv27
-  %6 = load i8* %scevgep55, align 1
+  %6 = load i8, i8* %scevgep55, align 1
   %idxprom.i.i = sext i8 %6 to i64
   %isascii.i.i224 = icmp sgt i8 %6, -1
   br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181
 cond.true.i.i:
   %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
-  %7 = load i32* %arrayidx.i.i, align 4
+  %7 = load i32, i32* %arrayidx.i.i, align 4
   %and.i.i = and i32 %7, 32768
   br label %isupper.exit
 cond.false.i.i:
@@ -75,13 +75,13 @@
   %sunkaddr = ptrtoint i8* %5 to i64
   %sunkaddr58 = add i64 %sunkaddr, %lsr.iv27
   %sunkaddr59 = inttoptr i64 %sunkaddr58 to i8*
-  %9 = load i8* %sunkaddr59, align 1
+  %9 = load i8, i8* %sunkaddr59, align 1
   %idxprom.i.i214 = sext i8 %9 to i64
   %isascii.i.i213225 = icmp sgt i8 %9, -1
   br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181
 cond.true.i.i217:
   %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
-  %10 = load i32* %arrayidx.i.i215, align 4
+  %10 = load i32, i32* %arrayidx.i.i215, align 4
   %and.i.i216 = and i32 %10, 32768
   br label %isupper.exit223
 cond.false.i.i219:
@@ -97,11 +97,11 @@
   %sunkaddr60 = ptrtoint i8* %4 to i64
   %sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27
   %sunkaddr62 = inttoptr i64 %sunkaddr61 to i8*
-  %12 = load i8* %sunkaddr62, align 1
+  %12 = load i8, i8* %sunkaddr62, align 1
   %sunkaddr63 = ptrtoint i8* %5 to i64
   %sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27
   %sunkaddr65 = inttoptr i64 %sunkaddr64 to i8*
-  %13 = load i8* %sunkaddr65, align 1
+  %13 = load i8, i8* %sunkaddr65, align 1
   %cmp14 = icmp eq i8 %12, %13
   br i1 %cmp14, label %if.end17, label %return, !prof !991
 if.end17:
@@ -115,13 +115,13 @@
   %sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64
   %sunkaddr67 = add i64 %sunkaddr66, 16
   %sunkaddr68 = inttoptr i64 %sunkaddr67 to i8**
-  %16 = load i8** %sunkaddr68, align 8
-  %17 = load i8* %16, align 1
+  %16 = load i8*, i8** %sunkaddr68, align 8
+  %17 = load i8, i8* %16, align 1
   %cmp26 = icmp eq i8 %17, 83
   %sunkaddr69 = ptrtoint i8* %4 to i64
   %sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27
   %sunkaddr71 = inttoptr i64 %sunkaddr70 to i8*
-  %18 = load i8* %sunkaddr71, align 1
+  %18 = load i8, i8* %sunkaddr71, align 1
   br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993
 land.lhs.true28:
   switch i8 %18, label %land.rhs.preheader [
@@ -132,7 +132,7 @@
   %sunkaddr72 = ptrtoint i8* %5 to i64
   %sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27
   %sunkaddr74 = inttoptr i64 %sunkaddr73 to i8*
-  %19 = load i8* %sunkaddr74, align 1
+  %19 = load i8, i8* %sunkaddr74, align 1
   switch i8 %19, label %land.rhs.preheader [
     i8 112, label %land.lhs.true43
   ], !prof !995
@@ -157,7 +157,7 @@
   %sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27
   %sunkaddr77 = add i64 %sunkaddr76, -1
   %sunkaddr78 = inttoptr i64 %sunkaddr77 to i8*
-  %24 = load i8* %sunkaddr78, align 1
+  %24 = load i8, i8* %sunkaddr78, align 1
   %cmp55 = icmp eq i8 %24, 73
   %cmp61233 = icmp eq i8 %18, 0
   %or.cond265 = or i1 %cmp55, %cmp61233
@@ -173,7 +173,7 @@
   %lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ]
   %25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ]
   %scevgep34 = getelementptr i8, i8* %scevgep33, i64 %lsr.iv
-  %26 = load i8* %scevgep34, align 1
+  %26 = load i8, i8* %scevgep34, align 1
   %cmp64 = icmp eq i8 %26, 0
   br i1 %cmp64, label %return, label %while.body66, !prof !1000
 while.body66:
@@ -189,7 +189,7 @@
 if.then83:
   %scevgep44 = getelementptr i8, i8* %scevgep43, i64 %lsr.iv
   %scevgep45 = getelementptr i8, i8* %scevgep44, i64 1
-  %27 = load i8* %scevgep45, align 1
+  %27 = load i8, i8* %scevgep45, align 1
   %cmp61 = icmp eq i8 %27, 0
   %lsr.iv.next = add i64 %lsr.iv, 1
   br i1 %cmp61, label %return, label %land.rhs, !prof !999
@@ -202,7 +202,7 @@
   %sunkaddr79 = ptrtoint i8* %4 to i64
   %sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27
   %sunkaddr81 = inttoptr i64 %sunkaddr80 to i8*
-  %28 = load i8* %sunkaddr81, align 1
+  %28 = load i8, i8* %sunkaddr81, align 1
   %cmp97238 = icmp eq i8 %28, 0
   br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004
 land.rhs99.preheader:
@@ -213,7 +213,7 @@
   %lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ]
   %29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ]
   %scevgep32 = getelementptr i8, i8* %scevgep31, i64 %lsr.iv17
-  %30 = load i8* %scevgep32, align 1
+  %30 = load i8, i8* %scevgep32, align 1
   %cmp101 = icmp eq i8 %30, 0
   br i1 %cmp101, label %return, label %while.body104, !prof !1005
 while.body104:
@@ -226,7 +226,7 @@
 if.then117:
   %scevgep41 = getelementptr i8, i8* %scevgep40, i64 %lsr.iv17
   %scevgep42 = getelementptr i8, i8* %scevgep41, i64 1
-  %31 = load i8* %scevgep42, align 1
+  %31 = load i8, i8* %scevgep42, align 1
   %cmp97 = icmp eq i8 %31, 0
   %lsr.iv.next18 = add i64 %lsr.iv17, 1
   br i1 %cmp97, label %return, label %land.rhs99, !prof !1004
@@ -239,7 +239,7 @@
   %sunkaddr82 = ptrtoint i8* %4 to i64
   %sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27
   %sunkaddr84 = inttoptr i64 %sunkaddr83 to i8*
-  %32 = load i8* %sunkaddr84, align 1
+  %32 = load i8, i8* %sunkaddr84, align 1
   %cmp132244 = icmp eq i8 %32, 0
   br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008
 land.rhs134.preheader:
@@ -250,7 +250,7 @@
   %lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ]
   %33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ]
   %scevgep30 = getelementptr i8, i8* %scevgep29, i64 %lsr.iv22
-  %34 = load i8* %scevgep30, align 1
+  %34 = load i8, i8* %scevgep30, align 1
   %cmp136 = icmp eq i8 %34, 0
   br i1 %cmp136, label %return, label %while.body139, !prof !1009
 while.body139:
@@ -263,7 +263,7 @@
 if.then152:
   %scevgep38 = getelementptr i8, i8* %scevgep37, i64 %lsr.iv22
   %scevgep39 = getelementptr i8, i8* %scevgep38, i64 1
-  %35 = load i8* %scevgep39, align 1
+  %35 = load i8, i8* %scevgep39, align 1
   %cmp132 = icmp eq i8 %35, 0
   %lsr.iv.next23 = add i64 %lsr.iv22, 1
   br i1 %cmp132, label %return, label %land.rhs134, !prof !1008
diff --git a/llvm/test/CodeGen/AArch64/regress-tail-livereg.ll b/llvm/test/CodeGen/AArch64/regress-tail-livereg.ll
index 03c3f33..965aa0d 100644
--- a/llvm/test/CodeGen/AArch64/regress-tail-livereg.ll
+++ b/llvm/test/CodeGen/AArch64/regress-tail-livereg.ll
@@ -5,7 +5,7 @@
 
 define void @foo() {
 ; CHECK-LABEL: foo:
-       %func = load void()** @var
+       %func = load void()*, void()** @var
 
        ; Calling a function encourages @foo to use a callee-saved register,
        ; which makes it a natural choice for the tail call itself. But we don't
diff --git a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
index 477d996..0d301bb 100644
--- a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -19,7 +19,7 @@
   call void @bar(i8* %locvar)
 ; CHECK: bl {{_?bar}}
 
-  %inc.1 = load i8* %locvar
+  %inc.1 = load i8, i8* %locvar
   %inc.2 = zext i8 %inc.1 to i64
   %inc.3 = add i64 %inc.2, 1
   %inc.4 = trunc i64 %inc.3 to i8
@@ -30,7 +30,7 @@
 ; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
 ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
 
-  %ret.1 = load i8* %locvar
+  %ret.1 = load i8, i8* %locvar
   %ret.2 = zext i8 %ret.1 to i64
   ret i64 %ret.2
 ; CHECK: ret
diff --git a/llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll b/llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
index c3167e4..66a5ed6 100644
--- a/llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
+++ b/llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
@@ -7,15 +7,15 @@
 ; CHECK-LABEL: test_w29_reserved:
 ; CHECK: add x29, sp, #{{[0-9]+}}
 
-  %val1 = load volatile i32* @var
-  %val2 = load volatile i32* @var
-  %val3 = load volatile i32* @var
-  %val4 = load volatile i32* @var
-  %val5 = load volatile i32* @var
-  %val6 = load volatile i32* @var
-  %val7 = load volatile i32* @var
-  %val8 = load volatile i32* @var
-  %val9 = load volatile i32* @var
+  %val1 = load volatile i32, i32* @var
+  %val2 = load volatile i32, i32* @var
+  %val3 = load volatile i32, i32* @var
+  %val4 = load volatile i32, i32* @var
+  %val5 = load volatile i32, i32* @var
+  %val6 = load volatile i32, i32* @var
+  %val7 = load volatile i32, i32* @var
+  %val8 = load volatile i32, i32* @var
+  %val9 = load volatile i32, i32* @var
 
 ; CHECK-NOT: ldr w29,
 
diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
index 36dc118..5184d74 100644
--- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
+++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll
@@ -15,8 +15,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ne
 entry:
-  %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
-  %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+  %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
   %cmp = icmp sgt i16 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -43,8 +43,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
-  %0 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
-  %1 = load i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
+  %0 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 1), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.s_signed_i16* @cost_s_i8_i16, i64 0, i32 2), align 2
   %cmp = icmp sgt i16 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -71,8 +71,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ne
 entry:
-  %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
-  %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+  %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
   %cmp = icmp ugt i16 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -99,8 +99,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
-  %0 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
-  %1 = load i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
+  %0 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 1), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.s_unsigned_i16* @cost_u_i16, i64 0, i32 2), align 2
   %cmp = icmp ugt i16 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -136,8 +136,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ne
 entry:
-  %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
-  %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+  %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+  %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
   %cmp = icmp sgt i8 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -164,8 +164,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ge
 entry:
-  %0 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
-  %1 = load i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
+  %0 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 1), align 2
+  %1 = load i8, i8* getelementptr inbounds (%struct.s_signed_i8* @cost_s, i64 0, i32 2), align 2
   %cmp = icmp sgt i8 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -192,8 +192,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.ne
 entry:
-  %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
-  %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+  %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+  %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
   %cmp = icmp ugt i8 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -220,8 +220,8 @@
 ; CHECK-NOT: cmp
 ; CHECK: b.hs
 entry:
-  %0 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
-  %1 = load i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
+  %0 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 1), align 2
+  %1 = load i8, i8* getelementptr inbounds (%struct.s_unsigned_i8* @cost_u_i8, i64 0, i32 2), align 2
   %cmp = icmp ugt i8 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/AArch64/sibling-call.ll b/llvm/test/CodeGen/AArch64/sibling-call.ll
index 34e3bb4..34d45d8 100644
--- a/llvm/test/CodeGen/AArch64/sibling-call.ll
+++ b/llvm/test/CodeGen/AArch64/sibling-call.ll
@@ -88,7 +88,7 @@
 ; CHECK-LABEL: indirect_tail:
 ; CHECK-NOT: sub sp, sp
 
-  %fptr = load void(i32)** @func
+  %fptr = load void(i32)*, void(i32)** @func
   tail call void %fptr(i32 42)
   ret void
 ; CHECK: ldr [[FPTR:x[1-9]+]], [{{x[0-9]+}}, {{#?}}:lo12:func]
diff --git a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
index bedbf5f..eb4937e 100644
--- a/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
@@ -15,7 +15,7 @@
 define i32 @test_stack_guard_remat2() {
 entry:
   %StackGuardSlot = alloca i8*
-  %StackGuard = load i8** bitcast (i64** @__stack_chk_guard to i8**)
+  %StackGuard = load i8*, i8** bitcast (i64** @__stack_chk_guard to i8**)
   call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
   %container = alloca [32 x i8], align 1
   call void @llvm.stackprotectorcheck(i8** bitcast (i64** @__stack_chk_guard to i8**))
diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
index c77043c..8863f70 100644
--- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
+++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll
@@ -211,7 +211,7 @@
 ; CHECK-NOT: cmp
 ; CHECK: tbz [[CMP]], #63
 
-  %val = load i64* %ptr
+  %val = load i64, i64* %ptr
   %tst = icmp slt i64 %val, 0
   br i1 %tst, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/AArch64/tst-br.ll b/llvm/test/CodeGen/AArch64/tst-br.ll
index 5dc7b5d..345c4d9 100644
--- a/llvm/test/CodeGen/AArch64/tst-br.ll
+++ b/llvm/test/CodeGen/AArch64/tst-br.ll
@@ -9,8 +9,8 @@
 define i32 @test_tbz() {
 ; CHECK-LABEL: test_tbz:
 
-  %val = load i32* @var32
-  %val64 = load i64* @var64
+  %val = load i32, i32* @var32
+  %val64 = load i64, i64* @var64
 
   %tbit0 = and i32 %val, 32768
   %tst0 = icmp ne i32 %tbit0, 0
diff --git a/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
index ab9c4d6..b719f9f 100644
--- a/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
+++ b/llvm/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -11,7 +11,7 @@
         ret void
 
 cond_true11:            ; preds = %entry
-        %tmp.i32 = load %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]
+        %tmp.i32 = load %struct.layer_data*, %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]
         %tmp3.i35 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
         %tmp.i36 = getelementptr %struct.layer_data, %struct.layer_data* %tmp.i32, i32 0, i32 2          ; <i8**> [#uses=1]
         store i8* %tmp3.i35, i8** %tmp.i36
diff --git a/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
index c2b0ad4..8b94b7b 100644
--- a/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
+++ b/llvm/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -15,15 +15,15 @@
 	br label %cond_next489
 
 cond_next489:		; preds = %cond_false, %bb471
-	%j.7.in = load i8* null		; <i8> [#uses=1]
-	%i.8.in = load i8* null		; <i8> [#uses=1]
+	%j.7.in = load i8, i8* null		; <i8> [#uses=1]
+	%i.8.in = load i8, i8* null		; <i8> [#uses=1]
 	%i.8 = zext i8 %i.8.in to i32		; <i32> [#uses=4]
 	%j.7 = zext i8 %j.7.in to i32		; <i32> [#uses=4]
 	%tmp495 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=2]
-	%tmp496 = load i32* %tmp495		; <i32> [#uses=2]
-	%tmp502 = load i32* null		; <i32> [#uses=1]
+	%tmp496 = load i32, i32* %tmp495		; <i32> [#uses=2]
+	%tmp502 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp542 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp543 = load i32* %tmp542		; <i32> [#uses=1]
+	%tmp543 = load i32, i32* %tmp542		; <i32> [#uses=1]
 	%tmp548 = ashr i32 0, 0		; <i32> [#uses=3]
 	%tmp561 = sub i32 0, %tmp496		; <i32> [#uses=3]
 	%abscond563 = icmp sgt i32 %tmp561, -1		; <i1> [#uses=1]
@@ -36,9 +36,9 @@
 
 cond_next589:		; preds = %cond_next489
 	%tmp605 = getelementptr [6 x [4 x [4 x i32]]], [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp606 = load i32* %tmp605		; <i32> [#uses=1]
-	%tmp612 = load i32* null		; <i32> [#uses=1]
-	%tmp629 = load i32* null		; <i32> [#uses=1]
+	%tmp606 = load i32, i32* %tmp605		; <i32> [#uses=1]
+	%tmp612 = load i32, i32* null		; <i32> [#uses=1]
+	%tmp629 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]
 	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
 	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]
@@ -85,9 +85,9 @@
 
 cond_true740:		; preds = %bb737
 	%tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 )		; <i32> [#uses=1]
-	%tmp780 = load i32* null		; <i32> [#uses=1]
+	%tmp780 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp785 = getelementptr [4 x [4 x i32]], [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
-	%tmp786 = load i32* %tmp785		; <i32> [#uses=1]
+	%tmp786 = load i32, i32* %tmp785		; <i32> [#uses=1]
 	%tmp781 = mul i32 %tmp780, %tmp761		; <i32> [#uses=1]
 	%tmp787 = mul i32 %tmp781, %tmp786		; <i32> [#uses=1]
 	%tmp789 = shl i32 %tmp787, 0		; <i32> [#uses=1]
@@ -96,7 +96,7 @@
 
 cond_next791:		; preds = %cond_true740, %bb737
 	%ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ]		; <i32> [#uses=1]
-	%tmp796 = load i32* %tmp495		; <i32> [#uses=1]
+	%tmp796 = load i32, i32* %tmp495		; <i32> [#uses=1]
 	%tmp798 = add i32 %tmp796, %ilev.1		; <i32> [#uses=1]
 	%tmp812 = mul i32 0, %tmp502		; <i32> [#uses=0]
 	%tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 )		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
index 80eda54..472a345 100644
--- a/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -5,7 +5,7 @@
 
 bb:		; preds = %bb, %0
 	%p_addr.0 = getelementptr i8, i8* %p, i32 0		; <i8*> [#uses=1]
-	%tmp2 = load i8* %p_addr.0		; <i8> [#uses=2]
+	%tmp2 = load i8, i8* %p_addr.0		; <i8> [#uses=2]
 	%tmp4.rec = add i32 0, 1		; <i32> [#uses=1]
 	%tmp4 = getelementptr i8, i8* %p, i32 %tmp4.rec		; <i8*> [#uses=1]
 	%tmp56 = zext i8 %tmp2 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index b8c8e70..9c0143b 100644
--- a/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -23,25 +23,25 @@
 	%d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
 	%d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
 	%d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
-	%fm.1 = load i32* %fm.1.in		; <i32> [#uses=4]
+	%fm.1 = load i32, i32* %fm.1.in		; <i32> [#uses=4]
 	icmp eq i32 %fp.1.rec, %tmp8		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb78.exitStub, label %bb26
 
 bb26:		; preds = %bb74
 	%tmp28 = getelementptr i32*, i32** %tmp1, i32 %fp.1.rec		; <i32**> [#uses=1]
-	%tmp30 = load i32** %tmp28		; <i32*> [#uses=4]
+	%tmp30 = load i32*, i32** %tmp28		; <i32*> [#uses=4]
 	%tmp33 = getelementptr i32, i32* %tmp30, i32 %i.0196.0.ph		; <i32*> [#uses=1]
-	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]
+	%tmp34 = load i32, i32* %tmp33		; <i32> [#uses=1]
 	%tmp38 = getelementptr i32, i32* %tmp30, i32 %tmp36224		; <i32*> [#uses=1]
-	%tmp39 = load i32* %tmp38		; <i32> [#uses=1]
+	%tmp39 = load i32, i32* %tmp38		; <i32> [#uses=1]
 	%tmp42 = mul i32 %tmp34, %fm.1		; <i32> [#uses=1]
 	%tmp44 = add i32 %tmp42, %d0.1		; <i32> [#uses=1]
 	%tmp48 = getelementptr i32, i32* %tmp30, i32 %tmp46223		; <i32*> [#uses=1]
-	%tmp49 = load i32* %tmp48		; <i32> [#uses=1]
+	%tmp49 = load i32, i32* %tmp48		; <i32> [#uses=1]
 	%tmp52 = mul i32 %tmp39, %fm.1		; <i32> [#uses=1]
 	%tmp54 = add i32 %tmp52, %d1.1		; <i32> [#uses=1]
 	%tmp58 = getelementptr i32, i32* %tmp30, i32 %tmp56222		; <i32*> [#uses=1]
-	%tmp59 = load i32* %tmp58		; <i32> [#uses=1]
+	%tmp59 = load i32, i32* %tmp58		; <i32> [#uses=1]
 	%tmp62 = mul i32 %tmp49, %fm.1		; <i32> [#uses=1]
 	%tmp64 = add i32 %tmp62, %d2.1		; <i32> [#uses=1]
 	%tmp67 = mul i32 %tmp59, %fm.1		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
index 9d8c526..0162d7f 100644
--- a/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -71,7 +71,7 @@
 	ret void
 
 bb866:		; preds = %cond_true851
-	%tmp874 = load i32* %tmp2122		; <i32> [#uses=1]
+	%tmp874 = load i32, i32* %tmp2122		; <i32> [#uses=1]
 	%tmp876877 = trunc i32 %tmp874 to i8		; <i8> [#uses=1]
 	icmp eq i8 %tmp876877, 1		; <i1>:0 [#uses=1]
 	br i1 %0, label %cond_next881, label %cond_true878
@@ -82,7 +82,7 @@
 cond_next881:		; preds = %bb866
 	%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier*		; <%struct.tree_identifier*> [#uses=1]
 	%tmp887 = getelementptr %struct.tree_identifier, %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0		; <i8**> [#uses=1]
-	%tmp888 = load i8** %tmp887		; <i8*> [#uses=1]
+	%tmp888 = load i8*, i8** %tmp887		; <i8*> [#uses=1]
 	tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
 	ret void
 
diff --git a/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
index 1962859..2a0ef77 100644
--- a/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -13,7 +13,7 @@
 	ret void
 
 cond_next416:		; preds = %entry
-	%tmp1085 = load %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]
+	%tmp1085 = load %struct.rtx_def*, %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]
 	br i1 false, label %bb1084, label %cond_true418
 
 cond_true418:		; preds = %cond_next416
@@ -25,7 +25,7 @@
 cond_true1092:		; preds = %bb1084
 	%tmp1094 = getelementptr %struct.rtx_def, %struct.rtx_def* %tmp1085, i32 0, i32 3		; <%struct.u*> [#uses=1]
 	%tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=2]
-	%tmp1101 = load %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]
+	%tmp1101 = load %struct.rtx_def*, %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]
 	store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr
 	br label %cond_next1102
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
index 49958be..7b74e6c 100644
--- a/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -28,7 +28,7 @@
 	br i1 false, label %bb.preheader, label %return
 
 bb.preheader:		; preds = %entry
-	%tbl.014.us = load i32* null		; <i32> [#uses=1]
+	%tbl.014.us = load i32, i32* null		; <i32> [#uses=1]
 	br i1 false, label %cond_next.us, label %bb
 
 cond_next51.us:		; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge
@@ -41,7 +41,7 @@
 
 cond_next.us:		; preds = %bb.preheader
 	%tmp37.us = getelementptr %struct.X_Y, %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us		; <%struct.H_TBL**> [#uses=3]
-	%tmp4524.us = load %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]
+	%tmp4524.us = load %struct.H_TBL*, %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]
 	icmp eq %struct.H_TBL* %tmp4524.us, null		; <i1>:0 [#uses=1]
 	br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
index 6bb8299..d34c078 100644
--- a/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -9,7 +9,7 @@
 define internal void @_ZN1B1iEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
 	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
@@ -19,7 +19,7 @@
 define internal void @_ZN1B1jEv(%struct.B* %this) {
 entry:
 	%tmp1 = getelementptr %struct.B, %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1		; <i32> [#uses=1]
 	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
 	ret void
 }
@@ -37,11 +37,11 @@
 	%b2.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]
 	%ctg23.i = getelementptr i8, i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i = bitcast i8* %ctg23.i to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i = load i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]
+	%tmp15.i = load i32 (...)**, i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]
 	%tmp151.i = bitcast i32 (...)** %tmp15.i to i8*		; <i8*> [#uses=1]
 	%ctg2.i = getelementptr i8, i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i = bitcast i8* %ctg2.i to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i = load i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]
+	%tmp22.i = load i32 (...)*, i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]
 	%tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit
 
@@ -59,11 +59,11 @@
 	%b2.i35 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]
 	%ctg23.i36 = getelementptr i8, i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i38 = load i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]
+	%tmp15.i38 = load i32 (...)**, i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]
 	%tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8*		; <i8*> [#uses=1]
 	%ctg2.i42 = getelementptr i8, i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i44 = load i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]
+	%tmp22.i44 = load i32 (...)*, i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]
 	%tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit56
 
@@ -81,11 +81,11 @@
 	%b2.i7 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]
 	%ctg23.i8 = getelementptr i8, i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
 	%tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)***		; <i32 (...)***> [#uses=1]
-	%tmp15.i10 = load i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]
+	%tmp15.i10 = load i32 (...)**, i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]
 	%tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8*		; <i8*> [#uses=1]
 	%ctg2.i14 = getelementptr i8, i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
 	%tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)**		; <i32 (...)**> [#uses=1]
-	%tmp22.i16 = load i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]
+	%tmp22.i16 = load i32 (...)*, i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]
 	%tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
 	br label %_Z3fooiM1BFvvE.exit28
 
diff --git a/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
index a89e937..7973f22 100644
--- a/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
+++ b/llvm/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -9,15 +9,15 @@
 
 define fastcc void @EvaluateDevelopment() {
 entry:
-        %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]
-        %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]
-        %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]
+        %tmp7 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]
+        %tmp50 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]
+        %tmp52 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]
         %tmp53 = or i64 %tmp52, %tmp50          ; <i64> [#uses=1]
-        %tmp57.b = load i1* @rank_mask.1.b              ; <i1> [#uses=1]
+        %tmp57.b = load i1, i1* @rank_mask.1.b              ; <i1> [#uses=1]
         %tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0               ; <i64> [#uses=1]
         %tmp58 = and i64 %tmp57, %tmp7          ; <i64> [#uses=1]
         %tmp59 = lshr i64 %tmp58, 8             ; <i64> [#uses=1]
-        %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]
+        %tmp63 = load i64, i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]
         %tmp64 = or i64 %tmp63, 0               ; <i64> [#uses=1]
         %tmp65 = and i64 %tmp59, %tmp53         ; <i64> [#uses=1]
         %tmp66 = and i64 %tmp65, %tmp64         ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index 7fd0bd5..f59d081 100644
--- a/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -14,7 +14,7 @@
 
 define fastcc void @Draw7(i32 %Option, i32* %Status) {
 entry:
-	%tmp115.b = load i1* @FirstTime.4637.b		; <i1> [#uses=1]
+	%tmp115.b = load i1, i1* @FirstTime.4637.b		; <i1> [#uses=1]
 	br i1 %tmp115.b, label %cond_next239, label %cond_next.i
 
 cond_next.i:		; preds = %entry
@@ -88,11 +88,11 @@
 	br i1 %tmp1148, label %cond_next1275, label %cond_true1272
 
 cond_true1272:		; preds = %cond_next1267
-	%tmp1273 = load %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]
+	%tmp1273 = load %struct.TestObj*, %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]
 	%tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32		; <i32> [#uses=1]
 	%tmp42.i348 = sub i32 0, %tmp2930.i		; <i32> [#uses=1]
 	%tmp45.i = getelementptr %struct.TestObj, %struct.TestObj* %tmp1273, i32 0, i32 0		; <i8**> [#uses=2]
-	%tmp48.i = load i8** %tmp45.i		; <i8*> [#uses=1]
+	%tmp48.i = load i8*, i8** %tmp45.i		; <i8*> [#uses=1]
 	%tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]
 	br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 90a3b37..5895a32 100644
--- a/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -17,7 +17,7 @@
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -34,7 +34,7 @@
 	br label %cond_next
 
 cond_next:		; preds = %cond_false, %cond_true
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -55,7 +55,7 @@
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index 37e41ec..abb6a33f 100644
--- a/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -19,7 +19,7 @@
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -28,7 +28,7 @@
 cond_true:		; preds = %entry
 	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
 	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -37,7 +37,7 @@
 cond_false:		; preds = %entry
 	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
 	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
 	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
@@ -58,7 +58,7 @@
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 30ae723..1edaefb 100644
--- a/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/llvm/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -29,7 +29,7 @@
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -38,7 +38,7 @@
 cond_true:		; preds = %entry
 	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
 	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -47,7 +47,7 @@
 cond_false:		; preds = %entry
 	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
 	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
 	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
@@ -68,7 +68,7 @@
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
index fd13676..7d6396c 100644
--- a/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
+++ b/llvm/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -45,7 +45,7 @@
 
 cond_true110.i:		; preds = %bb102.i
 	%tmp116.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 2		; <i8**> [#uses=1]
-	%tmp117.i = load i8** %tmp116.i		; <i8*> [#uses=1]
+	%tmp117.i = load i8*, i8** %tmp116.i		; <i8*> [#uses=1]
 	%tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) )		; <%struct.FILE*> [#uses=0]
 	ret i32 0
 
@@ -60,7 +60,7 @@
 	%tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
 	%tmp1.i108.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 0		; <i8**> [#uses=1]
 	store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
-	%tmp.i91.i = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%tmp.i91.i = load i32, i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
 	store i32 0, i32* @_C_nextcmd, align 4
 	%tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
 	%tmp1.i98.i = getelementptr [100 x i8*], [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i		; <i8**> [#uses=1]
@@ -78,7 +78,7 @@
 	%argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ]		; <i8**> [#uses=2]
 	%argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ]		; <i32> [#uses=1]
 	%tmp214.i = getelementptr i8*, i8** %argv_addr.2321.0.i, i32 1		; <i8**> [#uses=9]
-	%tmp215.i = load i8** %tmp214.i		; <i8*> [#uses=1]
+	%tmp215.i = load i8*, i8** %tmp214.i		; <i8*> [#uses=1]
 	%tmp1314.i = sext i8 0 to i32		; <i32> [#uses=1]
 	switch i32 %tmp1314.i, label %bb192.i [
 		 i32 76, label %C_addcmd.exit120.i
diff --git a/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
index 3754db0..9894105 100644
--- a/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -10,7 +10,7 @@
 define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
 entry:
 	%opb_addr = alloca %struct.oggpack_buffer*		; <%struct.oggpack_buffer**> [#uses=1]
-	%tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
+	%tmp1 = load %struct.oggpack_buffer*, %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
 	call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind 
 	call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind 
 	unreachable
diff --git a/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
index dabe620..b0a50a4 100644
--- a/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -4,7 +4,7 @@
 entry:
 	br label %bb1
 bb1:		; preds = %entry
-	%tmp3.i.i = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp3.i.i = load i8, i8* null, align 1		; <i8> [#uses=1]
 	%tmp4.i.i = icmp slt i8 %tmp3.i.i, 0		; <i1> [#uses=1]
 	br i1 %tmp4.i.i, label %bb2, label %bb3
 bb2:		; preds = %bb1
diff --git a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
index 94c562b..24e664c 100644
--- a/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -11,7 +11,7 @@
 bb88.i:		; preds = %bb74.i
 	br i1 false, label %mandel.exit, label %bb74.i
 mandel.exit:		; preds = %bb88.i
-	%tmp2 = load volatile double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp2 = load volatile double, double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
 	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
 	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
 	ret i32 0
diff --git a/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 5c7e250..428aa11 100644
--- a/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -57,7 +57,7 @@
 	br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
 cond_true380.i.i:		; preds = %cond_false373.i.i
 	%tmp394.i418.i = add i32 %cell.0.i.i, 1		; <i32> [#uses=1]
-	%tmp397.i420.i = load %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
+	%tmp397.i420.i = load %struct.cellbox*, %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
 	br label %bb398.i.i
 bb398.i.i:		; preds = %bb398.i.i, %cond_true380.i.i
 	br i1 false, label %bb414.i.i, label %bb398.i.i
@@ -74,7 +74,7 @@
 bb620.i.i:		; preds = %bb620.i.i, %bb609.i.i
 	%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ]		; <i32> [#uses=1]
 	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
-	%tmp648.i.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp648.i.i = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i		; <i1> [#uses=1]
 	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
 	%indvar.next167.i.i = add i32 %indvar166.i465.i, 1		; <i32> [#uses=1]
@@ -126,7 +126,7 @@
 bb.i53:		; preds = %cond_true163
 	ret void
 bb34.i:		; preds = %cond_true163
-	%tmp37.i55 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp37.i55 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	br i1 false, label %bb65.preheader.i, label %bb78.i
 bb65.preheader.i:		; preds = %bb34.i
 	br label %bb65.outer.us.i
@@ -149,7 +149,7 @@
 	%indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ]		; <i32> [#uses=2]
 	%sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55		; <i32> [#uses=4]
 	%tmp161.i = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i		; <double*> [#uses=1]
-	%tmp162.i74 = load double* %tmp161.i, align 4		; <double> [#uses=0]
+	%tmp162.i74 = load double, double* %tmp161.i, align 4		; <double> [#uses=0]
 	br i1 false, label %cond_true167.i, label %cond_next215.i
 cond_true167.i:		; preds = %bb155.i
 	%tmp173.i = getelementptr %struct.III_scalefac_t, %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i		; <i32*> [#uses=1]
@@ -157,7 +157,7 @@
 	%tmp182.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i		; <i32*> [#uses=0]
 	%tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1		; <i32> [#uses=1]
 	%tmp187.1.i = getelementptr [14 x i32], [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78		; <i32*> [#uses=1]
-	%tmp188.i = load i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
+	%tmp188.i = load i32, i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
 	%tmp21153.i = icmp slt i32 0, %tmp188.i		; <i1> [#uses=1]
 	br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
 bb190.preheader.i:		; preds = %cond_true167.i
@@ -224,7 +224,7 @@
 	%over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
 	%best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
 	%notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
-	%tmp147 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp147 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp148 = icmp eq i32 %tmp147, 0		; <i1> [#uses=1]
 	%tmp153 = icmp eq i32 %over.1, 0		; <i1> [#uses=1]
 	%bothcond = and i1 %tmp148, %tmp153		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
index d74fea8..c9c78e1 100644
--- a/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -21,8 +21,8 @@
 	br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
 
 cond_true.i29:		; preds = %entry
-	%tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
-	%tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
+	%tmp17.i = load i32, i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
+	%tmp20.i27 = load i32, i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
 	%tmp8.i.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
 	br label %bb.i8.us.i
 
diff --git a/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
index e86bc1b..cf98d7f 100644
--- a/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
+++ b/llvm/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -4,6 +4,6 @@
 define void @main({ i32 }*) {
 entry:
 	%sret1 = alloca { i32 }		; <{ i32 }*> [#uses=1]
-	load { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
+	load { i32 }, { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
index adb0112..520e800 100644
--- a/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
+++ b/llvm/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -8,6 +8,6 @@
 
 define i32 @__gcov_close() nounwind {
 entry:
-	load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
+	load i32, i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
index 4c0c59c..c581222 100644
--- a/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -87,7 +87,7 @@
 bb396:		; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
 	%stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ]		; <%struct.rec*> [#uses=7]
 	%headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ]		; <i32> [#uses=2]
-	%link.1 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	%link.1 = load %struct.rec*, %struct.rec** null		; <%struct.rec*> [#uses=2]
 	%1 = icmp eq %struct.rec* %link.1, %hd		; <i1> [#uses=1]
 	br i1 %1, label %bb398, label %bb122
 
diff --git a/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
index a48f003..a14589f 100644
--- a/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -15,6 +15,6 @@
 	br label %return
 
 return:		; preds = %entry
-	%2 = load i32* %retval		; <i32> [#uses=1]
+	%2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
index bc5e602..d9ec4d2 100644
--- a/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -28,12 +28,12 @@
 	br i1 %phitmp, label %bb55, label %bb52
 
 bb55:		; preds = %bb53
-	%4 = load double* @a, align 4		; <double> [#uses=10]
+	%4 = load double, double* @a, align 4		; <double> [#uses=10]
 	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
 	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
 	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
 	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
-	%.pn165.in = load double* @N		; <double> [#uses=5]
+	%.pn165.in = load double, double* @N		; <double> [#uses=5]
 	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
 	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
 	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 377bbd2..5674003 100644
--- a/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -42,7 +42,7 @@
   %17 = fdiv double %16, %0
   %18 = fadd double 0.000000e+00, %17
   %19 = call double @acos(double %18) nounwind readonly
-  %20 = load double* null, align 4
+  %20 = load double, double* null, align 4
   %21 = fmul double %20, 0x401921FB54442D18
   %22 = call double @sin(double %19) nounwind readonly
   %23 = fmul double %22, 0.000000e+00
diff --git a/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
index e90c5b3..bc7dbd4 100644
--- a/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -8,14 +8,14 @@
 	store i32 0, i32* %retval
 	%res = alloca i32		; <i32*> [#uses=0]
 	%fh = alloca i32		; <i32*> [#uses=1]
-	%1 = load i32* %fh		; <i32> [#uses=1]
-	%2 = load i32* %ptr		; <i32> [#uses=1]
+	%1 = load i32, i32* %fh		; <i32> [#uses=1]
+	%2 = load i32, i32* %ptr		; <i32> [#uses=1]
 	%3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind		; <i32> [#uses=1]
         store i32 %3, i32* %retval
 	br label %return
 
 return:		; preds = %entry
-	%4 = load i32* %retval		; <i32> [#uses=1]
+	%4 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %4
 }
 
diff --git a/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
index f166e7e..edeae9b 100644
--- a/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -8,11 +8,11 @@
 	%b = alloca { double, double }		; <{ double, double }*> [#uses=1]
 	store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
 	store i32 %x_arg, i32* %x
-	%tmp = load i32* %x		; <i32> [#uses=1]
+	%tmp = load i32, i32* %x		; <i32> [#uses=1]
 	%tmp1 = getelementptr { i32, { double, double }* }, { i32, { double, double }* }* %d, i32 0, i32 1		; <{ double, double }**> [#uses=1]
-	%.ptr = load { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
+	%.ptr = load { double, double }*, { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
 	%tmp2 = getelementptr { double, double }, { double, double }* %.ptr, i32 %tmp		; <{ double, double }*> [#uses=1]
-	%tmp3 = load { double, double }* %tmp2		; <{ double, double }> [#uses=1]
+	%tmp3 = load { double, double }, { double, double }* %tmp2		; <{ double, double }> [#uses=1]
 	store { double, double } %tmp3, { double, double }* %b
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
index 9e32e05..949e107 100644
--- a/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -2,7 +2,7 @@
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
 entry:
-	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
+	%input2 = load <4 x float>, <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
 	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
 	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
 	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
index 5b17463..f2532d7 100644
--- a/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
+++ b/llvm/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -4,7 +4,7 @@
 define void @foo(...) nounwind {
 entry:
 	%rr = alloca i32		; <i32*> [#uses=2]
-	%0 = load i32* %rr		; <i32> [#uses=1]
+	%0 = load i32, i32* %rr		; <i32> [#uses=1]
 	%1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind		; <i32> [#uses=1]
 	store i32 %1, i32* %rr
 	br label %return
diff --git a/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
index 2bc7df0..06456cc 100644
--- a/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
+++ b/llvm/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -20,7 +20,7 @@
 bb3:		; preds = %bb1, %bb
 	%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ]		; <i32> [#uses=1]
 	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
-	%2 = load %struct.List** null, align 4		; <%struct.List*> [#uses=2]
+	%2 = load %struct.List*, %struct.List** null, align 4		; <%struct.List*> [#uses=2]
 	%phitmp = icmp eq %struct.List* %2, null		; <i1> [#uses=1]
 	br i1 %phitmp, label %bb5, label %bb
 
diff --git a/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
index 98e0023..17beb3c 100644
--- a/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -136,7 +136,7 @@
 	br label %bb141
 
 bb139:		; preds = %bb141
-	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%scevgep441442881 = load i16, i16* undef		; <i16> [#uses=1]
 	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
 	br i1 %1, label %bb141, label %bb142
 
diff --git a/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
index 380d43a..4ab54c2 100644
--- a/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -5,9 +5,9 @@
 
 define void @simplify_unary_real(i8* nocapture %p) nounwind {
 entry:
-	%tmp121 = load i64* null, align 4		; <i64> [#uses=1]
+	%tmp121 = load i64, i64* null, align 4		; <i64> [#uses=1]
 	%0 = getelementptr %struct.rtx_def, %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0		; <i64*> [#uses=1]
-	%tmp122 = load i64* %0, align 4		; <i64> [#uses=1]
+	%tmp122 = load i64, i64* %0, align 4		; <i64> [#uses=1]
 	%1 = zext i64 undef to i192		; <i192> [#uses=2]
 	%2 = zext i64 %tmp121 to i192		; <i192> [#uses=1]
 	%3 = shl i192 %2, 64		; <i192> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
index 2c89265..243726c 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -35,10 +35,10 @@
 	unreachable
 
 bb11:		; preds = %bb9
-	%0 = load i32* undef, align 4		; <i32> [#uses=2]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=2]
 	%1 = add i32 %0, 1		; <i32> [#uses=2]
 	store i32 %1, i32* undef, align 4
-	%2 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	store i32 %2, i32* @nn, align 4
 	store i32 0, i32* @al_len, align 4
 	store i32 0, i32* @no_mat, align 4
@@ -48,9 +48,9 @@
 	%4 = sitofp i32 undef to double		; <double> [#uses=1]
 	%5 = fdiv double %4, 1.000000e+01		; <double> [#uses=1]
 	%6 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
-	%7 = load i32* @al_len, align 4		; <i32> [#uses=1]
-	%8 = load i32* @no_mat, align 4		; <i32> [#uses=1]
-	%9 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* @al_len, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* @no_mat, align 4		; <i32> [#uses=1]
+	%9 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
 	%10 = sub i32 %7, %8		; <i32> [#uses=1]
 	%11 = sub i32 %10, %9		; <i32> [#uses=1]
 	%12 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
index 599f291..17051df9 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -33,17 +33,17 @@
 	unreachable
 
 bb11:		; preds = %bb9
-	%0 = load i32* undef, align 4		; <i32> [#uses=3]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=3]
 	%1 = add i32 %0, 1		; <i32> [#uses=2]
 	store i32 %1, i32* undef, align 4
-	%2 = load i32* undef, align 4		; <i32> [#uses=2]
+	%2 = load i32, i32* undef, align 4		; <i32> [#uses=2]
 	%3 = sub i32 %2, %0		; <i32> [#uses=1]
 	store i32 0, i32* @no_mat, align 4
 	store i32 0, i32* @no_mis, align 4
 	%4 = getelementptr i8, i8* %B, i32 %0		; <i8*> [#uses=1]
 	tail call  void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
 	%5 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
-	%6 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* @no_mis, align 4		; <i32> [#uses=1]
 	%7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
 	%8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
 	br i1 undef, label %bb15, label %bb12
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
index fb6ca10..cf7325d 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -28,7 +28,7 @@
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	%0 = load i32*, i32** @JJ, align 4		; <i32*> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -85,9 +85,9 @@
 	%fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ]		; <i32> [#uses=1]
 	%fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
 	%scevgep88.i = getelementptr i32, i32* null, i32 %i.121.i		; <i32*> [#uses=3]
-	%4 = load i32* %scevgep88.i, align 4		; <i32> [#uses=2]
+	%4 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=2]
 	%scevgep89.i = getelementptr i32, i32* %0, i32 %i.121.i		; <i32*> [#uses=3]
-	%5 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i		; <i32> [#uses=0]
 	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
 	%6 = icmp slt i32 undef, 0		; <i1> [#uses=3]
@@ -95,8 +95,8 @@
 	%cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5		; <i32> [#uses=1]
 	%c.14.i = select i1 %6, i32 0, i32 undef		; <i32> [#uses=2]
 	store i32 %c.14.i, i32* undef, align 4
-	%7 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
-	%8 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	store i32 %ci.12.i, i32* %scevgep88.i, align 4
 	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
 	store i32 %4, i32* undef, align 4
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
index 0485ab0..203608a 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -22,7 +22,7 @@
 	br i1 undef, label %bb8, label %bb6
 
 bb8:		; preds = %bb6, %bb5
-	%0 = load i8** @name1, align 4		; <i8*> [#uses=0]
+	%0 = load i8*, i8** @name1, align 4		; <i8*> [#uses=0]
 	br label %bb15
 
 bb9:		; preds = %bb15
@@ -39,9 +39,9 @@
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%3 = load i32** @II, align 4		; <i32*> [#uses=1]
-	%4 = load i32* @r, align 4		; <i32> [#uses=1]
-	%5 = load i32* @qr, align 4		; <i32> [#uses=1]
+	%3 = load i32*, i32** @II, align 4		; <i32*> [#uses=1]
+	%4 = load i32, i32* @r, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* @qr, align 4		; <i32> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -100,7 +100,7 @@
 	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=1]
 	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=1]
 	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
-	%10 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
 	br i1 undef, label %bb211.i, label %bb218.i
 
 bb211.i:		; preds = %bb168.i
diff --git a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
index 16f5d1d..b3c91ed 100644
--- a/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
+++ b/llvm/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -28,7 +28,7 @@
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @XX, align 4		; <i32*> [#uses=0]
+	%0 = load i32*, i32** @XX, align 4		; <i32*> [#uses=0]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
diff --git a/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
index ae826fe..55039dd 100644
--- a/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -30,9 +30,9 @@
 	br i1 undef, label %bb15, label %bb12
 
 bb12:		; preds = %bb11
-	%0 = load i32** @II, align 4		; <i32*> [#uses=1]
-	%1 = load i32** @JJ, align 4		; <i32*> [#uses=1]
-	%2 = load i32* @qr, align 4		; <i32> [#uses=1]
+	%0 = load i32*, i32** @II, align 4		; <i32*> [#uses=1]
+	%1 = load i32*, i32** @JJ, align 4		; <i32*> [#uses=1]
+	%2 = load i32, i32* @qr, align 4		; <i32> [#uses=1]
 	br label %bb228.i
 
 bb74.i:		; preds = %bb228.i
@@ -97,8 +97,8 @@
 	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=2]
 	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=2]
 	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
-	%6 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
-	%7 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %scevgep89.i, align 4		; <i32> [#uses=1]
 	store i32 %ci.12.i, i32* %scevgep88.i, align 4
 	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
 	br i1 undef, label %bb211.i, label %bb218.i
diff --git a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
index 0c6378a..2cb2678 100644
--- a/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -17,42 +17,42 @@
 
 bb:		; preds = %entry
 	%1 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
-	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%2 = load %struct.VERTEX*, %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
 	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
 	br i1 %3, label %bb7, label %bb1.i
 
 bb1.i:		; preds = %bb1.i, %bb
 	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
 	%4 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%5 = load %struct.VERTEX*, %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
 	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
 	br i1 %6, label %get_low.exit, label %bb1.i
 
 get_low.exit:		; preds = %bb1.i
 	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
 	%7 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	%8 = load %struct.VERTEX*, %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
 	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
 	%9 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
-	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%10 = load %struct.edge_rec*, %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
 	%11 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%12 = load %struct.edge_rec*, %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
 	%13 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
-	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%14 = load %struct.edge_rec*, %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
 	%15 = getelementptr %struct.EDGE_PAIR, %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	%16 = load %struct.edge_rec*, %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
 	br label %bb.i
 
 bb.i:		; preds = %bb4.i, %get_low.exit
 	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
 	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
 	%17 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%18 = load %struct.VERTEX*, %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
 	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
 	%20 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%21 = load double, double* %20, align 4		; <double> [#uses=3]
 	%22 = getelementptr %struct.VERTEX, %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%23 = load double* %22, align 4		; <double> [#uses=3]
+	%23 = load double, double* %22, align 4		; <double> [#uses=3]
 	br label %bb2.i
 
 bb1.i1:		; preds = %bb2.i
@@ -63,7 +63,7 @@
 	%28 = or i32 %26, %27		; <i32> [#uses=1]
 	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%30 = getelementptr %struct.edge_rec, %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%31 = load %struct.edge_rec*, %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
 	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
 	%33 = add i32 %32, 16		; <i32> [#uses=1]
 	%34 = and i32 %33, 63		; <i32> [#uses=1]
@@ -80,16 +80,16 @@
 	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%t1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%t2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
-	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t1.0.i = load %struct.VERTEX*, %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX*, %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
 	%38 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%39 = load double, double* %38, align 4		; <double> [#uses=3]
 	%40 = getelementptr %struct.VERTEX, %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%41 = load double, double* %40, align 4		; <double> [#uses=3]
 	%42 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%43 = load double, double* %42, align 4		; <double> [#uses=1]
 	%44 = getelementptr %struct.VERTEX, %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%45 = load double, double* %44, align 4		; <double> [#uses=1]
 	%46 = fsub double %39, %21		; <double> [#uses=1]
 	%47 = fsub double %45, %23		; <double> [#uses=1]
 	%48 = fmul double %46, %47		; <double> [#uses=1]
@@ -105,11 +105,11 @@
 	%55 = xor i32 %54, 32		; <i32> [#uses=3]
 	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%57 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%58 = load %struct.VERTEX*, %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
 	%59 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%60 = load double, double* %59, align 4		; <double> [#uses=1]
 	%61 = getelementptr %struct.VERTEX, %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%62 = load double, double* %61, align 4		; <double> [#uses=1]
 	%63 = fsub double %60, %39		; <double> [#uses=1]
 	%64 = fsub double %23, %41		; <double> [#uses=1]
 	%65 = fmul double %63, %64		; <double> [#uses=1]
@@ -122,7 +122,7 @@
 
 bb4.i:		; preds = %bb3.i
 	%71 = getelementptr %struct.edge_rec, %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	%72 = load %struct.edge_rec*, %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
 	br label %bb.i
 
 bb5.i:		; preds = %bb3.i
@@ -132,7 +132,7 @@
 	%76 = or i32 %74, %75		; <i32> [#uses=1]
 	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%78 = getelementptr %struct.edge_rec, %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%79 = load %struct.edge_rec*, %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
 	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
 	%81 = add i32 %80, 16		; <i32> [#uses=1]
 	%82 = and i32 %81, 63		; <i32> [#uses=1]
@@ -140,7 +140,7 @@
 	%84 = or i32 %82, %83		; <i32> [#uses=1]
 	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%86 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%87 = load %struct.VERTEX*, %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
 	%88 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
 	%89 = getelementptr %struct.edge_rec, %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
 	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
@@ -161,7 +161,7 @@
 	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
 	%101 = getelementptr %struct.edge_rec, %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
-	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%102 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
 	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
 	%104 = add i32 %103, 16		; <i32> [#uses=1]
 	%105 = and i32 %104, 63		; <i32> [#uses=1]
@@ -169,7 +169,7 @@
 	%107 = or i32 %105, %106		; <i32> [#uses=1]
 	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%109 = getelementptr %struct.edge_rec, %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%110 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
 	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
 	%112 = add i32 %111, 16		; <i32> [#uses=1]
 	%113 = and i32 %112, 63		; <i32> [#uses=1]
@@ -177,19 +177,19 @@
 	%115 = or i32 %113, %114		; <i32> [#uses=1]
 	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%117 = getelementptr %struct.edge_rec, %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%118 = load %struct.edge_rec*, %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
 	%119 = getelementptr %struct.edge_rec, %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	%120 = load %struct.edge_rec*, %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
 	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
-	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
-	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%121 = load %struct.edge_rec*, %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec*, %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
 	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
 	%123 = xor i32 %91, 32		; <i32> [#uses=1]
 	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
 	%125 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%126 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
 	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
 	%128 = add i32 %127, 16		; <i32> [#uses=1]
 	%129 = and i32 %128, 63		; <i32> [#uses=1]
@@ -197,7 +197,7 @@
 	%131 = or i32 %129, %130		; <i32> [#uses=1]
 	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%133 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%134 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
 	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
 	%136 = add i32 %135, 16		; <i32> [#uses=1]
 	%137 = and i32 %136, 63		; <i32> [#uses=1]
@@ -205,13 +205,13 @@
 	%139 = or i32 %137, %138		; <i32> [#uses=1]
 	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%141 = getelementptr %struct.edge_rec, %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%142 = load %struct.edge_rec*, %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
 	%143 = getelementptr %struct.edge_rec, %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	%144 = load %struct.edge_rec*, %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
 	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
-	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
-	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	%145 = load %struct.edge_rec*, %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec*, %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
 	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
 	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
 	%147 = and i32 %92, 63		; <i32> [#uses=1]
@@ -219,22 +219,22 @@
 	%149 = or i32 %147, %148		; <i32> [#uses=1]
 	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%151 = getelementptr %struct.edge_rec, %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%152 = load %struct.edge_rec*, %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
 	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
 	%154 = add i32 %153, 16		; <i32> [#uses=1]
 	%155 = and i32 %154, 63		; <i32> [#uses=1]
 	%156 = and i32 %153, -64		; <i32> [#uses=1]
 	%157 = or i32 %155, %156		; <i32> [#uses=1]
 	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%159 = load %struct.VERTEX*, %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
 	%160 = getelementptr %struct.edge_rec, %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%161 = load %struct.VERTEX*, %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
 	%162 = getelementptr %struct.edge_rec, %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%163 = load %struct.VERTEX*, %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
 	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
 	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
 	%165 = getelementptr %struct.edge_rec, %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%166 = load %struct.VERTEX*, %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
 	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
 	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
 	br label %bb9.i
@@ -244,31 +244,31 @@
 	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
 	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
 	%168 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%169 = load %struct.edge_rec*, %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
 	%170 = getelementptr %struct.edge_rec, %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
-	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%171 = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
 	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
 	%173 = xor i32 %172, 32		; <i32> [#uses=1]
 	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%175 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
-	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%176 = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
 	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
 	%178 = xor i32 %177, 32		; <i32> [#uses=1]
 	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%180 = getelementptr %struct.edge_rec, %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%181 = load %struct.VERTEX*, %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
 	%182 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
-	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%183 = load double, double* %182, align 4		; <double> [#uses=2]
 	%184 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
-	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%185 = load double, double* %184, align 4		; <double> [#uses=2]
 	%186 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%187 = load double, double* %186, align 4		; <double> [#uses=1]
 	%188 = getelementptr %struct.VERTEX, %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%189 = load double, double* %188, align 4		; <double> [#uses=1]
 	%190 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%191 = load double, double* %190, align 4		; <double> [#uses=2]
 	%192 = getelementptr %struct.VERTEX, %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%193 = load double, double* %192, align 4		; <double> [#uses=2]
 	%194 = fsub double %183, %191		; <double> [#uses=1]
 	%195 = fsub double %189, %193		; <double> [#uses=1]
 	%196 = fmul double %194, %195		; <double> [#uses=1]
@@ -281,7 +281,7 @@
 
 bb10.i:		; preds = %bb9.i
 	%202 = getelementptr %struct.VERTEX, %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
 	br label %bb12.i
 
 bb11.i:		; preds = %bb12.i
@@ -292,7 +292,7 @@
 	%207 = or i32 %205, %206		; <i32> [#uses=1]
 	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%209 = getelementptr %struct.edge_rec, %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%210 = load %struct.edge_rec*, %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
 	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
 	%212 = add i32 %211, 16		; <i32> [#uses=1]
 	%213 = and i32 %212, 63		; <i32> [#uses=1]
@@ -300,7 +300,7 @@
 	%215 = or i32 %213, %214		; <i32> [#uses=1]
 	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%217 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%218 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
 	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
 	%220 = add i32 %219, 16		; <i32> [#uses=1]
 	%221 = and i32 %220, 63		; <i32> [#uses=1]
@@ -308,7 +308,7 @@
 	%223 = or i32 %221, %222		; <i32> [#uses=1]
 	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%225 = getelementptr %struct.edge_rec, %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%226 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
 	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
 	%228 = add i32 %227, 16		; <i32> [#uses=1]
 	%229 = and i32 %228, 63		; <i32> [#uses=1]
@@ -316,13 +316,13 @@
 	%231 = or i32 %229, %230		; <i32> [#uses=1]
 	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%233 = getelementptr %struct.edge_rec, %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%234 = load %struct.edge_rec*, %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
 	%235 = getelementptr %struct.edge_rec, %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	%236 = load %struct.edge_rec*, %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
 	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
-	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
-	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%237 = load %struct.edge_rec*, %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec*, %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
 	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
 	%239 = xor i32 %203, 32		; <i32> [#uses=2]
@@ -331,7 +331,7 @@
 	%242 = or i32 %241, %206		; <i32> [#uses=1]
 	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%244 = getelementptr %struct.edge_rec, %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%245 = load %struct.edge_rec*, %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
 	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
 	%247 = add i32 %246, 16		; <i32> [#uses=1]
 	%248 = and i32 %247, 63		; <i32> [#uses=1]
@@ -340,7 +340,7 @@
 	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%253 = getelementptr %struct.edge_rec, %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%254 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
 	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
 	%256 = add i32 %255, 16		; <i32> [#uses=1]
 	%257 = and i32 %256, 63		; <i32> [#uses=1]
@@ -348,7 +348,7 @@
 	%259 = or i32 %257, %258		; <i32> [#uses=1]
 	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%261 = getelementptr %struct.edge_rec, %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%262 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
 	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
 	%264 = add i32 %263, 16		; <i32> [#uses=1]
 	%265 = and i32 %264, 63		; <i32> [#uses=1]
@@ -356,22 +356,22 @@
 	%267 = or i32 %265, %266		; <i32> [#uses=1]
 	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%269 = getelementptr %struct.edge_rec, %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%270 = load %struct.edge_rec*, %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
 	%271 = getelementptr %struct.edge_rec, %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	%272 = load %struct.edge_rec*, %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
 	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
-	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
-	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%273 = load %struct.edge_rec*, %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec*, %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
 	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
 	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%276 = getelementptr %struct.edge_rec, %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
 	%277 = getelementptr %struct.edge_rec, %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
-	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
-	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	%278 = load %struct.edge_rec*, %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double, double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double, double* %184, align 4		; <double> [#uses=1]
 	br label %bb12.i
 
 bb12.i:		; preds = %bb11.i, %bb10.i
@@ -392,34 +392,34 @@
 	%v1.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v2.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v3.0.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
-	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%v1.0.i = load %struct.VERTEX*, %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX*, %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX*, %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double, double* %202, align 4		; <double> [#uses=3]
 	%282 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%283 = load double, double* %282, align 4		; <double> [#uses=1]
 	%284 = fsub double %283, %280		; <double> [#uses=2]
 	%285 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%286 = load double, double* %285, align 4		; <double> [#uses=1]
 	%287 = fsub double %286, %279		; <double> [#uses=2]
 	%288 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%289 = load double, double* %288, align 4		; <double> [#uses=1]
 	%290 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%291 = load double, double* %290, align 4		; <double> [#uses=1]
 	%292 = fsub double %291, %280		; <double> [#uses=2]
 	%293 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%294 = load double, double* %293, align 4		; <double> [#uses=1]
 	%295 = fsub double %294, %279		; <double> [#uses=2]
 	%296 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%297 = load double, double* %296, align 4		; <double> [#uses=1]
 	%298 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%299 = load double, double* %298, align 4		; <double> [#uses=1]
 	%300 = fsub double %299, %280		; <double> [#uses=2]
 	%301 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%302 = load double, double* %301, align 4		; <double> [#uses=1]
 	%303 = fsub double %302, %279		; <double> [#uses=2]
 	%304 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%305 = load double, double* %304, align 4		; <double> [#uses=1]
 	%306 = fsub double %289, %281		; <double> [#uses=1]
 	%307 = fmul double %292, %303		; <double> [#uses=1]
 	%308 = fmul double %295, %300		; <double> [#uses=1]
@@ -442,8 +442,8 @@
 
 bb13.loopexit.i:		; preds = %bb12.i
 	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
-	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
-	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre23.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
 	br label %bb13.i
 
 bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
@@ -457,7 +457,7 @@
 	%330 = or i32 %328, %329		; <i32> [#uses=1]
 	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%332 = getelementptr %struct.edge_rec, %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%333 = load %struct.edge_rec*, %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
 	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
 	%335 = add i32 %334, 16		; <i32> [#uses=1]
 	%336 = and i32 %335, 63		; <i32> [#uses=1]
@@ -466,19 +466,19 @@
 	%339 = xor i32 %338, 32		; <i32> [#uses=1]
 	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%341 = getelementptr %struct.edge_rec, %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%342 = load %struct.VERTEX*, %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
 	%343 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%344 = load double, double* %343, align 4		; <double> [#uses=1]
 	%345 = getelementptr %struct.VERTEX, %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%346 = load double, double* %345, align 4		; <double> [#uses=1]
 	%347 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%348 = load double, double* %347, align 4		; <double> [#uses=1]
 	%349 = getelementptr %struct.VERTEX, %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%350 = load double, double* %349, align 4		; <double> [#uses=1]
 	%351 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
-	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%352 = load double, double* %351, align 4		; <double> [#uses=3]
 	%353 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
-	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%354 = load double, double* %353, align 4		; <double> [#uses=3]
 	%355 = fsub double %344, %352		; <double> [#uses=1]
 	%356 = fsub double %350, %354		; <double> [#uses=1]
 	%357 = fmul double %355, %356		; <double> [#uses=1]
@@ -491,7 +491,7 @@
 
 bb14.i:		; preds = %bb13.i
 	%363 = getelementptr %struct.VERTEX, %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec*, %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
 	br label %bb16.i
 
 bb15.i:		; preds = %bb16.i
@@ -502,7 +502,7 @@
 	%368 = or i32 %366, %367		; <i32> [#uses=1]
 	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%370 = getelementptr %struct.edge_rec, %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%371 = load %struct.edge_rec*, %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
 	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
 	%373 = add i32 %372, 16		; <i32> [#uses=1]
 	%374 = and i32 %373, 63		; <i32> [#uses=1]
@@ -510,7 +510,7 @@
 	%376 = or i32 %374, %375		; <i32> [#uses=1]
 	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%378 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%379 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
 	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
 	%381 = add i32 %380, 16		; <i32> [#uses=1]
 	%382 = and i32 %381, 63		; <i32> [#uses=1]
@@ -518,7 +518,7 @@
 	%384 = or i32 %382, %383		; <i32> [#uses=1]
 	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%386 = getelementptr %struct.edge_rec, %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%387 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
 	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
 	%389 = add i32 %388, 16		; <i32> [#uses=1]
 	%390 = and i32 %389, 63		; <i32> [#uses=1]
@@ -526,13 +526,13 @@
 	%392 = or i32 %390, %391		; <i32> [#uses=1]
 	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%394 = getelementptr %struct.edge_rec, %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%395 = load %struct.edge_rec*, %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
 	%396 = getelementptr %struct.edge_rec, %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	%397 = load %struct.edge_rec*, %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
 	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
-	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
-	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%398 = load %struct.edge_rec*, %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec*, %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
 	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
 	%400 = xor i32 %364, 32		; <i32> [#uses=2]
@@ -541,7 +541,7 @@
 	%403 = or i32 %402, %367		; <i32> [#uses=1]
 	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%405 = getelementptr %struct.edge_rec, %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%406 = load %struct.edge_rec*, %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
 	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
 	%408 = add i32 %407, 16		; <i32> [#uses=1]
 	%409 = and i32 %408, 63		; <i32> [#uses=1]
@@ -550,7 +550,7 @@
 	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%414 = getelementptr %struct.edge_rec, %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%415 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
 	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
 	%417 = add i32 %416, 16		; <i32> [#uses=1]
 	%418 = and i32 %417, 63		; <i32> [#uses=1]
@@ -558,7 +558,7 @@
 	%420 = or i32 %418, %419		; <i32> [#uses=1]
 	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%422 = getelementptr %struct.edge_rec, %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%423 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
 	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
 	%425 = add i32 %424, 16		; <i32> [#uses=1]
 	%426 = and i32 %425, 63		; <i32> [#uses=1]
@@ -566,13 +566,13 @@
 	%428 = or i32 %426, %427		; <i32> [#uses=1]
 	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%430 = getelementptr %struct.edge_rec, %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%431 = load %struct.edge_rec*, %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
 	%432 = getelementptr %struct.edge_rec, %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	%433 = load %struct.edge_rec*, %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
 	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
-	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
-	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%434 = load %struct.edge_rec*, %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec*, %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
 	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
 	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
@@ -584,14 +584,14 @@
 	%441 = or i32 %439, %440		; <i32> [#uses=1]
 	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%443 = getelementptr %struct.edge_rec, %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%444 = load %struct.edge_rec*, %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
 	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
 	%446 = add i32 %445, 16		; <i32> [#uses=1]
 	%447 = and i32 %446, 63		; <i32> [#uses=1]
 	%448 = and i32 %445, -64		; <i32> [#uses=1]
 	%449 = or i32 %447, %448		; <i32> [#uses=2]
-	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
-	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	%.pre25.i = load double, double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double, double* %353, align 4		; <double> [#uses=1]
 	br label %bb16.i
 
 bb16.i:		; preds = %bb15.i, %bb14.i
@@ -612,34 +612,34 @@
 	%v1.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v2.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
 	%v3.1.in.i = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
-	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%v1.1.i = load %struct.VERTEX*, %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX*, %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX*, %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double, double* %363, align 4		; <double> [#uses=3]
 	%453 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%454 = load double, double* %453, align 4		; <double> [#uses=1]
 	%455 = fsub double %454, %451		; <double> [#uses=2]
 	%456 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%457 = load double, double* %456, align 4		; <double> [#uses=1]
 	%458 = fsub double %457, %450		; <double> [#uses=2]
 	%459 = getelementptr %struct.VERTEX, %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%460 = load double, double* %459, align 4		; <double> [#uses=1]
 	%461 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%462 = load double, double* %461, align 4		; <double> [#uses=1]
 	%463 = fsub double %462, %451		; <double> [#uses=2]
 	%464 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%465 = load double, double* %464, align 4		; <double> [#uses=1]
 	%466 = fsub double %465, %450		; <double> [#uses=2]
 	%467 = getelementptr %struct.VERTEX, %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%468 = load double, double* %467, align 4		; <double> [#uses=1]
 	%469 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%470 = load double, double* %469, align 4		; <double> [#uses=1]
 	%471 = fsub double %470, %451		; <double> [#uses=2]
 	%472 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%473 = load double, double* %472, align 4		; <double> [#uses=1]
 	%474 = fsub double %473, %450		; <double> [#uses=2]
 	%475 = getelementptr %struct.VERTEX, %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%476 = load double, double* %475, align 4		; <double> [#uses=1]
 	%477 = fsub double %460, %452		; <double> [#uses=1]
 	%478 = fmul double %463, %474		; <double> [#uses=1]
 	%479 = fmul double %466, %471		; <double> [#uses=1]
@@ -662,8 +662,8 @@
 
 bb17.loopexit.i:		; preds = %bb16.i
 	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
-	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
-	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre27.i = load %struct.VERTEX*, %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX*, %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
 	br label %bb17.i
 
 bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
@@ -674,19 +674,19 @@
 	%498 = xor i32 %497, 32		; <i32> [#uses=1]
 	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%500 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%501 = load %struct.VERTEX*, %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
 	%502 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%503 = load double, double* %502, align 4		; <double> [#uses=1]
 	%504 = getelementptr %struct.VERTEX, %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%505 = load double, double* %504, align 4		; <double> [#uses=1]
 	%506 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%507 = load double, double* %506, align 4		; <double> [#uses=2]
 	%508 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%509 = load double, double* %508, align 4		; <double> [#uses=2]
 	%510 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%511 = load double, double* %510, align 4		; <double> [#uses=3]
 	%512 = getelementptr %struct.VERTEX, %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%513 = load double, double* %512, align 4		; <double> [#uses=3]
 	%514 = fsub double %503, %511		; <double> [#uses=2]
 	%515 = fsub double %509, %513		; <double> [#uses=1]
 	%516 = fmul double %514, %515		; <double> [#uses=1]
@@ -699,11 +699,11 @@
 	%523 = xor i32 %522, 32		; <i32> [#uses=1]
 	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%525 = getelementptr %struct.edge_rec, %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%526 = load %struct.VERTEX*, %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
 	%527 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%528 = load double, double* %527, align 4		; <double> [#uses=4]
 	%529 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%530 = load double, double* %529, align 4		; <double> [#uses=4]
 	%531 = fsub double %530, %513		; <double> [#uses=1]
 	%532 = fmul double %514, %531		; <double> [#uses=1]
 	%533 = fsub double %528, %511		; <double> [#uses=1]
@@ -715,9 +715,9 @@
 
 bb21.i:		; preds = %bb17.i
 	%538 = getelementptr %struct.edge_rec, %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%539 = load %struct.VERTEX*, %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
 	%540 = getelementptr %struct.edge_rec, %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	%541 = load %struct.VERTEX*, %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
 	br i1 %521, label %bb22.i, label %bb24.i
 
 bb22.i:		; preds = %bb21.i
@@ -725,27 +725,27 @@
 
 bb23.i:		; preds = %bb22.i
 	%542 = getelementptr %struct.VERTEX, %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%543 = load double, double* %542, align 4		; <double> [#uses=3]
 	%544 = fsub double %507, %528		; <double> [#uses=2]
 	%545 = fsub double %509, %530		; <double> [#uses=2]
 	%546 = getelementptr %struct.VERTEX, %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%547 = load double, double* %546, align 4		; <double> [#uses=1]
 	%548 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%549 = load double, double* %548, align 4		; <double> [#uses=1]
 	%550 = fsub double %549, %528		; <double> [#uses=2]
 	%551 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%552 = load double, double* %551, align 4		; <double> [#uses=1]
 	%553 = fsub double %552, %530		; <double> [#uses=2]
 	%554 = getelementptr %struct.VERTEX, %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%555 = load double, double* %554, align 4		; <double> [#uses=1]
 	%556 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%557 = load double, double* %556, align 4		; <double> [#uses=1]
 	%558 = fsub double %557, %528		; <double> [#uses=2]
 	%559 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%560 = load double, double* %559, align 4		; <double> [#uses=1]
 	%561 = fsub double %560, %530		; <double> [#uses=2]
 	%562 = getelementptr %struct.VERTEX, %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%563 = load double, double* %562, align 4		; <double> [#uses=1]
 	%564 = fsub double %547, %543		; <double> [#uses=1]
 	%565 = fmul double %550, %561		; <double> [#uses=1]
 	%566 = fmul double %553, %558		; <double> [#uses=1]
@@ -773,7 +773,7 @@
 	%585 = or i32 %583, %584		; <i32> [#uses=1]
 	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%587 = getelementptr %struct.edge_rec, %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%588 = load %struct.edge_rec*, %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
 	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
 	%590 = add i32 %589, 16		; <i32> [#uses=1]
 	%591 = and i32 %590, 63		; <i32> [#uses=1]
@@ -800,7 +800,7 @@
 	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
 	%608 = getelementptr %struct.edge_rec, %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
-	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%609 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
 	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
 	%611 = add i32 %610, 16		; <i32> [#uses=1]
 	%612 = and i32 %611, 63		; <i32> [#uses=1]
@@ -808,7 +808,7 @@
 	%614 = or i32 %612, %613		; <i32> [#uses=1]
 	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%616 = getelementptr %struct.edge_rec, %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%617 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
 	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
 	%619 = add i32 %618, 16		; <i32> [#uses=1]
 	%620 = and i32 %619, 63		; <i32> [#uses=1]
@@ -816,19 +816,19 @@
 	%622 = or i32 %620, %621		; <i32> [#uses=1]
 	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%624 = getelementptr %struct.edge_rec, %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%625 = load %struct.edge_rec*, %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
 	%626 = getelementptr %struct.edge_rec, %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	%627 = load %struct.edge_rec*, %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
 	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
-	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
-	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%628 = load %struct.edge_rec*, %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec*, %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
 	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
 	%630 = xor i32 %598, 32		; <i32> [#uses=2]
 	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%632 = getelementptr %struct.edge_rec, %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%633 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
 	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
 	%635 = add i32 %634, 16		; <i32> [#uses=1]
 	%636 = and i32 %635, 63		; <i32> [#uses=1]
@@ -836,7 +836,7 @@
 	%638 = or i32 %636, %637		; <i32> [#uses=1]
 	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%640 = getelementptr %struct.edge_rec, %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%641 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
 	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
 	%643 = add i32 %642, 16		; <i32> [#uses=1]
 	%644 = and i32 %643, 63		; <i32> [#uses=1]
@@ -844,13 +844,13 @@
 	%646 = or i32 %644, %645		; <i32> [#uses=1]
 	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%648 = getelementptr %struct.edge_rec, %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%649 = load %struct.edge_rec*, %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
 	%650 = getelementptr %struct.edge_rec, %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	%651 = load %struct.edge_rec*, %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
 	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
-	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
-	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%652 = load %struct.edge_rec*, %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec*, %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
 	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
 	%654 = add i32 %630, 48		; <i32> [#uses=1]
@@ -859,7 +859,7 @@
 	%657 = or i32 %655, %656		; <i32> [#uses=1]
 	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%659 = getelementptr %struct.edge_rec, %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%660 = load %struct.edge_rec*, %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
 	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
 	%662 = add i32 %661, 16		; <i32> [#uses=1]
 	%663 = and i32 %662, 63		; <i32> [#uses=1]
@@ -875,7 +875,7 @@
 	%670 = or i32 %668, %669		; <i32> [#uses=1]
 	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%672 = getelementptr %struct.edge_rec, %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%673 = load %struct.edge_rec*, %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
 	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
 	%675 = add i32 %674, 16		; <i32> [#uses=1]
 	%676 = and i32 %675, 63		; <i32> [#uses=1]
@@ -902,7 +902,7 @@
 	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
 	%693 = getelementptr %struct.edge_rec, %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
-	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%694 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
 	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
 	%696 = add i32 %695, 16		; <i32> [#uses=1]
 	%697 = and i32 %696, 63		; <i32> [#uses=1]
@@ -910,7 +910,7 @@
 	%699 = or i32 %697, %698		; <i32> [#uses=1]
 	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%701 = getelementptr %struct.edge_rec, %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%702 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
 	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
 	%704 = add i32 %703, 16		; <i32> [#uses=1]
 	%705 = and i32 %704, 63		; <i32> [#uses=1]
@@ -918,19 +918,19 @@
 	%707 = or i32 %705, %706		; <i32> [#uses=1]
 	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%709 = getelementptr %struct.edge_rec, %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%710 = load %struct.edge_rec*, %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
 	%711 = getelementptr %struct.edge_rec, %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	%712 = load %struct.edge_rec*, %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
 	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
-	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
-	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%713 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec*, %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
 	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
 	%715 = xor i32 %683, 32		; <i32> [#uses=1]
 	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
 	%717 = getelementptr %struct.edge_rec, %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%718 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
 	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
 	%720 = add i32 %719, 16		; <i32> [#uses=1]
 	%721 = and i32 %720, 63		; <i32> [#uses=1]
@@ -938,7 +938,7 @@
 	%723 = or i32 %721, %722		; <i32> [#uses=1]
 	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%725 = getelementptr %struct.edge_rec, %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%726 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
 	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
 	%728 = add i32 %727, 16		; <i32> [#uses=1]
 	%729 = and i32 %728, 63		; <i32> [#uses=1]
@@ -946,21 +946,21 @@
 	%731 = or i32 %729, %730		; <i32> [#uses=1]
 	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%733 = getelementptr %struct.edge_rec, %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%734 = load %struct.edge_rec*, %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
 	%735 = getelementptr %struct.edge_rec, %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	%736 = load %struct.edge_rec*, %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
 	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
-	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
-	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%737 = load %struct.edge_rec*, %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec*, %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
 	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
-	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%739 = load %struct.edge_rec*, %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
 	br label %bb9.i
 
 do_merge.exit:		; preds = %bb17.i
 	%740 = getelementptr %struct.edge_rec, %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%741 = load %struct.VERTEX*, %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
 	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
 	br i1 %742, label %bb5.loopexit, label %bb2
 
@@ -970,28 +970,28 @@
 	%744 = xor i32 %743, 32		; <i32> [#uses=1]
 	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%746 = getelementptr %struct.edge_rec, %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%747 = load %struct.edge_rec*, %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
 	%748 = getelementptr %struct.edge_rec, %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%749 = load %struct.VERTEX*, %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
 	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
 	br i1 %750, label %bb5.loopexit, label %bb2
 
 bb4:		; preds = %bb5.loopexit, %bb4
 	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
 	%751 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%752 = load %struct.edge_rec*, %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
 	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
 	%754 = xor i32 %753, 32		; <i32> [#uses=1]
 	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
 	%756 = getelementptr %struct.edge_rec, %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%757 = load %struct.VERTEX*, %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
 	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
 	br i1 %758, label %bb6, label %bb4
 
 bb5.loopexit:		; preds = %bb2, %do_merge.exit
 	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
 	%759 = getelementptr %struct.edge_rec, %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%760 = load %struct.VERTEX*, %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
 	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
 	br i1 %761, label %bb6, label %bb4
 
@@ -1003,7 +1003,7 @@
 
 bb7:		; preds = %bb
 	%762 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
-	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%763 = load %struct.VERTEX*, %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
 	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
 	%765 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
 	%766 = getelementptr %struct.edge_rec, %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
@@ -1076,14 +1076,14 @@
 	%806 = xor i32 %781, 32		; <i32> [#uses=1]
 	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%808 = getelementptr %struct.edge_rec, %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%809 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
 	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
 	%811 = add i32 %810, 16		; <i32> [#uses=1]
 	%812 = and i32 %811, 63		; <i32> [#uses=1]
 	%813 = and i32 %810, -64		; <i32> [#uses=1]
 	%814 = or i32 %812, %813		; <i32> [#uses=1]
 	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
 	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
 	%818 = add i32 %817, 16		; <i32> [#uses=1]
 	%819 = and i32 %818, 63		; <i32> [#uses=1]
@@ -1091,32 +1091,32 @@
 	%821 = or i32 %819, %820		; <i32> [#uses=1]
 	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%823 = getelementptr %struct.edge_rec, %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%824 = load %struct.edge_rec*, %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
 	%825 = getelementptr %struct.edge_rec, %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	%826 = load %struct.edge_rec*, %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
 	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
-	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
-	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%827 = load %struct.edge_rec*, %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec*, %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
 	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
 	%829 = xor i32 %795, 32		; <i32> [#uses=3]
 	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%831 = getelementptr %struct.edge_rec, %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
-	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%832 = load %struct.VERTEX*, %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
 	%833 = and i32 %798, 63		; <i32> [#uses=1]
 	%834 = and i32 %795, -64		; <i32> [#uses=1]
 	%835 = or i32 %833, %834		; <i32> [#uses=1]
 	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%837 = getelementptr %struct.edge_rec, %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%838 = load %struct.edge_rec*, %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
 	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
 	%840 = add i32 %839, 16		; <i32> [#uses=1]
 	%841 = and i32 %840, 63		; <i32> [#uses=1]
 	%842 = and i32 %839, -64		; <i32> [#uses=1]
 	%843 = or i32 %841, %842		; <i32> [#uses=1]
 	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%845 = load %struct.VERTEX*, %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
 	%846 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
 	%847 = getelementptr %struct.edge_rec, %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
 	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
@@ -1137,7 +1137,7 @@
 	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
 	%859 = getelementptr %struct.edge_rec, %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
-	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%860 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
 	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
 	%862 = add i32 %861, 16		; <i32> [#uses=1]
 	%863 = and i32 %862, 63		; <i32> [#uses=1]
@@ -1145,7 +1145,7 @@
 	%865 = or i32 %863, %864		; <i32> [#uses=1]
 	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%867 = getelementptr %struct.edge_rec, %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%868 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
 	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
 	%870 = add i32 %869, 16		; <i32> [#uses=1]
 	%871 = and i32 %870, 63		; <i32> [#uses=1]
@@ -1153,26 +1153,26 @@
 	%873 = or i32 %871, %872		; <i32> [#uses=1]
 	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%875 = getelementptr %struct.edge_rec, %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%876 = load %struct.edge_rec*, %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
 	%877 = getelementptr %struct.edge_rec, %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	%878 = load %struct.edge_rec*, %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
 	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
-	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
-	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%879 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec*, %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
 	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
 	%881 = xor i32 %849, 32		; <i32> [#uses=3]
 	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%883 = getelementptr %struct.edge_rec, %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
-	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%884 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
 	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
 	%886 = add i32 %885, 16		; <i32> [#uses=1]
 	%887 = and i32 %886, 63		; <i32> [#uses=1]
 	%888 = and i32 %885, -64		; <i32> [#uses=1]
 	%889 = or i32 %887, %888		; <i32> [#uses=1]
 	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
 	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
 	%893 = add i32 %892, 16		; <i32> [#uses=1]
 	%894 = and i32 %893, 63		; <i32> [#uses=1]
@@ -1180,27 +1180,27 @@
 	%896 = or i32 %894, %895		; <i32> [#uses=1]
 	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%898 = getelementptr %struct.edge_rec, %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%899 = load %struct.edge_rec*, %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
 	%900 = getelementptr %struct.edge_rec, %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	%901 = load %struct.edge_rec*, %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
 	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
-	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
-	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%902 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec*, %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
 	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
 	%904 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%905 = load double, double* %904, align 4		; <double> [#uses=2]
 	%906 = getelementptr %struct.VERTEX, %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%907 = load double, double* %906, align 4		; <double> [#uses=2]
 	%908 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%909 = load double, double* %908, align 4		; <double> [#uses=3]
 	%910 = getelementptr %struct.VERTEX, %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%911 = load double, double* %910, align 4		; <double> [#uses=3]
 	%912 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%913 = load double, double* %912, align 4		; <double> [#uses=3]
 	%914 = getelementptr %struct.VERTEX, %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
-	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%915 = load double, double* %914, align 4		; <double> [#uses=3]
 	%916 = fsub double %905, %913		; <double> [#uses=1]
 	%917 = fsub double %911, %915		; <double> [#uses=1]
 	%918 = fmul double %916, %917		; <double> [#uses=1]
@@ -1228,14 +1228,14 @@
 	%934 = or i32 %932, %933		; <i32> [#uses=1]
 	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%936 = getelementptr %struct.edge_rec, %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%937 = load %struct.edge_rec*, %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
 	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
 	%939 = add i32 %938, 16		; <i32> [#uses=1]
 	%940 = and i32 %939, 63		; <i32> [#uses=1]
 	%941 = and i32 %938, -64		; <i32> [#uses=1]
 	%942 = or i32 %940, %941		; <i32> [#uses=1]
 	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
 	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
 	%946 = add i32 %945, 16		; <i32> [#uses=1]
 	%947 = and i32 %946, 63		; <i32> [#uses=1]
@@ -1243,7 +1243,7 @@
 	%949 = or i32 %947, %948		; <i32> [#uses=1]
 	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%951 = getelementptr %struct.edge_rec, %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%952 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
 	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
 	%954 = add i32 %953, 16		; <i32> [#uses=1]
 	%955 = and i32 %954, 63		; <i32> [#uses=1]
@@ -1251,13 +1251,13 @@
 	%957 = or i32 %955, %956		; <i32> [#uses=1]
 	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%959 = getelementptr %struct.edge_rec, %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%960 = load %struct.edge_rec*, %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
 	%961 = getelementptr %struct.edge_rec, %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	%962 = load %struct.edge_rec*, %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
 	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
-	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
-	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%963 = load %struct.edge_rec*, %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec*, %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
 	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
 	%965 = add i32 %881, 16		; <i32> [#uses=1]
@@ -1265,14 +1265,14 @@
 	%967 = or i32 %966, %933		; <i32> [#uses=1]
 	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%969 = getelementptr %struct.edge_rec, %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
-	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%970 = load %struct.edge_rec*, %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
 	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
 	%972 = add i32 %971, 16		; <i32> [#uses=1]
 	%973 = and i32 %972, 63		; <i32> [#uses=1]
 	%974 = and i32 %971, -64		; <i32> [#uses=1]
 	%975 = or i32 %973, %974		; <i32> [#uses=1]
 	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
-	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
 	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
 	%979 = add i32 %978, 16		; <i32> [#uses=1]
 	%980 = and i32 %979, 63		; <i32> [#uses=1]
@@ -1280,7 +1280,7 @@
 	%982 = or i32 %980, %981		; <i32> [#uses=1]
 	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%984 = getelementptr %struct.edge_rec, %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
-	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%985 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
 	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
 	%987 = add i32 %986, 16		; <i32> [#uses=1]
 	%988 = and i32 %987, 63		; <i32> [#uses=1]
@@ -1288,17 +1288,17 @@
 	%990 = or i32 %988, %989		; <i32> [#uses=1]
 	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
 	%992 = getelementptr %struct.edge_rec, %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%993 = load %struct.edge_rec*, %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
 	%994 = getelementptr %struct.edge_rec, %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
-	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	%995 = load %struct.edge_rec*, %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
 	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
-	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
-	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%996 = load %struct.edge_rec*, %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec*, %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
 	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
 	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
 	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
-	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%999 = load %struct.edge_rec*, %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
 	%1000 = getelementptr %struct.edge_rec, %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
 	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
 	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
diff --git a/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
index d477ba9..d746b10 100644
--- a/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -83,7 +83,7 @@
 	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
 
 bb52:		; preds = %cli_calloc.exit
-	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%0 = load i16, i16* undef, align 4		; <i16> [#uses=1]
 	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
 	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
 	%2 = tail call  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
index 198efa7..156fd88 100644
--- a/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -67,7 +67,7 @@
 bb22:		; preds = %bb18, %bb17
 	%0 = getelementptr i8, i8* null, i32 10		; <i8*> [#uses=1]
 	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
-	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%2 = load i16, i16* %1, align 2		; <i16> [#uses=1]
 	%3 = add i16 %2, 1		; <i16> [#uses=1]
 	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
 	%5 = mul i32 %4, 3		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
index 5003fbd..01591c8 100644
--- a/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
+++ b/llvm/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -12,7 +12,7 @@
 	br i1 undef, label %bb28, label %bb
 
 bb28:		; preds = %bb
-	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%0 = load double, double* @a, align 4		; <double> [#uses=2]
 	%1 = fadd double %0, undef		; <double> [#uses=2]
 	br i1 undef, label %bb59, label %bb60
 
diff --git a/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
index a656c49..e277b4c 100644
--- a/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -13,17 +13,17 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <4 x i32> %v, <4 x i32>* %v_addr
 	store i32 %f, i32* %f_addr
-	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
-	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%1 = load <4 x i32>, <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32, i32* %f_addr, align 4		; <i32> [#uses=1]
 	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
 	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
 	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %5, <4 x i32>* %0, align 16
-	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	%6 = load <4 x i32>, <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %6, <4 x i32>* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	%retval1 = load <4 x i32>, <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %retval1
 }
diff --git a/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
index 574a06c..a5e9692 100644
--- a/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -29,7 +29,7 @@
 bb8:                                              ; preds = %bb7, %entry
   %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
   %scevgep22 = getelementptr %struct.iovec, %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
-  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %3 = load i32, i32* %nr_segs, align 4                ; <i32> [#uses=1]
   %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
   br i1 %4, label %bb, label %bb9
 
diff --git a/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
index 5bb9b1e..0d258e6 100644
--- a/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -9,18 +9,18 @@
 
 define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
 entry:
-  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  %t.idx51.val.i = load double, double* null              ; <double> [#uses=1]
   br i1 undef, label %bb4.i, label %bb.i
 
 bb.i:                                             ; preds = %entry
   unreachable
 
 bb4.i:                                            ; preds = %entry
-  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %0 = load %struct.tree*, %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
   %.idx45.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
-  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx45.val.i = load double, double* %.idx45.i          ; <double> [#uses=1]
   %.idx46.i = getelementptr %struct.tree, %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
-  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %.idx46.val.i = load double, double* %.idx46.i          ; <double> [#uses=1]
   %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
   %2 = fmul double %1, %1                         ; <double> [#uses=1]
   %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 3373ba4..d6babb6 100644
--- a/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -22,14 +22,14 @@
   br label %return
 
 bb:                                               ; preds = %ppad
-  %eh_select = load i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.1, align 4
-  %eh_value = load i8** %eh_exception
+  %eh_value = load i8*, i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.0, align 4
   call void @_ZN1AD1Ev(%struct.A* %a) nounwind
-  %0 = load i8** %save_eptr.0, align 4
+  %0 = load i8*, i8** %save_eptr.0, align 4
   store i8* %0, i8** %eh_exception, align 4
-  %1 = load i32* %save_filt.1, align 4
+  %1 = load i32, i32* %save_filt.1, align 4
   store i32 %1, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -49,7 +49,7 @@
   br label %bb
 
 Unwind:                                           ; preds = %bb
-  %eh_ptr3 = load i8** %eh_exception
+  %eh_ptr3 = load i8*, i8** %eh_exception
   call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
   unreachable
 }
@@ -61,7 +61,7 @@
   store %struct.A* %this, %struct.A** %this_addr
   %0 = call i8* @_Znwm(i32 4)
   %1 = bitcast i8* %0 to i32*
-  %2 = load %struct.A** %this_addr, align 4
+  %2 = load %struct.A*, %struct.A** %this_addr, align 4
   %3 = getelementptr inbounds %struct.A, %struct.A* %2, i32 0, i32 0
   store i32* %1, i32** %3, align 4
   br label %return
@@ -77,9 +77,9 @@
   %this_addr = alloca %struct.A*
   %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = load %struct.A** %this_addr, align 4
+  %0 = load %struct.A*, %struct.A** %this_addr, align 4
   %1 = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0
-  %2 = load i32** %1, align 4
+  %2 = load i32*, i32** %1, align 4
   %3 = bitcast i32* %2 to i8*
   call void @_ZdlPv(i8* %3) nounwind
   br label %bb
diff --git a/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
index b078ec0..a6d128d 100644
--- a/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
+++ b/llvm/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -4,7 +4,7 @@
 define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v2regbug:
 ;CHECK: vzip.16
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
 	ret <4 x i16> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
index 426bd17..4437d37 100644
--- a/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -2,8 +2,8 @@
 ; pr4939
 
 define void @test(double* %x, double* %y) nounwind {
-  %1 = load double* %x
-  %2 = load double* %y
+  %1 = load double, double* %x
+  %2 = load double, double* %y
   %3 = fsub double -0.000000e+00, %1
   %4 = fcmp ugt double %2, %3
   br i1 %4, label %bb1, label %bb2
diff --git a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
index 5d3722c..de927a8 100644
--- a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -13,10 +13,10 @@
 
 define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
 entry:
-  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
-  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %val.i.i = load <4 x float>, <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>, <4 x float>* null              ; <<4 x float>> [#uses=1]
   %elt3.i.i = getelementptr inbounds %struct.obb, %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
-  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %val4.i.i = load <4 x float>, <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
   %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
   br label %bb33
diff --git a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
index dd9a6fd..b8a1479 100644
--- a/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -16,13 +16,13 @@
   %tmp3738 = inttoptr i32 %tmp37 to float*
   %tmp39 = add i32 %1, 24
   %tmp3940 = inttoptr i32 %tmp39 to float*
-  %2 = load float* %lsr.iv2641, align 4
-  %3 = load float* %tmp2930, align 4
-  %4 = load float* %tmp3132, align 4
-  %5 = load float* %tmp3334, align 4
-  %6 = load float* %tmp3536, align 4
-  %7 = load float* %tmp3738, align 4
-  %8 = load float* %tmp3940, align 4
+  %2 = load float, float* %lsr.iv2641, align 4
+  %3 = load float, float* %tmp2930, align 4
+  %4 = load float, float* %tmp3132, align 4
+  %5 = load float, float* %tmp3334, align 4
+  %6 = load float, float* %tmp3536, align 4
+  %7 = load float, float* %tmp3738, align 4
+  %8 = load float, float* %tmp3940, align 4
   %9 = insertelement <4 x float> undef, float %6, i32 0
   %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
   %11 = insertelement <4 x float> %10, float %7, i32 1
diff --git a/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
index 2ff479b..4bbd047 100644
--- a/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -9,7 +9,7 @@
 entry:
   %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
   %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
-  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %val92 = load <4 x float>, <4 x float>* null                 ; <<4 x float>> [#uses=1]
   %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
   ret %struct.1* %this
 }
diff --git a/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll b/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
index 224bd01..4502542 100644
--- a/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
+++ b/llvm/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -7,7 +7,7 @@
   %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
 ; CHECK: vldr
-  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %0 = load <4 x i16>, <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   store i16 %1, i16* %out_poly16_t, align 2
   br label %return
diff --git a/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
index 465368b..641036f 100644
--- a/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
+++ b/llvm/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -6,8 +6,8 @@
   br i1 undef, label %return, label %bb
 
 bb:                                               ; preds = %bb, %entry
-  %0 = load float* undef, align 4                 ; <float> [#uses=1]
-  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %0 = load float, float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float, float* null, align 4                  ; <float> [#uses=1]
   %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
   %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
   %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
index 2597b41..154cd65 100644
--- a/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -8,7 +8,7 @@
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: vld1.16 {d16[], d17[]}
-  %0 = load i16* undef, align 2
+  %0 = load i16, i16* undef, align 2
   %1 = insertelement <8 x i16> undef, i16 %0, i32 2
   %2 = insertelement <8 x i16> %1, i16 undef, i32 3
   %3 = mul <8 x i16> %2, %2
diff --git a/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
index 38eb0ea..9632c77 100644
--- a/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -6,7 +6,7 @@
 
 define arm_aapcs_vfpcc void @foo() {
 entry:
-  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %0 = load float, float* null, align 4                  ; <float> [#uses=2]
   %1 = fmul float %0, undef                       ; <float> [#uses=2]
   %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
   %3 = fmul float %0, %1                          ; <float> [#uses=1]
@@ -18,7 +18,7 @@
   %7 = fsub float %2, undef                       ; <float> [#uses=1]
   %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
   %9 = fadd float %2, undef                       ; <float> [#uses=3]
-  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %10 = load float, float* undef, align 8                ; <float> [#uses=3]
   %11 = fmul float %8, %10                        ; <float> [#uses=1]
   %12 = fadd float undef, %11                     ; <float> [#uses=2]
   %13 = fmul float undef, undef                   ; <float> [#uses=1]
@@ -30,10 +30,10 @@
   %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
   %20 = fmul float undef, %10                     ; <float> [#uses=1]
   %21 = fadd float %19, %20                       ; <float> [#uses=1]
-  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %22 = load float, float* undef, align 8                ; <float> [#uses=1]
   %23 = fmul float %5, %22                        ; <float> [#uses=1]
   %24 = fadd float %23, undef                     ; <float> [#uses=1]
-  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %25 = load float, float* undef, align 8                ; <float> [#uses=2]
   %26 = fmul float %8, %25                        ; <float> [#uses=1]
   %27 = fadd float %24, %26                       ; <float> [#uses=1]
   %28 = fmul float %9, %25                        ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
index 19824b8..07e910b 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -14,10 +14,10 @@
 
 bb:                                               ; preds = %entry
   %0 = getelementptr inbounds %bar, %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
-  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float, float* undef, align 4                 ; <float> [#uses=1]
   %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
   %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
-  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %4 = load float, float* %0, align 4                    ; <float> [#uses=3]
   %5 = fmul float %4, %2                          ; <float> [#uses=1]
   %6 = fsub float %3, %5                          ; <float> [#uses=1]
   %7 = fmul float %4, undef                       ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
index ebe261b..9eddcf7 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -22,19 +22,19 @@
   %0 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
   %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
   %2 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
-  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %3 = load float, float* %2, align 4                    ; <float> [#uses=1]
   %4 = getelementptr inbounds %quuz, %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
   %5 = fsub float %3, undef                       ; <float> [#uses=2]
   %6 = getelementptr inbounds %quuz, %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
-  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %7 = load float, float* %6, align 4                    ; <float> [#uses=1]
   %8 = fsub float %7, undef                       ; <float> [#uses=1]
   %9 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
-  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %10 = load float, float* %9, align 4                   ; <float> [#uses=1]
   %11 = fsub float %10, undef                     ; <float> [#uses=2]
   %12 = getelementptr inbounds %quuz, %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
-  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %13 = load float, float* %12, align 4                  ; <float> [#uses=1]
   %14 = fsub float %13, undef                     ; <float> [#uses=1]
-  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %15 = load float, float* undef, align 4                ; <float> [#uses=1]
   %16 = fsub float %15, undef                     ; <float> [#uses=1]
   %17 = fmul float %5, %16                        ; <float> [#uses=1]
   %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
@@ -52,11 +52,11 @@
   %27 = fadd float %26, undef                     ; <float> [#uses=1]
   %28 = fadd float %27, undef                     ; <float> [#uses=1]
   %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
-  %30 = load float* null, align 4                 ; <float> [#uses=2]
-  %31 = load float* %4, align 4                   ; <float> [#uses=2]
-  %32 = load float* %2, align 4                   ; <float> [#uses=2]
-  %33 = load float* null, align 4                 ; <float> [#uses=3]
-  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %30 = load float, float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float, float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float, float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float, float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float, float* %6, align 4                   ; <float> [#uses=2]
   %35 = fsub float %33, %34                       ; <float> [#uses=2]
   %36 = fmul float %20, %35                       ; <float> [#uses=1]
   %37 = fsub float %36, undef                     ; <float> [#uses=1]
@@ -71,12 +71,12 @@
   %46 = fadd float %44, %45                       ; <float> [#uses=1]
   %47 = fmul float %33, %43                       ; <float> [#uses=1]
   %48 = fadd float %46, %47                       ; <float> [#uses=2]
-  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %49 = load float, float* %9, align 4                   ; <float> [#uses=2]
   %50 = fsub float %30, %49                       ; <float> [#uses=1]
-  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %51 = load float, float* %12, align 4                  ; <float> [#uses=3]
   %52 = fsub float %32, %51                       ; <float> [#uses=2]
-  %53 = load float* undef, align 4                ; <float> [#uses=2]
-  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %53 = load float, float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float, float* %24, align 4                  ; <float> [#uses=2]
   %55 = fmul float %54, undef                     ; <float> [#uses=1]
   %56 = fmul float undef, %52                     ; <float> [#uses=1]
   %57 = fsub float %55, %56                       ; <float> [#uses=1]
@@ -93,7 +93,7 @@
   %68 = fsub float %51, %31                       ; <float> [#uses=1]
   %69 = fsub float %53, %33                       ; <float> [#uses=1]
   %70 = fmul float undef, %67                     ; <float> [#uses=1]
-  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %71 = load float, float* undef, align 4                ; <float> [#uses=2]
   %72 = fmul float %71, %69                       ; <float> [#uses=1]
   %73 = fsub float %70, %72                       ; <float> [#uses=1]
   %74 = fmul float %71, %68                       ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
index 0aff718..8a14804 100644
--- a/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
+++ b/llvm/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -11,7 +11,7 @@
 
 define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
 entry:
-  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  %0 = load %bar*, %bar** undef, align 4                 ; <%bar*> [#uses=2]
   br i1 false, label %bb85, label %bb
 
 bb:                                               ; preds = %entry
@@ -43,10 +43,10 @@
   %18 = fadd float %17, undef                     ; <float> [#uses=1]
   %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
   %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
-  %21 = load float* %1, align 4                   ; <float> [#uses=2]
-  %22 = load float* %3, align 4                   ; <float> [#uses=2]
-  %23 = load float* undef, align 4                ; <float> [#uses=2]
-  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %21 = load float, float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float, float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float, float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float, float* %4, align 4                   ; <float> [#uses=2]
   %25 = fsub float %23, %24                       ; <float> [#uses=2]
   %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
   %27 = fsub float %26, undef                     ; <float> [#uses=1]
@@ -59,11 +59,11 @@
   %34 = fadd float %32, %33                       ; <float> [#uses=1]
   %35 = fmul float %23, %31                       ; <float> [#uses=1]
   %36 = fadd float %34, %35                       ; <float> [#uses=1]
-  %37 = load float* %6, align 4                   ; <float> [#uses=2]
-  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %37 = load float, float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float, float* %7, align 4                   ; <float> [#uses=2]
   %39 = fsub float %22, %38                       ; <float> [#uses=2]
-  %40 = load float* undef, align 4                ; <float> [#uses=1]
-  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %40 = load float, float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float, float* null, align 4                 ; <float> [#uses=2]
   %42 = fmul float %41, undef                     ; <float> [#uses=1]
   %43 = fmul float undef, %39                     ; <float> [#uses=1]
   %44 = fsub float %42, %43                       ; <float> [#uses=1]
@@ -80,7 +80,7 @@
   %55 = fmul float undef, undef                   ; <float> [#uses=1]
   %56 = fsub float %54, %55                       ; <float> [#uses=1]
   %57 = fmul float undef, %53                     ; <float> [#uses=1]
-  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %58 = load float, float* undef, align 4                ; <float> [#uses=2]
   %59 = fmul float %58, undef                     ; <float> [#uses=1]
   %60 = fsub float %57, %59                       ; <float> [#uses=1]
   %61 = fmul float %58, undef                     ; <float> [#uses=1]
@@ -100,7 +100,7 @@
   br i1 %72, label %bb4.i97, label %ccc.exit98
 
 bb4.i97:                                          ; preds = %bb3.i
-  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  %73 = load %bar*, %bar** undef, align 4                ; <%bar*> [#uses=0]
   br label %ccc.exit98
 
 ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
diff --git a/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
index 5de609b..d21b488 100644
--- a/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
+++ b/llvm/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
 
 define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
-  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %1 = load i32, i32* undef                            ; <i32> [#uses=1]
   %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
   br i1 undef, label %stack_overflow, label %no_overflow
 
@@ -10,13 +10,13 @@
 
 no_overflow:                                      ; preds = %0
   %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
-  %3 = load i32* undef                            ; <i32> [#uses=1]
-  %4 = load i32* null                             ; <i32> [#uses=1]
+  %3 = load i32, i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32, i32* null                             ; <i32> [#uses=1]
   %5 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
   %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
-  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %7 = load [8 x i8]*, [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
   %8 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
-  %9 = load i32* %8                               ; <i32> [#uses=1]
+  %9 = load i32, i32* %8                               ; <i32> [#uses=1]
   br i1 undef, label %bci_13, label %bci_4
 
 bci_13:                                           ; preds = %no_overflow
@@ -27,13 +27,13 @@
 
 bci_46:                                           ; preds = %bci_30
   %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
-  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %11 = load [8 x i8]*, [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
   %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
   %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
-  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %base_pc7 = load i32, i32* %12                       ; <i32> [#uses=2]
   %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
   %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
-  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)*, void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
   %15 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
   %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
   %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
index 1bc58f4..a1923ec 100644
--- a/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
+++ b/llvm/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -9,10 +9,10 @@
 
 no_overflow:                                      ; preds = %0
   %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
-  %2 = load i32* null                             ; <i32> [#uses=2]
+  %2 = load i32, i32* null                             ; <i32> [#uses=2]
   %3 = getelementptr inbounds [17 x i32], [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
-  %4 = load i32* %3                               ; <i32> [#uses=2]
-  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  %4 = load i32, i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]*, [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
   br i1 undef, label %bci_13, label %bci_4
 
 bci_13:                                           ; preds = %no_overflow
diff --git a/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
index 52244aae..d6febe6 100644
--- a/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -82,9 +82,9 @@
   ret void
 
 bb1567:                                           ; preds = %cond_true1254
-  %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+  %tmp1591 = load i64, i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
   %tmp1572 = tail call fastcc i32 @FirstOne()     ; <i32> [#uses=1]
-  %tmp1594 = load i32* undef                      ; <i32> [#uses=1]
+  %tmp1594 = load i32, i32* undef                      ; <i32> [#uses=1]
   %tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8     ; <i8> [#uses=1]
   %shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
   %tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6    ; <i64> [#uses=1]
@@ -92,7 +92,7 @@
   %tmp1596 = and i32 %tmp1595.upgrd.7, 255        ; <i32> [#uses=1]
   %gep.upgrd.8 = zext i32 %tmp1596 to i64         ; <i64> [#uses=1]
   %tmp1598 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
-  %tmp1599 = load i32* %tmp1598                   ; <i32> [#uses=1]
+  %tmp1599 = load i32, i32* %tmp1598                   ; <i32> [#uses=1]
   %tmp1602 = sub i32 0, %tmp1599                  ; <i32> [#uses=1]
   br i1 undef, label %cond_next1637, label %cond_true1607
 
diff --git a/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
index 9461643..16c4953 100644
--- a/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -16,7 +16,7 @@
   %vla10 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
   %vla14 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
   %vla18 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
-  %tmp21 = load i32* undef                        ; <i32> [#uses=1]
+  %tmp21 = load i32, i32* undef                        ; <i32> [#uses=1]
   %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
   %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
   call  void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
diff --git a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
index caa00c1..24469cc 100644
--- a/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -21,7 +21,7 @@
   br i1 %tst, label %bb.nph96, label %bb3
 
 bb3:                                              ; preds = %entry
-  %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
+  %1 = load i64, i64* %0, align 4                      ; <i64> [#uses=0]
   ret i8 42
 
 bb.nph96:                                         ; preds = %entry
diff --git a/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
index 587c0af..94d0f4a 100644
--- a/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-19-Shuffles.ll
@@ -14,7 +14,7 @@
 }
 
 define void @f3(<4 x i64>* %xp) nounwind {
-  %x = load <4 x i64>* %xp
+  %x = load <4 x i64>, <4 x i64>* %xp
   %y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
   store <4 x i64> %y, <4 x i64>* %xp
   ret void
diff --git a/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index 6ec49be..6a6ccf3 100644
--- a/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/llvm/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -3,7 +3,7 @@
 
 define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
 entry:
-  %0 = load float* %fltp
+  %0 = load float, float* %fltp
   %1 = insertelement <4 x float> undef, float %0, i32 0
   %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
   %3 = shl i32 %packedValue, 16
@@ -11,28 +11,28 @@
   %.sum = add i32 %4, 4
   %5 = getelementptr inbounds float, float* %table, i32 %.sum
 ;CHECK: vldr s
-  %6 = load float* %5, align 4
+  %6 = load float, float* %5, align 4
   %tmp11 = insertelement <4 x float> undef, float %6, i32 0
   %7 = shl i32 %packedValue, 18
   %8 = ashr i32 %7, 30
   %.sum12 = add i32 %8, 4
   %9 = getelementptr inbounds float, float* %table, i32 %.sum12
 ;CHECK: vldr s
-  %10 = load float* %9, align 4
+  %10 = load float, float* %9, align 4
   %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
   %11 = shl i32 %packedValue, 20
   %12 = ashr i32 %11, 30
   %.sum13 = add i32 %12, 4
   %13 = getelementptr inbounds float, float* %table, i32 %.sum13
 ;CHECK: vldr s
-  %14 = load float* %13, align 4
+  %14 = load float, float* %13, align 4
   %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
   %15 = shl i32 %packedValue, 22
   %16 = ashr i32 %15, 30
   %.sum14 = add i32 %16, 4
   %17 = getelementptr inbounds float, float* %table, i32 %.sum14
 ;CHECK: vldr s
-  %18 = load float* %17, align 4
+  %18 = load float, float* %17, align 4
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
   %20 = bitcast float* %fltp to i8*
diff --git a/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index f7ceb6e..f86c3ba 100644
--- a/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -6,7 +6,7 @@
 define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind {
 entry:
  %0 = bitcast %struct.__int8x8x2_t* %a to i128*  ; <i128*> [#uses=1]
- %srcval = load i128* %0, align 8                ; <i128> [#uses=2]
+ %srcval = load i128, i128* %0, align 8                ; <i128> [#uses=2]
  %tmp6 = trunc i128 %srcval to i64               ; <i64> [#uses=1]
  %tmp8 = lshr i128 %srcval, 64                   ; <i128> [#uses=1]
  %tmp9 = trunc i128 %tmp8 to i64                 ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
index ac442aa..a1ab27e 100644
--- a/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
@@ -18,10 +18,10 @@
   br i1 %1, label %bb, label %return
 
 bb:                                               ; preds = %bb445, %entry
-  %2 = load %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
+  %2 = load %struct.cellbox*, %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
   %3 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
   store i32 undef, i32* %3, align 4
-  %4 = load i32* undef, align 4                   ; <i32> [#uses=3]
+  %4 = load i32, i32* undef, align 4                   ; <i32> [#uses=3]
   %5 = icmp eq i32 undef, 1                       ; <i1> [#uses=1]
   br i1 %5, label %bb10, label %bb445
 
@@ -29,12 +29,12 @@
   br i1 undef, label %bb11, label %bb445
 
 bb11:                                             ; preds = %bb10
-  %6 = load %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
-  %7 = load %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
+  %6 = load %struct.tilebox*, %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
+  %7 = load %struct.termbox*, %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
   %8 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
-  %9 = load i32* %8, align 4                      ; <i32> [#uses=3]
+  %9 = load i32, i32* %8, align 4                      ; <i32> [#uses=3]
   %10 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
-  %11 = load i32* %10, align 4                    ; <i32> [#uses=1]
+  %11 = load i32, i32* %10, align 4                    ; <i32> [#uses=1]
   br i1 false, label %bb12, label %bb13
 
 bb12:                                             ; preds = %bb11
@@ -77,7 +77,7 @@
 
 bb36:                                             ; preds = %bb43.loopexit, %bb36
   %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
-  %30 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %30 = load i32, i32* undef, align 4                  ; <i32> [#uses=1]
   %31 = sub nsw i32 %30, %9                       ; <i32> [#uses=1]
   %32 = sitofp i32 %31 to double                  ; <double> [#uses=1]
   %33 = fdiv double %32, 0.000000e+00             ; <double> [#uses=1]
@@ -93,7 +93,7 @@
   %40 = add i32 %iftmp.47.0, 0                    ; <i32> [#uses=1]
   store i32 %40, i32* undef, align 4
   %41 = getelementptr inbounds %struct.termbox, %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
-  %42 = load %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
+  %42 = load %struct.termbox*, %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
   %43 = icmp eq %struct.termbox* %42, null        ; <i1> [#uses=1]
   br i1 %43, label %bb52.loopexit, label %bb36
 
@@ -128,7 +128,7 @@
 
 bb249:                                            ; preds = %bb248
   %46 = getelementptr inbounds %struct.cellbox, %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
-  %47 = load %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
+  %47 = load %struct.tilebox*, %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
   %48 = getelementptr inbounds %struct.tilebox, %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
   store i32 undef, i32* %48, align 4
   unreachable
diff --git a/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index c33b16e..4ca8ef8d 100644
--- a/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -35,7 +35,7 @@
   %.loc = alloca i32                              ; <i32*> [#uses=2]
   %tmp.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
   %0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i = load %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
+  %tmp2.i = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
   %1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; <i1> [#uses=1]
   br i1 %1, label %entry.return_crit_edge, label %tmpbb
 
@@ -53,16 +53,16 @@
 
 if.then:                                          ; preds = %tmpbb
   %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit", %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
-  %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
+  %tmp3.i.i.i.i = load i8*, i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
   %arrayidx.i.i.i.i = getelementptr inbounds i8, i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
   %tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; <i32*> [#uses=1]
-  %tmp2.i.i.i = load i32* %tmp.i.i.i              ; <i32> [#uses=1]
+  %tmp2.i.i.i = load i32, i32* %tmp.i.i.i              ; <i32> [#uses=1]
   %tmp.i5 = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
   %tmp2.i.i = getelementptr inbounds %"class.llvm::Record", %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
   %4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
-  %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
+  %tmp3.i.i6 = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
   %tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
-  %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
+  %tmp6.i.i = load %"class.llvm::RecordVal"*, %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
   %sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; <i32> [#uses=1]
   %sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; <i32> [#uses=1]
   %sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; <i32> [#uses=1]
@@ -71,7 +71,7 @@
 
 codeRepl:                                         ; preds = %if.then
   %targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; <i1> [#uses=1]
-  %.reload = load i32* %.loc                      ; <i32> [#uses=3]
+  %.reload = load i32, i32* %.loc                      ; <i32> [#uses=3]
   br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit
 
 for.cond.i.return_crit_edge:                      ; preds = %codeRepl
@@ -101,7 +101,7 @@
 
 lor.lhs.false:                                    ; preds = %land.lhs.true
   %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
+  %tmp2.i4 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
   %5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; <i1> [#uses=1]
   br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1
 
@@ -122,7 +122,7 @@
 
 if.end:                                           ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge
   %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal", %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
-  %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
+  %tmp2.i2 = load %"struct.llvm::Init"*, %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
   %8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1]
   %cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; <i1> [#uses=1]
   br i1 %cmp19, label %cond.false, label %cond.end
@@ -133,9 +133,9 @@
 
 cond.end:                                         ; preds = %if.end
   %9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1]
-  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
   %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
-  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
+  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*, %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
   %call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1]
   ret %"struct.llvm::Init"* %call25
 
diff --git a/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index 96bc9c4..b341a83 100644
--- a/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -9,7 +9,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata i8* %buf, i64 0, metadata !0, metadata !{!"0x102"}), !dbg !15
   tail call void @llvm.dbg.value(metadata i32 %nbytes, i64 0, metadata !8, metadata !{!"0x102"}), !dbg !16
-  %tmp = load i32* @length, !dbg !17              ; <i32> [#uses=3]
+  %tmp = load i32, i32* @length, !dbg !17              ; <i32> [#uses=3]
   %cmp = icmp eq i32 %tmp, -1, !dbg !17           ; <i1> [#uses=1]
   %cmp.not = xor i1 %cmp, true                    ; <i1> [#uses=1]
   %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17    ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
index fcabc900..c7ef46c 100644
--- a/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
+++ b/llvm/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
@@ -7,7 +7,7 @@
 entry:
   %0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2> ; <<2 x i64>> [#uses=1]
   store <2 x i64> %0, <2 x i64>* undef, align 16
-  %val4723 = load <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
+  %val4723 = load <8 x i16>, <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
   call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
   ret i32 undef
 }
diff --git a/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
index fcc9381..d1259d5 100644
--- a/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
+++ b/llvm/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -30,9 +30,9 @@
 define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
 entry:
   %tmp.i = getelementptr inbounds %struct.A, %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
-  %tmp2.i = load i32* %tmp.i                      ; <i32> [#uses=1]
+  %tmp2.i = load i32, i32* %tmp.i                      ; <i32> [#uses=1]
   %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
-  %tmp3.i = load i32* @d                          ; <i32> [#uses=1]
+  %tmp3.i = load i32, i32* @d                          ; <i32> [#uses=1]
   %inc.i = add nsw i32 %tmp3.i, 1                 ; <i32> [#uses=1]
   store i32 %inc.i, i32* @d
   ret void
@@ -59,13 +59,13 @@
   %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
   %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
-  %tmp3.i.i = load i32* @d                        ; <i32> [#uses=1]
+  %tmp3.i.i = load i32, i32* @d                        ; <i32> [#uses=1]
   %inc.i.i4 = add nsw i32 %tmp3.i.i, 1            ; <i32> [#uses=1]
   store i32 %inc.i.i4, i32* @d
   tail call void @__cxa_end_catch()
-  %tmp13 = load i32* @d                           ; <i32> [#uses=1]
+  %tmp13 = load i32, i32* @d                           ; <i32> [#uses=1]
   %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
-  %tmp16 = load i32* @d                           ; <i32> [#uses=1]
+  %tmp16 = load i32, i32* @d                           ; <i32> [#uses=1]
   %cmp = icmp ne i32 %tmp16, 2                    ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
   ret i32 %conv
diff --git a/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
index 4b47085..13214c5 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -12,17 +12,17 @@
   br label %return
 
 bb:                                               ; No predecessors!
-  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  %eh_select = load i32, i32* %eh_selector             ; <i32> [#uses=1]
   store i32 %eh_select, i32* %save_filt.936, align 4
-  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_value = load i8*, i8** %eh_exception             ; <i8*> [#uses=1]
   store i8* %eh_value, i8** %save_eptr.935, align 4
   invoke arm_apcscc  void @func3()
           to label %invcont unwind label %lpad
 
 invcont:                                          ; preds = %bb
-  %tmp6 = load i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
+  %tmp6 = load i8*, i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
   store i8* %tmp6, i8** %eh_exception, align 4
-  %tmp7 = load i32* %save_filt.936, align 4          ; <i32> [#uses=1]
+  %tmp7 = load i32, i32* %save_filt.936, align 4          ; <i32> [#uses=1]
   store i32 %tmp7, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -38,7 +38,7 @@
               cleanup
   %exn = extractvalue { i8*, i32 } %eh_ptr, 0
   store i8* %exn, i8** %eh_exception
-  %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_ptr13 = load i8*, i8** %eh_exception             ; <i8*> [#uses=1]
   %eh_select14 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select14, i32* %eh_selector
   br label %ppad
@@ -47,7 +47,7 @@
   br label %bb12
 
 Unwind:
-  %eh_ptr15 = load i8** %eh_exception
+  %eh_ptr15 = load i8*, i8** %eh_exception
   call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
   unreachable
 }
diff --git a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 7f01cb7..5d30995 100644
--- a/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -13,13 +13,13 @@
 
 bb:                                               ; preds = %entry
   %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
-  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %2 = load i32, i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
   %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
   br label %bb2, !dbg !29
 
 bb1:                                              ; preds = %entry
   %4 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %5 = load i32, i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
   %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
   br label %bb2, !dbg !30
 
@@ -58,11 +58,11 @@
   store i32 1, i32* %1, align 8, !dbg !42
   %2 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
   %3 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
-  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  %4 = load i8*, i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
   store i8* %4, i8** %2, align 8, !dbg !43
   %5 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
   %6 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
-  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  %7 = load i32, i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
   store i32 %7, i32* %5, align 8, !dbg !43
   %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
diff --git a/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
index 5295a0a..09428ce 100644
--- a/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -39,29 +39,29 @@
   %scevgep97.i = getelementptr i32, i32* %in, i32 undef
   %tmp198410 = or i32 undef, 1
   %scevgep.i48 = getelementptr i32, i32* %in, i32 undef
-  %0 = load i32* %scevgep.i48, align 4
+  %0 = load i32, i32* %scevgep.i48, align 4
   %1 = add nsw i32 %0, 0
   store i32 %1, i32* undef, align 4
   %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
   %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
   %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
-  %3 = load i32* null, align 4
-  %4 = load i32* undef, align 4
+  %3 = load i32, i32* null, align 4
+  %4 = load i32, i32* undef, align 4
   %5 = sub nsw i32 %3, %4
-  %6 = load i32* undef, align 4
-  %7 = load i32* null, align 4
+  %6 = load i32, i32* undef, align 4
+  %7 = load i32, i32* null, align 4
   %8 = sub nsw i32 %6, %7
-  %9 = load i32* %scevgep97.i, align 4
-  %10 = load i32* undef, align 4
+  %9 = load i32, i32* %scevgep97.i, align 4
+  %10 = load i32, i32* undef, align 4
   %11 = sub nsw i32 %9, %10
-  %12 = load i32* null, align 4
-  %13 = load i32* %scevgep101.i, align 4
+  %12 = load i32, i32* null, align 4
+  %13 = load i32, i32* %scevgep101.i, align 4
   %14 = sub nsw i32 %12, %13
-  %15 = load i32* %scevgep.i48, align 4
-  %16 = load i32* null, align 4
+  %15 = load i32, i32* %scevgep.i48, align 4
+  %16 = load i32, i32* null, align 4
   %17 = add nsw i32 %16, %15
   %18 = sub nsw i32 %15, %16
-  %19 = load i32* undef, align 4
+  %19 = load i32, i32* undef, align 4
   %20 = add nsw i32 %19, %2
   %21 = sub nsw i32 %19, %2
   %22 = add nsw i32 %14, %5
diff --git a/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
index 1351a26..9dfe4da 100644
--- a/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
+++ b/llvm/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -12,7 +12,7 @@
 
 define arm_aapcs_vfpcc i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   switch i32 %0, label %bb2 [
     i32 12, label %bb
     i32 13, label %bb1
diff --git a/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index c58a79a..8db166a 100644
--- a/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/llvm/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -31,9 +31,9 @@
 ; OBJ-NEXT:     Section: .bss
 
 define i32 @main(i32 %argc) nounwind {
-  %1 = load i32* @sum, align 4
+  %1 = load i32, i32* @sum, align 4
   %2 = getelementptr [80 x i8], [80 x i8]* @array00, i32 0, i32 %argc
-  %3 = load i8* %2
+  %3 = load i8, i8* %2
   %4 = zext i8 %3 to i32
   %5 = add i32 %1, %4
   ret i32 %5
diff --git a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 67dda67..5dc07e4 100644
--- a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -31,7 +31,7 @@
 define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
-  %0 = load i8* @x1, align 4, !dbg !30
+  %0 = load i8, i8* @x1, align 4, !dbg !30
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !30
   store i8 %a, i8* @x1, align 4, !dbg !30
   ret i8 %0, !dbg !31
@@ -42,7 +42,7 @@
 define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !{!"0x102"}), !dbg !32
-  %0 = load i8* @x2, align 4, !dbg !32
+  %0 = load i8, i8* @x2, align 4, !dbg !32
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !32
   store i8 %a, i8* @x2, align 4, !dbg !32
   ret i8 %0, !dbg !33
@@ -51,7 +51,7 @@
 define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !34
-  %0 = load i8* @x3, align 4, !dbg !34
+  %0 = load i8, i8* @x3, align 4, !dbg !34
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !22, metadata !{!"0x102"}), !dbg !34
   store i8 %a, i8* @x3, align 4, !dbg !34
   ret i8 %0, !dbg !35
@@ -60,7 +60,7 @@
 define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !36
-  %0 = load i8* @x4, align 4, !dbg !36
+  %0 = load i8, i8* @x4, align 4, !dbg !36
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !25, metadata !{!"0x102"}), !dbg !36
   store i8 %a, i8* @x4, align 4, !dbg !36
   ret i8 %0, !dbg !37
@@ -69,7 +69,7 @@
 define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
-  %0 = load i8* @x5, align 4, !dbg !38
+  %0 = load i8, i8* @x5, align 4, !dbg !38
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !38
   store i8 %a, i8* @x5, align 4, !dbg !38
   ret i8 %0, !dbg !39
diff --git a/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
index b65c41f..e96641b 100644
--- a/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -39,10 +39,10 @@
   unreachable
 
 bb135:                                            ; preds = %meshBB396
-  %uriHash.1.phi.load = load i32* undef
-  %.load120 = load i8*** %.SV116
-  %.phi24 = load i8* null
-  %.phi26 = load i8** null
+  %uriHash.1.phi.load = load i32, i32* undef
+  %.load120 = load i8**, i8*** %.SV116
+  %.phi24 = load i8, i8* null
+  %.phi26 = load i8*, i8** null
   store i8 %.phi24, i8* %.phi26, align 1
   %0 = getelementptr inbounds i8, i8* %.phi26, i32 1
   store i8* %0, i8** %.load120, align 4
@@ -52,7 +52,7 @@
   %1 = mul i32 %uriHash.1.phi.load, 1000003
   %2 = xor i32 0, %1
   store i32 %2, i32* null
-  %3 = load i8* null, align 1
+  %3 = load i8, i8* null, align 1
   %4 = icmp eq i8 %3, 0
   store i8* %0, i8** undef
   br i1 %4, label %meshBB472, label %bb131
diff --git a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
index 521c9a2..c447a1f 100644
--- a/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
+++ b/llvm/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll
@@ -20,7 +20,7 @@
   %1 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 0
   store %struct.mo* undef, %struct.mo** %1, align 4
   %2 = getelementptr inbounds %struct.ui, %struct.ui* %0, i32 0, i32 5
-  %3 = load i64* %2, align 4
+  %3 = load i64, i64* %2, align 4
   %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind
   br i1 undef, label %bb3, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
index 07180d8..92bdd19 100644
--- a/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll
@@ -21,8 +21,8 @@
   %x = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0
   %y = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1
   %inc = add i32 %i.022, 1
-  %tmp8 = load i32* %x, align 4
-  %tmp11 = load i32* %y, align 4
+  %tmp8 = load i32, i32* %x, align 4
+  %tmp11 = load i32, i32* %y, align 4
   %mul = mul nsw i32 %tmp11, %tmp8
   %tobool.i14 = icmp eq i32 %mul, 0
   br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16
@@ -35,7 +35,7 @@
 
 _Z14printIsNotZeroi.exit17.for.body_crit_edge:    ; preds = %_Z14printIsNotZeroi.exit17
   %b.phi.trans.insert = getelementptr %struct.Outer, %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3
-  %tmp3.pre = load i8* %b.phi.trans.insert, align 1
+  %tmp3.pre = load i8, i8* %b.phi.trans.insert, align 1
   %phitmp27 = icmp eq i8 %val8, 0
   br label %for.body
 
diff --git a/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
index 305d4cd..7f60315 100644
--- a/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-07-schediv.ll
@@ -21,7 +21,7 @@
   %uglygep = getelementptr i8, i8* %src_copy_start6, i32 %tmp
   %src_copy_start_addr.04 = bitcast i8* %uglygep to float*
   %dst_copy_start_addr.03 = getelementptr float, float* %dst_copy_start, i32 %j.05
-  %1 = load float* %src_copy_start_addr.04, align 4
+  %1 = load float, float* %src_copy_start_addr.04, align 4
   store float %1, float* %dst_copy_start_addr.03, align 4
   %2 = add i32 %j.05, 1
   %exitcond = icmp eq i32 %2, %src_width
diff --git a/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
index f497fc2..54fc9b0 100644
--- a/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll
@@ -19,7 +19,7 @@
 ; CHECK: mov{{.*}} r{{[0-9]+}}, #{{[01]}}
 ; CHECK-NOT: mov r{{[0-9]+}}, #{{[01]}}
   %arrayidx = getelementptr i32, i32* %A, i32 %0
-  %tmp4 = load i32* %arrayidx, align 4
+  %tmp4 = load i32, i32* %arrayidx, align 4
   %cmp6 = icmp eq i32 %tmp4, %value
   br i1 %cmp6, label %return, label %for.inc
 
diff --git a/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
index e9a6793..8ad654f 100644
--- a/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll
@@ -7,7 +7,7 @@
 define void @_Z8TestCasev() nounwind ssp {
 entry:
   %a = alloca float, align 4
-  %tmp = load float* %a, align 4
+  %tmp = load float, float* %a, align 4
   call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
index 057c199..132b78e 100644
--- a/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
+++ b/llvm/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
@@ -22,7 +22,7 @@
   %block_count = alloca i32, align 4
   %index_cache = alloca i32, align 4
   store i32 0, i32* %index_cache, align 4
-  %tmp = load i32* @G, align 4
+  %tmp = load i32, i32* @G, align 4
   %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
   switch i32 %tmp1, label %bb8 [
     i32 0, label %bb
@@ -31,7 +31,7 @@
   ]
 
 bb:
-  %tmp2 = load i32* @G, align 4
+  %tmp2 = load i32, i32* @G, align 4
   %tmp4 = icmp eq i32 %tmp2, 0
   br i1 %tmp4, label %bb1, label %bb8
 
@@ -41,8 +41,8 @@
 ; CHECK: blx _Get
 ; CHECK: umull
 ; CHECK: blx _foo
-  %tmp5 = load i32* %block_size, align 4
-  %tmp6 = load i32* %block_count, align 4
+  %tmp5 = load i32, i32* %block_size, align 4
+  %tmp6 = load i32, i32* %block_count, align 4
   %tmp7 = call %struct.FF* @Get() nounwind
   store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
   %tmp10 = zext i32 %tmp6 to i64
diff --git a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 3edc946..cc84347 100644
--- a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -30,7 +30,7 @@
 
 define i32 @get1(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !30
-  %1 = load i32* @x1, align 4, !dbg !31
+  %1 = load i32, i32* @x1, align 4, !dbg !31
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !31
   store i32 %a, i32* @x1, align 4, !dbg !31
   ret i32 %1, !dbg !31
@@ -38,7 +38,7 @@
 
 define i32 @get2(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !32
-  %1 = load i32* @x2, align 4, !dbg !33
+  %1 = load i32, i32* @x2, align 4, !dbg !33
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !33
   store i32 %a, i32* @x2, align 4, !dbg !33
   ret i32 %1, !dbg !33
@@ -46,7 +46,7 @@
 
 define i32 @get3(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !34
-  %1 = load i32* @x3, align 4, !dbg !35
+  %1 = load i32, i32* @x3, align 4, !dbg !35
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !{!"0x102"}), !dbg !35
   store i32 %a, i32* @x3, align 4, !dbg !35
   ret i32 %1, !dbg !35
@@ -54,7 +54,7 @@
 
 define i32 @get4(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !{!"0x102"}), !dbg !36
-  %1 = load i32* @x4, align 4, !dbg !37
+  %1 = load i32, i32* @x4, align 4, !dbg !37
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !{!"0x102"}), !dbg !37
   store i32 %a, i32* @x4, align 4, !dbg !37
   ret i32 %1, !dbg !37
@@ -62,7 +62,7 @@
 
 define i32 @get5(i32 %a) nounwind optsize ssp {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !{!"0x102"}), !dbg !38
-  %1 = load i32* @x5, align 4, !dbg !39
+  %1 = load i32, i32* @x5, align 4, !dbg !39
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !{!"0x102"}), !dbg !39
   store i32 %a, i32* @x5, align 4, !dbg !39
   ret i32 %1, !dbg !39
diff --git a/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll b/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
index be188ef..fb84544 100644
--- a/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-29-SchedCycle.ll
@@ -32,7 +32,7 @@
 
 define void @t() nounwind {
 entry:
-  %tmp = load i64* undef, align 4
+  %tmp = load i64, i64* undef, align 4
   %tmp5 = udiv i64 %tmp, 30
   %tmp13 = and i64 %tmp5, 64739244643450880
   %tmp16 = sub i64 0, %tmp13
diff --git a/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll b/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
index 4cea77b..d9b38b5 100644
--- a/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
+++ b/llvm/test/CodeGen/ARM/2011-08-29-ldr_pre_imm.ll
@@ -18,7 +18,7 @@
   %1 = shl nsw i32 %k_addr.0.i, 1
   %.sum8.i = add i32 %1, -1
   %2 = getelementptr inbounds [256 x i32], [256 x i32]* %heap, i32 0, i32 %.sum8.i
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   br i1 false, label %bb5.i, label %bb4.i
 
 bb4.i:                                            ; preds = %bb.i
diff --git a/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll b/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
index 8fe9102..2561af7 100644
--- a/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
+++ b/llvm/test/CodeGen/ARM/2011-09-09-OddVectorDivision.ll
@@ -11,12 +11,12 @@
 @z2 = common global <4 x i16> zeroinitializer
 
 define void @f() {
-  %1 = load <3 x i16>* @x1
-  %2 = load <3 x i16>* @y1
+  %1 = load <3 x i16>, <3 x i16>* @x1
+  %2 = load <3 x i16>, <3 x i16>* @y1
   %3 = sdiv <3 x i16> %1, %2
   store <3 x i16> %3, <3 x i16>* @z1
-  %4 = load <4 x i16>* @x2
-  %5 = load <4 x i16>* @y2
+  %4 = load <4 x i16>, <4 x i16>* @x2
+  %5 = load <4 x i16>, <4 x i16>* @y2
   %6 = sdiv <4 x i16> %4, %5
   store <4 x i16> %6, <4 x i16>* @z2
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll b/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
index c6f4a93..53e3bed 100644
--- a/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-09-28-CMovCombineBug.ll
@@ -4,7 +4,7 @@
 ; ARM target specific dag combine created a cycle in DAG.
 
 define void @t() nounwind ssp {
-  %1 = load i64* undef, align 4
+  %1 = load i64, i64* undef, align 4
   %2 = shl i32 5, 0
   %3 = zext i32 %2 to i64
   %4 = and i64 %1, %3
diff --git a/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll b/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
index 297a0ae..9f2fa63 100644
--- a/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
+++ b/llvm/test/CodeGen/ARM/2011-10-26-ExpandUnalignedLoadCrash.ll
@@ -9,11 +9,11 @@
   %2 = mul i32 %0, 6
   %3 = getelementptr i8, i8* %1, i32 %2
   %4 = bitcast i8* %3 to <3 x i16>*
-  %5 = load <3 x i16>* %4, align 1
+  %5 = load <3 x i16>, <3 x i16>* %4, align 1
   %6 = bitcast i16* %sourceA to i8*
   %7 = getelementptr i8, i8* %6, i32 %2
   %8 = bitcast i8* %7 to <3 x i16>*
-  %9 = load <3 x i16>* %8, align 1
+  %9 = load <3 x i16>, <3 x i16>* %8, align 1
   %10 = or <3 x i16> %9, %5
   store <3 x i16> %10, <3 x i16>* %4, align 1
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll b/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
index a707a92..e705971 100644
--- a/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-07-PromoteVectorLoadStore.ll
@@ -7,8 +7,8 @@
 
 define void @test_neon_vector_add_2xi8() nounwind {
 ; CHECK-LABEL: test_neon_vector_add_2xi8:
-  %1 = load <2 x i8>* @i8_src1
-  %2 = load <2 x i8>* @i8_src2
+  %1 = load <2 x i8>, <2 x i8>* @i8_src1
+  %2 = load <2 x i8>, <2 x i8>* @i8_src2
   %3 = add <2 x i8> %1, %2
   store <2 x i8> %3, <2 x i8>* @i8_res
   ret void
@@ -16,8 +16,8 @@
 
 define void @test_neon_ld_st_volatile_with_ashr_2xi8() {
 ; CHECK-LABEL: test_neon_ld_st_volatile_with_ashr_2xi8:
-  %1 = load volatile <2 x i8>* @i8_src1
-  %2 = load volatile <2 x i8>* @i8_src2
+  %1 = load volatile <2 x i8>, <2 x i8>* @i8_src1
+  %2 = load volatile <2 x i8>, <2 x i8>* @i8_src2
   %3 = ashr <2 x i8> %1, %2
   store volatile <2 x i8> %3, <2 x i8>* @i8_res
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll b/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
index c1554d8..6dc9d4b 100644
--- a/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-09-BitcastVectorDouble.ll
@@ -8,7 +8,7 @@
 
 define void @test_neon_call_return_v2i16() {
 ; CHECK-LABEL: test_neon_call_return_v2i16:
-  %1 = load <2 x i16>* @src1_v2i16
+  %1 = load <2 x i16>, <2 x i16>* @src1_v2i16
   %2 = call <2 x i16> @foo_v2i16(<2 x i16> %1) nounwind
   store <2 x i16> %2, <2 x i16>* @res_v2i16
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll b/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
index c50461a..1da93bd 100644
--- a/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-09-IllegalVectorFPIntConvert.ll
@@ -4,7 +4,7 @@
 ; CHECK: test1
 ; CHECK: vcvt.s32.f64
 ; CHECK: vcvt.s32.f64
-  %tmp1 = load <2 x double>* %A
+  %tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp2 = fptosi <2 x double> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -13,7 +13,7 @@
 ; CHECK: test2
 ; CHECK: vcvt.u32.f64
 ; CHECK: vcvt.u32.f64
-  %tmp1 = load <2 x double>* %A
+  %tmp1 = load <2 x double>, <2 x double>* %A
 	%tmp2 = fptoui <2 x double> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -22,7 +22,7 @@
 ; CHECK: test3
 ; CHECK: vcvt.f64.s32
 ; CHECK: vcvt.f64.s32
-  %tmp1 = load <2 x i32>* %A
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x double>
 	ret <2 x double> %tmp2
 }
@@ -31,7 +31,7 @@
 ; CHECK: test4
 ; CHECK: vcvt.f64.u32
 ; CHECK: vcvt.f64.u32
-  %tmp1 = load <2 x i32>* %A
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x double>
 	ret <2 x double> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll b/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
index eadcbab9..7f40571 100644
--- a/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-14-EarlyClobber.ll
@@ -31,25 +31,25 @@
   store double 0.000000e+00, double* null, align 4
   %call = tail call double @cos(double %angle) nounwind readnone
   %call1 = tail call double @sin(double %angle) nounwind readnone
-  %0 = load double* %V1, align 4
+  %0 = load double, double* %V1, align 4
   %arrayidx2 = getelementptr inbounds double, double* %V1, i32 1
-  %1 = load double* %arrayidx2, align 4
+  %1 = load double, double* %arrayidx2, align 4
   %mul = fmul double %0, %1
   %sub = fsub double 1.000000e+00, %call
   %mul3 = fmul double %mul, %sub
-  %2 = load double* undef, align 4
+  %2 = load double, double* undef, align 4
   %mul5 = fmul double %2, %call1
   %add = fadd double %mul3, %mul5
   store double %add, double* %arrayidx5.1.i, align 4
-  %3 = load double* %V1, align 4
+  %3 = load double, double* %V1, align 4
   %mul11 = fmul double %3, undef
   %mul13 = fmul double %mul11, %sub
-  %4 = load double* %arrayidx2, align 4
+  %4 = load double, double* %arrayidx2, align 4
   %mul15 = fmul double %4, %call1
   %sub16 = fsub double %mul13, %mul15
   store double %sub16, double* %arrayidx5.2.i, align 4
-  %5 = load double* %V1, align 4
-  %6 = load double* %arrayidx2, align 4
+  %5 = load double, double* %V1, align 4
+  %6 = load double, double* %arrayidx2, align 4
   %mul22 = fmul double %5, %6
   %mul24 = fmul double %mul22, %sub
   %sub27 = fsub double %mul24, undef
diff --git a/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll b/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
index bc496b9..c2cafaa 100644
--- a/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-28-DAGCombineBug.ll
@@ -15,14 +15,14 @@
 ; CHECK: ldr.w
 ; CHECK-NOT: ldm
 entry:
-  %tmp13 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
-  %tmp15 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
-  %tmp17 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
-  %tmp19 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
-  %tmp = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
-  %tmp3 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
-  %tmp4 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
-  %tmp5 = load i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
+  %tmp13 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 0), align 1
+  %tmp15 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 1), align 1
+  %tmp17 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 2), align 1
+  %tmp19 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 1, i32 0, i32 3), align 1
+  %tmp = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 0), align 1
+  %tmp3 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 1), align 1
+  %tmp4 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 2), align 1
+  %tmp5 = load i32, i32* getelementptr inbounds (%struct.InformationBlock* @infoBlock, i32 0, i32 2, i32 0, i32 3), align 1
   %insert21 = insertvalue [4 x i32] undef, i32 %tmp13, 0
   %insert23 = insertvalue [4 x i32] %insert21, i32 %tmp15, 1
   %insert25 = insertvalue [4 x i32] %insert23, i32 %tmp17, 2
diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
index 86b58c8..404aca1 100644
--- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll
@@ -16,7 +16,7 @@
 ; CHECK:      vst1.64  {{.*}}
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -48,7 +48,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.cos.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -79,7 +79,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.exp.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -110,7 +110,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.exp2.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -141,7 +141,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log10.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -172,7 +172,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -203,7 +203,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -236,7 +236,7 @@
 
 L.entry:
 
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.pow.v4f32(<4 x float> %0, <4 x float> <float 2., float 2., float 2., float 2.>)
 
   store <4 x float> %1, <4 x float>* %X, align 16
@@ -259,7 +259,7 @@
 
 L.entry:
 
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.powi.v4f32(<4 x float> %0, i32 2)
 
   store <4 x float> %1, <4 x float>* %X, align 16
@@ -292,7 +292,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.sin.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
@@ -323,7 +323,7 @@
 ; CHECK:      vst1.64
 
 L.entry:
-  %0 = load <4 x float>* @A, align 16
+  %0 = load <4 x float>, <4 x float>* @A, align 16
   %1 = call <4 x float> @llvm.floor.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* %X, align 16
   ret void
diff --git a/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll b/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
index 0c90f4c..0d32440 100644
--- a/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
+++ b/llvm/test/CodeGen/ARM/2011-11-30-MergeAlignment.ll
@@ -8,9 +8,9 @@
 @x2 = internal global i64 12
 
 define i64 @f() {
-  %ax = load i32* @x1
+  %ax = load i32, i32* @x1
   %a = zext i32 %ax to i64
-  %b = load i64* @x2
+  %b = load i64, i64* @x2
   %c = add i64 %a, %b
   ret i64 %c
 }
diff --git a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
index 9334bf3..47b2260 100644
--- a/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
+++ b/llvm/test/CodeGen/ARM/2011-12-14-machine-sink.ll
@@ -15,13 +15,13 @@
 
 for.body:                                         ; preds = %for.cond
   %v.5 = select i1 undef, i32 undef, i32 0
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv88 = zext i8 %0 to i32
   %sub89 = sub nsw i32 0, %conv88
   %v.8 = select i1 undef, i32 undef, i32 %sub89
-  %1 = load i8* null, align 1
+  %1 = load i8, i8* null, align 1
   %conv108 = zext i8 %1 to i32
-  %2 = load i8* undef, align 1
+  %2 = load i8, i8* undef, align 1
   %conv110 = zext i8 %2 to i32
   %sub111 = sub nsw i32 %conv108, %conv110
   %cmp112 = icmp slt i32 %sub111, 0
diff --git a/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll b/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
index ddb76326..40d1f62 100644
--- a/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
+++ b/llvm/test/CodeGen/ARM/2011-12-19-sjlj-clobber.ll
@@ -22,13 +22,13 @@
   store i32 %b, i32* %tmp1, align 4
   store i8* %d, i8** %tmp2, align 4
   store i1 false, i1* %tmp3
-  %tmp7 = load i8** %c
+  %tmp7 = load i8*, i8** %c
   %tmp10 = invoke %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %0*)*)(i8* %tmp7, i8* %d, %0* null)
           to label %bb11 unwind label %bb15
 
 bb11:                                             ; preds = %bb
   store %0* %tmp10, %0** %myException, align 4
-  %tmp12 = load %0** %myException, align 4
+  %tmp12 = load %0*, %0** %myException, align 4
   %tmp13 = bitcast %0* %tmp12 to i8*
   invoke void @objc_exception_throw(i8* %tmp13) noreturn
           to label %bb14 unwind label %bb15
diff --git a/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll b/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
index 0f1c452..3f827f8 100644
--- a/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-23-PostRA-LICM.ll
@@ -18,7 +18,7 @@
   br i1 %tmp, label %bb4, label %bb67
 
 bb4:                                              ; preds = %bb3
-  %tmp5 = load <4 x i32>* undef, align 16
+  %tmp5 = load <4 x i32>, <4 x i32>* undef, align 16
   %tmp6 = and <4 x i32> %tmp5, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp7 = or <4 x i32> %tmp6, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp8 = bitcast <4 x i32> %tmp7 to <4 x float>
@@ -41,9 +41,9 @@
   %tmp24 = trunc i128 %tmp23 to i64
   %tmp25 = insertvalue [2 x i64] undef, i64 %tmp24, 0
   %tmp26 = insertvalue [2 x i64] %tmp25, i64 0, 1
-  %tmp27 = load float* undef, align 4
+  %tmp27 = load float, float* undef, align 4
   %tmp28 = insertelement <4 x float> undef, float %tmp27, i32 3
-  %tmp29 = load <4 x i32>* undef, align 16
+  %tmp29 = load <4 x i32>, <4 x i32>* undef, align 16
   %tmp30 = and <4 x i32> %tmp29, <i32 8388607, i32 8388607, i32 8388607, i32 8388607>
   %tmp31 = or <4 x i32> %tmp30, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
   %tmp32 = bitcast <4 x i32> %tmp31 to <4 x float>
@@ -52,10 +52,10 @@
   %tmp35 = fmul <4 x float> %tmp34, undef
   %tmp36 = fmul <4 x float> %tmp35, undef
   %tmp37 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp38 = load float* undef, align 4
+  %tmp38 = load float, float* undef, align 4
   %tmp39 = insertelement <2 x float> undef, float %tmp38, i32 0
   %tmp40 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp41 = load float* undef, align 4
+  %tmp41 = load float, float* undef, align 4
   %tmp42 = insertelement <4 x float> undef, float %tmp41, i32 3
   %tmp43 = shufflevector <2 x float> %tmp39, <2 x float> undef, <4 x i32> zeroinitializer
   %tmp44 = fmul <4 x float> %tmp33, %tmp43
@@ -64,10 +64,10 @@
   %tmp47 = fmul <4 x float> %tmp46, %tmp36
   %tmp48 = fadd <4 x float> undef, %tmp47
   %tmp49 = call arm_aapcs_vfpcc  i8* undef(i8* undef) nounwind
-  %tmp50 = load float* undef, align 4
+  %tmp50 = load float, float* undef, align 4
   %tmp51 = insertelement <4 x float> undef, float %tmp50, i32 3
   %tmp52 = call arm_aapcs_vfpcc float* null(i8* undef) nounwind
-  %tmp54 = load float* %tmp52, align 4
+  %tmp54 = load float, float* %tmp52, align 4
   %tmp55 = insertelement <4 x float> undef, float %tmp54, i32 3
   %tmp56 = fsub <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %tmp22
   %tmp57 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp56, <4 x float> %tmp55) nounwind
diff --git a/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index 61623ec..b70b7f6 100644
--- a/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -7,7 +7,7 @@
 ; This test case is exercising REG_SEQUENCE, and chains of REG_SEQUENCE.
 define arm_aapcs_vfpcc void @foo(i8* nocapture %arg, i8* %arg1) nounwind align 2 {
 bb:
-  %tmp = load <2 x float>* undef, align 8
+  %tmp = load <2 x float>, <2 x float>* undef, align 8
   %tmp2 = extractelement <2 x float> %tmp, i32 0
   %tmp3 = insertelement <4 x float> undef, float %tmp2, i32 0
   %tmp4 = insertelement <4 x float> %tmp3, float 0.000000e+00, i32 1
diff --git a/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll b/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
index a9e2ebb..2484f0d 100644
--- a/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
+++ b/llvm/test/CodeGen/ARM/2012-01-26-CopyPropKills.ll
@@ -56,9 +56,9 @@
   %tmp39 = shufflevector <2 x i64> %tmp38, <2 x i64> undef, <1 x i32> zeroinitializer
   %tmp40 = bitcast <1 x i64> %tmp39 to <2 x float>
   %tmp41 = shufflevector <2 x float> %tmp40, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %tmp42 = load <4 x float>* null, align 16
+  %tmp42 = load <4 x float>, <4 x float>* null, align 16
   %tmp43 = fmul <4 x float> %tmp42, %tmp41
-  %tmp44 = load <4 x float>* undef, align 16
+  %tmp44 = load <4 x float>, <4 x float>* undef, align 16
   %tmp45 = fadd <4 x float> undef, %tmp43
   %tmp46 = fadd <4 x float> undef, %tmp45
   %tmp47 = bitcast <4 x float> %tmp36 to <2 x i64>
@@ -76,7 +76,7 @@
   %tmp59 = fmul <4 x float> undef, %tmp58
   %tmp60 = fadd <4 x float> %tmp59, undef
   %tmp61 = fadd <4 x float> %tmp60, zeroinitializer
-  %tmp62 = load void (i8*, i8*)** undef, align 4
+  %tmp62 = load void (i8*, i8*)*, void (i8*, i8*)** undef, align 4
   call arm_aapcs_vfpcc  void %tmp62(i8* sret undef, i8* undef) nounwind
   %tmp63 = bitcast <4 x float> %tmp46 to i128
   %tmp64 = bitcast <4 x float> %tmp54 to i128
@@ -96,7 +96,7 @@
   call arm_aapcs_vfpcc  void @bar(i8* sret null, [8 x i64] %tmp77) nounwind
   %tmp78 = call arm_aapcs_vfpcc  i8* null(i8* null) nounwind
   %tmp79 = bitcast i8* %tmp78 to i512*
-  %tmp80 = load i512* %tmp79, align 16
+  %tmp80 = load i512, i512* %tmp79, align 16
   %tmp81 = lshr i512 %tmp80, 128
   %tmp82 = trunc i512 %tmp80 to i128
   %tmp83 = trunc i512 %tmp81 to i128
diff --git a/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll b/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
index 6c7aaad7..2495b30 100644
--- a/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
+++ b/llvm/test/CodeGen/ARM/2012-02-01-CoalescerBug.ll
@@ -10,7 +10,7 @@
 @foo = external global %0, align 16
 
 define arm_aapcs_vfpcc void @bar(float, i1 zeroext, i1 zeroext) nounwind {
-  %4 = load <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
+  %4 = load <4 x float>, <4 x float>* getelementptr inbounds (%0* @foo, i32 0, i32 0), align 16
   %5 = extractelement <4 x float> %4, i32 0
   %6 = extractelement <4 x float> %4, i32 1
   %7 = extractelement <4 x float> %4, i32 2
diff --git a/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll b/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
index 6206cd7..6fb760c 100644
--- a/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/ARM/2012-03-13-DAGCombineBug.ll
@@ -7,7 +7,7 @@
 define void @test_hi_short3(<3 x i16> * nocapture %srcA, <2 x i16> * nocapture %dst) nounwind {
 entry:
 ; CHECK: vst1.32
-  %0 = load <3 x i16> * %srcA, align 8
+  %0 = load <3 x i16> , <3 x i16> * %srcA, align 8
   %1 = shufflevector <3 x i16> %0, <3 x i16> undef, <2 x i32> <i32 2, i32 undef>
   store <2 x i16> %1, <2 x i16> * %dst, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
index a288015..6f92613 100644
--- a/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
+++ b/llvm/test/CodeGen/ARM/2012-06-12-SchedMemLatency.ll
@@ -27,13 +27,13 @@
 define i32 @f1(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store volatile i32 65540, i32* %p1, align 4
-  %0 = load volatile i32* %p2, align 4
+  %0 = load volatile i32, i32* %p2, align 4
   ret i32 %0
 }
 
 define i32 @f2(i32* nocapture %p1, i32* nocapture %p2) nounwind {
 entry:
   store i32 65540, i32* %p1, align 4
-  %0 = load i32* %p2, align 4
+  %0 = load i32, i32* %p2, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll b/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
index 70e3079..6e0b828 100644
--- a/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-04-DtripleSpillReload.ll
@@ -129,7 +129,7 @@
   %45 = fmul <4 x float> undef, undef
   %46 = fmul <4 x float> %45, %43
   %47 = fmul <4 x float> undef, %44
-  %48 = load <4 x float>* undef, align 8
+  %48 = load <4 x float>, <4 x float>* undef, align 8
   %49 = bitcast <4 x float> %48 to <2 x i64>
   %50 = shufflevector <2 x i64> %49, <2 x i64> undef, <1 x i32> <i32 1>
   %51 = bitcast <1 x i64> %50 to <2 x float>
diff --git a/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll b/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
index bdcd1b6..576dff4 100644
--- a/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-08-legalize-unaligned.ll
@@ -6,7 +6,7 @@
 
 define void @test_hi_char8() noinline {
 entry:
-  %0 = load <4 x i8>* undef, align 1
+  %0 = load <4 x i8>, <4 x i8>* undef, align 1
   store <4 x i8> %0, <4 x i8>* null, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll b/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
index a710825..285a431 100644
--- a/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-09-neon-extload.ll
@@ -14,7 +14,7 @@
 define void @test_v2i8tov2i32() {
 ; CHECK-LABEL: test_v2i8tov2i32:
 
-  %i8val = load <2 x i8>* @var_v2i8
+  %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
 
   %i32val = sext <2 x i8> %i8val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -28,7 +28,7 @@
 define void @test_v2i8tov2i64() {
 ; CHECK-LABEL: test_v2i8tov2i64:
 
-  %i8val = load <2 x i8>* @var_v2i8
+  %i8val = load <2 x i8>, <2 x i8>* @var_v2i8
 
   %i64val = sext <2 x i8> %i8val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
@@ -46,7 +46,7 @@
 define void @test_v4i8tov4i16() {
 ; CHECK-LABEL: test_v4i8tov4i16:
 
-  %i8val = load <4 x i8>* @var_v4i8
+  %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
 
   %i16val = sext <4 x i8> %i8val to <4 x i16>
   store <4 x i16> %i16val, <4 x i16>* @var_v4i16
@@ -61,7 +61,7 @@
 define void @test_v4i8tov4i32() {
 ; CHECK-LABEL: test_v4i8tov4i32:
 
-  %i8val = load <4 x i8>* @var_v4i8
+  %i8val = load <4 x i8>, <4 x i8>* @var_v4i8
 
   %i16val = sext <4 x i8> %i8val to <4 x i32>
   store <4 x i32> %i16val, <4 x i32>* @var_v4i32
@@ -75,7 +75,7 @@
 define void @test_v2i16tov2i32() {
 ; CHECK-LABEL: test_v2i16tov2i32:
 
-  %i16val = load <2 x i16>* @var_v2i16
+  %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
 
   %i32val = sext <2 x i16> %i16val to <2 x i32>
   store <2 x i32> %i32val, <2 x i32>* @var_v2i32
@@ -90,7 +90,7 @@
 define void @test_v2i16tov2i64() {
 ; CHECK-LABEL: test_v2i16tov2i64:
 
-  %i16val = load <2 x i16>* @var_v2i16
+  %i16val = load <2 x i16>, <2 x i16>* @var_v2i16
 
   %i64val = sext <2 x i16> %i16val to <2 x i64>
   store <2 x i64> %i64val, <2 x i64>* @var_v2i64
diff --git a/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
index e8d4fb2..3a851d6 100644
--- a/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
+++ b/llvm/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -15,7 +15,7 @@
 define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
 ;CHECK-LABEL: sextload_v4i8_c:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, <i32 3, i32 3, i32 3, i32 3>
@@ -28,7 +28,7 @@
 define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
 ;CHECK-LABEL: sextload_v2i8_c:
 entry:
-  %0   = load <2 x i8>* %v, align 8
+  %0   = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8>  %0 to <2 x i64>
 ;CHECK: vmull
   %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
@@ -41,7 +41,7 @@
 define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
 ;CHECK-LABEL: sextload_v2i16_c:
 entry:
-  %0   = load <2 x i16>* %v, align 8
+  %0   = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16>  %0 to <2 x i64>
 ;CHECK: vmull
   %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
@@ -56,10 +56,10 @@
 define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
 ;CHECK-LABEL: sextload_v4i8_v:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 
-  %1  = load <4 x i8>* %p, align 8
+  %1  = load <4 x i8>, <4 x i8>* %p, align 8
   %v2 = sext <4 x i8> %1 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, %v2
@@ -72,10 +72,10 @@
 define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i8_v:
 entry:
-  %0 = load <2 x i8>* %v, align 8
+  %0 = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
 
-  %1  = load <2 x i8>* %p, align 8
+  %1  = load <2 x i8>, <2 x i8>* %p, align 8
   %v2 = sext <2 x i8> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -88,10 +88,10 @@
 define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i16_v:
 entry:
-  %0 = load <2 x i16>* %v, align 8
+  %0 = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
 
-  %1  = load <2 x i16>* %p, align 8
+  %1  = load <2 x i16>, <2 x i16>* %p, align 8
   %v2 = sext <2 x i16> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -106,10 +106,10 @@
 define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v4i8_vs:
 entry:
-  %0 = load <4 x i8>* %v, align 8
+  %0 = load <4 x i8>, <4 x i8>* %v, align 8
   %v0  = sext <4 x i8> %0 to <4 x i32>
 
-  %1  = load <4 x i16>* %p, align 8
+  %1  = load <4 x i16>, <4 x i16>* %p, align 8
   %v2 = sext <4 x i16> %1 to <4 x i32>
 ;CHECK: vmull
   %v1 = mul <4 x i32>  %v0, %v2
@@ -122,10 +122,10 @@
 define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i8_vs:
 entry:
-  %0 = load <2 x i8>* %v, align 8
+  %0 = load <2 x i8>, <2 x i8>* %v, align 8
   %v0  = sext <2 x i8> %0 to <2 x i64>
 
-  %1  = load <2 x i16>* %p, align 8
+  %1  = load <2 x i16>, <2 x i16>* %p, align 8
   %v2 = sext <2 x i16> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
@@ -138,10 +138,10 @@
 define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
 ;CHECK-LABEL: sextload_v2i16_vs:
 entry:
-  %0 = load <2 x i16>* %v, align 8
+  %0 = load <2 x i16>, <2 x i16>* %v, align 8
   %v0  = sext <2 x i16> %0 to <2 x i64>
 
-  %1  = load <2 x i32>* %p, align 8
+  %1  = load <2 x i32>, <2 x i32>* %p, align 8
   %v2 = sext <2 x i32> %1 to <2 x i64>
 ;CHECK: vmull
   %v1 = mul <2 x i64>  %v0, %v2
diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
index bfbd078..b5bdc1b 100644
--- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
+++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll
@@ -45,7 +45,7 @@
 define void @test_byval_8_bytes_alignment_fixed_arg(i32 %n1, %struct_t* byval %val) nounwind {
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
-  %0 = load double* %a
+  %0 = load double, double* %a
   call void (double)* @f(double %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll b/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
index 694025a..203ba4d 100644
--- a/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
+++ b/llvm/test/CodeGen/ARM/2012-10-04-FixedFrame-vs-byval.ll
@@ -13,7 +13,7 @@
 define void @test_byval_usage_scheduling(i32 %n1, i32 %n2, %struct_t* byval %val) nounwind {
 entry:
   %a = getelementptr inbounds %struct_t, %struct_t* %val, i32 0, i32 0
-  %0 = load double* %a
+  %0 = load double, double* %a
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), double %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll b/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
index f6cd742..4c1f2a7 100644
--- a/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
+++ b/llvm/test/CodeGen/ARM/2013-01-21-PR14992.ll
@@ -6,11 +6,11 @@
 ;CHECK-LABEL: foo:
 define i32 @foo(i32* %a) nounwind optsize {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 2
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add.ptr = getelementptr inbounds i32, i32* %a, i32 3
 ;Make sure we do not have a duplicated register in the front of the reg list
 ;EXPECTED:  ldm [[BASE:r[0-9]+]]!, {[[REG:r[0-9]+]], {{r[0-9]+}},
diff --git a/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll b/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
index 9378e4c..6c8b0ff 100644
--- a/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
+++ b/llvm/test/CodeGen/ARM/2013-04-18-load-overlap-PR14824.ll
@@ -9,9 +9,9 @@
 entry:
 
 ; Load %source
-  %s0 = load <8 x i64> * %source, align 64
+  %s0 = load <8 x i64> , <8 x i64> * %source, align 64
   %arrayidx64 = getelementptr inbounds <8 x i64>, <8 x i64> * %source, i32 6
-  %s120 = load <8 x i64> * %arrayidx64, align 64
+  %s120 = load <8 x i64> , <8 x i64> * %arrayidx64, align 64
   %s122 = bitcast <8 x i64> %s120 to i512
   %data.i.i677.48.extract.shift = lshr i512 %s122, 384
   %data.i.i677.48.extract.trunc = trunc i512 %data.i.i677.48.extract.shift to i64
@@ -33,9 +33,9 @@
   %s130 = insertelement <8 x i64> %s129, i64 %data.i.i677.56.extract.trunc, i32 7
 
 ; Load %secondSource
-  %s1 = load <8 x i64> * %secondSource, align 64
+  %s1 = load <8 x i64> , <8 x i64> * %secondSource, align 64
   %arrayidx67 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 6
-  %s121 = load <8 x i64> * %arrayidx67, align 64
+  %s121 = load <8 x i64> , <8 x i64> * %arrayidx67, align 64
   %s131 = bitcast <8 x i64> %s121 to i512
   %data.i1.i676.48.extract.shift = lshr i512 %s131, 384
   %data.i1.i676.48.extract.trunc = trunc i512 %data.i1.i676.48.extract.shift to i64
@@ -64,7 +64,7 @@
   %arrayidx72 = getelementptr inbounds <8 x i64>, <8 x i64> * %dest, i32 6
   store <8 x i64> %vecinit49.i.i702, <8 x i64> * %arrayidx72, align 64
   %arrayidx78 = getelementptr inbounds <8 x i64>, <8 x i64> * %secondSource, i32 7
-  %s141 = load <8 x i64> * %arrayidx78, align 64
+  %s141 = load <8 x i64> , <8 x i64> * %arrayidx78, align 64
   %s151 = bitcast <8 x i64> %s141 to i512
   %data.i1.i649.32.extract.shift = lshr i512 %s151, 256
   %data.i1.i649.32.extract.trunc = trunc i512 %data.i1.i649.32.extract.shift to i64
diff --git a/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll b/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
index 63605ed..aabbfae 100644
--- a/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-07-ByteLoadSameAddress.ll
@@ -8,45 +8,45 @@
         %sub19 = sub i32 %add, %Width
         %sub20 = add i32 %sub19, -1
         %arrayidx21 = getelementptr inbounds i8, i8* %call1, i32 %sub20
-        %0 = load i8* %arrayidx21, align 1
+        %0 = load i8, i8* %arrayidx21, align 1
         %conv22 = zext i8 %0 to i32
         %arrayidx25 = getelementptr inbounds i8, i8* %call1, i32 %sub19
-        %1 = load i8* %arrayidx25, align 1
+        %1 = load i8, i8* %arrayidx25, align 1
         %conv26 = zext i8 %1 to i32
         %mul23189 = add i32 %conv26, %conv22
         %add30 = add i32 %sub19, 1
         %arrayidx31 = getelementptr inbounds i8, i8* %call1, i32 %add30
-        %2 = load i8* %arrayidx31, align 1
+        %2 = load i8, i8* %arrayidx31, align 1
         %conv32 = zext i8 %2 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
         %add28190 = add i32 %mul23189, %conv32
         %sub35 = add i32 %add, -1
         %arrayidx36 = getelementptr inbounds i8, i8* %call1, i32 %sub35
-        %3 = load i8* %arrayidx36, align 1
+        %3 = load i8, i8* %arrayidx36, align 1
         %conv37 = zext i8 %3 to i32
         %add34191 = add i32 %add28190, %conv37
         %arrayidx40 = getelementptr inbounds i8, i8* %call1, i32 %add
-        %4 = load i8* %arrayidx40, align 1
+        %4 = load i8, i8* %arrayidx40, align 1
         %conv41 = zext i8 %4 to i32
         %mul42 = mul nsw i32 %conv41, 255
         %add44 = add i32 %add, 1
         %arrayidx45 = getelementptr inbounds i8, i8* %call1, i32 %add44
-        %5 = load i8* %arrayidx45, align 1
+        %5 = load i8, i8* %arrayidx45, align 1
         %conv46 = zext i8 %5 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #1]
         %add49 = add i32 %add, %Width
         %sub50 = add i32 %add49, -1
         %arrayidx51 = getelementptr inbounds i8, i8* %call1, i32 %sub50
-        %6 = load i8* %arrayidx51, align 1
+        %6 = load i8, i8* %arrayidx51, align 1
         %conv52 = zext i8 %6 to i32
         %arrayidx56 = getelementptr inbounds i8, i8* %call1, i32 %add49
-        %7 = load i8* %arrayidx56, align 1
+        %7 = load i8, i8* %arrayidx56, align 1
         %conv57 = zext i8 %7 to i32
         %add61 = add i32 %add49, 1
         %arrayidx62 = getelementptr inbounds i8, i8* %call1, i32 %add61
-        %8 = load i8* %arrayidx62, align 1
+        %8 = load i8, i8* %arrayidx62, align 1
         %conv63 = zext i8 %8 to i32
 ; CHECK: ldrb r{{[0-9]*}}, [r{{[0-9]*}}, #-1]
 ; CHECK-NEXT: ldrb{{[.w]*}} r{{[0-9]*}}, [r{{[0-9]*}}, #1]
diff --git a/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll b/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
index 0130f7a..6172712 100644
--- a/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-31-char-shift-crash.ll
@@ -10,7 +10,7 @@
 entry:
   %a.addr = alloca i8, align 1
   store i8 %a, i8* %a.addr, align 1
-  %0 = load i8* %a.addr, align 1
+  %0 = load i8, i8* %a.addr, align 1
   %conv = sext i8 %0 to i32
   %shr = ashr i32 %conv, 56
   %conv1 = trunc i32 %shr to i8
diff --git a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
index 05a4ef0..979df30 100644
--- a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -17,7 +17,7 @@
 ; CHECK: vorr q9, q9, q10
 ; CHECK: vst1.32 {d18, d19}, [r0]
 vector.body:
-  %wide.load = load <4 x i32>* undef, align 4
+  %wide.load = load <4 x i32>, <4 x i32>* undef, align 4
   %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
   %1 = sub <4 x i32> %wide.load, zeroinitializer
   %2 = and <4 x i32> %1, <i32 16711680, i32 16711680, i32 16711680, i32 16711680>
diff --git a/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
index 6c0fbd0..dc7f308 100644
--- a/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
+++ b/llvm/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -38,13 +38,13 @@
 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
 ; CHECK: vtbx4:
 ; CHECK: VTBX4 {{.*}}, pred:14, pred:%noreg, %Q{{[0-9]+}}_Q{{[0-9]+}}<imp-use>
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
-	%tmp7 = load <8 x i8>* %C
+	%tmp7 = load <8 x i8>, <8 x i8>* %C
 	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
   call void @bar2(%struct.__neon_int8x8x4_t %tmp2, <8 x i8> %tmp8)
 	ret <8 x i8> %tmp8
diff --git a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
index 202138c..2efd91f 100644
--- a/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
+++ b/llvm/test/CodeGen/ARM/2015-01-21-thumbv4t-ldstr-opt.ll
@@ -12,10 +12,10 @@
   %4 = getelementptr inbounds i32, i32* %p, i32 4
 
 ; CHECK-NEXT: ldm [[NEWBASE]],
-  %5 = load i32* %1, align 4
-  %6 = load i32* %2, align 4
-  %7 = load i32* %3, align 4
-  %8 = load i32* %4, align 4
+  %5 = load i32, i32* %1, align 4
+  %6 = load i32, i32* %2, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = load i32, i32* %4, align 4
 
   %9 = add nsw i32 %5, %6
   %10 = add nsw i32 %9, %7
@@ -36,10 +36,10 @@
   %4 = getelementptr inbounds i32, i32* %p, i32 5
 
 ; CHECK-NEXT: ldm [[NEWBASE]],
-  %5 = load i32* %1, align 4
-  %6 = load i32* %2, align 4
-  %7 = load i32* %3, align 4
-  %8 = load i32* %4, align 4
+  %5 = load i32, i32* %1, align 4
+  %6 = load i32, i32* %2, align 4
+  %7 = load i32, i32* %3, align 4
+  %8 = load i32, i32* %4, align 4
 
   %9 = add nsw i32 %5, %6
   %10 = add nsw i32 %9, %7
diff --git a/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll b/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
index db4346e..3f7d625 100644
--- a/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/ARM/MergeConsecutiveStores.ll
@@ -13,12 +13,12 @@
   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
   %.0 = phi i32* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i32, i32* %.0, i32 1
-  %3 = load i32* %.0, align 1
+  %3 = load i32, i32* %.0, align 1
   %4 = getelementptr inbounds i8, i8* %c, i32 %3
-  %5 = load i8* %4, align 1
+  %5 = load i8, i8* %4, align 1
   %6 = add i32 %3, 1
   %7 = getelementptr inbounds i8, i8* %c, i32 %6
-  %8 = load i8* %7, align 1
+  %8 = load i8, i8* %7, align 1
   store i8 %5, i8* %.08, align 1
   %9 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %8, i8* %9, align 1
@@ -45,13 +45,13 @@
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i32 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i32
   %5 = getelementptr inbounds i8, i8* %c, i32 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i32 %4, 1
   %8 = getelementptr inbounds i8, i8* %c, i32 %7
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %9, i8* %10, align 1
@@ -77,14 +77,14 @@
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i32 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i32
   %5 = getelementptr inbounds i8, i8* %c, i32 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i8 %3, 1
   %wrap.4 = sext i8 %7 to i32
   %8 = getelementptr inbounds i8, i8* %c, i32 %wrap.4
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i32 1
   store i8 %9, i8* %10, align 1
diff --git a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
index a29aaad..a314259 100644
--- a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -10,10 +10,10 @@
   %i.addr = alloca i32, align 4
   %buffer = alloca [4096 x i8], align 1
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %rem = urem i32 %0, 4096
   %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %rem
-  %1 = load volatile i8* %arrayidx, align 1
+  %1 = load volatile i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/dllimport.ll b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
index bc737bd..6786be3 100644
--- a/llvm/test/CodeGen/ARM/Windows/dllimport.ll
+++ b/llvm/test/CodeGen/ARM/Windows/dllimport.ll
@@ -8,7 +8,7 @@
 declare arm_aapcs_vfpcc i32 @internal()
 
 define arm_aapcs_vfpcc i32 @get_var() {
-  %1 = load i32* @var, align 4
+  %1 = load i32, i32* @var, align 4
   ret i32 %1
 }
 
@@ -20,7 +20,7 @@
 ; CHECK: bx lr
 
 define arm_aapcs_vfpcc i32 @get_ext() {
-  %1 = load i32* @ext, align 4
+  %1 = load i32, i32* @ext, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/frame-register.ll b/llvm/test/CodeGen/ARM/Windows/frame-register.ll
index 31167d7..7ecfc1a 100644
--- a/llvm/test/CodeGen/ARM/Windows/frame-register.ll
+++ b/llvm/test/CodeGen/ARM/Windows/frame-register.ll
@@ -8,12 +8,12 @@
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %j, align 4
-  %1 = load i32* %j, align 4
+  %1 = load i32, i32* %j, align 4
   call void @callee(i32 %1)
-  %2 = load i32* %j, align 4
+  %2 = load i32, i32* %j, align 4
   %add1 = add nsw i32 %2, 1
   ret i32 %add1
 }
diff --git a/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll b/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
index 3ae6428..c21aee0 100644
--- a/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
+++ b/llvm/test/CodeGen/ARM/Windows/movw-movt-relocations.ll
@@ -10,8 +10,8 @@
 ; Function Attrs: nounwind optsize readonly
 define i32 @relocation(i32 %j, i32 %k) {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %add = add nsw i32 %1, %0
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/ARM/Windows/pic.ll b/llvm/test/CodeGen/ARM/Windows/pic.ll
index 28d371f..9ef7c35 100644
--- a/llvm/test/CodeGen/ARM/Windows/pic.ll
+++ b/llvm/test/CodeGen/ARM/Windows/pic.ll
@@ -5,7 +5,7 @@
 
 define arm_aapcs_vfpcc i8 @return_external() {
 entry:
-  %0 = load i8* @external, align 1
+  %0 = load i8, i8* @external, align 1
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll b/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
index 814c8b7..d66e93a 100644
--- a/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
+++ b/llvm/test/CodeGen/ARM/Windows/stack-probe-non-default.ll
@@ -12,7 +12,7 @@
   %0 = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 0
   call arm_aapcs_vfpcc void @initialise(i8* %0)
   %arrayidx = getelementptr inbounds [4096 x i8], [4096 x i8]* %buffer, i32 0, i32 %offset
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/Windows/vla.ll b/llvm/test/CodeGen/ARM/Windows/vla.ll
index 13180cd..1c0632e 100644
--- a/llvm/test/CodeGen/ARM/Windows/vla.ll
+++ b/llvm/test/CodeGen/ARM/Windows/vla.ll
@@ -9,7 +9,7 @@
 entry:
   %vla = alloca i8, i32 %sz, align 1
   %arrayidx = getelementptr inbounds i8, i8* %vla, i32 %idx
-  %0 = load volatile i8* %arrayidx, align 1
+  %0 = load volatile i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/a15-partial-update.ll b/llvm/test/CodeGen/ARM/a15-partial-update.ll
index 71b95ee..576eb7a 100644
--- a/llvm/test/CodeGen/ARM/a15-partial-update.ll
+++ b/llvm/test/CodeGen/ARM/a15-partial-update.ll
@@ -10,7 +10,7 @@
 ; generated.
 
 ; CHECK-NOT: vmov.{{.*}} d{{[0-9]+}},
-  %tmp2 = load float* %A, align 4
+  %tmp2 = load float, float* %A, align 4
   %tmp3 = insertelement <2 x float> %B, float %tmp2, i32 1
   ret <2 x float> %tmp3
 }
@@ -29,7 +29,7 @@
   %newcount = add i32 %oldcount, 1
   %p1 = getelementptr <4 x i8>, <4 x i8> *%in, i32 %newcount
   %p2 = getelementptr <4 x i8>, <4 x i8> *%out, i32 %newcount
-  %tmp1 = load <4 x i8> *%p1, align 4
+  %tmp1 = load <4 x i8> , <4 x i8> *%p1, align 4
   store <4 x i8> %tmp1, <4 x i8> *%p2
   %cmp = icmp eq i32 %newcount, %n
   br i1 %cmp, label %loop, label %ret
diff --git a/llvm/test/CodeGen/ARM/addrmode.ll b/llvm/test/CodeGen/ARM/addrmode.ll
index 8fd1da7..52bb9a2 100644
--- a/llvm/test/CodeGen/ARM/addrmode.ll
+++ b/llvm/test/CodeGen/ARM/addrmode.ll
@@ -4,14 +4,14 @@
 define i32 @t1(i32 %a) {
 	%b = mul i32 %a, 9
         %c = inttoptr i32 %b to i32*
-        %d = load i32* %c
+        %d = load i32, i32* %c
 	ret i32 %d
 }
 
 define i32 @t2(i32 %a) {
 	%b = mul i32 %a, -7
         %c = inttoptr i32 %b to i32*
-        %d = load i32* %c
+        %d = load i32, i32* %c
 	ret i32 %d
 }
 
diff --git a/llvm/test/CodeGen/ARM/aliases.ll b/llvm/test/CodeGen/ARM/aliases.ll
index 5a737ad..c24d0d2 100644
--- a/llvm/test/CodeGen/ARM/aliases.ll
+++ b/llvm/test/CodeGen/ARM/aliases.ll
@@ -33,9 +33,9 @@
 
 define i32 @test() {
 entry:
-   %tmp = load i32* @foo1
-   %tmp1 = load i32* @foo2
-   %tmp0 = load i32* @bar_i
+   %tmp = load i32, i32* @foo1
+   %tmp1 = load i32, i32* @foo2
+   %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
    %tmp4 = call %FunTy* @bar_f()
diff --git a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
index 5ad8719..600fb6a 100644
--- a/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
+++ b/llvm/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -31,9 +31,9 @@
 ; NO-REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
 ; NO-REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
@@ -66,9 +66,9 @@
 ; REALIGN: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]!
 ; REALIGN: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R0]]:128]
  %retval = alloca <16 x float>, align 16
- %0 = load <16 x float>* @T3_retval, align 16
+ %0 = load <16 x float>, <16 x float>* @T3_retval, align 16
  store <16 x float> %0, <16 x float>* %retval
- %1 = load <16 x float>* %retval
+ %1 = load <16 x float>, <16 x float>* %retval
  store <16 x float> %1, <16 x float>* %agg.result, align 16
  ret void
 }
diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 3b5161e..7d7fae9 100644
--- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -22,7 +22,7 @@
   %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
   %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
   %scevgep5 = getelementptr i8*, i8** %lsr.iv24, i32 -1
-  %tmp2 = load i8** %scevgep5
+  %tmp2 = load i8*, i8** %scevgep5
   %0 = ptrtoint i8* %tmp2 to i32
 
 ; ARM:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
@@ -90,7 +90,7 @@
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   %2 = zext i8 %1 to i32
 ; ARM: ands
 ; THUMB: ands
@@ -104,7 +104,7 @@
 bb:                                               ; preds = %entry
 ; V8-NEXT: %bb
   %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = zext i8 %6 to i32
 ; ARM: andsne
 ; THUMB: ands
diff --git a/llvm/test/CodeGen/ARM/arm-modifier.ll b/llvm/test/CodeGen/ARM/arm-modifier.ll
index 580f7e7..67d468e 100644
--- a/llvm/test/CodeGen/ARM/arm-modifier.ll
+++ b/llvm/test/CodeGen/ARM/arm-modifier.ll
@@ -6,8 +6,8 @@
   %scale2.addr = alloca float, align 4
   store float %scale, float* %scale.addr, align 4
   store float %scale2, float* %scale2.addr, align 4
-  %tmp = load float* %scale.addr, align 4
-  %tmp1 = load float* %scale2.addr, align 4
+  %tmp = load float, float* %scale.addr, align 4
+  %tmp1 = load float, float* %scale2.addr, align 4
   call void asm sideeffect "vmul.f32    q0, q0, ${0:y} \0A\09vmul.f32    q1, q1, ${0:y} \0A\09vmul.f32    q1, q0, ${1:y} \0A\09", "w,w,~{q0},~{q1}"(float %tmp, float %tmp1) nounwind
   ret i32 0
 }
@@ -49,8 +49,8 @@
 ; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
 ; CHECK: adds {{lr|r[0-9]+}}, [[REG1]]
 ; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
-%tmp = load i64* @f3_var, align 4
-%tmp1 = load i64* @f3_var2, align 4
+%tmp = load i64, i64* @f3_var, align 4
+%tmp1 = load i64, i64* @f3_var2, align 4
 %0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
 store i64 %0, i64* @f3_var, align 4
 %1 = call i64 asm sideeffect "ldm ${1:m}, ${0:M}\0A\09", "=r,*m"(i64** @f3_ptr) nounwind
diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll
index 0c0769f..7510d6c 100644
--- a/llvm/test/CodeGen/ARM/atomic-64bit.ll
+++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll
@@ -214,7 +214,7 @@
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
 ; CHECK-THUMB: dmb {{ish$}}
 
-  %r = load atomic i64* %ptr seq_cst, align 8
+  %r = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %r
 }
 
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index af13dfc..5db8178 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -44,7 +44,7 @@
 ; THUMBM-LABEL: test2
 ; THUMBM: ldr
 ; THUMBM: dmb sy
-  %val = load atomic i32* %ptr seq_cst, align 4
+  %val = load atomic i32, i32* %ptr seq_cst, align 4
   ret i32 %val
 }
 
@@ -76,7 +76,7 @@
 ; ARMV6-NOT: mcr
 ; THUMBM-LABEL: test3
 ; THUMBM-NOT: dmb sy
-  %val = load atomic i8* %ptr1 unordered, align 1
+  %val = load atomic i8, i8* %ptr1 unordered, align 1
   store atomic i8 %val, i8* %ptr2 unordered, align 1
   ret void
 }
@@ -87,7 +87,7 @@
 ; THUMBONE: ___sync_lock_test_and_set_1
 ; ARMV6-LABEL: test4
 ; THUMBM-LABEL: test4
-  %val = load atomic i8* %ptr1 seq_cst, align 1
+  %val = load atomic i8, i8* %ptr1 seq_cst, align 1
   store atomic i8 %val, i8* %ptr2 seq_cst, align 1
   ret void
 }
@@ -95,7 +95,7 @@
 define i64 @test_old_load_64bit(i64* %p) {
 ; ARMV4-LABEL: test_old_load_64bit
 ; ARMV4: ___sync_val_compare_and_swap_8
-  %1 = load atomic i64* %p seq_cst, align 8
+  %1 = load atomic i64, i64* %p seq_cst, align 8
   ret i64 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll
index 1ac8648..db32bff 100644
--- a/llvm/test/CodeGen/ARM/atomic-op.ll
+++ b/llvm/test/CodeGen/ARM/atomic-op.ll
@@ -25,7 +25,7 @@
 	store i32 3855, i32* %ort
 	store i32 3855, i32* %xort
 	store i32 4, i32* %temp
-	%tmp = load i32* %temp
+	%tmp = load i32, i32* %temp
   ; CHECK: ldrex
   ; CHECK: add
   ; CHECK: strex
@@ -308,8 +308,8 @@
 
 define i32 @load_load_add_acquire(i32* %mem1, i32* %mem2) nounwind {
 ; CHECK-LABEL: load_load_add_acquire
-  %val1 = load atomic i32* %mem1 acquire, align 4
-  %val2 = load atomic i32* %mem2 acquire, align 4
+  %val1 = load atomic i32, i32* %mem1 acquire, align 4
+  %val2 = load atomic i32, i32* %mem2 acquire, align 4
   %tmp = add i32 %val1, %val2
 
 ; CHECK: ldr {{r[0-9]}}, [r0]
@@ -353,7 +353,7 @@
 
 define void @load_fence_store_monotonic(i32* %mem1, i32* %mem2) {
 ; CHECK-LABEL: load_fence_store_monotonic
-  %val = load atomic i32* %mem1 monotonic, align 4
+  %val = load atomic i32, i32* %mem1 monotonic, align 4
   fence seq_cst
   store atomic i32 %val, i32* %mem2 monotonic, align 4
 
diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
index 6ba1352..db5007b 100644
--- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1166,7 +1166,7 @@
 
 define i8 @test_atomic_load_monotonic_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i8:
-  %val = load atomic i8* @var8 monotonic, align 1
+  %val = load atomic i8, i8* @var8 monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1183,7 +1183,7 @@
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i8*
 
-  %val = load atomic i8* %addr monotonic, align 1
+  %val = load atomic i8, i8* %addr monotonic, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK-LE: ldrb r0, [r0, r2]
@@ -1196,7 +1196,7 @@
 
 define i8 @test_atomic_load_acquire_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_acquire_i8:
-  %val = load atomic i8* @var8 acquire, align 1
+  %val = load atomic i8, i8* @var8 acquire, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1213,7 +1213,7 @@
 
 define i8 @test_atomic_load_seq_cst_i8() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i8:
-  %val = load atomic i8* @var8 seq_cst, align 1
+  %val = load atomic i8, i8* @var8 seq_cst, align 1
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
@@ -1230,7 +1230,7 @@
 
 define i16 @test_atomic_load_monotonic_i16() nounwind {
 ; CHECK-LABEL: test_atomic_load_monotonic_i16:
-  %val = load atomic i16* @var16 monotonic, align 2
+  %val = load atomic i16, i16* @var16 monotonic, align 2
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
@@ -1251,7 +1251,7 @@
   %addr_int = add i64 %base, %off
   %addr = inttoptr i64 %addr_int to i32*
 
-  %val = load atomic i32* %addr monotonic, align 4
+  %val = load atomic i32, i32* %addr monotonic, align 4
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK-LE: ldr r0, [r0, r2]
@@ -1264,7 +1264,7 @@
 
 define i64 @test_atomic_load_seq_cst_i64() nounwind {
 ; CHECK-LABEL: test_atomic_load_seq_cst_i64:
-  %val = load atomic i64* @var64 seq_cst, align 8
+  %val = load atomic i64, i64* @var64 seq_cst, align 8
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 ; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var64
@@ -1399,7 +1399,7 @@
 ; CHECK-LABEL: not.barriers:
   br i1 %cond, label %atomic_ver, label %simple_ver
 simple_ver:
-  %oldval = load i32* %var
+  %oldval = load i32, i32* %var
   %newval = add nsw i32 %oldval, -1
   store i32 %newval, i32* %var
   br label %somewhere
diff --git a/llvm/test/CodeGen/ARM/available_externally.ll b/llvm/test/CodeGen/ARM/available_externally.ll
index d925b5c..0550747 100644
--- a/llvm/test/CodeGen/ARM/available_externally.ll
+++ b/llvm/test/CodeGen/ARM/available_externally.ll
@@ -5,7 +5,7 @@
 @B = external hidden constant i32
 
 define i32 @t1() {
-  %tmp = load i32* @A
+  %tmp = load i32, i32* @A
   store i32 %tmp, i32* @B
   ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index 7bdcb79..c3de07e 100644
--- a/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/llvm/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -30,13 +30,13 @@
 ; CHECK-NOT: muls
   %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
   %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
-  %0 = load i32* %ptr1.addr.09, align 4
+  %0 = load i32, i32* %ptr1.addr.09, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
   %mul = mul i32 %1, %0
   %mul5 = mul i32 %mul, %2
@@ -64,13 +64,13 @@
 ; CHECK: muls
   %ptr1.addr.09 = phi i32* [ %add.ptr, %while.body ], [ %ptr1, %entry ]
   %ptr2.addr.08 = phi i32* [ %incdec.ptr, %while.body ], [ %ptr2, %entry ]
-  %0 = load i32* %ptr1.addr.09, align 4
+  %0 = load i32, i32* %ptr1.addr.09, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 2
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 3
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add.ptr = getelementptr inbounds i32, i32* %ptr1.addr.09, i32 4
   %mul = mul i32 %1, %0
   %mul5 = mul i32 %mul, %2
@@ -92,7 +92,7 @@
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: if.then
 ; CHECK-NOT: movs
-  %0 = load double* %q, align 4
+  %0 = load double, double* %q, align 4
   %cmp = fcmp olt double %0, 1.000000e+01
   %incdec.ptr1 = getelementptr inbounds i32, i32* %p, i32 1
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll
index bce09da..0661960 100644
--- a/llvm/test/CodeGen/ARM/bfi.ll
+++ b/llvm/test/CodeGen/ARM/bfi.ll
@@ -9,7 +9,7 @@
 ; CHECK: f1
 ; CHECK: mov r2, #10
 ; CHECK: bfi r1, r2, #22, #4
-  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %0 = load i32, i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
   %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
   %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
   store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
diff --git a/llvm/test/CodeGen/ARM/bfx.ll b/llvm/test/CodeGen/ARM/bfx.ll
index b2161e6..edb0c1a 100644
--- a/llvm/test/CodeGen/ARM/bfx.ll
+++ b/llvm/test/CodeGen/ARM/bfx.ll
@@ -42,12 +42,12 @@
   %shr2 = and i32 %and1, 255
   %shr4 = lshr i32 %x, 24
   %arrayidx = getelementptr inbounds i32, i32* %ctx, i32 %shr4
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %ctx, i32 %shr2
-  %1 = load i32* %arrayidx5, align 4
+  %1 = load i32, i32* %arrayidx5, align 4
   %add = add i32 %1, %0
   %arrayidx6 = getelementptr inbounds i32, i32* %ctx, i32 %shr
-  %2 = load i32* %arrayidx6, align 4
+  %2 = load i32, i32* %arrayidx6, align 4
   %add7 = add i32 %add, %2
   ret i32 %add7
 }
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll b/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
index 427d2e7..b5a840a 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-bitconv.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: conv_i64_to_v8i8:
 ; CHECK: vrev64.8
   %v = bitcast i64 %val to <8 x i8>
-  %w = load <8 x i8>* @v8i8
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   store <8 x i8> %a, <8 x i8>* %store
   ret void
@@ -28,8 +28,8 @@
 define void @conv_v8i8_to_i64( <8 x i8>* %load, <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_v8i8_to_i64:
 ; CHECK: vrev64.8
-  %v = load <8 x i8>* %load
-  %w = load <8 x i8>* @v8i8
+  %v = load <8 x i8>, <8 x i8>* %load
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   %f = bitcast <8 x i8> %a to i64
   call void @conv_i64_to_v8i8( i64 %f, <8 x i8>* %store )
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: conv_i64_to_v4i16:
 ; CHECK: vrev64.16
   %v = bitcast i64 %val to <4 x i16>
-  %w = load <4 x i16>* @v4i16
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   store <4 x i16> %a, <4 x i16>* %store
   ret void
@@ -49,8 +49,8 @@
 define void @conv_v4i16_to_i64( <4 x i16>* %load, <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_v4i16_to_i64:
 ; CHECK: vrev64.16
-  %v = load <4 x i16>* %load
-  %w = load <4 x i16>* @v4i16
+  %v = load <4 x i16>, <4 x i16>* %load
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   %f = bitcast <4 x i16> %a to i64
   call void @conv_i64_to_v4i16( i64 %f, <4 x i16>* %store )
@@ -61,7 +61,7 @@
 ; CHECK-LABEL: conv_i64_to_v2i32:
 ; CHECK: vrev64.32
   %v = bitcast i64 %val to <2 x i32>
-  %w = load <2 x i32>* @v2i32
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   store <2 x i32> %a, <2 x i32>* %store
   ret void
@@ -70,8 +70,8 @@
 define void @conv_v2i32_to_i64( <2 x i32>* %load, <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_v2i32_to_i64:
 ; CHECK: vrev64.32
-  %v = load <2 x i32>* %load
-  %w = load <2 x i32>* @v2i32
+  %v = load <2 x i32>, <2 x i32>* %load
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   %f = bitcast <2 x i32> %a to i64
   call void @conv_i64_to_v2i32( i64 %f, <2 x i32>* %store )
@@ -82,7 +82,7 @@
 ; CHECK-LABEL: conv_i64_to_v2f32:
 ; CHECK: vrev64.32
   %v = bitcast i64 %val to <2 x float>
-  %w = load <2 x float>* @v2f32
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   store <2 x float> %a, <2 x float>* %store
   ret void
@@ -91,8 +91,8 @@
 define void @conv_v2f32_to_i64( <2 x float>* %load, <2 x float>* %store ) {
 ; CHECK-LABEL: conv_v2f32_to_i64:
 ; CHECK: vrev64.32
-  %v = load <2 x float>* %load
-  %w = load <2 x float>* @v2f32
+  %v = load <2 x float>, <2 x float>* %load
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   %f = bitcast <2 x float> %a to i64
   call void @conv_i64_to_v2f32( i64 %f, <2 x float>* %store )
@@ -103,7 +103,7 @@
 ; CHECK-LABEL: conv_f64_to_v8i8:
 ; CHECK: vrev64.8
   %v = bitcast double %val to <8 x i8>
-  %w = load <8 x i8>* @v8i8
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   store <8 x i8> %a, <8 x i8>* %store
   ret void
@@ -112,8 +112,8 @@
 define void @conv_v8i8_to_f64( <8 x i8>* %load, <8 x i8>* %store ) {
 ; CHECK-LABEL: conv_v8i8_to_f64:
 ; CHECK: vrev64.8
-  %v = load <8 x i8>* %load
-  %w = load <8 x i8>* @v8i8
+  %v = load <8 x i8>, <8 x i8>* %load
+  %w = load <8 x i8>, <8 x i8>* @v8i8
   %a = add <8 x i8> %v, %w
   %f = bitcast <8 x i8> %a to double
   call void @conv_f64_to_v8i8( double %f, <8 x i8>* %store )
@@ -124,7 +124,7 @@
 ; CHECK-LABEL: conv_f64_to_v4i16:
 ; CHECK: vrev64.16
   %v = bitcast double %val to <4 x i16>
-  %w = load <4 x i16>* @v4i16
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   store <4 x i16> %a, <4 x i16>* %store
   ret void
@@ -133,8 +133,8 @@
 define void @conv_v4i16_to_f64( <4 x i16>* %load, <4 x i16>* %store ) {
 ; CHECK-LABEL: conv_v4i16_to_f64:
 ; CHECK: vrev64.16
-  %v = load <4 x i16>* %load
-  %w = load <4 x i16>* @v4i16
+  %v = load <4 x i16>, <4 x i16>* %load
+  %w = load <4 x i16>, <4 x i16>* @v4i16
   %a = add <4 x i16> %v, %w
   %f = bitcast <4 x i16> %a to double
   call void @conv_f64_to_v4i16( double %f, <4 x i16>* %store )
@@ -145,7 +145,7 @@
 ; CHECK-LABEL: conv_f64_to_v2i32:
 ; CHECK: vrev64.32
   %v = bitcast double %val to <2 x i32>
-  %w = load <2 x i32>* @v2i32
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   store <2 x i32> %a, <2 x i32>* %store
   ret void
@@ -154,8 +154,8 @@
 define void @conv_v2i32_to_f64( <2 x i32>* %load, <2 x i32>* %store ) {
 ; CHECK-LABEL: conv_v2i32_to_f64:
 ; CHECK: vrev64.32
-  %v = load <2 x i32>* %load
-  %w = load <2 x i32>* @v2i32
+  %v = load <2 x i32>, <2 x i32>* %load
+  %w = load <2 x i32>, <2 x i32>* @v2i32
   %a = add <2 x i32> %v, %w
   %f = bitcast <2 x i32> %a to double
   call void @conv_f64_to_v2i32( double %f, <2 x i32>* %store )
@@ -166,7 +166,7 @@
 ; CHECK-LABEL: conv_f64_to_v2f32:
 ; CHECK: vrev64.32
   %v = bitcast double %val to <2 x float>
-  %w = load <2 x float>* @v2f32
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   store <2 x float> %a, <2 x float>* %store
   ret void
@@ -175,8 +175,8 @@
 define void @conv_v2f32_to_f64( <2 x float>* %load, <2 x float>* %store ) {
 ; CHECK-LABEL: conv_v2f32_to_f64:
 ; CHECK: vrev64.32
-  %v = load <2 x float>* %load
-  %w = load <2 x float>* @v2f32
+  %v = load <2 x float>, <2 x float>* %load
+  %w = load <2 x float>, <2 x float>* @v2f32
   %a = fadd <2 x float> %v, %w
   %f = bitcast <2 x float> %a to double
   call void @conv_f64_to_v2f32( double %f, <2 x float>* %store )
@@ -190,7 +190,7 @@
 ; CHECK-LABEL: conv_i128_to_v16i8:
 ; CHECK: vrev32.8
   %v = bitcast i128 %val to <16 x i8>
-  %w = load  <16 x i8>* @v16i8
+  %w = load  <16 x i8>,  <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   store <16 x i8> %a, <16 x i8>* %store
   ret void
@@ -199,8 +199,8 @@
 define void @conv_v16i8_to_i128( <16 x i8>* %load, <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_v16i8_to_i128:
 ; CHECK: vrev32.8
-  %v = load <16 x i8>* %load
-  %w = load <16 x i8>* @v16i8
+  %v = load <16 x i8>, <16 x i8>* %load
+  %w = load <16 x i8>, <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   %f = bitcast <16 x i8> %a to i128
   call void @conv_i128_to_v16i8( i128 %f, <16 x i8>* %store )
@@ -211,7 +211,7 @@
 ; CHECK-LABEL: conv_i128_to_v8i16:
 ; CHECK: vrev32.16
   %v = bitcast i128 %val to <8 x i16>
-  %w = load  <8 x i16>* @v8i16
+  %w = load  <8 x i16>,  <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   store <8 x i16> %a, <8 x i16>* %store
   ret void
@@ -220,8 +220,8 @@
 define void @conv_v8i16_to_i128( <8 x i16>* %load, <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_v8i16_to_i128:
 ; CHECK: vrev32.16
-  %v = load <8 x i16>* %load
-  %w = load <8 x i16>* @v8i16
+  %v = load <8 x i16>, <8 x i16>* %load
+  %w = load <8 x i16>, <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   %f = bitcast <8 x i16> %a to i128
   call void @conv_i128_to_v8i16( i128 %f, <8 x i16>* %store )
@@ -232,7 +232,7 @@
 ; CHECK-LABEL: conv_i128_to_v4i32:
 ; CHECK: vrev64.32
   %v = bitcast i128 %val to <4 x i32>
-  %w = load <4 x i32>* @v4i32
+  %w = load <4 x i32>, <4 x i32>* @v4i32
   %a = add <4 x i32> %v, %w
   store <4 x i32> %a, <4 x i32>* %store
   ret void
@@ -241,8 +241,8 @@
 define void @conv_v4i32_to_i128( <4 x i32>* %load, <4 x i32>* %store ) {
 ; CHECK-LABEL: conv_v4i32_to_i128:
 ; CHECK: vrev64.32
-  %v = load <4 x i32>* %load
-  %w = load <4 x i32>* @v4i32
+  %v = load <4 x i32>, <4 x i32>* %load
+  %w = load <4 x i32>, <4 x i32>* @v4i32
   %a = add <4 x i32> %v, %w
   %f = bitcast <4 x i32> %a to i128
   call void @conv_i128_to_v4i32( i128 %f, <4 x i32>* %store )
@@ -253,7 +253,7 @@
 ; CHECK-LABEL: conv_i128_to_v4f32:
 ; CHECK: vrev64.32
   %v = bitcast i128 %val to <4 x float>
-  %w = load <4 x float>* @v4f32
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   store <4 x float> %a, <4 x float>* %store
   ret void
@@ -262,8 +262,8 @@
 define void @conv_v4f32_to_i128( <4 x float>* %load, <4 x float>* %store ) {
 ; CHECK-LABEL: conv_v4f32_to_i128:
 ; CHECK: vrev64.32
-  %v = load <4 x float>* %load
-  %w = load <4 x float>* @v4f32
+  %v = load <4 x float>, <4 x float>* %load
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   %f = bitcast <4 x float> %a to i128
   call void @conv_i128_to_v4f32( i128 %f, <4 x float>* %store )
@@ -274,7 +274,7 @@
 ; CHECK-LABEL: conv_f128_to_v2f64:
 ; CHECK: vrev64.32
   %v = bitcast fp128 %val to <2 x double>
-  %w = load <2 x double>* @v2f64
+  %w = load <2 x double>, <2 x double>* @v2f64
   %a = fadd <2 x double> %v, %w
   store <2 x double> %a, <2 x double>* %store
   ret void
@@ -283,8 +283,8 @@
 define void @conv_v2f64_to_f128( <2 x double>* %load, <2 x double>* %store ) {
 ; CHECK-LABEL: conv_v2f64_to_f128:
 ; CHECK: vrev64.32
-  %v = load <2 x double>* %load
-  %w = load <2 x double>* @v2f64
+  %v = load <2 x double>, <2 x double>* %load
+  %w = load <2 x double>, <2 x double>* @v2f64
   %a = fadd <2 x double> %v, %w
   %f = bitcast <2 x double> %a to fp128
   call void @conv_f128_to_v2f64( fp128 %f, <2 x double>* %store )
@@ -295,7 +295,7 @@
 ; CHECK-LABEL: conv_f128_to_v16i8:
 ; CHECK: vrev32.8
   %v = bitcast fp128 %val to <16 x i8>
-  %w = load  <16 x i8>* @v16i8
+  %w = load  <16 x i8>,  <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   store <16 x i8> %a, <16 x i8>* %store
   ret void
@@ -304,8 +304,8 @@
 define void @conv_v16i8_to_f128( <16 x i8>* %load, <16 x i8>* %store ) {
 ; CHECK-LABEL: conv_v16i8_to_f128:
 ; CHECK: vrev32.8
-  %v = load <16 x i8>* %load
-  %w = load <16 x i8>* @v16i8
+  %v = load <16 x i8>, <16 x i8>* %load
+  %w = load <16 x i8>, <16 x i8>* @v16i8
   %a = add <16 x i8> %v, %w
   %f = bitcast <16 x i8> %a to fp128
   call void @conv_f128_to_v16i8( fp128 %f, <16 x i8>* %store )
@@ -316,7 +316,7 @@
 ; CHECK-LABEL: conv_f128_to_v8i16:
 ; CHECK: vrev32.16
   %v = bitcast fp128 %val to <8 x i16>
-  %w = load  <8 x i16>* @v8i16
+  %w = load  <8 x i16>,  <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   store <8 x i16> %a, <8 x i16>* %store
   ret void
@@ -325,8 +325,8 @@
 define void @conv_v8i16_to_f128( <8 x i16>* %load, <8 x i16>* %store ) {
 ; CHECK-LABEL: conv_v8i16_to_f128:
 ; CHECK: vrev32.16
-  %v = load <8 x i16>* %load
-  %w = load <8 x i16>* @v8i16
+  %v = load <8 x i16>, <8 x i16>* %load
+  %w = load <8 x i16>, <8 x i16>* @v8i16
   %a = add <8 x i16> %v, %w
   %f = bitcast <8 x i16> %a to fp128
   call void @conv_f128_to_v8i16( fp128 %f, <8 x i16>* %store )
@@ -337,7 +337,7 @@
 ; CHECK-LABEL: conv_f128_to_v4f32:
 ; CHECK: vrev64.32
   %v = bitcast fp128 %val to <4 x float>
-  %w = load <4 x float>* @v4f32
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   store <4 x float> %a, <4 x float>* %store
   ret void
@@ -346,8 +346,8 @@
 define void @conv_v4f32_to_f128( <4 x float>* %load, <4 x float>* %store ) {
 ; CHECK-LABEL: conv_v4f32_to_f128:
 ; CHECK: vrev64.32
-  %v = load <4 x float>* %load
-  %w = load <4 x float>* @v4f32
+  %v = load <4 x float>, <4 x float>* %load
+  %w = load <4 x float>, <4 x float>* @v4f32
   %a = fadd <4 x float> %v, %w
   %f = bitcast <4 x float> %a to fp128
   call void @conv_f128_to_v4f32( fp128 %f, <4 x float>* %store )
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll b/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
index 1498356..1e35305 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-extend.ll
@@ -14,7 +14,7 @@
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i64>
   store <2 x i64> %2, <2 x i64>* %storeaddr
   ret void
@@ -33,7 +33,7 @@
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i16>* %loadaddr
+  %1 = load <2 x i16>, <2 x i16>* %loadaddr
   %2 = zext <2 x i16> %1 to <2 x i64>
   store <2 x i64> %2, <2 x i64>* %storeaddr
   ret void
@@ -49,7 +49,7 @@
 ; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i32>
   store <2 x i32> %2, <2 x i32>* %storeaddr
   ret void
@@ -63,7 +63,7 @@
 ; CHECK-NEXT: vrev64.32 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i16>* %loadaddr
+  %1 = load <2 x i16>, <2 x i16>* %loadaddr
   %2 = zext <2 x i16> %1 to <2 x i32>
   store <2 x i32> %2, <2 x i32>* %storeaddr
   ret void
@@ -80,7 +80,7 @@
 ; CHECK-NEXT: vrev32.16 [[REG]], {{d[0-9]+}}
 ; CHECK-NEXT: vst1.32   {[[REG]][0]}, [r1:32]
 ; CHECK-NEXT: bx        lr
-  %1 = load <2 x i8>* %loadaddr
+  %1 = load <2 x i8>, <2 x i8>* %loadaddr
   %2 = zext <2 x i8> %1 to <2 x i16>
   store <2 x i16> %2, <2 x i16>* %storeaddr
   ret void
@@ -95,7 +95,7 @@
 ; CHECK-NEXT: vrev64.32 [[QREG]], [[QREG]]
 ; CHECK-NEXT: vst1.64   {[[REG]], {{d[0-9]+}}}, [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <4 x i8>* %loadaddr
+  %1 = load <4 x i8>, <4 x i8>* %loadaddr
   %2 = zext <4 x i8> %1 to <4 x i32>
   store <4 x i32> %2, <4 x i32>* %storeaddr
   ret void
@@ -109,7 +109,7 @@
 ; CHECK-NEXT: vrev64.16 [[REG]], [[REG]]
 ; CHECK-NEXT: vstr      [[REG]], [r1]
 ; CHECK-NEXT: bx        lr
-  %1 = load <4 x i8>* %loadaddr
+  %1 = load <4 x i8>, <4 x i8>* %loadaddr
   %2 = zext <4 x i8> %1 to <4 x i16>
   store <4 x i16> %2, <4 x i16>* %storeaddr
   ret void
diff --git a/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll b/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
index 65147ad..cbfc46e 100644
--- a/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-neon-trunc-store.ll
@@ -6,7 +6,7 @@
 ; CHECK:       vrev32.16  [[REG]], [[REG]]
 ; CHECK:       vuzp.16    [[REG]], [[REG2:d[0-9]+]]
 ; CHECK:       vrev32.16  [[REG]], [[REG2]]
-  %1 = load <2 x i64>* %loadaddr
+  %1 = load <2 x i64>, <2 x i64>* %loadaddr
   %2 = trunc <2 x i64> %1 to <2 x i16>
   store <2 x i16> %2, <2 x i16>* %storeaddr
   ret void
@@ -18,7 +18,7 @@
 ; CHECK:       vrev16.8  [[REG]], [[REG]]
 ; CHECK:       vuzp.8    [[REG]], [[REG2:d[0-9]+]]
 ; CHECK:       vrev32.8  [[REG]], [[REG2]]
-  %1 = load <4 x i32>* %loadaddr
+  %1 = load <4 x i32>, <4 x i32>* %loadaddr
   %2 = trunc <4 x i32> %1 to <4 x i8>
   store <4 x i8> %2, <4 x i8>* %storeaddr
   ret void
diff --git a/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll b/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
index 614bfc0..f83e086 100644
--- a/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-ret-f64.ll
@@ -6,7 +6,7 @@
 ; CHECK: ldr r0, [sp]
 ; CHECK: ldr r1, [sp, #4]
   %r = alloca double, align 8
-  %1 = load double* %r, align 8
+  %1 = load double, double* %r, align 8
   ret double %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
index d01b0a7..54bda66 100644
--- a/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
+++ b/llvm/test/CodeGen/ARM/big-endian-vector-caller.ll
@@ -7,7 +7,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call i64 @test_i64_f64_helper(double %2)
     %4 = add i64 %3, %3
@@ -23,7 +23,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call i64 @test_i64_v1i64_helper(<1 x i64> %2)
     %4 = add i64 %3, %3
@@ -39,7 +39,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call i64 @test_i64_v2f32_helper(<2 x float> %2)
     %4 = add i64 %3, %3
@@ -55,7 +55,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call i64 @test_i64_v2i32_helper(<2 x i32> %2)
     %4 = add i64 %3, %3
@@ -71,7 +71,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call i64 @test_i64_v4i16_helper(<4 x i16> %2)
     %4 = add i64 %3, %3
@@ -87,7 +87,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call i64 @test_i64_v8i8_helper(<8 x i8> %2)
     %4 = add i64 %3, %3
@@ -102,7 +102,7 @@
 define void @test_f64_i64(i64* %p, double* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call double @test_f64_i64_helper(i64 %2)
     %4 = fadd double %3, %3
@@ -119,7 +119,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call double @test_f64_v1i64_helper(<1 x i64> %2)
     %4 = fadd double %3, %3
@@ -136,7 +136,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call double @test_f64_v2f32_helper(<2 x float> %2)
     %4 = fadd double %3, %3
@@ -153,7 +153,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call double @test_f64_v2i32_helper(<2 x i32> %2)
     %4 = fadd double %3, %3
@@ -170,7 +170,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call double @test_f64_v4i16_helper(<4 x i16> %2)
     %4 = fadd double %3, %3
@@ -187,7 +187,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call double @test_f64_v8i8_helper(<8 x i8> %2)
     %4 = fadd double %3, %3
@@ -203,7 +203,7 @@
 define void @test_v1i64_i64(i64* %p, <1 x i64>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <1 x i64> @test_v1i64_i64_helper(i64 %2)
     %4 = add <1 x i64> %3, %3
@@ -220,7 +220,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <1 x i64> @test_v1i64_f64_helper(double %2)
     %4 = add <1 x i64> %3, %3
@@ -237,7 +237,7 @@
 ; HARD: vrev64.32 d0
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2f32_helper(<2 x float> %2)
     %4 = add <1 x i64> %3, %3
@@ -255,7 +255,7 @@
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <1 x i64> @test_v1i64_v2i32_helper(<2 x i32> %2)
     %4 = add <1 x i64> %3, %3
@@ -272,7 +272,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <1 x i64> @test_v1i64_v4i16_helper(<4 x i16> %2)
     %4 = add <1 x i64> %3, %3
@@ -289,7 +289,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <1 x i64> @test_v1i64_v8i8_helper(<8 x i8> %2)
     %4 = add <1 x i64> %3, %3
@@ -305,7 +305,7 @@
 define void @test_v2f32_i64(i64* %p, <2 x float>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x float> @test_v2f32_i64_helper(i64 %2)
     %4 = fadd <2 x float> %3, %3
@@ -322,7 +322,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x float> @test_v2f32_f64_helper(double %2)
     %4 = fadd <2 x float> %3, %3
@@ -339,7 +339,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x float> @test_v2f32_v1i64_helper(<1 x i64> %2)
     %4 = fadd <2 x float> %3, %3
@@ -357,7 +357,7 @@
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <2 x float> @test_v2f32_v2i32_helper(<2 x i32> %2)
     %4 = fadd <2 x float> %3, %3
@@ -374,7 +374,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x float> @test_v2f32_v4i16_helper(<4 x i16> %2)
     %4 = fadd <2 x float> %3, %3
@@ -391,7 +391,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x float> @test_v2f32_v8i8_helper(<8 x i8> %2)
     %4 = fadd <2 x float> %3, %3
@@ -407,7 +407,7 @@
 define void @test_v2i32_i64(i64* %p, <2 x i32>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <2 x i32> @test_v2i32_i64_helper(i64 %2)
     %4 = add <2 x i32> %3, %3
@@ -424,7 +424,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <2 x i32> @test_v2i32_f64_helper(double %2)
     %4 = add <2 x i32> %3, %3
@@ -441,7 +441,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <2 x i32> @test_v2i32_v1i64_helper(<1 x i64> %2)
     %4 = add <2 x i32> %3, %3
@@ -460,7 +460,7 @@
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <2 x i32> @test_v2i32_v2f32_helper(<2 x float> %2)
     %4 = add <2 x i32> %3, %3
@@ -477,7 +477,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <2 x i32> @test_v2i32_v4i16_helper(<4 x i16> %2)
     %4 = add <2 x i32> %3, %3
@@ -494,7 +494,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <2 x i32> @test_v2i32_v8i8_helper(<8 x i8> %2)
     %4 = add <2 x i32> %3, %3
@@ -510,7 +510,7 @@
 define void @test_v4i16_i64(i64* %p, <4 x i16>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <4 x i16> @test_v4i16_i64_helper(i64 %2)
     %4 = add <4 x i16> %3, %3
@@ -527,7 +527,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <4 x i16> @test_v4i16_f64_helper(double %2)
     %4 = add <4 x i16> %3, %3
@@ -544,7 +544,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <4 x i16> @test_v4i16_v1i64_helper(<1 x i64> %2)
     %4 = add <4 x i16> %3, %3
@@ -563,7 +563,7 @@
 ; SOFT: vadd.f32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2f32_helper(<2 x float> %2)
     %4 = add <4 x i16> %3, %3
@@ -582,7 +582,7 @@
 ; SOFT: vadd.i32 [[REG:d[0-9]+]]
 ; SOFT: vrev64.32 [[REG]]
 ; SOFT: vmov r1, r0, [[REG]]
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <4 x i16> @test_v4i16_v2i32_helper(<2 x i32> %2)
     %4 = add <4 x i16> %3, %3
@@ -599,7 +599,7 @@
 ; SOFT: vrev64.8 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.8 d0
-    %1 = load <8 x i8>* %p
+    %1 = load <8 x i8>, <8 x i8>* %p
     %2 = add <8 x i8> %1, %1
     %3 = call <4 x i16> @test_v4i16_v8i8_helper(<8 x i8> %2)
     %4 = add <4 x i16> %3, %3
@@ -615,7 +615,7 @@
 define void @test_v8i8_i64(i64* %p, <8 x i8>* %q) {
 ; CHECK: adds r1
 ; CHECK: adc r0
-    %1 = load i64* %p
+    %1 = load i64, i64* %p
     %2 = add i64 %1, %1
     %3 = call <8 x i8> @test_v8i8_i64_helper(i64 %2)
     %4 = add <8 x i8> %3, %3
@@ -632,7 +632,7 @@
 ; SOFT: vadd.f64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.f64 d0
-    %1 = load double* %p
+    %1 = load double, double* %p
     %2 = fadd double %1, %1
     %3 = call <8 x i8> @test_v8i8_f64_helper(double %2)
     %4 = add <8 x i8> %3, %3
@@ -649,7 +649,7 @@
 ; SOFT: vadd.i64 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vadd.i64 d0
-    %1 = load <1 x i64>* %p
+    %1 = load <1 x i64>, <1 x i64>* %p
     %2 = add <1 x i64> %1, %1
     %3 = call <8 x i8> @test_v8i8_v1i64_helper(<1 x i64> %2)
     %4 = add <8 x i8> %3, %3
@@ -666,7 +666,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x float>* %p
+    %1 = load <2 x float>, <2 x float>* %p
     %2 = fadd <2 x float> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2f32_helper(<2 x float> %2)
     %4 = add <8 x i8> %3, %3
@@ -683,7 +683,7 @@
 ; SOFT: vrev64.32 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.32 d0
-    %1 = load <2 x i32>* %p
+    %1 = load <2 x i32>, <2 x i32>* %p
     %2 = add <2 x i32> %1, %1
     %3 = call <8 x i8> @test_v8i8_v2i32_helper(<2 x i32> %2)
     %4 = add <8 x i8> %3, %3
@@ -700,7 +700,7 @@
 ; SOFT: vrev64.16 [[REG:d[0-9]+]]
 ; SOFT: vmov r1, r0, [[REG]]
 ; HARD: vrev64.16 d0
-    %1 = load <4 x i16>* %p
+    %1 = load <4 x i16>, <4 x i16>* %p
     %2 = add <4 x i16> %1, %1
     %3 = call <8 x i8> @test_v8i8_v4i16_helper(<4 x i16> %2)
     %4 = add <8 x i8> %3, %3
@@ -720,7 +720,7 @@
 ; SOFT: vmov r3, r2, [[REG2]]
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call fp128 @test_f128_v2f64_helper(<2 x double> %2)
     %4 = fadd fp128 %3, %3
@@ -735,7 +735,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call fp128 @test_f128_v2i64_helper(<2 x i64> %2)
     %4 = fadd fp128 %3, %3
@@ -750,7 +750,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call fp128 @test_f128_v4f32_helper(<4 x float> %2)
     %4 = fadd fp128 %3, %3
@@ -765,7 +765,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call fp128 @test_f128_v4i32_helper(<4 x i32> %2)
     %4 = fadd fp128 %3, %3
@@ -780,7 +780,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call fp128 @test_f128_v8i16_helper(<8 x i16> %2)
     %4 = fadd fp128 %3, %3
@@ -795,7 +795,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call fp128 @test_f128_v16i8_helper(<16 x i8> %2)
     %4 = fadd fp128 %3, %3
@@ -807,7 +807,7 @@
 ; CHECK-LABEL: test_v2f64_f128:
 declare <2 x double> @test_v2f64_f128_helper(fp128 %p)
 define void @test_v2f64_f128(fp128* %p, <2 x double>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x double> @test_v2f64_f128_helper(fp128 %2)
     %4 = fadd <2 x double> %3, %3
@@ -824,7 +824,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <2 x double> @test_v2f64_v2i64_helper(<2 x i64> %2)
     %4 = fadd <2 x double> %3, %3
@@ -840,7 +840,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x double> @test_v2f64_v4f32_helper(<4 x float> %2)
     %4 = fadd <2 x double> %3, %3
@@ -856,7 +856,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x double> @test_v2f64_v4i32_helper(<4 x i32> %2)
     %4 = fadd <2 x double> %3, %3
@@ -872,7 +872,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x double> @test_v2f64_v8i16_helper(<8 x i16> %2)
     %4 = fadd <2 x double> %3, %3
@@ -888,7 +888,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x double> @test_v2f64_v16i8_helper(<16 x i8> %2)
     %4 = fadd <2 x double> %3, %3
@@ -901,7 +901,7 @@
 ; CHECK-LABEL: test_v2i64_f128:
 declare <2 x i64> @test_v2i64_f128_helper(fp128 %p)
 define void @test_v2i64_f128(fp128* %p, <2 x i64>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <2 x i64> @test_v2i64_f128_helper(fp128 %2)
     %4 = add <2 x i64> %3, %3
@@ -918,7 +918,7 @@
 ; SOFT: vmov r3, r2, [[REG2]]
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <2 x i64> @test_v2i64_v2f64_helper(<2 x double> %2)
     %4 = add <2 x i64> %3, %3
@@ -934,7 +934,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4f32_helper(<4 x float> %2)
     %4 = add <2 x i64> %3, %3
@@ -950,7 +950,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <2 x i64> @test_v2i64_v4i32_helper(<4 x i32> %2)
     %4 = add <2 x i64> %3, %3
@@ -966,7 +966,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <2 x i64> @test_v2i64_v8i16_helper(<8 x i16> %2)
     %4 = add <2 x i64> %3, %3
@@ -982,7 +982,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <2 x i64> @test_v2i64_v16i8_helper(<16 x i8> %2)
     %4 = add <2 x i64> %3, %3
@@ -995,7 +995,7 @@
 ; CHECK-LABEL: test_v4f32_f128:
 declare <4 x float> @test_v4f32_f128_helper(fp128 %p)
 define void @test_v4f32_f128(fp128* %p, <4 x float>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x float> @test_v4f32_f128_helper(fp128 %2)
     %4 = fadd <4 x float> %3, %3
@@ -1012,7 +1012,7 @@
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64  d1
 ; HARD: vadd.f64  d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x float> @test_v4f32_v2f64_helper(<2 x double> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1028,7 +1028,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x float> @test_v4f32_v2i64_helper(<2 x i64> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1044,7 +1044,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <4 x float> @test_v4f32_v4i32_helper(<4 x i32> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1060,7 +1060,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x float> @test_v4f32_v8i16_helper(<8 x i16> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1076,7 +1076,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x float> @test_v4f32_v16i8_helper(<16 x i8> %2)
     %4 = fadd <4 x float> %3, %3
@@ -1089,7 +1089,7 @@
 ; CHECK-LABEL: test_v4i32_f128:
 declare <4 x i32> @test_v4i32_f128_helper(fp128 %p)
 define void @test_v4i32_f128(fp128* %p, <4 x i32>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <4 x i32> @test_v4i32_f128_helper(fp128 %2)
     %4 = add <4 x i32> %3, %3
@@ -1106,7 +1106,7 @@
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2f64_helper(<2 x double> %2)
     %4 = add <4 x i32> %3, %3
@@ -1122,7 +1122,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <4 x i32> @test_v4i32_v2i64_helper(<2 x i64> %2)
     %4 = add <4 x i32> %3, %3
@@ -1138,7 +1138,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <4 x i32> @test_v4i32_v4f32_helper(<4 x float> %2)
     %4 = add <4 x i32> %3, %3
@@ -1154,7 +1154,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <4 x i32> @test_v4i32_v8i16_helper(<8 x i16> %2)
     %4 = add <4 x i32> %3, %3
@@ -1170,7 +1170,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <4 x i32> @test_v4i32_v16i8_helper(<16 x i8> %2)
     %4 = add <4 x i32> %3, %3
@@ -1183,7 +1183,7 @@
 ; CHECK-LABEL: test_v8i16_f128:
 declare <8 x i16> @test_v8i16_f128_helper(fp128 %p)
 define void @test_v8i16_f128(fp128* %p, <8 x i16>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <8 x i16> @test_v8i16_f128_helper(fp128 %2)
     %4 = add <8 x i16> %3, %3
@@ -1200,7 +1200,7 @@
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2f64_helper(<2 x double> %2)
     %4 = add <8 x i16> %3, %3
@@ -1216,7 +1216,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <8 x i16> @test_v8i16_v2i64_helper(<2 x i64> %2)
     %4 = add <8 x i16> %3, %3
@@ -1232,7 +1232,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4f32_helper(<4 x float> %2)
     %4 = add <8 x i16> %3, %3
@@ -1248,7 +1248,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <8 x i16> @test_v8i16_v4i32_helper(<4 x i32> %2)
     %4 = add <8 x i16> %3, %3
@@ -1264,7 +1264,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.8 q0
-    %1 = load <16 x i8>* %p
+    %1 = load <16 x i8>, <16 x i8>* %p
     %2 = add <16 x i8> %1, %1
     %3 = call <8 x i16> @test_v8i16_v16i8_helper(<16 x i8> %2)
     %4 = add <8 x i16> %3, %3
@@ -1277,7 +1277,7 @@
 ; CHECK-LABEL: test_v16i8_f128:
 declare <16 x i8> @test_v16i8_f128_helper(fp128 %p)
 define void @test_v16i8_f128(fp128* %p, <16 x i8>* %q) {
-    %1 = load fp128* %p
+    %1 = load fp128, fp128* %p
     %2 = fadd fp128 %1, %1
     %3 = call <16 x i8> @test_v16i8_f128_helper(fp128 %2)
     %4 = add <16 x i8> %3, %3
@@ -1294,7 +1294,7 @@
 ; SOFT: vmov r3, r2
 ; HARD: vadd.f64 d1
 ; HARD: vadd.f64 d0
-    %1 = load <2 x double>* %p
+    %1 = load <2 x double>, <2 x double>* %p
     %2 = fadd <2 x double> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2f64_helper(<2 x double> %2)
     %4 = add <16 x i8> %3, %3
@@ -1310,7 +1310,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vadd.i64 q0
-    %1 = load <2 x i64>* %p
+    %1 = load <2 x i64>, <2 x i64>* %p
     %2 = add <2 x i64> %1, %1
     %3 = call <16 x i8> @test_v16i8_v2i64_helper(<2 x i64> %2)
     %4 = add <16 x i8> %3, %3
@@ -1326,7 +1326,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x float>* %p
+    %1 = load <4 x float>, <4 x float>* %p
     %2 = fadd <4 x float> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4f32_helper(<4 x float> %2)
     %4 = add <16 x i8> %3, %3
@@ -1342,7 +1342,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.32 q0
-    %1 = load <4 x i32>* %p
+    %1 = load <4 x i32>, <4 x i32>* %p
     %2 = add <4 x i32> %1, %1
     %3 = call <16 x i8> @test_v16i8_v4i32_helper(<4 x i32> %2)
     %4 = add <16 x i8> %3, %3
@@ -1358,7 +1358,7 @@
 ; SOFT: vmov r1, r0
 ; SOFT: vmov r3, r2
 ; HARD: vrev64.16 q0
-    %1 = load <8 x i16>* %p
+    %1 = load <8 x i16>, <8 x i16>* %p
     %2 = add <8 x i16> %1, %1
     %3 = call <16 x i8> @test_v16i8_v8i16_helper(<8 x i16> %2)
     %4 = add <16 x i8> %3, %3
diff --git a/llvm/test/CodeGen/ARM/bswap16.ll b/llvm/test/CodeGen/ARM/bswap16.ll
index 70c62d2..dc0e468 100644
--- a/llvm/test/CodeGen/ARM/bswap16.ll
+++ b/llvm/test/CodeGen/ARM/bswap16.ll
@@ -4,7 +4,7 @@
 
 define void @test1(i16* nocapture %data) {
 entry:
-  %0 = load i16* %data, align 2
+  %0 = load i16, i16* %data, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
   store i16 %1, i16* %data, align 2
   ret void
@@ -30,7 +30,7 @@
 
 define i16 @test3(i16* nocapture %data) {
 entry:
-  %0 = load i16* %data, align 2
+  %0 = load i16, i16* %data, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
   ret i16 %1
 
diff --git a/llvm/test/CodeGen/ARM/call-tc.ll b/llvm/test/CodeGen/ARM/call-tc.ll
index a35fd74..b2b6aae 100644
--- a/llvm/test/CodeGen/ARM/call-tc.ll
+++ b/llvm/test/CodeGen/ARM/call-tc.ll
@@ -24,7 +24,7 @@
 ; CHECKT2D: ldr
 ; CHECKT2D-NEXT: ldr
 ; CHECKT2D-NEXT: bx r0
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
@@ -153,7 +153,7 @@
 ; CHECKT2D: b.w ___divsi3
   %lock = alloca %class.MutexLock, align 1
   %1 = call %class.MutexLock* @_ZN9MutexLockC1Ev(%class.MutexLock* %lock)
-  %2 = load i32* @x, align 4
+  %2 = load i32, i32* @x, align 4
   %3 = sdiv i32 1000, %2
   %4 = call %class.MutexLock* @_ZN9MutexLockD1Ev(%class.MutexLock* %lock)
   ret i32 %3
@@ -170,7 +170,7 @@
 ; CHECKT2D-LABEL: libcall_tc_test2:
 ; CHECKT2D: blx _floorf
 ; CHECKT2D: b.w _truncf
-  %1 = load float* %a, align 4
+  %1 = load float, float* %a, align 4
   %call = tail call float @floorf(float %1)
   store float %call, float* %a, align 4
   %call1 = tail call float @truncf(float %b)
diff --git a/llvm/test/CodeGen/ARM/call.ll b/llvm/test/CodeGen/ARM/call.ll
index 97827bc6..87252a9 100644
--- a/llvm/test/CodeGen/ARM/call.ll
+++ b/llvm/test/CodeGen/ARM/call.ll
@@ -20,7 +20,7 @@
 define void @g.upgrd.1() {
 ; CHECKV4: mov lr, pc
 ; CHECKV5: blx
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
@@ -30,10 +30,10 @@
 ; CHECKV4: bx r{{.*}}
 BB0:
   %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
-  %t35 = load volatile i32* %5                    ; <i32> [#uses=1]
+  %t35 = load volatile i32, i32* %5                    ; <i32> [#uses=1]
   %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
   %7 = getelementptr i32*, i32** %6, i32 86             ; <i32**> [#uses=1]
-  %8 = load i32** %7                              ; <i32*> [#uses=1]
+  %8 = load i32*, i32** %7                              ; <i32*> [#uses=1]
   %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
   %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
   ret i32* %10
diff --git a/llvm/test/CodeGen/ARM/call_nolink.ll b/llvm/test/CodeGen/ARM/call_nolink.ll
index 93be566..0cd5bcd 100644
--- a/llvm/test/CodeGen/ARM/call_nolink.ll
+++ b/llvm/test/CodeGen/ARM/call_nolink.ll
@@ -23,31 +23,31 @@
 bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
 	%i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ]		; <i32> [#uses=7]
 	%tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0		; <i32*> [#uses=1]
-	%tmp125.i.i = load i32* %tmp124.i.i		; <i32> [#uses=1]
+	%tmp125.i.i = load i32, i32* %tmp124.i.i		; <i32> [#uses=1]
 	%tmp126.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp125.i.i		; <i32*> [#uses=1]
-	%tmp127.i.i = load i32* %tmp126.i.i		; <i32> [#uses=1]
+	%tmp127.i.i = load i32, i32* %tmp126.i.i		; <i32> [#uses=1]
 	%tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1		; <i32*> [#uses=1]
-	%tmp132.i.i = load i32* %tmp131.i.i		; <i32> [#uses=1]
+	%tmp132.i.i = load i32, i32* %tmp131.i.i		; <i32> [#uses=1]
 	%tmp133.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp132.i.i		; <i32*> [#uses=1]
-	%tmp134.i.i = load i32* %tmp133.i.i		; <i32> [#uses=1]
+	%tmp134.i.i = load i32, i32* %tmp133.i.i		; <i32> [#uses=1]
 	%tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2		; <i32*> [#uses=1]
-	%tmp139.i.i = load i32* %tmp138.i.i		; <i32> [#uses=1]
+	%tmp139.i.i = load i32, i32* %tmp138.i.i		; <i32> [#uses=1]
 	%tmp140.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp139.i.i		; <i32*> [#uses=1]
-	%tmp141.i.i = load i32* %tmp140.i.i		; <i32> [#uses=1]
+	%tmp141.i.i = load i32, i32* %tmp140.i.i		; <i32> [#uses=1]
 	%tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12		; <i32> [#uses=1]
 	%tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }], [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0		; <i32*> [#uses=1]
-	%tmp147.i.i = load i32* %tmp146.i.i		; <i32> [#uses=1]
+	%tmp147.i.i = load i32, i32* %tmp146.i.i		; <i32> [#uses=1]
 	%tmp149.i.i = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0		; <i32 (i32, i32, i32)**> [#uses=1]
-	%tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]
+	%tmp150.i.i = load i32 (i32, i32, i32)*, i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]
 	%tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i )		; <i32> [#uses=1]
 	%tmp155.i.i = getelementptr [14 x i32], [14 x i32]* @r, i32 0, i32 %tmp143.i.i		; <i32*> [#uses=1]
 	store i32 %tmp154.i.i, i32* %tmp155.i.i
 	%tmp159.i.i = getelementptr [2 x i32], [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i		; <i32*> [#uses=2]
-	%tmp160.i.i = load i32* %tmp159.i.i		; <i32> [#uses=1]
+	%tmp160.i.i = load i32, i32* %tmp159.i.i		; <i32> [#uses=1]
 	%tmp161.i.i = add i32 %tmp160.i.i, 1		; <i32> [#uses=1]
 	store i32 %tmp161.i.i, i32* %tmp159.i.i
 	%tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1		; <i32> [#uses=2]
-	%tmp168.i.i = load i32* @numi		; <i32> [#uses=1]
+	%tmp168.i.i = load i32, i32* @numi		; <i32> [#uses=1]
 	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]
 	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
 }
diff --git a/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll b/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
index 4e5fb5e..4f2b66d 100644
--- a/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/llvm/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -28,7 +28,7 @@
 for.body2:                                        ; preds = %for.cond1
   store i32 %storemerge11, i32* @b, align 4, !dbg !26
   tail call void @llvm.dbg.value(metadata i32* null, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !28
-  %0 = load i64* @a, align 8, !dbg !29
+  %0 = load i64, i64* @a, align 8, !dbg !29
   %xor = xor i64 %0, %e.1.ph, !dbg !29
   %conv3 = trunc i64 %xor to i32, !dbg !29
   tail call void @llvm.dbg.value(metadata i32 %conv3, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !29
@@ -44,7 +44,7 @@
   %1 = phi i1 [ false, %for.body2 ], [ %tobool5, %land.rhs ]
   %land.ext = zext i1 %1 to i32
   %call6 = tail call i32 bitcast (i32 (...)* @fn2 to i32 (i32, i32*)*)(i32 %land.ext, i32* null) #3
-  %2 = load i32* @b, align 4, !dbg !26
+  %2 = load i32, i32* @b, align 4, !dbg !26
   %inc8 = add nsw i32 %2, 1, !dbg !26
   %phitmp = and i64 %xor, 4294967295, !dbg !26
   br label %for.cond1.outer, !dbg !26
@@ -52,7 +52,7 @@
 for.cond1.outer:                                  ; preds = %land.end, %for.cond1.preheader
   %storemerge11.ph = phi i32 [ %inc8, %land.end ], [ 0, %for.cond1.preheader ]
   %e.1.ph = phi i64 [ %phitmp, %land.end ], [ 0, %for.cond1.preheader ]
-  %3 = load i32* @d, align 4, !dbg !31
+  %3 = load i32, i32* @d, align 4, !dbg !31
   %tobool10 = icmp eq i32 %3, 0, !dbg !31
   br label %for.cond1
 
diff --git a/llvm/test/CodeGen/ARM/coalesce-subregs.ll b/llvm/test/CodeGen/ARM/coalesce-subregs.ll
index 5cc3eca..72fefea 100644
--- a/llvm/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/llvm/test/CodeGen/ARM/coalesce-subregs.ll
@@ -86,22 +86,22 @@
 define void @f3(float* %p, float* %q) nounwind ssp {
 entry:
   %arrayidx = getelementptr inbounds float, float* %p, i32 3
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %vecins = insertelement <2 x float> undef, float %0, i32 1
   %tobool = icmp eq float* %q, null
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load float* %q, align 4
+  %1 = load float, float* %q, align 4
   %arrayidx2 = getelementptr inbounds float, float* %q, i32 1
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %add = fadd float %1, %2
   %vecins3 = insertelement <2 x float> %vecins, float %add, i32 0
   br label %if.end
 
 if.else:                                          ; preds = %entry
   %arrayidx4 = getelementptr inbounds float, float* %p, i32 2
-  %3 = load float* %arrayidx4, align 4
+  %3 = load float, float* %arrayidx4, align 4
   %vecins5 = insertelement <2 x float> %vecins, float %3, i32 0
   br label %if.end
 
@@ -129,9 +129,9 @@
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load float* %q, align 4
+  %1 = load float, float* %q, align 4
   %arrayidx1 = getelementptr inbounds float, float* %q, i32 1
-  %2 = load float* %arrayidx1, align 4
+  %2 = load float, float* %arrayidx1, align 4
   %add = fadd float %1, %2
   %vecins = insertelement <2 x float> %vld1, float %add, i32 1
   br label %if.end
@@ -165,12 +165,12 @@
 
 if.then:                                          ; preds = %entry
   %arrayidx = getelementptr inbounds float, float* %q, i32 1
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %add4 = fadd float %vecext, %1
-  %2 = load float* %q, align 4
+  %2 = load float, float* %q, align 4
   %add6 = fadd float %vecext1, %2
   %arrayidx7 = getelementptr inbounds float, float* %q, i32 2
-  %3 = load float* %arrayidx7, align 4
+  %3 = load float, float* %arrayidx7, align 4
   %add8 = fadd float %vecext2, %3
   br label %if.end
 
@@ -231,7 +231,7 @@
   br i1 undef, label %bb10, label %bb12
 
 bb10:                                             ; preds = %bb3
-  %tmp11 = load <4 x float>* undef, align 8
+  %tmp11 = load <4 x float>, <4 x float>* undef, align 8
   br label %bb12
 
 bb12:                                             ; preds = %bb10, %bb3
@@ -333,7 +333,7 @@
   br i1 undef, label %for.body29, label %for.end
 
 for.body29:                                       ; preds = %for.body29, %for.body
-  %0 = load <2 x double>* null, align 1
+  %0 = load <2 x double>, <2 x double>* null, align 1
   %splat40 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
   %mul41 = fmul <2 x double> undef, %splat40
   %add42 = fadd <2 x double> undef, %mul41
diff --git a/llvm/test/CodeGen/ARM/code-placement.ll b/llvm/test/CodeGen/ARM/code-placement.ll
index 9f27eee..bf5cf52 100644
--- a/llvm/test/CodeGen/ARM/code-placement.ll
+++ b/llvm/test/CodeGen/ARM/code-placement.ll
@@ -19,7 +19,7 @@
   %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
   %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
   %1 = getelementptr inbounds %struct.list_head, %struct.list_head* %list_addr.05, i32 0, i32 0
-  %2 = load %struct.list_head** %1, align 4
+  %2 = load %struct.list_head*, %struct.list_head** %1, align 4
   store %struct.list_head* %next.04, %struct.list_head** %1, align 4
   %3 = icmp eq %struct.list_head* %2, null
   br i1 %3, label %bb2, label %bb
@@ -46,7 +46,7 @@
   %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
   %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
   %scevgep = getelementptr i32, i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
-  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %1 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
   %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/commute-movcc.ll b/llvm/test/CodeGen/ARM/commute-movcc.ll
index 6002576..2978d31 100644
--- a/llvm/test/CodeGen/ARM/commute-movcc.ll
+++ b/llvm/test/CodeGen/ARM/commute-movcc.ll
@@ -32,7 +32,7 @@
   %BestCost.011 = phi i32 [ -1, %entry ], [ %BestCost.1, %if.end8 ]
   %BestIdx.010 = phi i32 [ 0, %entry ], [ %BestIdx.1, %if.end8 ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.012
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul i32 %0, %0
   %sub = add nsw i32 %i.012, -5
   %cmp2 = icmp eq i32 %sub, %Pref
diff --git a/llvm/test/CodeGen/ARM/compare-call.ll b/llvm/test/CodeGen/ARM/compare-call.ll
index 61034b3..d4bd92b 100644
--- a/llvm/test/CodeGen/ARM/compare-call.ll
+++ b/llvm/test/CodeGen/ARM/compare-call.ll
@@ -2,9 +2,9 @@
 
 define void @test3(float* %glob, i32 %X) {
 entry:
-        %tmp = load float* %glob                ; <float> [#uses=1]
+        %tmp = load float, float* %glob                ; <float> [#uses=1]
         %tmp2 = getelementptr float, float* %glob, i32 2               ; <float*> [#uses=1]
-        %tmp3 = load float* %tmp2               ; <float> [#uses=1]
+        %tmp3 = load float, float* %tmp2               ; <float> [#uses=1]
         %tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3               ; <i1> [#uses=1]
         br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/llvm/test/CodeGen/ARM/copy-paired-reg.ll b/llvm/test/CodeGen/ARM/copy-paired-reg.ll
index 17a4461..453fac4 100644
--- a/llvm/test/CodeGen/ARM/copy-paired-reg.ll
+++ b/llvm/test/CodeGen/ARM/copy-paired-reg.ll
@@ -11,7 +11,7 @@
   store atomic i64 0, i64* %c seq_cst, align 8
   store atomic i64 0, i64* %d seq_cst, align 8
 
-  %e = load atomic i64* %d seq_cst, align 8
+  %e = load atomic i64, i64* %d seq_cst, align 8
 
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
index 96b6bb6..287c081 100644
--- a/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
+++ b/llvm/test/CodeGen/ARM/crash-greedy-v6.ll
@@ -38,7 +38,7 @@
   %arrayidx22 = getelementptr i8, i8* %green, i32 %i.031
   %arrayidx25 = getelementptr i8, i8* %blue, i32 %i.031
   %arrayidx28 = getelementptr i8, i8* %alpha, i32 %i.031
-  %tmp12 = load float* %arrayidx11, align 4
+  %tmp12 = load float, float* %arrayidx11, align 4
   tail call fastcc void @sample_3d_nearest(i8* %tObj, i8* undef, float undef, float %tmp12, float undef, i8* %arrayidx19, i8* %arrayidx22, i8* %arrayidx25, i8* %arrayidx28)
   %0 = add i32 %i.031, 1
   %exitcond = icmp eq i32 %0, %n
diff --git a/llvm/test/CodeGen/ARM/crash.ll b/llvm/test/CodeGen/ARM/crash.ll
index 8e36701..3b01d81 100644
--- a/llvm/test/CodeGen/ARM/crash.ll
+++ b/llvm/test/CodeGen/ARM/crash.ll
@@ -5,7 +5,7 @@
 
 define void @func() nounwind {
 entry:
-  %tmp = load i32* undef, align 4
+  %tmp = load i32, i32* undef, align 4
   br label %bb1
 
 bb1:
diff --git a/llvm/test/CodeGen/ARM/cse-ldrlit.ll b/llvm/test/CodeGen/ARM/cse-ldrlit.ll
index 3f5d4c2..e76e47e 100644
--- a/llvm/test/CodeGen/ARM/cse-ldrlit.ll
+++ b/llvm/test/CodeGen/ARM/cse-ldrlit.ll
@@ -9,7 +9,7 @@
 declare void @bar(i32*)
 
 define void @foo() {
-  %flag = load i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
+  %flag = load i32, i32* getelementptr inbounds([16 x i32]* @var, i32 0, i32 1)
   %tst = icmp eq i32 %flag, 0
   br i1 %tst, label %true, label %false
 true:
diff --git a/llvm/test/CodeGen/ARM/cse-libcalls.ll b/llvm/test/CodeGen/ARM/cse-libcalls.ll
index 4f5b759..1255ec5 100644
--- a/llvm/test/CodeGen/ARM/cse-libcalls.ll
+++ b/llvm/test/CodeGen/ARM/cse-libcalls.ll
@@ -10,7 +10,7 @@
 
 define double @u_f_nonbon(double %lambda) nounwind {
 entry:
-	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
+	%tmp19.i.i = load double, double* null, align 4		; <double> [#uses=2]
 	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
 	%dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i		; <double> [#uses=1]
 	%tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
index 8950abd..98a2ce9 100644
--- a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
+++ b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll
@@ -5,7 +5,7 @@
   ; CHECK: vldr
   ; CHECK: vmovl.u16
   ; CHECK-NOT: vand
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i16> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -21,7 +21,7 @@
 
 define float @g(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK-NOT: uxth
   %2 = extractelement <4 x i16> %1, i32 0
   ; CHECK: vcvt.f32.u32
diff --git a/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll b/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
index 1addf63..1f814e7 100644
--- a/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
+++ b/llvm/test/CodeGen/ARM/debug-frame-large-stack.ll
@@ -48,7 +48,7 @@
 	%tmp = alloca i32, align 4
 	%a = alloca [805306369 x i8], align 16
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
         ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
index 063e321..934e125 100644
--- a/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/llvm/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -118,11 +118,11 @@
 
 for.body:                                         ; preds = %entry, %for.body
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %ap.cur = load i8** %vl, align 4
+  %ap.cur = load i8*, i8** %vl, align 4
   %ap.next = getelementptr i8, i8* %ap.cur, i32 4
   store i8* %ap.next, i8** %vl, align 4
   %0 = bitcast i8* %ap.cur to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %call = call i32 @foo(i32 %1) #1
   %inc = add nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, %count
diff --git a/llvm/test/CodeGen/ARM/debug-info-blocks.ll b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
index 8e8431b..fcdf43b 100644
--- a/llvm/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/llvm/test/CodeGen/ARM/debug-info-blocks.ll
@@ -47,21 +47,21 @@
   call void @llvm.dbg.declare(metadata %2* %6, metadata !136, metadata !163), !dbg !137
   call void @llvm.dbg.declare(metadata %2* %6, metadata !138, metadata !164), !dbg !137
   call void @llvm.dbg.declare(metadata %2* %6, metadata !139, metadata !165), !dbg !140
-  %8 = load %0** %1, align 4, !dbg !141
-  %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+  %8 = load %0*, %0** %1, align 4, !dbg !141
+  %9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
   %10 = bitcast %0* %8 to i8*, !dbg !141
   %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
   %12 = bitcast i8* %11 to %0*, !dbg !141
   %13 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !141
-  %14 = load i8** %13, !dbg !141
+  %14 = load i8*, i8** %13, !dbg !141
   %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
   %16 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
-  %17 = load %struct.__block_byref_mydata** %16, !dbg !141
+  %17 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %16, !dbg !141
   %18 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
   store %0* %12, %0** %18, align 4, !dbg !141
   %19 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !143
-  %20 = load %3** %19, align 4, !dbg !143
-  %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+  %20 = load %3*, %3** %19, align 4, !dbg !143
+  %21 = load i32, i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
   %22 = bitcast %3* %20 to i8*, !dbg !143
   %23 = getelementptr inbounds i8, i8* %22, i32 %21, !dbg !143
   %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
@@ -69,8 +69,8 @@
   %26 = bitcast %struct.CR* %data to i8*, !dbg !143
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
   %27 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !144
-  %28 = load %3** %27, align 4, !dbg !144
-  %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+  %28 = load %3*, %3** %27, align 4, !dbg !144
+  %29 = load i32, i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
   %30 = bitcast %3* %28 to i8*, !dbg !144
   %31 = getelementptr inbounds i8, i8* %30, i32 %29, !dbg !144
   %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
@@ -78,15 +78,15 @@
   %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
   %35 = getelementptr inbounds %2, %2* %6, i32 0, i32 6, !dbg !145
-  %36 = load %3** %35, align 4, !dbg !145
+  %36 = load %3*, %3** %35, align 4, !dbg !145
   %37 = getelementptr inbounds %2, %2* %6, i32 0, i32 5, !dbg !145
-  %38 = load i8** %37, !dbg !145
+  %38 = load i8*, i8** %37, !dbg !145
   %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
   %40 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
-  %41 = load %struct.__block_byref_mydata** %40, !dbg !145
+  %41 = load %struct.__block_byref_mydata*, %struct.__block_byref_mydata** %40, !dbg !145
   %42 = getelementptr inbounds %struct.__block_byref_mydata, %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
-  %43 = load %0** %42, align 4, !dbg !145
-  %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+  %43 = load %0*, %0** %42, align 4, !dbg !145
+  %44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
   %45 = bitcast %3* %36 to i8*, !dbg !145
   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
   ret void, !dbg !146
diff --git a/llvm/test/CodeGen/ARM/divmod.ll b/llvm/test/CodeGen/ARM/divmod.ll
index fa290c4..9336d0c 100644
--- a/llvm/test/CodeGen/ARM/divmod.ll
+++ b/llvm/test/CodeGen/ARM/divmod.ll
@@ -47,7 +47,7 @@
 entry:
 ; A8-LABEL: do_indent:
 ; SWIFT-LABEL: do_indent:
-  %0 = load i32* @flags, align 4
+  %0 = load i32, i32* @flags, align 4
   %1 = and i32 %0, 67108864
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %bb1, label %bb
@@ -57,7 +57,7 @@
 ; SWIFT: sdiv
 ; SWIFT: mls
 ; SWIFT-NOT: bl __divmodsi4
-  %3 = load i32* @tabsize, align 4
+  %3 = load i32, i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
   %6 = tail call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false)
diff --git a/llvm/test/CodeGen/ARM/dwarf-eh.ll b/llvm/test/CodeGen/ARM/dwarf-eh.ll
index 4bbfe8b..228d8b9 100644
--- a/llvm/test/CodeGen/ARM/dwarf-eh.ll
+++ b/llvm/test/CodeGen/ARM/dwarf-eh.ll
@@ -34,12 +34,12 @@
   store i32 %7, i32* %2
   br label %8
 
-  %9 = load i32* %2
+  %9 = load i32, i32* %2
   %10 = call i32 @llvm.eh.typeid.for(i8* bitcast ({ i8*, i8* }* @_ZTI9exception to i8*)) nounwind
   %11 = icmp eq i32 %9, %10
   br i1 %11, label %12, label %17
 
-  %13 = load i8** %1
+  %13 = load i8*, i8** %1
   %14 = call i8* @__cxa_begin_catch(i8* %13) #3
   %15 = bitcast i8* %14 to %struct.exception*
   store %struct.exception* %15, %struct.exception** %e
@@ -48,8 +48,8 @@
 
   ret void
 
-  %18 = load i8** %1
-  %19 = load i32* %2
+  %18 = load i8*, i8** %1
+  %19 = load i32, i32* %2
   %20 = insertvalue { i8*, i32 } undef, i8* %18, 0
   %21 = insertvalue { i8*, i32 } %20, i32 %19, 1
   resume { i8*, i32 } %21
diff --git a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
index 487b131..1b64a01 100644
--- a/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/llvm/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -19,7 +19,7 @@
 ; CHECK-NOT: sub r{{[0-9]+}}, sp, [[REG1]]
 ; CHECK: sub sp, sp, [[REG1]]
 
-  %tmp6 = load i32* null
+  %tmp6 = load i32, i32* null
   %tmp8 = alloca float, i32 %tmp6
   store i32 1, i32* null
   br i1 false, label %bb123.preheader, label %return
@@ -29,7 +29,7 @@
 
 bb43:                                             ; preds = %bb123.preheader
   call fastcc void @f1(float* %tmp8, float* null, i32 0)
-  %tmp70 = load i32* null
+  %tmp70 = load i32, i32* null
   %tmp85 = getelementptr float, float* %tmp8, i32 0
   call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
   ret void
diff --git a/llvm/test/CodeGen/ARM/emit-big-cst.ll b/llvm/test/CodeGen/ARM/emit-big-cst.ll
index 01d789c..7453e8c 100644
--- a/llvm/test/CodeGen/ARM/emit-big-cst.ll
+++ b/llvm/test/CodeGen/ARM/emit-big-cst.ll
@@ -11,7 +11,7 @@
 
 define void @accessBig(i64* %storage) {
   %addr = bitcast i64* %storage to i82*
-  %bigLoadedCst = load volatile i82* @bigCst
+  %bigLoadedCst = load volatile i82, i82* @bigCst
   %tmp = add i82 %bigLoadedCst, 1
   store i82 %tmp, i82* %addr
   ret void
diff --git a/llvm/test/CodeGen/ARM/extload-knownzero.ll b/llvm/test/CodeGen/ARM/extload-knownzero.ll
index f55b951..da340f7 100644
--- a/llvm/test/CodeGen/ARM/extload-knownzero.ll
+++ b/llvm/test/CodeGen/ARM/extload-knownzero.ll
@@ -8,7 +8,7 @@
   br i1 %tmp1, label %bb1, label %bb2
 bb1:
 ; CHECK: ldrh
-  %tmp2 = load i16* %ptr, align 2
+  %tmp2 = load i16, i16* %ptr, align 2
   br label %bb2
 bb2:
 ; CHECK-NOT: uxth
diff --git a/llvm/test/CodeGen/ARM/extloadi1.ll b/llvm/test/CodeGen/ARM/extloadi1.ll
index 2504c6c..a67859d 100644
--- a/llvm/test/CodeGen/ARM/extloadi1.ll
+++ b/llvm/test/CodeGen/ARM/extloadi1.ll
@@ -4,7 +4,7 @@
 
 define void @__mf_sigusr1_respond() {
 entry:
-        %tmp8.b = load i1* @handler_installed.6144.b            ; <i1> [#uses=1]
+        %tmp8.b = load i1, i1* @handler_installed.6144.b            ; <i1> [#uses=1]
         br i1 false, label %cond_true7, label %cond_next
 
 cond_next:              ; preds = %entry
diff --git a/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 010b77f..a52cd83 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -17,7 +17,7 @@
   store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
 ; ARM: add r0, r0, #124
 ; THUMB: adds r0, #124
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -30,7 +30,7 @@
 ; ARM: movw [[R:r[0-9]+]], #1148
 ; ARM: add r0, r{{[0-9]+}}, [[R]]
 ; THUMB: addw r0, r0, #1148
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -42,7 +42,7 @@
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
 ; ARM: add r0, r0, #140
 ; THUMB: adds r0, #140
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -61,6 +61,6 @@
 ; ARM-NOT: add r{{[0-9]}}, r{{[0-9]}}, #4
 ; ARM: movw r{{[0-9]}}, #1284
 ; THUMB: addw r{{[0-9]}}, r{{[0-9]}}, #1284
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-align.ll b/llvm/test/CodeGen/ARM/fast-isel-align.ll
index 4268542..3d98dcc 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-align.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-align.ll
@@ -34,7 +34,7 @@
 ; THUMB: str r1, [r0]
 
   %add = fadd float %x, %y
-  %0 = load %struct.anon** @a, align 4
+  %0 = load %struct.anon*, %struct.anon** @a, align 4
   %x1 = getelementptr inbounds %struct.anon, %struct.anon* %0, i32 0, i32 0
   store float %add, float* %x1, align 1
   ret void
@@ -66,9 +66,9 @@
 ; THUMB: @unaligned_f32_load
   %0 = alloca %class.TAlignTest*, align 4
   store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
-  %1 = load %class.TAlignTest** %0
+  %1 = load %class.TAlignTest*, %class.TAlignTest** %0
   %2 = getelementptr inbounds %class.TAlignTest, %class.TAlignTest* %1, i32 0, i32 1
-  %3 = load float* %2, align 1
+  %3 = load float, float* %2, align 1
   %4 = fcmp une float %3, 0.000000e+00
 ; ARM: ldr r[[R:[0-9]+]], [r0, #2]
 ; ARM: vmov s0, r[[R]]
@@ -103,7 +103,7 @@
 ; THUMB-STRICT-ALIGN: ldrb
 ; THUMB-STRICT-ALIGN: ldrb
 
-  %0 = load i16* %x, align 1
+  %0 = load i16, i16* %x, align 1
   ret i16 %0
 }
 
@@ -139,6 +139,6 @@
 ; THUMB-STRICT-ALIGN: ldrb
 ; THUMB-STRICT-ALIGN: ldrb
 
-  %0 = load i32* %x, align 1
+  %0 = load i32, i32* %x, align 1
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-call.ll b/llvm/test/CodeGen/ARM/fast-isel-call.ll
index 0a6c865..bd170f3 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-call.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-call.ll
@@ -157,7 +157,7 @@
 ; THUMB: blx     r1
   %fptr = alloca i32 (i32)*, align 8
   store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
-  %1 = load i32 (i32)** %fptr, align 8
+  %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
   %call = call i32 %1(i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-fold.ll b/llvm/test/CodeGen/ARM/fast-isel-fold.ll
index 145cffc..37e93c0 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-fold.ll
@@ -14,7 +14,7 @@
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
 ; THUMB-NOT: and{{.*}}, #255
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   call void @foo1(i8 zeroext %1)
   ret void
 }
@@ -26,7 +26,7 @@
 ; THUMB: t2
 ; THUMB: ldrh
 ; THUMB-NOT: uxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   call void @foo2(i16 zeroext %1)
   ret void
 }
@@ -43,7 +43,7 @@
 ; THUMB: ldrb
 ; THUMB-NOT: uxtb
 ; THUMB-NOT: and{{.*}}, #255
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i32
   ret i32 %2
 }
@@ -55,7 +55,7 @@
 ; THUMB: t4
 ; THUMB: ldrh
 ; THUMB-NOT: uxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i32
   ret i32 %2
 }
@@ -67,7 +67,7 @@
 ; THUMB: t5
 ; THUMB: ldrsh
 ; THUMB-NOT: sxth
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i32
   ret i32 %2
 }
@@ -79,7 +79,7 @@
 ; THUMB: t6
 ; THUMB: ldrsb
 ; THUMB-NOT: sxtb
-  %1 = load i8* @a, align 2
+  %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i32
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index fcc685d..cce914b 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -4,7 +4,7 @@
 entry:
 ; ARM: t1
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 1
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; ARM: ldr r{{[0-9]}}, [r0, #4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@
 entry:
 ; ARM: t2
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 63
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; ARM: ldr.w r{{[0-9]}}, [r0, #252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@
 entry:
 ; ARM: t3
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 1
-  %0 = load i16* %add.ptr, align 4
+  %0 = load i16, i16* %add.ptr, align 4
 ; ARM: ldrh r{{[0-9]}}, [r0, #2]
   ret i16 %0
 }
@@ -31,7 +31,7 @@
 entry:
 ; ARM: t4
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i16 63
-  %0 = load i16* %add.ptr, align 4
+  %0 = load i16, i16* %add.ptr, align 4
 ; ARM: ldrh.w r{{[0-9]}}, [r0, #126]
   ret i16 %0
 }
@@ -40,7 +40,7 @@
 entry:
 ; ARM: t5
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 1
-  %0 = load i8* %add.ptr, align 4
+  %0 = load i8, i8* %add.ptr, align 4
 ; ARM: ldrb r{{[0-9]}}, [r0, #1]
   ret i8 %0
 }
@@ -49,7 +49,7 @@
 entry:
 ; ARM: t6
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i8 63
-  %0 = load i8* %add.ptr, align 4
+  %0 = load i8, i8* %add.ptr, align 4
 ; ARM: ldrb.w r{{[0-9]}}, [r0, #63]
   ret i8 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
index e433ee7..f24100b 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll
@@ -4,7 +4,7 @@
 entry:
 ; THUMB: t1
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-4]
   ret i32 %0
 }
@@ -13,7 +13,7 @@
 entry:
 ; THUMB: t2
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0, #-252]
   ret i32 %0
 }
@@ -22,7 +22,7 @@
 entry:
 ; THUMB: t3
   %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
 ; THUMB: ldr r{{[0-9]}}, [r0]
   ret i32 %0
 }
@@ -31,7 +31,7 @@
 entry:
 ; THUMB: t4
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2]
   ret i16 %0
 }
@@ -40,7 +40,7 @@
 entry:
 ; THUMB: t5
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254]
   ret i16 %0
 }
@@ -49,7 +49,7 @@
 entry:
 ; THUMB: t6
   %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; THUMB: ldrh r{{[0-9]}}, [r0]
   ret i16 %0
 }
@@ -58,7 +58,7 @@
 entry:
 ; THUMB: t7
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1]
   ret i8 %0
 }
@@ -67,7 +67,7 @@
 entry:
 ; THUMB: t8
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255]
   ret i8 %0
 }
@@ -76,7 +76,7 @@
 entry:
 ; THUMB: t9
   %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
 ; THUMB: ldrb r{{[0-9]}}, [r0]
   ret i8 %0
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index 572233e..ca512970 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -6,7 +6,7 @@
 entry:
 ; ARM: t1
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -8
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-16]
   ret i16 %0
 }
@@ -15,7 +15,7 @@
 entry:
 ; ARM: t2
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -16
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-32]
   ret i16 %0
 }
@@ -24,7 +24,7 @@
 entry:
 ; ARM: t3
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #-254]
   ret i16 %0
 }
@@ -33,7 +33,7 @@
 entry:
 ; ARM: t4
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 -128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: mvn r{{[1-9]}}, #255
 ; ARM: add r0, r0, r{{[1-9]}}
 ; ARM: ldrh r0, [r0]
@@ -44,7 +44,7 @@
 entry:
 ; ARM: t5
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 8
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #16]
   ret i16 %0
 }
@@ -53,7 +53,7 @@
 entry:
 ; ARM: t6
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 16
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #32]
   ret i16 %0
 }
@@ -62,7 +62,7 @@
 entry:
 ; ARM: t7
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 127
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: ldrh r0, [r0, #254]
   ret i16 %0
 }
@@ -71,7 +71,7 @@
 entry:
 ; ARM: t8
   %add.ptr = getelementptr inbounds i16, i16* %a, i64 128
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
 ; ARM: add r0, r0, #256
 ; ARM: ldrh r0, [r0]
   ret i16 %0
@@ -124,7 +124,7 @@
 entry:
 ; ARM: t13
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -8
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: ldrsb r0, [r0, #-8]
   ret i8 %0
 }
@@ -133,7 +133,7 @@
 entry:
 ; ARM: t14
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -255
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: ldrsb r0, [r0, #-255]
   ret i8 %0
 }
@@ -142,7 +142,7 @@
 entry:
 ; ARM: t15
   %add.ptr = getelementptr inbounds i8, i8* %a, i64 -256
-  %0 = load i8* %add.ptr, align 2
+  %0 = load i8, i8* %add.ptr, align 2
 ; ARM: mvn r{{[1-9]}}, #255
 ; ARM: add r0, r0, r{{[1-9]}}
 ; ARM: ldrsb r0, [r0]
diff --git a/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll b/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
index 770b9b3..acf10c8 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-load-store-verify.ll
@@ -17,7 +17,7 @@
 ; ALL: @t1
 ; ALL: ldrb
 ; ALL: add
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = add nsw i8 %1, 1
   ret i8 %2
 }
@@ -26,7 +26,7 @@
 ; ALL: @t2
 ; ALL: ldrh
 ; ALL: add
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = add nsw i16 %1, 1
   ret i16 %2
 }
@@ -35,7 +35,7 @@
 ; ALL: @t3
 ; ALL: ldr
 ; ALL: add
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %2 = add nsw i32 %1, 1
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-pic.ll b/llvm/test/CodeGen/ARM/fast-isel-pic.ll
index fdbdf03..70e15da 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-pic.ll
@@ -29,7 +29,7 @@
 ; ARMv7-ELF-NEXT: add r[[reg2]], pc
 ; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
-  %tmp = load i32* @g
+  %tmp = load i32, i32* @g
   ret i32 %tmp
 }
 
@@ -60,6 +60,6 @@
 ; ARMv7-ELF-NEXT: add r[[reg5]], pc
 ; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
 ; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
-  %tmp = load i32* @i
+  %tmp = load i32, i32* @i
   ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-pred.ll b/llvm/test/CodeGen/ARM/fast-isel-pred.ll
index bf1593beef..ae8b67d 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-pred.ll
@@ -7,9 +7,9 @@
   %X = alloca <4 x i32>, align 16
   %Y = alloca <4 x float>, align 16
   store i32 0, i32* %retval
-  %tmp = load <4 x i32>* %X, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %X, align 16
   call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y)
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -24,15 +24,15 @@
   store i8* %p, i8** %p.addr, align 4
   store i32 %offset, i32* %offset.addr, align 4
   store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
-  %tmp = load <4 x i32>* %v.addr, align 16
+  %tmp = load <4 x i32>, <4 x i32>* %v.addr, align 16
   store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16
-  %tmp.i = load <4 x i32>* %__a.addr.i, align 16
+  %tmp.i = load <4 x i32>, <4 x i32>* %__a.addr.i, align 16
   %0 = bitcast <4 x i32> %tmp.i to <16 x i8>
   %1 = bitcast <16 x i8> %0 to <4 x i32>
   %vcvt.i = sitofp <4 x i32> %1 to <4 x float>
-  %tmp1 = load i8** %p.addr, align 4
-  %tmp2 = load i32* %offset.addr, align 4
-  %tmp3 = load <4 x float>** %constants.addr, align 4
+  %tmp1 = load i8*, i8** %p.addr, align 4
+  %tmp2 = load i32, i32* %offset.addr, align 4
+  %tmp3 = load <4 x float>*, <4 x float>** %constants.addr, align 4
   call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3)
   ret void
 }
@@ -48,9 +48,9 @@
   store i8* %p, i8** %p.addr, align 4
   store i32 %offset, i32* %offset.addr, align 4
   store <4 x float>* %constants, <4 x float>** %constants.addr, align 4
-  %tmp = load i64* %data, align 4
-  %tmp1 = load i8** %p.addr, align 4
-  %tmp2 = load i32* %offset.addr, align 4
+  %tmp = load i64, i64* %data, align 4
+  %tmp1 = load i8*, i8** %p.addr, align 4
+  %tmp2 = load i32, i32* %offset.addr, align 4
   %add.ptr = getelementptr i8, i8* %tmp1, i32 %tmp2
   %0 = bitcast i8* %add.ptr to i64*
   %arrayidx = getelementptr inbounds i64, i64* %0, i32 0
diff --git a/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll b/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
index 7e8ed9a..a1c8657 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -6,6 +6,6 @@
 
 define i32 @f(i32* %x) nounwind ssp {
   %y = getelementptr inbounds i32, i32* %x, i32 5000
-  %tmp103 = load i32* %y, align 4
+  %tmp103 = load i32, i32* %y, align 4
   ret i32 %tmp103
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-static.ll b/llvm/test/CodeGen/ARM/fast-isel-static.ll
index 3a11d69..c3980cb 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-static.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-static.ll
@@ -9,12 +9,12 @@
   %addend.addr = alloca float*, align 4
   store float* %sum, float** %sum.addr, align 4
   store float* %addend, float** %addend.addr, align 4
-  %tmp = load float** %sum.addr, align 4
-  %tmp1 = load float* %tmp
-  %tmp2 = load float** %addend.addr, align 4
-  %tmp3 = load float* %tmp2
+  %tmp = load float*, float** %sum.addr, align 4
+  %tmp1 = load float, float* %tmp
+  %tmp2 = load float*, float** %addend.addr, align 4
+  %tmp3 = load float, float* %tmp2
   %add = fadd float %tmp1, %tmp3
-  %tmp4 = load float** %sum.addr, align 4
+  %tmp4 = load float*, float** %sum.addr, align 4
   store float %add, float* %tmp4
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
index e8c4001..aa37e7d 100644
--- a/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -10,11 +10,11 @@
   %m = alloca i32, align 4
   %n = alloca i32, align 4
   %tmp = alloca i32, align 4
-  %0 = load i32* %i, align 4
-  %1 = load i32* %j, align 4
-  %2 = load i32* %k, align 4
-  %3 = load i32* %m, align 4
-  %4 = load i32* %n, align 4
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %j, align 4
+  %2 = load i32, i32* %k, align 4
+  %3 = load i32, i32* %m, align 4
+  %4 = load i32, i32* %n, align 4
 ; ARM: VarArg
 ; ARM: mov [[FP:r[0-9]+]], sp
 ; ARM: sub sp, sp, #32
@@ -39,7 +39,7 @@
 ; THUMB: bl {{_?}}CallVariadic
   %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
   store i32 %call, i32* %tmp, align 4
-  %5 = load i32* %tmp, align 4
+  %5 = load i32, i32* %tmp, align 4
   ret i32 %5
 }
 
diff --git a/llvm/test/CodeGen/ARM/fast-isel.ll b/llvm/test/CodeGen/ARM/fast-isel.ll
index c8d9e3b..4946022 100644
--- a/llvm/test/CodeGen/ARM/fast-isel.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel.ll
@@ -9,8 +9,8 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr
   store i32 %b, i32* %b.addr
-  %tmp = load i32* %a.addr
-  %tmp1 = load i32* %b.addr
+  %tmp = load i32, i32* %a.addr
+  %tmp1 = load i32, i32* %b.addr
   %add = add nsw i32 %tmp, %tmp1
   ret i32 %add
 }
@@ -110,9 +110,9 @@
 ; ARM: sxth
 
 bb3:
-  %c1 = load i8* %ptr3
-  %c2 = load i16* %ptr2
-  %c3 = load i32* %ptr1
+  %c1 = load i8, i8* %ptr3
+  %c2 = load i16, i16* %ptr2
+  %c3 = load i32, i32* %ptr1
   %c4 = zext i8 %c1 to i32
   %c5 = sext i16 %c2 to i32
   %c6 = add i32 %c4, %c5
@@ -138,7 +138,7 @@
 @test4g = external global i32
 
 define void @test4() {
-  %a = load i32* @test4g
+  %a = load i32, i32* @test4g
   %b = add i32 %a, 1
   store i32 %b, i32* @test4g
   ret void
diff --git a/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll b/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
index dbe431c..232ab50 100644
--- a/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
+++ b/llvm/test/CodeGen/ARM/fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
-  %0 = load i8** %ptr.addr, align 8
+  %0 = load i8*, i8** %ptr.addr, align 8
 
   ; CHECK-LABEL: _gep_promotion:
   ; CHECK: ldrb {{r[0-9]+}}, {{\[r[0-9]+\]}}
   %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
 
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/flag-crash.ll b/llvm/test/CodeGen/ARM/flag-crash.ll
index 9952f56..66eb8a5 100644
--- a/llvm/test/CodeGen/ARM/flag-crash.ll
+++ b/llvm/test/CodeGen/ARM/flag-crash.ll
@@ -6,12 +6,12 @@
 define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
 entry:
   %0 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 6
-  %1 = load float* %0, align 4
+  %1 = load float, float* %0, align 4
   %2 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 8
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = getelementptr inbounds %struct.gs_matrix, %struct.gs_matrix* %pm1, i32 0, i32 2
   %5 = bitcast float* %4 to i32*
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   %7 = or i32 0, %6
   %.mask = and i32 %7, 2147483647
   %8 = icmp eq i32 %.mask, 0
diff --git a/llvm/test/CodeGen/ARM/fnegs.ll b/llvm/test/CodeGen/ARM/fnegs.ll
index 65fe9e3..3a4767e 100644
--- a/llvm/test/CodeGen/ARM/fnegs.ll
+++ b/llvm/test/CodeGen/ARM/fnegs.ll
@@ -21,7 +21,7 @@
 
 define float @test1(float* %a) {
 entry:
-	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%0 = load float, float* %a, align 4		; <float> [#uses=2]
 	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
 	%2 = fpext float %1 to double		; <double> [#uses=1]
 	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
@@ -48,7 +48,7 @@
 
 define float @test2(float* %a) {
 entry:
-	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%0 = load float, float* %a, align 4		; <float> [#uses=2]
 	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
 	%2 = fpext float %1 to double		; <double> [#uses=1]
 	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
index c5ff82e..aff79a1 100644
--- a/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/llvm/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -82,7 +82,7 @@
 
   %var = alloca i8, i32 16
 
-  %tmp = load %bigVec* @var
+  %tmp = load %bigVec, %bigVec* @var
   call void @bar(i8* %var)
   store %bigVec %tmp, %bigVec* @var
 
@@ -119,7 +119,7 @@
 
   %var = alloca i8, i32 64
 
-  %tmp = load %bigVec* @var
+  %tmp = load %bigVec, %bigVec* @var
   call void @bar(i8* %var)
   store %bigVec %tmp, %bigVec* @var
 
@@ -152,7 +152,7 @@
 
   ; We want a long-lived floating register so that a callee-saved dN is used and
   ; there's both a vpop and a pop.
-  %live_val = load double* @dbl
+  %live_val = load double, double* @dbl
   br i1 %tst, label %true, label %end
 true:
   call void @bar(i8* %var)
diff --git a/llvm/test/CodeGen/ARM/fp.ll b/llvm/test/CodeGen/ARM/fp.ll
index 7e1f000..cc47e3b 100644
--- a/llvm/test/CodeGen/ARM/fp.ll
+++ b/llvm/test/CodeGen/ARM/fp.ll
@@ -45,7 +45,7 @@
 ;CHECK: vldr
 ;CHECK-NEXT: vmov
 entry:
-        %tmp = load double* %v          ; <double> [#uses=1]
+        %tmp = load double, double* %v          ; <double> [#uses=1]
         ret double %tmp
 }
 
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index 5a926ac..25fbf90 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -16,8 +16,8 @@
 ; CHECK-ARMV8-LABEL: foo:
 ; CHECK-SOFTFLOAT-LABEL: foo:
 entry:
-  %0 = load i16* @x, align 2
-  %1 = load i16* @y, align 2
+  %0 = load i16, i16* @x, align 2
+  %1 = load i16, i16* @y, align 2
   %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
 ; CHECK: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
diff --git a/llvm/test/CodeGen/ARM/fpcmp-opt.ll b/llvm/test/CodeGen/ARM/fpcmp-opt.ll
index eab5988..45bb6d2 100644
--- a/llvm/test/CodeGen/ARM/fpcmp-opt.ll
+++ b/llvm/test/CodeGen/ARM/fpcmp-opt.ll
@@ -13,8 +13,8 @@
 ; CHECK: vcmpe.f32 [[S1]], [[S0]]
 ; CHECK: vmrs APSR_nzcv, fpscr
 ; CHECK: beq
-  %0 = load float* %a
-  %1 = load float* %b
+  %0 = load float, float* %a
+  %1 = load float, float* %b
   %2 = fcmp une float %0, %1
   br i1 %2, label %bb1, label %bb2
 
@@ -41,7 +41,7 @@
 ; CHECK-NOT: vcmpe.f32
 ; CHECK-NOT: vmrs
 ; CHECK: bne
-  %0 = load double* %a
+  %0 = load double, double* %a
   %1 = fcmp oeq double %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 
@@ -64,7 +64,7 @@
 ; CHECK-NOT: vcmpe.f32
 ; CHECK-NOT: vmrs
 ; CHECK: bne
-  %0 = load float* %a
+  %0 = load float, float* %a
   %1 = fcmp oeq float %0, 0.000000e+00
   br i1 %1, label %bb1, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/fpmem.ll b/llvm/test/CodeGen/ARM/fpmem.ll
index 99a5930..23fbea9 100644
--- a/llvm/test/CodeGen/ARM/fpmem.ll
+++ b/llvm/test/CodeGen/ARM/fpmem.ll
@@ -9,7 +9,7 @@
 define float @f2(float* %v, float %u) {
 ; CHECK-LABEL: f2:
 ; CHECK: vldr{{.*}}[
-        %tmp = load float* %v           ; <float> [#uses=1]
+        %tmp = load float, float* %v           ; <float> [#uses=1]
         %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
         ret float %tmp1
 }
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2offset:
 ; CHECK: vldr{{.*}}, #4]
         %addr = getelementptr float, float* %v, i32 1
-        %tmp = load float* %addr
+        %tmp = load float, float* %addr
         %tmp1 = fadd float %tmp, %u
         ret float %tmp1
 }
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: f2noffset:
 ; CHECK: vldr{{.*}}, #-4]
         %addr = getelementptr float, float* %v, i32 -1
-        %tmp = load float* %addr
+        %tmp = load float, float* %addr
         %tmp1 = fadd float %tmp, %u
         ret float %tmp1
 }
diff --git a/llvm/test/CodeGen/ARM/fptoint.ll b/llvm/test/CodeGen/ARM/fptoint.ll
index f50d0b9..6cbb30b 100644
--- a/llvm/test/CodeGen/ARM/fptoint.ll
+++ b/llvm/test/CodeGen/ARM/fptoint.ll
@@ -4,13 +4,13 @@
 @u = weak global i32 0		; <i32*> [#uses=2]
 
 define i32 @foo1(float *%x) {
-        %tmp1 = load float* %x
+        %tmp1 = load float, float* %x
 	%tmp2 = bitcast float %tmp1 to i32
 	ret i32 %tmp2
 }
 
 define i64 @foo2(double *%x) {
-        %tmp1 = load double* %x
+        %tmp1 = load double, double* %x
 	%tmp2 = bitcast double %tmp1 to i64
 	ret i64 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/frame-register.ll b/llvm/test/CodeGen/ARM/frame-register.ll
index b04e376..0cc5005 100644
--- a/llvm/test/CodeGen/ARM/frame-register.ll
+++ b/llvm/test/CodeGen/ARM/frame-register.ll
@@ -17,12 +17,12 @@
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %j, align 4
-  %1 = load i32* %j, align 4
+  %1 = load i32, i32* %j, align 4
   call void @callee(i32 %1)
-  %2 = load i32* %j, align 4
+  %2 = load i32, i32* %j, align 4
   %add1 = add nsw i32 %2, 1
   ret i32 %add1
 }
diff --git a/llvm/test/CodeGen/ARM/fusedMAC.ll b/llvm/test/CodeGen/ARM/fusedMAC.ll
index e29f291..6f6cdc1 100644
--- a/llvm/test/CodeGen/ARM/fusedMAC.ll
+++ b/llvm/test/CodeGen/ARM/fusedMAC.ll
@@ -144,7 +144,7 @@
 define float @test_fnms_f32(float %a, float %b, float* %c) nounwind readnone ssp {
 ; CHECK: test_fnms_f32
 ; CHECK: vfnms.f32
-  %tmp1 = load float* %c, align 4
+  %tmp1 = load float, float* %c, align 4
   %tmp2 = fsub float -0.0, %tmp1
   %tmp3 = tail call float @llvm.fma.f32(float %a, float %b, float %tmp2) nounwind readnone
   ret float %tmp3 
diff --git a/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll b/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
index 623b422..9731b3d 100644
--- a/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
+++ b/llvm/test/CodeGen/ARM/ghc-tcreturn-lowered.ll
@@ -15,7 +15,7 @@
 define ghccc void @test_indirect_tail() {
 ; CHECK-LABEL: test_indirect_tail:
 ; CHECK: bx {{r[0-9]+}}
-  %func = load void()** @ind_func
+  %func = load void()*, void()** @ind_func
   tail call ghccc void()* %func()
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/global-merge-1.ll b/llvm/test/CodeGen/ARM/global-merge-1.ll
index e5d4def..20b4ba5 100644
--- a/llvm/test/CodeGen/ARM/global-merge-1.ll
+++ b/llvm/test/CodeGen/ARM/global-merge-1.ll
@@ -55,12 +55,12 @@
 
 ; Function Attrs: nounwind ssp
 define internal void @calculate() #0 {
-  %1 = load <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
-  %2 = load <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
+  %1 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @bar to <4 x i32>*), align 4
+  %2 = load <4 x i32>, <4 x i32>* bitcast ([5 x i32]* @baz to <4 x i32>*), align 4
   %3 = mul <4 x i32> %2, %1
   store <4 x i32> %3, <4 x i32>* bitcast ([5 x i32]* @foo to <4 x i32>*), align 4
-  %4 = load i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
-  %5 = load i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
+  %4 = load i32, i32* getelementptr inbounds ([5 x i32]* @bar, i32 0, i32 4), align 4, !tbaa !1
+  %5 = load i32, i32* getelementptr inbounds ([5 x i32]* @baz, i32 0, i32 4), align 4, !tbaa !1
   %6 = mul nsw i32 %5, %4
   store i32 %6, i32* getelementptr inbounds ([5 x i32]* @foo, i32 0, i32 4), align 4, !tbaa !1
   ret void
diff --git a/llvm/test/CodeGen/ARM/globals.ll b/llvm/test/CodeGen/ARM/globals.ll
index 2c599bf..bab96da 100644
--- a/llvm/test/CodeGen/ARM/globals.ll
+++ b/llvm/test/CodeGen/ARM/globals.ll
@@ -6,7 +6,7 @@
 @G = external global i32
 
 define i32 @test1() {
-	%tmp = load i32* @G
+	%tmp = load i32, i32* @G
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/CodeGen/ARM/gv-stubs-crash.ll b/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
index c4c4180..6e82afe 100644
--- a/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
+++ b/llvm/test/CodeGen/ARM/gv-stubs-crash.ll
@@ -4,7 +4,7 @@
 @Exn = external hidden unnamed_addr constant { i8*, i8* }
 
 define hidden void @func(i32* %this, i32* %e) optsize align 2 {
-  %e.ld = load i32* %e, align 4
+  %e.ld = load i32, i32* %e, align 4
   %inv = invoke zeroext i1 @func2(i32* %this, i32 %e.ld) optsize
           to label %ret unwind label %lpad
 
diff --git a/llvm/test/CodeGen/ARM/half.ll b/llvm/test/CodeGen/ARM/half.ll
index 10cebb3..777aff2 100644
--- a/llvm/test/CodeGen/ARM/half.ll
+++ b/llvm/test/CodeGen/ARM/half.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: test_load_store:
 ; CHECK: ldrh [[TMP:r[0-9]+]], [r0]
 ; CHECK: strh [[TMP]], [r1]
-  %val = load half* %in
+  %val = load half, half* %in
   store half %val, half* %out
   ret void
 }
@@ -14,7 +14,7 @@
 define i16 @test_bitcast_from_half(half* %addr) {
 ; CHECK-LABEL: test_bitcast_from_half:
 ; CHECK: ldrh r0, [r0]
-  %val = load half* %addr
+  %val = load half, half* %addr
   %val_int = bitcast half %val to i16
   ret i16 %val_int
 }
@@ -33,7 +33,7 @@
 ; CHECK-OLD: b.w ___gnu_h2f_ieee
 ; CHECK-F16: vcvtb.f32.f16
 ; CHECK-V8: vcvtb.f32.f16
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to float
   ret float %val32
 }
@@ -46,7 +46,7 @@
 ; CHECK-F16: vcvtb.f32.f16
 ; CHECK-F16: vcvt.f64.f32
 ; CHECK-V8: vcvtb.f64.f16
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to double
   ret double %val32
 }
diff --git a/llvm/test/CodeGen/ARM/hidden-vis-2.ll b/llvm/test/CodeGen/ARM/hidden-vis-2.ll
index 18d38d4..a104f35 100644
--- a/llvm/test/CodeGen/ARM/hidden-vis-2.ll
+++ b/llvm/test/CodeGen/ARM/hidden-vis-2.ll
@@ -7,6 +7,6 @@
 ; CHECK-LABEL: t:
 ; CHECK: ldr
 ; CHECK-NEXT: ldr
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/ARM/hidden-vis-3.ll b/llvm/test/CodeGen/ARM/hidden-vis-3.ll
index 3bc3312..0cf2f77 100644
--- a/llvm/test/CodeGen/ARM/hidden-vis-3.ll
+++ b/llvm/test/CodeGen/ARM/hidden-vis-3.ll
@@ -10,8 +10,8 @@
 ; CHECK: LCPI0_1:
 ; CHECK-NEXT: .long _y
 
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/ifconv-kills.ll b/llvm/test/CodeGen/ARM/ifconv-kills.ll
index c9f6789..3a458e4 100644
--- a/llvm/test/CodeGen/ARM/ifconv-kills.ll
+++ b/llvm/test/CodeGen/ARM/ifconv-kills.ll
@@ -10,7 +10,7 @@
 ; present something which can be easily if-converted
 if.then:
   ; %R0 should be killed here
-  %valt = load i32* %ptr, align 4
+  %valt = load i32, i32* %ptr, align 4
   br label %return
 
 if.else:
@@ -18,7 +18,7 @@
   ; has to be removed because if.then will follow after this and still
   ; read it.
   %addr = getelementptr inbounds i32, i32* %ptr, i32 4
-  %vale = load i32* %addr, align 4
+  %vale = load i32, i32* %addr, align 4
   br label %return
 
 return:
diff --git a/llvm/test/CodeGen/ARM/ifconv-regmask.ll b/llvm/test/CodeGen/ARM/ifconv-regmask.ll
index d45f65f..11ad6f2 100644
--- a/llvm/test/CodeGen/ARM/ifconv-regmask.ll
+++ b/llvm/test/CodeGen/ARM/ifconv-regmask.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind ssp
 define i32 @sfu() {
 entry:
-  %bf.load = load i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
+  %bf.load = load i32, i32* getelementptr inbounds (%union.opcode* @opcode, i32 0, i32 0), align 4
   %bf.lshr = lshr i32 %bf.load, 26
   %bf.clear = and i32 %bf.lshr, 7
   switch i32 %bf.clear, label %return [
diff --git a/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll b/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
index 9cc8738..41d78e5 100644
--- a/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -4,7 +4,7 @@
 define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
 entry:
   %0 = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1, i32 0
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   %2 = zext i8 %1 to i32
   %3 = and i32 %2, 112
   %4 = icmp eq i32 %3, 0
@@ -12,7 +12,7 @@
 
 bb:
   %5 = getelementptr inbounds %struct.S, %struct.S* %y, i32 0, i32 1, i32 0
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = zext i8 %6 to i32
   %8 = and i32 %7, 112
   %9 = icmp eq i32 %8, 0
diff --git a/llvm/test/CodeGen/ARM/ifcvt11.ll b/llvm/test/CodeGen/ARM/ifcvt11.ll
index a02dff0..eae41e2 100644
--- a/llvm/test/CodeGen/ARM/ifcvt11.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt11.ll
@@ -23,8 +23,8 @@
   %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
   %scevgep10 = getelementptr inbounds %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 0
   %scevgep11 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 1
-  %3 = load double* %scevgep10, align 4
-  %4 = load double* %scevgep11, align 4
+  %3 = load double, double* %scevgep10, align 4
+  %4 = load double, double* %scevgep11, align 4
   %5 = fcmp uge double %3, %4
   br i1 %5, label %bb3, label %bb1
 
@@ -35,7 +35,7 @@
 ; CHECK: vcmpe.f64
 ; CHECK: vmrs APSR_nzcv, fpscr
   %scevgep12 = getelementptr %struct.xyz_t, %struct.xyz_t* %p, i32 %n.08, i32 2
-  %6 = load double* %scevgep12, align 4
+  %6 = load double, double* %scevgep12, align 4
   %7 = fcmp uge double %3, %6
   br i1 %7, label %bb3, label %bb2
 
diff --git a/llvm/test/CodeGen/ARM/ifcvt5.ll b/llvm/test/CodeGen/ARM/ifcvt5.ll
index 31e3e00..3aa2139 100644
--- a/llvm/test/CodeGen/ARM/ifcvt5.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt5.ll
@@ -6,7 +6,7 @@
 
 define void @foo(i32 %a) {
 entry:
-	%tmp = load i32** @x		; <i32*> [#uses=1]
+	%tmp = load i32*, i32** @x		; <i32*> [#uses=1]
 	store i32 %a, i32* %tmp
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/ifcvt7.ll b/llvm/test/CodeGen/ARM/ifcvt7.ll
index 476ed4d..e0d2b7c 100644
--- a/llvm/test/CodeGen/ARM/ifcvt7.ll
+++ b/llvm/test/CodeGen/ARM/ifcvt7.ll
@@ -11,9 +11,9 @@
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
-	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
-	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
-	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp6 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
 	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
 	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
 	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll b/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
index 7208fff..766b3d7 100644
--- a/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -3,10 +3,10 @@
 
 define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
 {
-  %h = load <8 x float>* %f
+  %h = load <8 x float>, <8 x float>* %f
   %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
   %m = bitcast <8 x float> %i to <4 x i64>
-  %z = load <4 x i64>* %y
+  %z = load <4 x i64>, <4 x i64>* %y
   %n = mul <4 x i64> %z, %m
   %p = bitcast <4 x i64> %n to <8 x float>
   store <8 x float> %p, <8 x float>* %g
diff --git a/llvm/test/CodeGen/ARM/indirectbr-2.ll b/llvm/test/CodeGen/ARM/indirectbr-2.ll
index 3a5d2d8..318880a 100644
--- a/llvm/test/CodeGen/ARM/indirectbr-2.ll
+++ b/llvm/test/CodeGen/ARM/indirectbr-2.ll
@@ -15,7 +15,7 @@
 
 define i32 @func() nounwind ssp {
   %1 = alloca i32, align 4
-  %2 = load i32* @foo, align 4
+  %2 = load i32, i32* @foo, align 4
   %3 = icmp eq i32 %2, 34879
   br label %4
 
@@ -24,7 +24,7 @@
   %6 = mul i32 %5, 287
   %7 = add i32 %6, 2
   %8 = getelementptr [2 x i32], [2 x i32]* @DWJumpTable2808, i32 0, i32 %5
-  %9 = load i32* %8
+  %9 = load i32, i32* %8
   %10 = add i32 %9, ptrtoint (i8* blockaddress(@func, %4) to i32)
   %11 = inttoptr i32 %10 to i8*
   %12 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([45 x i8]* @0, i32 0, i32 0))
@@ -33,7 +33,7 @@
 ; <label>:13                                      ; preds = %4
   %tmp14 = phi i32 [ %7, %4 ]
   store i32 23958, i32* @foo, align 4
-  %tmp15 = load i32* %1, align 4
+  %tmp15 = load i32, i32* %1, align 4
   %tmp16 = icmp eq i32 %tmp15, 0
   %tmp17 = zext i1 %tmp16 to i32
   %tmp21 = add i32 %tmp17, %tmp14
diff --git a/llvm/test/CodeGen/ARM/indirectbr.ll b/llvm/test/CodeGen/ARM/indirectbr.ll
index 2c63b6f..d15ef14 100644
--- a/llvm/test/CodeGen/ARM/indirectbr.ll
+++ b/llvm/test/CodeGen/ARM/indirectbr.ll
@@ -16,7 +16,7 @@
 ; THUMB: [[NEXTADDR_PCBASE:LPC0_[0-9]]]:
 ; THUMB: add r[[NEXTADDR_REG]], pc
 
-  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %0 = load i8*, i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
 ; indirect branch gets duplicated here
 ; ARM: bx
@@ -32,7 +32,7 @@
 
 bb3:                                              ; preds = %entry
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
-  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0.pre = load i8*, i8** %2, align 4        ; <i8*> [#uses=1]
   br label %bb2
 
 L5:                                               ; preds = %bb2
diff --git a/llvm/test/CodeGen/ARM/inline-diagnostics.ll b/llvm/test/CodeGen/ARM/inline-diagnostics.ll
index dbb779e..3f5b73c 100644
--- a/llvm/test/CodeGen/ARM/inline-diagnostics.ll
+++ b/llvm/test/CodeGen/ARM/inline-diagnostics.ll
@@ -9,7 +9,7 @@
   %c3 = alloca %struct.float4, align 4
   call void asm sideeffect "vmul.f32 ${2:q}, ${0:q}, ${1:q}", "=*r,=*r,*w"(%struct.float4* %c1, %struct.float4* %c2, %struct.float4* %c3) #1, !srcloc !1
   %x = getelementptr inbounds %struct.float4, %struct.float4* %c3, i32 0, i32 0
-  %1 = load float* %x, align 4
+  %1 = load float, float* %x, align 4
   ret float %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/interrupt-attr.ll b/llvm/test/CodeGen/ARM/interrupt-attr.ll
index c6da09d..95ada08 100644
--- a/llvm/test/CodeGen/ARM/interrupt-attr.ll
+++ b/llvm/test/CodeGen/ARM/interrupt-attr.ll
@@ -65,7 +65,7 @@
 
 ; CHECK-A-THUMB-LABEL: fiq_fn:
 ; CHECK-M-LABEL: fiq_fn:
-  %val = load volatile [16 x i32]* @bigvar
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
   store volatile [16 x i32] %val, [16 x i32]* @bigvar
   ret void
 }
@@ -81,7 +81,7 @@
 ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
 ; CHECK-A: subs pc, lr, #0
 
-  %val = load volatile [16 x i32]* @bigvar
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
   store volatile [16 x i32] %val, [16 x i32]* @bigvar
   ret void
 }
@@ -126,8 +126,8 @@
 ; CHECK-A-NOT: vstr
 ; CHECK-A-NOT: vstm
 ; CHECK-A: vadd.f64 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
-  %lhs = load volatile double* @var
-  %rhs = load volatile double* @var
+  %lhs = load volatile double, double* @var
+  %rhs = load volatile double, double* @var
   %sum = fadd double %lhs, %rhs
   store double %sum, double* @var
   ret void
diff --git a/llvm/test/CodeGen/ARM/intrinsics-crypto.ll b/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
index 96413d3..6e5efd8 100644
--- a/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
+++ b/llvm/test/CodeGen/ARM/intrinsics-crypto.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s
 
 define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) {
-  %tmp = load <16 x i8>* %a
-  %tmp2 = load <16 x i8>* %b
+  %tmp = load <16 x i8>, <16 x i8>* %a
+  %tmp2 = load <16 x i8>, <16 x i8>* %b
   %tmp3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %tmp, <16 x i8> %tmp2)
   ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}}
   %tmp4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %tmp3, <16 x i8> %tmp2)
@@ -15,9 +15,9 @@
 }
 
 define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) {
-  %tmp = load <4 x i32>* %a
-  %tmp2 = load <4 x i32>* %b
-  %tmp3 = load <4 x i32>* %c
+  %tmp = load <4 x i32>, <4 x i32>* %a
+  %tmp2 = load <4 x i32>, <4 x i32>* %b
+  %tmp3 = load <4 x i32>, <4 x i32>* %c
   %scalar = extractelement <4 x i32> %tmp, i32 0
   %resscalar = call i32 @llvm.arm.neon.sha1h(i32 %scalar)
   %res1 = insertelement <4 x i32> undef, i32 %resscalar, i32 0
diff --git a/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll b/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
index 0b607f7..aab3556 100644
--- a/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
+++ b/llvm/test/CodeGen/ARM/invoke-donothing-assert.ll
@@ -46,7 +46,7 @@
   br label %cleanup
 
 cleanup:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %inc294 = add nsw i32 %0, 4
   store i32 %inc294, i32* %a, align 4
   br i1 false, label %_ZN3lol5ArrayIivvvvvvvED1Ev.exit, label %delete.notnull.i.i.i1409
diff --git a/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll b/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
index 0116fe8..f1745bd 100644
--- a/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
+++ b/llvm/test/CodeGen/ARM/isel-v8i32-crash.ll
@@ -15,7 +15,7 @@
 define void @func(i16* nocapture %pb, float* nocapture readonly %pf) #0 {
 entry:
   %0 = bitcast float* %pf to <8 x float>*
-  %1 = load <8 x float>* %0, align 4
+  %1 = load <8 x float>, <8 x float>* %0, align 4
   %2 = fmul <8 x float> %1, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
   %3 = fptosi <8 x float> %2 to <8 x i16>
   %4 = bitcast i16* %pb to <8 x i16>*
diff --git a/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll b/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
index df4bd575..222664a 100644
--- a/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
+++ b/llvm/test/CodeGen/ARM/krait-cpu-div-attribute.ll
@@ -28,8 +28,8 @@
   store i32 0, i32* %retval
   store volatile i32 100, i32* %b, align 4
   store volatile i32 32, i32* %c, align 4
-  %0 = load volatile i32* %b, align 4
-  %1 = load volatile i32* %c, align 4
+  %0 = load volatile i32, i32* %b, align 4
+  %1 = load volatile i32, i32* %c, align 4
   %div = sdiv i32 %0, %1
   store volatile i32 %div, i32* %a, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/ARM/large-stack.ll b/llvm/test/CodeGen/ARM/large-stack.ll
index 1a9a1fa..e2d4de3 100644
--- a/llvm/test/CodeGen/ARM/large-stack.ll
+++ b/llvm/test/CodeGen/ARM/large-stack.ll
@@ -15,6 +15,6 @@
 	%tmp = alloca i32, align 4
 	%a = alloca [805306369 x i8], align 16
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
         ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/ARM/ldm.ll b/llvm/test/CodeGen/ARM/ldm.ll
index 3977da6..b35631f 100644
--- a/llvm/test/CodeGen/ARM/ldm.ll
+++ b/llvm/test/CodeGen/ARM/ldm.ll
@@ -8,8 +8,8 @@
 ; CHECK: pop
 ; V4T-LABEL: t1:
 ; V4T: pop
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
         ret i32 %tmp4
 }
@@ -19,9 +19,9 @@
 ; CHECK: pop
 ; V4T-LABEL: t2:
 ; V4T: pop
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
         %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
@@ -34,9 +34,9 @@
 ; V4T: ldmib
 ; V4T: pop
 ; V4T-NEXT: bx lr
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
diff --git a/llvm/test/CodeGen/ARM/ldr.ll b/llvm/test/CodeGen/ARM/ldr.ll
index c740e1a..bd4de5de 100644
--- a/llvm/test/CodeGen/ARM/ldr.ll
+++ b/llvm/test/CodeGen/ARM/ldr.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ldr r0
 entry:
-        %tmp = load i32* %v
+        %tmp = load i32, i32* %v
         ret i32 %tmp
 }
 
@@ -13,7 +13,7 @@
 ; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32, i32* %v, i32 1023
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -23,7 +23,7 @@
 ; CHECK: ldr r0
 entry:
         %tmp2 = getelementptr i32, i32* %v, i32 1024
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -34,7 +34,7 @@
 entry:
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -44,7 +44,7 @@
 entry:
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -55,7 +55,7 @@
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
 
@@ -66,6 +66,6 @@
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_ext.ll b/llvm/test/CodeGen/ARM/ldr_ext.ll
index 31aaba5..15efb50 100644
--- a/llvm/test/CodeGen/ARM/ldr_ext.ll
+++ b/llvm/test/CodeGen/ARM/ldr_ext.ll
@@ -2,28 +2,28 @@
 
 define i32 @test1(i8* %t1) nounwind {
 ; CHECK: ldrb
-    %tmp.u = load i8* %t1
+    %tmp.u = load i8, i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test2(i16* %t1) nounwind {
 ; CHECK: ldrh
-    %tmp.u = load i16* %t1
+    %tmp.u = load i16, i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test3(i8* %t0) nounwind {
 ; CHECK: ldrsb
-    %tmp.s = load i8* %t0
+    %tmp.s = load i8, i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test4(i16* %t0) nounwind {
 ; CHECK: ldrsh
-    %tmp.s = load i16* %t0
+    %tmp.s = load i16, i16* %t0
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
@@ -31,7 +31,7 @@
 define i32 @test5() nounwind {
 ; CHECK: mov r0, #0
 ; CHECK: ldrsh
-    %tmp.s = load i16* null
+    %tmp.s = load i16, i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_frame.ll b/llvm/test/CodeGen/ARM/ldr_frame.ll
index 113badd..01b18bc 100644
--- a/llvm/test/CodeGen/ARM/ldr_frame.ll
+++ b/llvm/test/CodeGen/ARM/ldr_frame.ll
@@ -3,14 +3,14 @@
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 0
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
 define i32 @f2() {
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 0
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
@@ -18,14 +18,14 @@
 define i32 @f3() {
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 32
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
 define i32 @f4() {
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 2
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/ldr_post.ll b/llvm/test/CodeGen/ARM/ldr_post.ll
index 2558b16..139c6f4 100644
--- a/llvm/test/CodeGen/ARM/ldr_post.ll
+++ b/llvm/test/CodeGen/ARM/ldr_post.ll
@@ -7,7 +7,7 @@
 define i32 @test1(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
@@ -19,7 +19,7 @@
 define i32 @test2(i32 %a, i32 %b) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, 16               ; <i32> [#uses=1]
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
diff --git a/llvm/test/CodeGen/ARM/ldr_pre.ll b/llvm/test/CodeGen/ARM/ldr_pre.ll
index 4246c09..c6c76e2 100644
--- a/llvm/test/CodeGen/ARM/ldr_pre.ll
+++ b/llvm/test/CodeGen/ARM/ldr_pre.ll
@@ -6,7 +6,7 @@
 ; CHECK-NOT: ldr
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
-        %A = load i32* %Y               ; <i32> [#uses=1]
+        %A = load i32, i32* %Y               ; <i32> [#uses=1]
         store i32 %A, i32* %dest
         ret i32* %Y
 }
@@ -17,7 +17,7 @@
 define i32 @test2(i32 %a, i32 %b, i32 %c) {
         %tmp1 = sub i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
         %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
diff --git a/llvm/test/CodeGen/ARM/ldrd-memoper.ll b/llvm/test/CodeGen/ARM/ldrd-memoper.ll
index f1a1121..744fbd5 100644
--- a/llvm/test/CodeGen/ARM/ldrd-memoper.ll
+++ b/llvm/test/CodeGen/ARM/ldrd-memoper.ll
@@ -8,8 +8,8 @@
 ; CHECK: Formed {{.*}} t2LDRD{{.*}} mem:LD4[%0] LD4[%0+4]
 define i64 @t(i64 %a) nounwind readonly {
 entry:
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
diff --git a/llvm/test/CodeGen/ARM/ldrd.ll b/llvm/test/CodeGen/ARM/ldrd.ll
index 7d8ca61..6fed0ae 100644
--- a/llvm/test/CodeGen/ARM/ldrd.ll
+++ b/llvm/test/CodeGen/ARM/ldrd.ll
@@ -19,8 +19,8 @@
 ; M3-LABEL: t:
 ; M3-NOT: ldrd
 
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
@@ -57,8 +57,8 @@
   %scevgep4 = getelementptr i32, i32* %b, i32 %i.03    ; <i32*> [#uses=1]
   %tmp = add i32 %i.03, 1                         ; <i32> [#uses=3]
   %scevgep5 = getelementptr i32, i32* %a, i32 %tmp     ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
-  %3 = load i32* %scevgep5, align 4               ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = load i32, i32* %scevgep5, align 4               ; <i32> [#uses=1]
   %4 = add nsw i32 %3, %2                         ; <i32> [#uses=1]
   store i32 %4, i32* %scevgep4, align 4
   %exitcond = icmp eq i32 %tmp, %0                ; <i1> [#uses=1]
@@ -84,8 +84,8 @@
 ; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}}
   %orig_blocks = alloca [256 x i16], align 2
   %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start(i64 512, i8* %0) nounwind
-  %tmp1 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
-  %tmp2 = load i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
+  %tmp1 = load i32, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 1), align 4
+  %tmp2 = load i32, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 2), align 4
   %add = add nsw i32 %tmp2, %tmp1
   store i32 %add, i32* getelementptr inbounds (%struct.Test* @TestVar, i32 0, i32 0), align 4
   call void @llvm.lifetime.end(i64 512, i8* %0) nounwind
diff --git a/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll b/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
index 82c0fdb..d00f44e 100644
--- a/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
+++ b/llvm/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -17,7 +17,7 @@
   %uglygep = getelementptr i8, i8* %src6, i32 %tmp
   %src_addr.04 = bitcast i8* %uglygep to float*
   %dst_addr.03 = getelementptr float, float* %dst, i32 %j.05
-  %1 = load float* %src_addr.04, align 4
+  %1 = load float, float* %src_addr.04, align 4
   store float %1, float* %dst_addr.03, align 4
   %2 = add i32 %j.05, 1
   %exitcond = icmp eq i32 %2, %width
diff --git a/llvm/test/CodeGen/ARM/ldstrex-m.ll b/llvm/test/CodeGen/ARM/ldstrex-m.ll
index b50699f..3d83a9d 100644
--- a/llvm/test/CodeGen/ARM/ldstrex-m.ll
+++ b/llvm/test/CodeGen/ARM/ldstrex-m.ll
@@ -4,7 +4,7 @@
 ; CHECK-NOT: ldrexd
 define i64 @f0(i64* %p) nounwind readonly {
 entry:
-  %0 = load atomic i64* %p seq_cst, align 8
+  %0 = load atomic i64, i64* %p seq_cst, align 8
   ret i64 %0
 }
 
@@ -29,7 +29,7 @@
 ; CHECK: ldr
 define i32 @f3(i32* %p) nounwind readonly {
 entry:
-  %0 = load atomic i32* %p seq_cst, align 4
+  %0 = load atomic i32, i32* %p seq_cst, align 4
   ret i32 %0
 }
 
@@ -37,7 +37,7 @@
 ; CHECK: ldrb
 define i8 @f4(i8* %p) nounwind readonly {
 entry:
-  %0 = load atomic i8* %p seq_cst, align 4
+  %0 = load atomic i8, i8* %p seq_cst, align 4
   ret i8 %0
 }
 
diff --git a/llvm/test/CodeGen/ARM/ldstrex.ll b/llvm/test/CodeGen/ARM/ldstrex.ll
index edcf116..59349f7 100644
--- a/llvm/test/CodeGen/ARM/ldstrex.ll
+++ b/llvm/test/CodeGen/ARM/ldstrex.ll
@@ -106,14 +106,14 @@
 
 define void @excl_addrmode() {
 ; CHECK-T2ADDRMODE-LABEL: excl_addrmode:
-  %base1020 = load i32** @base
+  %base1020 = load i32*, i32** @base
   %offset1020 = getelementptr i32, i32* %base1020, i32 255
   call i32 @llvm.arm.ldrex.p0i32(i32* %offset1020)
   call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1020)
 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [{{r[0-9]+}}, #1020]
 
-  %base1024 = load i32** @base
+  %base1024 = load i32*, i32** @base
   %offset1024 = getelementptr i32, i32* %base1024, i32 256
   call i32 @llvm.arm.ldrex.p0i32(i32* %offset1024)
   call i32 @llvm.arm.strex.p0i32(i32 0, i32* %offset1024)
@@ -121,7 +121,7 @@
 ; CHECK-T2ADDRMODE: ldrex {{r[0-9]+}}, [r[[ADDR]]]
 ; CHECK-T2ADDRMODE: strex {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]]
 
-  %base1 = load i32** @base
+  %base1 = load i32*, i32** @base
   %addr8 = bitcast i32* %base1 to i8*
   %offset1_8 = getelementptr i8, i8* %addr8, i32 1
   %offset1 = bitcast i8* %offset1_8 to i32*
diff --git a/llvm/test/CodeGen/ARM/load-global.ll b/llvm/test/CodeGen/ARM/load-global.ll
index 00ca2e8..34748bc 100644
--- a/llvm/test/CodeGen/ARM/load-global.ll
+++ b/llvm/test/CodeGen/ARM/load-global.ll
@@ -45,6 +45,6 @@
 ; LINUX: ldr r0, [r1, r0]
 ; LINUX: ldr r0, [r0]
 ; LINUX: .long G(GOT)
-	%tmp = load i32* @G
+	%tmp = load i32, i32* @G
 	ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/ARM/load.ll b/llvm/test/CodeGen/ARM/load.ll
index ca16adc..3b2d637 100644
--- a/llvm/test/CodeGen/ARM/load.ll
+++ b/llvm/test/CodeGen/ARM/load.ll
@@ -2,28 +2,28 @@
 
 define i32 @f1(i8* %p) {
 entry:
-        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
         %tmp1 = sext i8 %tmp to i32              ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f2(i8* %p) {
 entry:
-        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
         %tmp2 = zext i8 %tmp to i32              ; <i32> [#uses=1]
         ret i32 %tmp2
 }
 
 define i32 @f3(i16* %p) {
 entry:
-        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
         %tmp3 = sext i16 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp3
 }
 
 define i32 @f4(i16* %p) {
 entry:
-        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
         %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/ARM/load_i1_select.ll b/llvm/test/CodeGen/ARM/load_i1_select.ll
index 7a208ea..48cd4ea 100644
--- a/llvm/test/CodeGen/ARM/load_i1_select.ll
+++ b/llvm/test/CodeGen/ARM/load_i1_select.ll
@@ -11,7 +11,7 @@
 ; CHECK: tst.w r[[R0]], #1
 define void @foo(i8* %call, double* %p) nounwind {
 entry:
-  %tmp2 = load i8* %call
+  %tmp2 = load i8, i8* %call
   %tmp3 = trunc i8 %tmp2 to i1
   %cond = select i1 %tmp3, double 2.000000e+00, double 1.000000e+00
   store double %cond, double* %p
diff --git a/llvm/test/CodeGen/ARM/long.ll b/llvm/test/CodeGen/ARM/long.ll
index d0bff4a..1807813 100644
--- a/llvm/test/CodeGen/ARM/long.ll
+++ b/llvm/test/CodeGen/ARM/long.ll
@@ -85,6 +85,6 @@
 ; CHECK-LABEL: f10:
 entry:
         %a = alloca i64, align 8                ; <i64*> [#uses=1]
-        %retval = load i64* %a          ; <i64> [#uses=1]
+        %retval = load i64, i64* %a          ; <i64> [#uses=1]
         ret i64 %retval
 }
diff --git a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
index a5b3333..aa2b2d2 100644
--- a/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
+++ b/llvm/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -23,16 +23,16 @@
 	%indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ]		; <i32> [#uses=6]
 	%k.069.0 = add i32 %indvar, 1		; <i32> [#uses=3]
 	%tmp3 = getelementptr i32, i32* %mpp, i32 %indvar		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp3		; <i32> [#uses=1]
 	%tmp8 = getelementptr i32, i32* %tpmm, i32 %indvar		; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %tmp8		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp4		; <i32> [#uses=2]
 	%tmp13 = getelementptr i32, i32* %mc, i32 %k.069.0		; <i32*> [#uses=5]
 	store i32 %tmp10, i32* %tmp13
 	%tmp17 = getelementptr i32, i32* %ip, i32 %indvar		; <i32*> [#uses=1]
-	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* %tmp17		; <i32> [#uses=1]
 	%tmp22 = getelementptr i32, i32* %tpim, i32 %indvar		; <i32*> [#uses=1]
-	%tmp23 = load i32* %tmp22		; <i32> [#uses=1]
+	%tmp23 = load i32, i32* %tmp22		; <i32> [#uses=1]
 	%tmp24 = add i32 %tmp23, %tmp18		; <i32> [#uses=2]
 	%tmp30 = icmp sgt i32 %tmp24, %tmp10		; <i1> [#uses=1]
 	br i1 %tmp30, label %cond_true, label %cond_next
@@ -42,9 +42,9 @@
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %bb
-	%tmp39 = load i32* %tmp13		; <i32> [#uses=1]
+	%tmp39 = load i32, i32* %tmp13		; <i32> [#uses=1]
 	%tmp42 = getelementptr i32, i32* %ms, i32 %k.069.0		; <i32*> [#uses=1]
-	%tmp43 = load i32* %tmp42		; <i32> [#uses=1]
+	%tmp43 = load i32, i32* %tmp42		; <i32> [#uses=1]
 	%tmp44 = add i32 %tmp43, %tmp39		; <i32> [#uses=2]
 	store i32 %tmp44, i32* %tmp13
 	%tmp52 = icmp slt i32 %tmp44, -987654321		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
index ddadadc..ef98727 100644
--- a/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
+++ b/llvm/test/CodeGen/ARM/lsr-icmp-imm.ll
@@ -19,7 +19,7 @@
   %i.addr.05 = phi i32 [ %sub, %for.body ], [ %i, %entry ]
   %b.04 = phi i32 [ %.b.0, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.addr.05
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, %b.04
   %.b.0 = select i1 %cmp1, i32 %0, i32 %b.04
   %i.addr.0.bi.0 = select i1 %cmp1, i32 %i.addr.05, i32 %bi.06
diff --git a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
index 410233e..17292cf 100644
--- a/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/llvm/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -25,8 +25,8 @@
   %0 = phi i32 [ %inc71, %for.inc69 ], [ 0, %entry ]
   %offset = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 2
   %len = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %0, i32 3
-  %tmp5 = load i64* %offset, align 4
-  %tmp15 = load i64* %len, align 4
+  %tmp5 = load i64, i64* %offset, align 4
+  %tmp15 = load i64, i64* %len, align 4
   %add = add nsw i64 %tmp15, %tmp5
   br label %inner.loop
 
@@ -39,8 +39,8 @@
 if.end:                                           ; preds = %inner.loop
   %len39 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 3
   %offset28 = getelementptr %struct.partition_entry, %struct.partition_entry* %part, i32 %1, i32 2
-  %tmp29 = load i64* %offset28, align 4
-  %tmp40 = load i64* %len39, align 4
+  %tmp29 = load i64, i64* %offset28, align 4
+  %tmp40 = load i64, i64* %len39, align 4
   %add41 = add nsw i64 %tmp40, %tmp29
   %cmp44 = icmp sge i64 %tmp29, %tmp5
   %cmp47 = icmp slt i64 %tmp29, %add
diff --git a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
index 895e74a..9d7ea62 100644
--- a/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/llvm/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -30,7 +30,7 @@
 ; CHECK: poplt
 ; CHECK-NOT: cmp
 ; CHECK: movle
-  %0 = load i32* @foo, align 4
+  %0 = load i32, i32* @foo, align 4
   %cmp28 = icmp sgt i32 %0, 0
   br i1 %cmp28, label %for.body.lr.ph, label %for.cond1.preheader
 
@@ -53,7 +53,7 @@
 ; CHECK-NOT: sub
 ; CHECK: cmp
 ; CHECK: blt
-%0 = load i32* %offset, align 4
+%0 = load i32, i32* %offset, align 4
 %cmp = icmp slt i32 %0, %size
 %s = sub nsw i32 %0, %size
 %size2 = sub nsw i32 %size, 0
diff --git a/llvm/test/CodeGen/ARM/machine-licm.ll b/llvm/test/CodeGen/ARM/machine-licm.ll
index 921426e..a1eec78 100644
--- a/llvm/test/CodeGen/ARM/machine-licm.ll
+++ b/llvm/test/CodeGen/ARM/machine-licm.ll
@@ -39,14 +39,14 @@
 ; THUMB: LCPI0_0:
 ; THUMB-NOT: LCPI0_1:
 ; THUMB: .section
-  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  %.pre = load i32, i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %bb.nph
   %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
   %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
   %scevgep = getelementptr i32, i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
   store i32 %3, i32* @GV, align 4
   %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/ARM/minsize-litpools.ll b/llvm/test/CodeGen/ARM/minsize-litpools.ll
index d5cd2a9..d5452ed 100644
--- a/llvm/test/CodeGen/ARM/minsize-litpools.ll
+++ b/llvm/test/CodeGen/ARM/minsize-litpools.ll
@@ -12,7 +12,7 @@
 ; CHECK: ldr r[[GLOBDEST:[0-9]+]], {{.?LCPI0_0}}
 ; CHECK: ldr r0, [r[[GLOBDEST]]]
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 }
 
@@ -21,6 +21,6 @@
 ; CHECK: movw [[GLOBDEST:r[0-9]+]], :lower16:var
 ; CHECK: movt [[GLOBDEST]], :upper16:var
 
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/misched-copy-arm.ll b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
index 4f5249c..330252a 100644
--- a/llvm/test/CodeGen/ARM/misched-copy-arm.ll
+++ b/llvm/test/CodeGen/ARM/misched-copy-arm.ll
@@ -19,7 +19,7 @@
   %s.05 = phi i32 [ %mul, %for.body ], [ 0, %entry ]
   %indvars.iv.next = add i32 %indvars.iv, %s
   %arrayidx = getelementptr inbounds i32, i32* %d, i32 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul nsw i32 %0, %s.05
   %exitcond = icmp eq i32 %indvars.iv.next, %a
   br i1 %exitcond, label %for.end, label %for.body
@@ -65,7 +65,7 @@
   %dst.1 = phi %struct.rtx_def* [ undef, %if.then24 ], [ %dst.0, %while.cond ], [ %dst.0, %while.cond ]
   %arrayidx30 = getelementptr inbounds %struct.rtx_def, %struct.rtx_def* %dst.1, i32 0, i32 1, i32 0
   %rtx31 = bitcast %union.rtunion_def* %arrayidx30 to %struct.rtx_def**
-  %0 = load %struct.rtx_def** %rtx31, align 4
+  %0 = load %struct.rtx_def*, %struct.rtx_def** %rtx31, align 4
   br label %while.cond
 
 if.then46:                                        ; preds = %while.cond
diff --git a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
index 05e9b0f..2c0a612 100644
--- a/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
+++ b/llvm/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
@@ -166,7 +166,7 @@
 
 define arm_aapcscc void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -191,10 +191,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -206,10 +206,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -221,7 +221,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -278,10 +278,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -295,10 +295,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/ARM/negative-offset.ll b/llvm/test/CodeGen/ARM/negative-offset.ll
index a5fdb1e..bafc964 100644
--- a/llvm/test/CodeGen/ARM/negative-offset.ll
+++ b/llvm/test/CodeGen/ARM/negative-offset.ll
@@ -8,9 +8,9 @@
 ;CHECK: ldr r{{.*}}, [r0, #-16]
 ;CHECK: ldr r{{.*}}, [r0, #-8]
   %arrayidx = getelementptr inbounds i32, i32* %p, i32 -4
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i32 -2
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/ARM/neon_cmp.ll b/llvm/test/CodeGen/ARM/neon_cmp.ll
index e1662c4..fcf4486 100644
--- a/llvm/test/CodeGen/ARM/neon_cmp.ll
+++ b/llvm/test/CodeGen/ARM/neon_cmp.ll
@@ -4,8 +4,8 @@
 ; radar://13191881
 ; CHECK: vfcmp
 define void @vfcmp(<2 x double>* %a, <2 x double>* %b) {
-  %wide.load = load <2 x double>* %a, align 4
-  %wide.load2 = load <2 x double>* %b, align 4
+  %wide.load = load <2 x double>, <2 x double>* %a, align 4
+  %wide.load2 = load <2 x double>, <2 x double>* %b, align 4
 ; CHECK-NOT: vdup.32
 ; CHECK-NOT: vmovn.i64
   %v1 = fcmp olt <2 x double> %wide.load, %wide.load2
diff --git a/llvm/test/CodeGen/ARM/neon_div.ll b/llvm/test/CodeGen/ARM/neon_div.ll
index 4f1607e..e185c2a 100644
--- a/llvm/test/CodeGen/ARM/neon_div.ll
+++ b/llvm/test/CodeGen/ARM/neon_div.ll
@@ -7,8 +7,8 @@
 ;CHECK: vrecpe.f32
 ;CHECK: vmovn.i32
 ;CHECK: vmovn.i16
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
 ;CHECK: vqmovun.s16
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -31,8 +31,8 @@
 ;CHECK: vrecpe.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -42,8 +42,8 @@
 ;CHECK: vrecps.f32
 ;CHECK: vrecps.f32
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/neon_fpconv.ll b/llvm/test/CodeGen/ARM/neon_fpconv.ll
index 8e37ce7..61ac409 100644
--- a/llvm/test/CodeGen/ARM/neon_fpconv.ll
+++ b/llvm/test/CodeGen/ARM/neon_fpconv.ll
@@ -19,7 +19,7 @@
 ; CHECK: vsitofp_double
 define void @vsitofp_double(<2 x i32>* %loadaddr,
                             <2 x double>* %storeaddr) {
-  %v0 = load <2 x i32>* %loadaddr
+  %v0 = load <2 x i32>, <2 x i32>* %loadaddr
 ; CHECK:      vldr
 ; CHECK-NEXT:	vcvt.f64.s32
 ; CHECK-NEXT:	vcvt.f64.s32
@@ -31,7 +31,7 @@
 ; CHECK: vuitofp_double
 define void @vuitofp_double(<2 x i32>* %loadaddr,
                             <2 x double>* %storeaddr) {
-  %v0 = load <2 x i32>* %loadaddr
+  %v0 = load <2 x i32>, <2 x i32>* %loadaddr
 ; CHECK:      vldr
 ; CHECK-NEXT:	vcvt.f64.u32
 ; CHECK-NEXT:	vcvt.f64.u32
diff --git a/llvm/test/CodeGen/ARM/neon_ld1.ll b/llvm/test/CodeGen/ARM/neon_ld1.ll
index 9fd3fc5..f4d6019 100644
--- a/llvm/test/CodeGen/ARM/neon_ld1.ll
+++ b/llvm/test/CodeGen/ARM/neon_ld1.ll
@@ -7,8 +7,8 @@
 ; CHECK: vstr d
 define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
 entry:
-	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
-	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%0 = load <4 x i16>, <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>, <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
 	%2 = add <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
 	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	store <2 x i32> %3, <2 x i32>* %r, align 8
@@ -22,8 +22,8 @@
 ; CHECK: vmov r0, r1, d
 define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
 entry:
-	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
-	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%0 = load <4 x i16>, <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>, <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
 	%2 = sub <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
 	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
 	ret <2 x i32> %3
diff --git a/llvm/test/CodeGen/ARM/neon_ld2.ll b/llvm/test/CodeGen/ARM/neon_ld2.ll
index 571a16a..5bd6ae6 100644
--- a/llvm/test/CodeGen/ARM/neon_ld2.ll
+++ b/llvm/test/CodeGen/ARM/neon_ld2.ll
@@ -13,8 +13,8 @@
 ; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+:128\]}}
 define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
-	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
-	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
 	%2 = add <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
 	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %3, <4 x i32>* %r, align 16
@@ -35,8 +35,8 @@
 ; SWIFT: vmov r2, r3, d
 define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
 entry:
-	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
-	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%0 = load <2 x i64>, <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>, <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
 	%2 = sub <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
 	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %3
@@ -50,8 +50,8 @@
 ; SWIFT: vst1.64 {{.d[0-9]+, d[0-9]+}, \[r[0-9]+}}
 define void @t3(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
 entry:
-	%0 = load <2 x i64>* %a, align 8
-	%1 = load <2 x i64>* %b, align 8
+	%0 = load <2 x i64>, <2 x i64>* %a, align 8
+	%1 = load <2 x i64>, <2 x i64>* %b, align 8
 	%2 = add <2 x i64> %0, %1
 	%3 = bitcast <2 x i64> %2 to <4 x i32>
 	store <4 x i32> %3, <4 x i32>* %r, align 8
diff --git a/llvm/test/CodeGen/ARM/neon_spill.ll b/llvm/test/CodeGen/ARM/neon_spill.ll
index d286d16..04f08f438 100644
--- a/llvm/test/CodeGen/ARM/neon_spill.ll
+++ b/llvm/test/CodeGen/ARM/neon_spill.ll
@@ -24,7 +24,7 @@
 define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
-  %3 = load %0** %2, align 4
+  %3 = load %0*, %0** %2, align 4
   store float 0.000000e+00, float* undef, align 4
   %4 = call arm_aapcs_vfpcc  %2* @func3(%2* undef, %2* undef, i32 2956) nounwind
   call arm_aapcs_vfpcc  void @func1(%0* %3, float* undef, float* undef, %2* undef)
diff --git a/llvm/test/CodeGen/ARM/no-fpu.ll b/llvm/test/CodeGen/ARM/no-fpu.ll
index fff4bcc..c5d1f19 100644
--- a/llvm/test/CodeGen/ARM/no-fpu.ll
+++ b/llvm/test/CodeGen/ARM/no-fpu.ll
@@ -6,7 +6,7 @@
 ; Check no NEON instructions are selected when feature is disabled.
 define void @neonop(i64* nocapture readonly %a, i64* nocapture %b) #0 {
   %1 = bitcast i64* %a to <2 x i64>*
-  %wide.load = load <2 x i64>* %1, align 8
+  %wide.load = load <2 x i64>, <2 x i64>* %1, align 8
   ; NONEON-NOVFP-NOT: vld1.64
   ; NONEON-NOT: vld1.64
   %add = add <2 x i64> %wide.load, %wide.load
diff --git a/llvm/test/CodeGen/ARM/no-tail-call.ll b/llvm/test/CodeGen/ARM/no-tail-call.ll
index b65edc2..5a5d43c 100644
--- a/llvm/test/CodeGen/ARM/no-tail-call.ll
+++ b/llvm/test/CodeGen/ARM/no-tail-call.ll
@@ -38,7 +38,7 @@
   %5 = call float @llvm.ceil.f32(float 5.000000e+00)
   %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
   %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
-  %6 = load float* %.native3.value, align 4
+  %6 = load float, float* %.native3.value, align 4
   %7 = call float @llvm.ceil.f32(float %6)
   %8 = insertvalue { float, float, float } { float 0.000000e+00, float undef, float undef }, float %5, 1
   %9 = insertvalue { float, float, float } %8, float %7, 2
@@ -76,7 +76,7 @@
   %5 = call float @llvm.ceil.f32(float 5.000000e+00)
   %.native3 = getelementptr inbounds %foo, %foo* %1, i32 0, i32 0
   %.native3.value = getelementptr inbounds %Sf, %Sf* %.native3, i32 0, i32 0
-  %6 = load float* %.native3.value, align 4
+  %6 = load float, float* %.native3.value, align 4
   %7 = call float @llvm.ceil.f32(float %6)
   %8 = insertvalue { float, float } { float 0.000000e+00, float undef }, float %7, 1
   ret { float, float } %8
diff --git a/llvm/test/CodeGen/ARM/none-macho.ll b/llvm/test/CodeGen/ARM/none-macho.ll
index 2a7878f..23555b3 100644
--- a/llvm/test/CodeGen/ARM/none-macho.ll
+++ b/llvm/test/CodeGen/ARM/none-macho.ll
@@ -11,7 +11,7 @@
 
 define i32 @test_litpool() minsize {
 ; CHECK-LABEL: test_litpool:
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 
   ; Lit-pool entries need to produce a "$non_lazy_ptr" version of the symbol.
@@ -21,7 +21,7 @@
 
 define i32 @test_movw_movt() {
 ; CHECK-LABEL: test_movw_movt:
-  %val = load i32* @var
+  %val = load i32, i32* @var
   ret i32 %val
 
   ; movw/movt should also address their symbols MachO-style
@@ -56,7 +56,7 @@
 %big_arr = type [8 x i32]
 define void @test_two_areas(%big_arr* %addr) {
 ; CHECK-LABEL: test_two_areas:
-  %val = load %big_arr* %addr
+  %val = load %big_arr, %big_arr* %addr
   call void @test_trap()
   store %big_arr %val, %big_arr* %addr
 
diff --git a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
index c810900..fa0e892 100644
--- a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
+++ b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll
@@ -5,7 +5,7 @@
 ;CHECK-NOT: vst1.32
 ;CHECK: bx
 define void @foo(<16 x i8>* %J) {
-  %A = load <16 x i8>* %J
+  %A = load <16 x i8>, <16 x i8>* %J
   %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %T2 = shufflevector <8 x i8>  %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   store <16 x i8> %T2, <16 x i8>* %J
diff --git a/llvm/test/CodeGen/ARM/optselect-regclass.ll b/llvm/test/CodeGen/ARM/optselect-regclass.ll
index 0acb2f2..5935a4f 100644
--- a/llvm/test/CodeGen/ARM/optselect-regclass.ll
+++ b/llvm/test/CodeGen/ARM/optselect-regclass.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind ssp
 define void @xfr() {
 entry:
-  %bf.load4 = load i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
+  %bf.load4 = load i32, i32* getelementptr inbounds (%union.opcode.0.2.5.8.15.28* @opcode, i32 0, i32 0), align 4
   %bf.clear10 = and i32 %bf.load4, 65535
   %and11 = and i32 %bf.load4, 32768
   %tobool12 = icmp ne i32 %and11, 0
diff --git a/llvm/test/CodeGen/ARM/phi.ll b/llvm/test/CodeGen/ARM/phi.ll
index 3ac0f52..ff85052 100644
--- a/llvm/test/CodeGen/ARM/phi.ll
+++ b/llvm/test/CodeGen/ARM/phi.ll
@@ -19,7 +19,7 @@
 end:
 ; CHECK: ldr	r0, [r1, #4]
   %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
-  %r = load i32* %gep
+  %r = load i32, i32* %gep
 ; CHECK-NEXT: bx	lr
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index 7ace640..74f9064 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -12,7 +12,7 @@
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -24,7 +24,7 @@
 ; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -36,7 +36,7 @@
 ; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@
 ; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
 ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -66,7 +66,7 @@
 ; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -81,7 +81,7 @@
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
 	ret <8 x i8> %tmp2
 }
@@ -89,7 +89,7 @@
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
 	ret <4 x i16> %tmp2
 }
@@ -97,7 +97,7 @@
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
 	ret <2 x i32> %tmp2
 }
@@ -105,7 +105,7 @@
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
 	ret <16 x i8> %tmp2
 }
@@ -113,7 +113,7 @@
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
 	ret <8 x i16> %tmp2
 }
@@ -121,7 +121,7 @@
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
 	ret <4 x i32> %tmp2
 }
@@ -137,7 +137,7 @@
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -145,7 +145,7 @@
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -153,7 +153,7 @@
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -161,7 +161,7 @@
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -169,7 +169,7 @@
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -177,7 +177,7 @@
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/pr13249.ll b/llvm/test/CodeGen/ARM/pr13249.ll
index 54430e6..cede600 100644
--- a/llvm/test/CodeGen/ARM/pr13249.ll
+++ b/llvm/test/CodeGen/ARM/pr13249.ll
@@ -6,7 +6,7 @@
 
 bb3:                                              ; preds = %bb3, %bb
   %tmp = phi i8* [ %tmp5, %bb3 ], [ %arg, %bb ]
-  %tmp4 = load i8* %tmp, align 1
+  %tmp4 = load i8, i8* %tmp, align 1
   %tmp5 = getelementptr inbounds i8, i8* %tmp, i32 1
   br i1 undef, label %bb3, label %bb7
 
@@ -18,7 +18,7 @@
   br i1 %tmp10, label %bb13, label %bb15
 
 bb13:                                             ; preds = %bb7
-  %tmp14 = load i8* %tmp12, align 1
+  %tmp14 = load i8, i8* %tmp12, align 1
   br label %bb7
 
 bb15:                                             ; preds = %bb7
diff --git a/llvm/test/CodeGen/ARM/pr18364-movw.ll b/llvm/test/CodeGen/ARM/pr18364-movw.ll
index fdcf154..b783522 100644
--- a/llvm/test/CodeGen/ARM/pr18364-movw.ll
+++ b/llvm/test/CodeGen/ARM/pr18364-movw.ll
@@ -14,8 +14,8 @@
   %z = alloca i64, align 8
   store i64 1, i64* %y, align 8
   store i64 11579764786944, i64* %z, align 8
-  %0 = load i64* %y, align 8
-  %1 = load i64* %z, align 8
+  %0 = load i64, i64* %y, align 8
+  %1 = load i64, i64* %z, align 8
   %sub = sub i64 %0, %1
   ret i64 %sub
 }
diff --git a/llvm/test/CodeGen/ARM/pr3502.ll b/llvm/test/CodeGen/ARM/pr3502.ll
index 606d969..4ec982e 100644
--- a/llvm/test/CodeGen/ARM/pr3502.ll
+++ b/llvm/test/CodeGen/ARM/pr3502.ll
@@ -11,7 +11,7 @@
 entry:
 	tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
 	tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
-	%0 = load %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
+	%0 = load %struct.SHARED_AREA*, %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
 	%1 = ptrtoint %struct.SHARED_AREA* %0 to i32		; <i32> [#uses=1]
 	%2 = lshr i32 %1, 20		; <i32> [#uses=1]
 	%3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/private.ll b/llvm/test/CodeGen/ARM/private.ll
index e48c292..fab8f37 100644
--- a/llvm/test/CodeGen/ARM/private.ll
+++ b/llvm/test/CodeGen/ARM/private.ll
@@ -15,7 +15,7 @@
 
 define i32 @bar() {
         call void @foo()
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/ARM/reg_sequence.ll b/llvm/test/CodeGen/ARM/reg_sequence.ll
index a9fdd7e..507ee48 100644
--- a/llvm/test/CodeGen/ARM/reg_sequence.ll
+++ b/llvm/test/CodeGen/ARM/reg_sequence.ll
@@ -20,9 +20,9 @@
 ; CHECK-NOT:    vmov d
 ; CHECK-NEXT:   vst1.16
   %0 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
-  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %1 = load <4 x i32>, <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
   %2 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
-  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>, <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
   %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
@@ -56,9 +56,9 @@
 ; CHECK:        vst1.16
 ; CHECK:        vst1.16
   %0 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
+  %1 = load <8 x i16>, <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
   %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %3 = load <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
+  %3 = load <8 x i16>, <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
   %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = getelementptr inbounds i16, i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
@@ -146,7 +146,7 @@
 ; CHECK-NOT:    vmov
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
-  %tmp1 = load <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
+  %tmp1 = load <8 x i16>, <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
   %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
@@ -159,7 +159,7 @@
 ; CHECK:        vldr
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
-  %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
+  %tmp1 = load <8 x i8>, <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/saxpy10-a9.ll b/llvm/test/CodeGen/ARM/saxpy10-a9.ll
index 26c6954f..531b02b 100644
--- a/llvm/test/CodeGen/ARM/saxpy10-a9.ll
+++ b/llvm/test/CodeGen/ARM/saxpy10-a9.ll
@@ -63,72 +63,72 @@
 ; This accumulates a sum rather than storing each result.
 define float @saxpy10(float* nocapture readonly %data1, float* nocapture readonly %data2, float %a) {
 entry:
-  %0 = load float* %data1, align 4
+  %0 = load float, float* %data1, align 4
   %mul = fmul float %0, %a
-  %1 = load float* %data2, align 4
+  %1 = load float, float* %data2, align 4
   %add = fadd float %mul, %1
   %add2 = fadd float %add, 0.000000e+00
   %arrayidx.1 = getelementptr inbounds float, float* %data1, i32 1
-  %2 = load float* %arrayidx.1, align 4
+  %2 = load float, float* %arrayidx.1, align 4
   %mul.1 = fmul float %2, %a
   %arrayidx1.1 = getelementptr inbounds float, float* %data2, i32 1
-  %3 = load float* %arrayidx1.1, align 4
+  %3 = load float, float* %arrayidx1.1, align 4
   %add.1 = fadd float %mul.1, %3
   %add2.1 = fadd float %add2, %add.1
   %arrayidx.2 = getelementptr inbounds float, float* %data1, i32 2
-  %4 = load float* %arrayidx.2, align 4
+  %4 = load float, float* %arrayidx.2, align 4
   %mul.2 = fmul float %4, %a
   %arrayidx1.2 = getelementptr inbounds float, float* %data2, i32 2
-  %5 = load float* %arrayidx1.2, align 4
+  %5 = load float, float* %arrayidx1.2, align 4
   %add.2 = fadd float %mul.2, %5
   %add2.2 = fadd float %add2.1, %add.2
   %arrayidx.3 = getelementptr inbounds float, float* %data1, i32 3
-  %6 = load float* %arrayidx.3, align 4
+  %6 = load float, float* %arrayidx.3, align 4
   %mul.3 = fmul float %6, %a
   %arrayidx1.3 = getelementptr inbounds float, float* %data2, i32 3
-  %7 = load float* %arrayidx1.3, align 4
+  %7 = load float, float* %arrayidx1.3, align 4
   %add.3 = fadd float %mul.3, %7
   %add2.3 = fadd float %add2.2, %add.3
   %arrayidx.4 = getelementptr inbounds float, float* %data1, i32 4
-  %8 = load float* %arrayidx.4, align 4
+  %8 = load float, float* %arrayidx.4, align 4
   %mul.4 = fmul float %8, %a
   %arrayidx1.4 = getelementptr inbounds float, float* %data2, i32 4
-  %9 = load float* %arrayidx1.4, align 4
+  %9 = load float, float* %arrayidx1.4, align 4
   %add.4 = fadd float %mul.4, %9
   %add2.4 = fadd float %add2.3, %add.4
   %arrayidx.5 = getelementptr inbounds float, float* %data1, i32 5
-  %10 = load float* %arrayidx.5, align 4
+  %10 = load float, float* %arrayidx.5, align 4
   %mul.5 = fmul float %10, %a
   %arrayidx1.5 = getelementptr inbounds float, float* %data2, i32 5
-  %11 = load float* %arrayidx1.5, align 4
+  %11 = load float, float* %arrayidx1.5, align 4
   %add.5 = fadd float %mul.5, %11
   %add2.5 = fadd float %add2.4, %add.5
   %arrayidx.6 = getelementptr inbounds float, float* %data1, i32 6
-  %12 = load float* %arrayidx.6, align 4
+  %12 = load float, float* %arrayidx.6, align 4
   %mul.6 = fmul float %12, %a
   %arrayidx1.6 = getelementptr inbounds float, float* %data2, i32 6
-  %13 = load float* %arrayidx1.6, align 4
+  %13 = load float, float* %arrayidx1.6, align 4
   %add.6 = fadd float %mul.6, %13
   %add2.6 = fadd float %add2.5, %add.6
   %arrayidx.7 = getelementptr inbounds float, float* %data1, i32 7
-  %14 = load float* %arrayidx.7, align 4
+  %14 = load float, float* %arrayidx.7, align 4
   %mul.7 = fmul float %14, %a
   %arrayidx1.7 = getelementptr inbounds float, float* %data2, i32 7
-  %15 = load float* %arrayidx1.7, align 4
+  %15 = load float, float* %arrayidx1.7, align 4
   %add.7 = fadd float %mul.7, %15
   %add2.7 = fadd float %add2.6, %add.7
   %arrayidx.8 = getelementptr inbounds float, float* %data1, i32 8
-  %16 = load float* %arrayidx.8, align 4
+  %16 = load float, float* %arrayidx.8, align 4
   %mul.8 = fmul float %16, %a
   %arrayidx1.8 = getelementptr inbounds float, float* %data2, i32 8
-  %17 = load float* %arrayidx1.8, align 4
+  %17 = load float, float* %arrayidx1.8, align 4
   %add.8 = fadd float %mul.8, %17
   %add2.8 = fadd float %add2.7, %add.8
   %arrayidx.9 = getelementptr inbounds float, float* %data1, i32 9
-  %18 = load float* %arrayidx.9, align 4
+  %18 = load float, float* %arrayidx.9, align 4
   %mul.9 = fmul float %18, %a
   %arrayidx1.9 = getelementptr inbounds float, float* %data2, i32 9
-  %19 = load float* %arrayidx1.9, align 4
+  %19 = load float, float* %arrayidx1.9, align 4
   %add.9 = fadd float %mul.9, %19
   %add2.9 = fadd float %add2.8, %add.9
   ret float %add2.9
diff --git a/llvm/test/CodeGen/ARM/segmented-stacks.ll b/llvm/test/CodeGen/ARM/segmented-stacks.ll
index 9873bf3..cbb124d 100644
--- a/llvm/test/CodeGen/ARM/segmented-stacks.ll
+++ b/llvm/test/CodeGen/ARM/segmented-stacks.ll
@@ -55,7 +55,7 @@
 }
 
 define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
-       %addend = load i32 * %closure
+       %addend = load i32 , i32 * %closure
        %result = add i32 %other, %addend
        %mem = alloca i32, i32 10
        call void @dummy_use (i32* %mem, i32 10)
diff --git a/llvm/test/CodeGen/ARM/select_xform.ll b/llvm/test/CodeGen/ARM/select_xform.ll
index 326eb51..460ca8f 100644
--- a/llvm/test/CodeGen/ARM/select_xform.ll
+++ b/llvm/test/CodeGen/ARM/select_xform.ll
@@ -183,7 +183,7 @@
 ; Handle frame index operands.
 define void @pr13628() nounwind uwtable align 2 {
   %x3 = alloca i8, i32 256, align 8
-  %x4 = load i8* undef, align 1
+  %x4 = load i8, i8* undef, align 1
   %x5 = icmp ne i8 %x4, 0
   %x6 = select i1 %x5, i8* %x3, i8* null
   call void @bar(i8* %x6) nounwind
diff --git a/llvm/test/CodeGen/ARM/shifter_operand.ll b/llvm/test/CodeGen/ARM/shifter_operand.ll
index fd09d82..3999168 100644
--- a/llvm/test/CodeGen/ARM/shifter_operand.ll
+++ b/llvm/test/CodeGen/ARM/shifter_operand.ll
@@ -43,8 +43,8 @@
         %tmp3 = inttoptr i32 %tmp2 to i32*
         %tmp4 = add i32 %base2, %tmp1
         %tmp5 = inttoptr i32 %tmp4 to i32*
-        %tmp6 = load i32* %tmp3
-        %tmp7 = load i32* %tmp5
+        %tmp6 = load i32, i32* %tmp3
+        %tmp7 = load i32, i32* %tmp5
         %tmp8 = add i32 %tmp7, %tmp6
         ret i32 %tmp8
 }
@@ -68,7 +68,7 @@
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
   %3 = getelementptr inbounds i32, i32* %1, i32 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = add nsw i32 %4, 1
   store i32 %5, i32* %3, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/smul.ll b/llvm/test/CodeGen/ARM/smul.ll
index b7ddd10..13873f5 100644
--- a/llvm/test/CodeGen/ARM/smul.ll
+++ b/llvm/test/CodeGen/ARM/smul.ll
@@ -7,7 +7,7 @@
 define i32 @f1(i32 %y) {
 ; CHECK: f1
 ; CHECK: smulbt
-        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp = load i16, i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/space-directive.ll b/llvm/test/CodeGen/ARM/space-directive.ll
index 55be199..24f0d0a 100644
--- a/llvm/test/CodeGen/ARM/space-directive.ll
+++ b/llvm/test/CodeGen/ARM/space-directive.ll
@@ -11,7 +11,7 @@
 ; CHECK: [[PAST_CP]]:
 ; CHECK: .zero 10000
   %addr = inttoptr i32 12345678 to i32*
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call i32 @llvm.arm.space(i32 10000, i32 undef)
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/spill-q.ll b/llvm/test/CodeGen/ARM/spill-q.ll
index 425fc12..1a102e3 100644
--- a/llvm/test/CodeGen/ARM/spill-q.ll
+++ b/llvm/test/CodeGen/ARM/spill-q.ll
@@ -43,7 +43,7 @@
   store float 0.000000e+00, float* undef, align 4
   %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>, <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
 bb4:                                              ; preds = %bb193, %entry
diff --git a/llvm/test/CodeGen/ARM/ssp-data-layout.ll b/llvm/test/CodeGen/ARM/ssp-data-layout.ll
index 1dc0a93..516cc2b 100644
--- a/llvm/test/CodeGen/ARM/ssp-data-layout.ll
+++ b/llvm/test/CodeGen/ARM/ssp-data-layout.ll
@@ -149,18 +149,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -296,18 +296,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -431,18 +431,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -475,13 +475,13 @@
   %0 = bitcast %struct.struct_large_char2* %b to %struct.struct_large_char*
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %0, i32 0, i32 0
   %1 = bitcast [8 x i8]* %coerce.dive to i64*
-  %2 = load i64* %1, align 1
+  %2 = load i64, i64* %1, align 1
   %coerce.dive4 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %a, i32 0, i32 0
   %3 = bitcast [2 x i8]* %coerce.dive4 to i16*
-  %4 = load i16* %3, align 1
+  %4 = load i16, i16* %3, align 1
   %coerce.dive5 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d2, i32 0, i32 0
   %5 = bitcast [2 x i16]* %coerce.dive5 to i32*
-  %6 = load i32* %5, align 1
+  %6 = load i32, i32* %5, align 1
   call void @takes_all(i64 %2, i16 %4, %struct.struct_large_nonchar* byval align 8 %d1, i32 %6, i8* null, i8* null, i32* null, i16* null, i32* null, i32 0, i32 0, i32 0)
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/stack-alignment.ll b/llvm/test/CodeGen/ARM/stack-alignment.ll
index 2decd2a..ac14a59 100644
--- a/llvm/test/CodeGen/ARM/stack-alignment.ll
+++ b/llvm/test/CodeGen/ARM/stack-alignment.ll
@@ -40,65 +40,65 @@
 ; CHECK-THUMB2:      bfc	r4, #0, #12
 ; CHECK-THUMB2-NEXT: mov	sp, r4
   %a = alloca i8, align 4096
-  %0 = load double* %d, align 4
+  %0 = load double, double* %d, align 4
   %arrayidx1 = getelementptr inbounds double, double* %d, i32 1
-  %1 = load double* %arrayidx1, align 4
+  %1 = load double, double* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds double, double* %d, i32 2
-  %2 = load double* %arrayidx2, align 4
+  %2 = load double, double* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds double, double* %d, i32 3
-  %3 = load double* %arrayidx3, align 4
+  %3 = load double, double* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds double, double* %d, i32 4
-  %4 = load double* %arrayidx4, align 4
+  %4 = load double, double* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds double, double* %d, i32 5
-  %5 = load double* %arrayidx5, align 4
+  %5 = load double, double* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds double, double* %d, i32 6
-  %6 = load double* %arrayidx6, align 4
+  %6 = load double, double* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds double, double* %d, i32 7
-  %7 = load double* %arrayidx7, align 4
+  %7 = load double, double* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds double, double* %d, i32 8
-  %8 = load double* %arrayidx8, align 4
+  %8 = load double, double* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds double, double* %d, i32 9
-  %9 = load double* %arrayidx9, align 4
+  %9 = load double, double* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds double, double* %d, i32 10
-  %10 = load double* %arrayidx10, align 4
+  %10 = load double, double* %arrayidx10, align 4
   %arrayidx11 = getelementptr inbounds double, double* %d, i32 11
-  %11 = load double* %arrayidx11, align 4
+  %11 = load double, double* %arrayidx11, align 4
   %arrayidx12 = getelementptr inbounds double, double* %d, i32 12
-  %12 = load double* %arrayidx12, align 4
+  %12 = load double, double* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds double, double* %d, i32 13
-  %13 = load double* %arrayidx13, align 4
+  %13 = load double, double* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds double, double* %d, i32 14
-  %14 = load double* %arrayidx14, align 4
+  %14 = load double, double* %arrayidx14, align 4
   %arrayidx15 = getelementptr inbounds double, double* %d, i32 15
-  %15 = load double* %arrayidx15, align 4
+  %15 = load double, double* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds double, double* %d, i32 16
-  %16 = load double* %arrayidx16, align 4
+  %16 = load double, double* %arrayidx16, align 4
   %arrayidx17 = getelementptr inbounds double, double* %d, i32 17
-  %17 = load double* %arrayidx17, align 4
+  %17 = load double, double* %arrayidx17, align 4
   %arrayidx18 = getelementptr inbounds double, double* %d, i32 18
-  %18 = load double* %arrayidx18, align 4
+  %18 = load double, double* %arrayidx18, align 4
   %arrayidx19 = getelementptr inbounds double, double* %d, i32 19
-  %19 = load double* %arrayidx19, align 4
+  %19 = load double, double* %arrayidx19, align 4
   %arrayidx20 = getelementptr inbounds double, double* %d, i32 20
-  %20 = load double* %arrayidx20, align 4
+  %20 = load double, double* %arrayidx20, align 4
   %arrayidx21 = getelementptr inbounds double, double* %d, i32 21
-  %21 = load double* %arrayidx21, align 4
+  %21 = load double, double* %arrayidx21, align 4
   %arrayidx22 = getelementptr inbounds double, double* %d, i32 22
-  %22 = load double* %arrayidx22, align 4
+  %22 = load double, double* %arrayidx22, align 4
   %arrayidx23 = getelementptr inbounds double, double* %d, i32 23
-  %23 = load double* %arrayidx23, align 4
+  %23 = load double, double* %arrayidx23, align 4
   %arrayidx24 = getelementptr inbounds double, double* %d, i32 24
-  %24 = load double* %arrayidx24, align 4
+  %24 = load double, double* %arrayidx24, align 4
   %arrayidx25 = getelementptr inbounds double, double* %d, i32 25
-  %25 = load double* %arrayidx25, align 4
+  %25 = load double, double* %arrayidx25, align 4
   %arrayidx26 = getelementptr inbounds double, double* %d, i32 26
-  %26 = load double* %arrayidx26, align 4
+  %26 = load double, double* %arrayidx26, align 4
   %arrayidx27 = getelementptr inbounds double, double* %d, i32 27
-  %27 = load double* %arrayidx27, align 4
+  %27 = load double, double* %arrayidx27, align 4
   %arrayidx28 = getelementptr inbounds double, double* %d, i32 28
-  %28 = load double* %arrayidx28, align 4
+  %28 = load double, double* %arrayidx28, align 4
   %arrayidx29 = getelementptr inbounds double, double* %d, i32 29
-  %29 = load double* %arrayidx29, align 4
+  %29 = load double, double* %arrayidx29, align 4
   %div = fdiv double %29, %28
   %div30 = fdiv double %div, %27
   %div31 = fdiv double %div30, %26
diff --git a/llvm/test/CodeGen/ARM/str_post.ll b/llvm/test/CodeGen/ARM/str_post.ll
index a4f8640..0933e15 100644
--- a/llvm/test/CodeGen/ARM/str_post.ll
+++ b/llvm/test/CodeGen/ARM/str_post.ll
@@ -3,7 +3,7 @@
 define i16 @test1(i32* %X, i16* %A) {
 ; CHECK-LABEL: test1:
 ; CHECK: strh {{.*}}[{{.*}}], #-4
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
         store i16 %tmp1, i16* %A
         %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
@@ -14,7 +14,7 @@
 define i32 @test2(i32* %X, i32* %A) {
 ; CHECK-LABEL: test2:
 ; CHECK: str {{.*}}[{{.*}}],
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
         %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
         %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/ARM/str_pre-2.ll b/llvm/test/CodeGen/ARM/str_pre-2.ll
index 5ce2bce..4b8b4c6 100644
--- a/llvm/test/CodeGen/ARM/str_pre-2.ll
+++ b/llvm/test/CodeGen/ARM/str_pre-2.ll
@@ -7,8 +7,8 @@
 ; CHECK: push {r4, r5, lr}
 ; CHECK: pop {r4, r5, pc}
         call void asm sideeffect "", "~{r4},~{r5}"() nounwind
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
diff --git a/llvm/test/CodeGen/ARM/str_pre.ll b/llvm/test/CodeGen/ARM/str_pre.ll
index dc8cee9..848261f 100644
--- a/llvm/test/CodeGen/ARM/str_pre.ll
+++ b/llvm/test/CodeGen/ARM/str_pre.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=arm-eabi %s -o -  | FileCheck %s
 
 define void @test1(i32* %X, i32* %A, i32** %dest) {
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
         store i32* %Y, i32** %dest
@@ -9,7 +9,7 @@
 }
 
 define i16* @test2(i16* %X, i32* %A) {
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16, i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
         store i16 %tmp, i16* %Y
diff --git a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
index 80b157e..bca797d 100644
--- a/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
+++ b/llvm/test/CodeGen/ARM/struct-byval-frame-index.ll
@@ -72,10 +72,10 @@
 ; Function Attrs: nounwind
 define void @set_stored_macroblock_parameters() #1 {
 entry:
-  %0 = load %structB** @img, align 4
-  %1 = load i32* undef, align 4
+  %0 = load %structB*, %structB** @img, align 4
+  %1 = load i32, i32* undef, align 4
   %mb_data = getelementptr inbounds %structB, %structB* %0, i32 0, i32 61
-  %2 = load %structK** %mb_data, align 4
+  %2 = load %structK*, %structK** %mb_data, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -134,7 +134,7 @@
 
 if.end236:                                        ; preds = %if.end230
   %cmp242 = icmp ne i16 undef, 8
-  %4 = load i32* @luma_transform_size_8x8_flag, align 4
+  %4 = load i32, i32* @luma_transform_size_8x8_flag, align 4
   %tobool245 = icmp ne i32 %4, 0
   %or.cond812 = or i1 %cmp242, %tobool245
   br i1 %or.cond812, label %if.end249, label %land.lhs.true246
@@ -150,11 +150,11 @@
   br label %if.end249
 
 if.end249:                                        ; preds = %if.then248, %land.lhs.true246, %if.end236
-  %5 = load i32* @luma_transform_size_8x8_flag, align 4
-  %6 = load %structA** @rdopt, align 4
+  %5 = load i32, i32* @luma_transform_size_8x8_flag, align 4
+  %6 = load %structA*, %structA** @rdopt, align 4
   %luma_transform_size_8x8_flag264 = getelementptr inbounds %structA, %structA* %6, i32 0, i32 21
   store i32 %5, i32* %luma_transform_size_8x8_flag264, align 4
-  %7 = load i32* undef, align 4
+  %7 = load i32, i32* undef, align 4
   %add281 = add nsw i32 %7, 0
   br label %for.body285
 
@@ -162,36 +162,36 @@
   %8 = phi %structB* [ undef, %if.end249 ], [ %.pre1155, %for.inc503 ]
   %i.21103 = phi i32 [ 0, %if.end249 ], [ %inc504, %for.inc503 ]
   %block_x286 = getelementptr inbounds %structB, %structB* %8, i32 0, i32 37
-  %9 = load i32* %block_x286, align 4
+  %9 = load i32, i32* %block_x286, align 4
   %add287 = add nsw i32 %9, %i.21103
   %shr289 = ashr i32 %i.21103, 1
   %add290 = add nsw i32 %shr289, 0
   %arrayidx292 = getelementptr inbounds %structK, %structK* %2, i32 %1, i32 15, i32 %add290
-  %10 = load %structM** @enc_picture, align 4
+  %10 = load %structM*, %structM** @enc_picture, align 4
   %ref_idx = getelementptr inbounds %structM, %structM* %10, i32 0, i32 35
-  %11 = load i8**** %ref_idx, align 4
-  %12 = load i8*** %11, align 4
+  %11 = load i8***, i8**** %ref_idx, align 4
+  %12 = load i8**, i8*** %11, align 4
   %arrayidx313 = getelementptr inbounds i8*, i8** %12, i32 %add281
-  %13 = load i8** %arrayidx313, align 4
+  %13 = load i8*, i8** %arrayidx313, align 4
   %arrayidx314 = getelementptr inbounds i8, i8* %13, i32 %add287
   store i8 -1, i8* %arrayidx314, align 1
-  %14 = load %structB** @img, align 4
+  %14 = load %structB*, %structB** @img, align 4
   %MbaffFrameFlag327 = getelementptr inbounds %structB, %structB* %14, i32 0, i32 100
-  %15 = load i32* %MbaffFrameFlag327, align 4
+  %15 = load i32, i32* %MbaffFrameFlag327, align 4
   %tobool328 = icmp eq i32 %15, 0
   br i1 %tobool328, label %if.end454, label %if.then329
 
 if.then329:                                       ; preds = %for.body285
-  %16 = load %structA** @rdopt, align 4
+  %16 = load %structA*, %structA** @rdopt, align 4
   br label %if.end454
 
 if.end454:                                        ; preds = %if.then329, %for.body285
-  %17 = load i32* %arrayidx292, align 4
+  %17 = load i32, i32* %arrayidx292, align 4
   %cmp457 = icmp eq i32 %17, 0
   br i1 %cmp457, label %if.then475, label %lor.lhs.false459
 
 lor.lhs.false459:                                 ; preds = %if.end454
-  %18 = load i32* %mb_type, align 4
+  %18 = load i32, i32* %mb_type, align 4
   switch i32 %18, label %for.inc503 [
     i32 9, label %if.then475
     i32 10, label %if.then475
@@ -205,7 +205,7 @@
 
 for.inc503:                                       ; preds = %if.then475, %lor.lhs.false459
   %inc504 = add nsw i32 %i.21103, 1
-  %.pre1155 = load %structB** @img, align 4
+  %.pre1155 = load %structB*, %structB** @img, align 4
   br label %for.body285
 }
 
diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
index f7328dc..3203888 100644
--- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
+++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll
@@ -169,7 +169,7 @@
 ; CHECK: sub
 ; CHECK: cmp
 ; CHECK: bge
-  %load = load i32* @t, align 4
+  %load = load i32, i32* @t, align 4
   %sub = sub i32 %load, 17
   %cmp = icmp slt i32 %sub, 0
   br i1 %cmp, label %if.then, label %if.else
@@ -191,7 +191,7 @@
 ; CHECK: sub
 ; CHECK: cmp
 ; CHECK: bhs
-  %load = load i32* @t, align 4
+  %load = load i32, i32* @t, align 4
   %sub = sub i32 %load, 17
   %cmp = icmp ult i32 %sub, 0
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/ARM/swift-atomics.ll b/llvm/test/CodeGen/ARM/swift-atomics.ll
index 8b100f1..ca7e7fb 100644
--- a/llvm/test/CodeGen/ARM/swift-atomics.ll
+++ b/llvm/test/CodeGen/ARM/swift-atomics.ll
@@ -33,7 +33,7 @@
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
 
   store atomic i32 %v, i32* %p seq_cst, align 4
-  %val = load atomic i32* %p seq_cst, align 4
+  %val = load atomic i32, i32* %p seq_cst, align 4
   ret i32 %val
 }
 
@@ -46,6 +46,6 @@
 
 ; CHECK-STRICT-ATOMIC-LABEL: test_acq:
 ; CHECK-STRICT-ATOMIC: dmb {{ish$}}
-  %val = load atomic i32* %addr acquire, align 4
+  %val = load atomic i32, i32* %addr acquire, align 4
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/ARM/swift-vldm.ll b/llvm/test/CodeGen/ARM/swift-vldm.ll
index ca23792..9e50727 100644
--- a/llvm/test/CodeGen/ARM/swift-vldm.ll
+++ b/llvm/test/CodeGen/ARM/swift-vldm.ll
@@ -15,11 +15,11 @@
   %addr1 = getelementptr double, double * %x, i32 1
   %addr2 = getelementptr double, double * %x, i32 2
   %addr3 = getelementptr double, double * %x, i32 3
-  %d0 = load double * %y
-  %d1 = load double * %x
-  %d2 = load double * %addr1
-  %d3 = load double * %addr2
-  %d4 = load double * %addr3
+  %d0 = load double , double * %y
+  %d1 = load double , double * %x
+  %d2 = load double , double * %addr1
+  %d3 = load double , double * %addr2
+  %d4 = load double , double * %addr3
   ; We are trying to force x[0-3] in registers d1 to d4 so that we can test we
   ; don't form a "vldmia rX, {d1, d2, d3, d4}".
   ; We are relying on the calling convention and that register allocation
diff --git a/llvm/test/CodeGen/ARM/tail-dup.ll b/llvm/test/CodeGen/ARM/tail-dup.ll
index 49ab114..407bdf7 100644
--- a/llvm/test/CodeGen/ARM/tail-dup.ll
+++ b/llvm/test/CodeGen/ARM/tail-dup.ll
@@ -11,19 +11,19 @@
 
 define i32 @fn(i32* nocapture %opcodes) nounwind readonly ssp {
 entry:
-  %0 = load i32* %opcodes, align 4
+  %0 = load i32, i32* %opcodes, align 4
   %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %0
   br label %indirectgoto
 
 INCREMENT:                                        ; preds = %indirectgoto
   %inc = add nsw i32 %result.0, 1
-  %1 = load i32* %opcodes.addr.0, align 4
+  %1 = load i32, i32* %opcodes.addr.0, align 4
   %arrayidx2 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %1
   br label %indirectgoto
 
 DECREMENT:                                        ; preds = %indirectgoto
   %dec = add nsw i32 %result.0, -1
-  %2 = load i32* %opcodes.addr.0, align 4
+  %2 = load i32, i32* %opcodes.addr.0, align 4
   %arrayidx4 = getelementptr inbounds [3 x i8*], [3 x i8*]* @fn.codetable, i32 0, i32 %2
   br label %indirectgoto
 
@@ -32,7 +32,7 @@
   %opcodes.pn = phi i32* [ %opcodes, %entry ], [ %opcodes.addr.0, %DECREMENT ], [ %opcodes.addr.0, %INCREMENT ]
   %indirect.goto.dest.in = phi i8** [ %arrayidx, %entry ], [ %arrayidx4, %DECREMENT ], [ %arrayidx2, %INCREMENT ]
   %opcodes.addr.0 = getelementptr inbounds i32, i32* %opcodes.pn, i32 1
-  %indirect.goto.dest = load i8** %indirect.goto.dest.in, align 4
+  %indirect.goto.dest = load i8*, i8** %indirect.goto.dest.in, align 4
   indirectbr i8* %indirect.goto.dest, [label %RETURN, label %INCREMENT, label %DECREMENT]
 
 RETURN:                                           ; preds = %indirectgoto
diff --git a/llvm/test/CodeGen/ARM/test-sharedidx.ll b/llvm/test/CodeGen/ARM/test-sharedidx.ll
index 4bdf3b0..377996c 100644
--- a/llvm/test/CodeGen/ARM/test-sharedidx.ll
+++ b/llvm/test/CodeGen/ARM/test-sharedidx.ll
@@ -24,10 +24,10 @@
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv6 = zext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv27 = zext i8 %1 to i32
   %add = add nsw i32 %conv27, %conv6
   %conv3 = trunc i32 %add to i8
@@ -45,10 +45,10 @@
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
-  %2 = load i8* %arrayidx.1, align 1
+  %2 = load i8, i8* %arrayidx.1, align 1
   %conv6.1 = zext i8 %2 to i32
   %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
-  %3 = load i8* %arrayidx1.1, align 1
+  %3 = load i8, i8* %arrayidx1.1, align 1
   %conv27.1 = zext i8 %3 to i32
   %add.1 = add nsw i32 %conv27.1, %conv6.1
   %conv3.1 = trunc i32 %add.1 to i8
@@ -63,10 +63,10 @@
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
-  %4 = load i8* %arrayidx.2, align 1
+  %4 = load i8, i8* %arrayidx.2, align 1
   %conv6.2 = zext i8 %4 to i32
   %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
-  %5 = load i8* %arrayidx1.2, align 1
+  %5 = load i8, i8* %arrayidx1.2, align 1
   %conv27.2 = zext i8 %5 to i32
   %add.2 = add nsw i32 %conv27.2, %conv6.2
   %conv3.2 = trunc i32 %add.2 to i8
@@ -81,10 +81,10 @@
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
 ; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
-  %6 = load i8* %arrayidx.3, align 1
+  %6 = load i8, i8* %arrayidx.3, align 1
   %conv6.3 = zext i8 %6 to i32
   %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
-  %7 = load i8* %arrayidx1.3, align 1
+  %7 = load i8, i8* %arrayidx1.3, align 1
   %conv27.3 = zext i8 %7 to i32
   %add.3 = add nsw i32 %conv27.3, %conv6.3
   %conv3.3 = trunc i32 %add.3 to i8
diff --git a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
index 892fe1a..0637be0 100644
--- a/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
+++ b/llvm/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -12,7 +12,7 @@
 ; CHECK-LABEL: foo:
 
 	%size = alloca i32, align 4
-	%0 = load i8** @__bar, align 4
+	%0 = load i8*, i8** @__bar, align 4
 	%1 = icmp eq i8* %0, null
 	br i1 %1, label %bb1, label %bb3
 ; CHECK: bne
diff --git a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
index c831260..9c62fae 100644
--- a/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/llvm/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -13,10 +13,10 @@
 
   %b = alloca <6 x i32>, align 16
   %a = alloca <4 x i32>, align 16
-  %stuff = load <6 x i32>* %p, align 16
+  %stuff = load <6 x i32>, <6 x i32>* %p, align 16
   store <6 x i32> %stuff, <6 x i32>* %b, align 16
   store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
-  %0 = load <4 x i32>* %a, align 16
+  %0 = load <4 x i32>, <4 x i32>* %a, align 16
   ret <4 x i32> %0
 
 ; Epilogue
@@ -46,7 +46,7 @@
   %a = alloca <4 x i32>, align 16
   store <4 x i32> <i32 42, i32 42, i32 42, i32 42>, <4 x i32>* %b, align 16
   store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32>* %a, align 16
-  %0 = load <4 x i32>* %a, align 16
+  %0 = load <4 x i32>, <4 x i32>* %a, align 16
   call void @llvm.va_start(i8* null)
   ret <4 x i32> %0
 
@@ -78,7 +78,7 @@
 ; CHECK-V4T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
 ; CHECK-V5T:    push    {[[SAVED:(r[4567](, )?)+]], lr}
 
-  %0 = load <6 x i32>* %p, align 16
+  %0 = load <6 x i32>, <6 x i32>* %p, align 16
   %1 = extractelement <6 x i32> %0, i32 0
   %2 = extractelement <6 x i32> %0, i32 1
   %3 = extractelement <6 x i32> %0, i32 2
@@ -121,24 +121,24 @@
   store i32 2, i32* %b, align 4
   store i32 3, i32* %c, align 4
   store i32 4, i32* %d, align 4
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %a, align 4
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %inc1 = add nsw i32 %1, 1
   store i32 %inc1, i32* %b, align 4
-  %2 = load i32* %c, align 4
+  %2 = load i32, i32* %c, align 4
   %inc2 = add nsw i32 %2, 1
   store i32 %inc2, i32* %c, align 4
-  %3 = load i32* %d, align 4
+  %3 = load i32, i32* %d, align 4
   %inc3 = add nsw i32 %3, 1
   store i32 %inc3, i32* %d, align 4
-  %4 = load i32* %a, align 4
-  %5 = load i32* %b, align 4
+  %4 = load i32, i32* %a, align 4
+  %5 = load i32, i32* %b, align 4
   %add = add nsw i32 %4, %5
-  %6 = load i32* %c, align 4
+  %6 = load i32, i32* %c, align 4
   %add4 = add nsw i32 %add, %6
-  %7 = load i32* %d, align 4
+  %7 = load i32, i32* %d, align 4
   %add5 = add nsw i32 %add4, %7
   %add6 = add nsw i32 %add5, %i
   call void @llvm.va_start(i8* null)
diff --git a/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll b/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
index 16a55a8..9f1950c 100644
--- a/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
+++ b/llvm/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL foo:
 define void @foo(i32 %x) {
 entry:
-  %0 = load void (i32)** @f, align 4
+  %0 = load void (i32)*, void (i32)** @f, align 4
   tail call void %0(i32 %x)
   ret void
 
diff --git a/llvm/test/CodeGen/ARM/tls1.ll b/llvm/test/CodeGen/ARM/tls1.ll
index b03f76b..d492522 100644
--- a/llvm/test/CodeGen/ARM/tls1.ll
+++ b/llvm/test/CodeGen/ARM/tls1.ll
@@ -12,7 +12,7 @@
 
 define i32 @f() {
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/tls2.ll b/llvm/test/CodeGen/ARM/tls2.ll
index 24b4794..d522da8 100644
--- a/llvm/test/CodeGen/ARM/tls2.ll
+++ b/llvm/test/CodeGen/ARM/tls2.ll
@@ -12,7 +12,7 @@
 ; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: __tls_get_addr
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/ARM/tls3.ll b/llvm/test/CodeGen/ARM/tls3.ll
index e0e944f..1617a52 100644
--- a/llvm/test/CodeGen/ARM/tls3.ll
+++ b/llvm/test/CodeGen/ARM/tls3.ll
@@ -6,6 +6,6 @@
 
 define i32 @main() {
 entry:
-	%tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/trunc_ldr.ll b/llvm/test/CodeGen/ARM/trunc_ldr.ll
index 94cef8b..ca7ad9a 100644
--- a/llvm/test/CodeGen/ARM/trunc_ldr.ll
+++ b/llvm/test/CodeGen/ARM/trunc_ldr.ll
@@ -6,7 +6,7 @@
 define i8 @f1(%struct.A* %d) {
 	%tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
 	%tmp23 = bitcast i16* %tmp2 to i32*
-	%tmp4 = load i32* %tmp23
+	%tmp4 = load i32, i32* %tmp23
 	%tmp512 = lshr i32 %tmp4, 24
 	%tmp56 = trunc i32 %tmp512 to i8
 	ret i8 %tmp56
@@ -15,7 +15,7 @@
 define i32 @f2(%struct.A* %d) {
 	%tmp2 = getelementptr %struct.A, %struct.A* %d, i32 0, i32 4
 	%tmp23 = bitcast i16* %tmp2 to i32*
-	%tmp4 = load i32* %tmp23
+	%tmp4 = load i32, i32* %tmp23
 	%tmp512 = lshr i32 %tmp4, 24
 	%tmp56 = trunc i32 %tmp512 to i8
         %tmp57 = sext i8 %tmp56 to i32
diff --git a/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll b/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
index 360e3e1..11fa022 100644
--- a/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/llvm/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -3,7 +3,7 @@
 define void @bar(i8* %P, i16* %Q) {
 entry:
 	%P1 = bitcast i8* %P to i16*		; <i16*> [#uses=1]
-	%tmp = load i16* %Q, align 1		; <i16> [#uses=1]
+	%tmp = load i16, i16* %Q, align 1		; <i16> [#uses=1]
 	store i16 %tmp, i16* %P1, align 1
 	ret void
 }
@@ -11,7 +11,7 @@
 define void @foo(i8* %P, i32* %Q) {
 entry:
 	%P1 = bitcast i8* %P to i32*		; <i32*> [#uses=1]
-	%tmp = load i32* %Q, align 1		; <i32> [#uses=1]
+	%tmp = load i32, i32* %Q, align 1		; <i32> [#uses=1]
 	store i32 %tmp, i32* %P1, align 1
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/twoaddrinstr.ll b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
index 01df3b4..97a4933 100644
--- a/llvm/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT:   vst1.32
 
 entry:
-  %0 = load <4 x float>* undef, align 4
+  %0 = load <4 x float>, <4 x float>* undef, align 4
   store <4 x float> zeroinitializer, <4 x float>* undef, align 4
   store <4 x float> %0, <4 x float>* undef, align 4
   %1 = insertelement <4 x float> %0, float 1.000000e+00, i32 3
diff --git a/llvm/test/CodeGen/ARM/uint64tof64.ll b/llvm/test/CodeGen/ARM/uint64tof64.ll
index 32eb225..d779955 100644
--- a/llvm/test/CodeGen/ARM/uint64tof64.ll
+++ b/llvm/test/CodeGen/ARM/uint64tof64.ll
@@ -7,7 +7,7 @@
 
 define fastcc void @t() {
 entry:
-	%0 = load i64* null, align 4		; <i64> [#uses=1]
+	%0 = load i64, i64* null, align 4		; <i64> [#uses=1]
 	%1 = uitofp i64 %0 to double		; <double> [#uses=1]
 	%2 = fdiv double 0.000000e+00, %1		; <double> [#uses=1]
 	%3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/ARM/umulo-32.ll b/llvm/test/CodeGen/ARM/umulo-32.ll
index 19875ce..1c83573 100644
--- a/llvm/test/CodeGen/ARM/umulo-32.ll
+++ b/llvm/test/CodeGen/ARM/umulo-32.ll
@@ -28,7 +28,7 @@
 store i32 %argc, i32* %2, align 4
 store i8** %argv, i8*** %3, align 4
 store i32 10, i32* %m_degree, align 4
-%4 = load i32* %m_degree, align 4
+%4 = load i32, i32* %m_degree, align 4
 %5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8)
 %6 = extractvalue %umul.ty %5, 1
 %7 = extractvalue %umul.ty %5, 0
diff --git a/llvm/test/CodeGen/ARM/unaligned_load_store.ll b/llvm/test/CodeGen/ARM/unaligned_load_store.ll
index 72163ae..0be3917 100644
--- a/llvm/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/llvm/test/CodeGen/ARM/unaligned_load_store.ll
@@ -28,7 +28,7 @@
 
   %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
   %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
-  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  %tmp.i = load i32, i32* %__src1.i, align 1           ; <i32> [#uses=1]
   store i32 %tmp.i, i32* %__dest2.i, align 1
   ret void
 }
@@ -44,7 +44,7 @@
 ; UNALIGNED-LABEL: hword:
 ; UNALIGNED: vld1.16
 ; UNALIGNED: vst1.16
-  %tmp = load double* %a, align 2
+  %tmp = load double, double* %a, align 2
   store double %tmp, double* %b, align 2
   ret void
 }
@@ -60,7 +60,7 @@
 ; UNALIGNED-LABEL: byte:
 ; UNALIGNED: vld1.8
 ; UNALIGNED: vst1.8
-  %tmp = load double* %a, align 1
+  %tmp = load double, double* %a, align 1
   store double %tmp, double* %b, align 1
   ret void
 }
@@ -76,7 +76,7 @@
 ; UNALIGNED: ldr
 ; UNALIGNED-NOT: strb
 ; UNALIGNED: str
-  %tmp = load i32* %a, align 1
+  %tmp = load i32, i32* %a, align 1
   store i32 %tmp, i32* %b, align 1
   ret void
 }
diff --git a/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll b/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
index 6f98945..abb523c 100644
--- a/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
+++ b/llvm/test/CodeGen/ARM/unaligned_load_store_vector.ll
@@ -11,7 +11,7 @@
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vld1.8
-  %v1 = load  <8 x i8>* %vi, align 1
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 1
 ;CHECK: vst1.8
   store <8 x i8> %v1, <8 x i8>* %vo, align 1
   ret void
@@ -29,7 +29,7 @@
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x i16>* %vi, align 1
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x i16> %v1, <4 x i16>* %vo, align 1
   ret void
@@ -47,7 +47,7 @@
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x i32>* %vi, align 1
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x i32> %v1, <2 x i32>* %vo, align 1
   ret void
@@ -65,7 +65,7 @@
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x float>* %vi, align 1
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x float> %v1, <2 x float>* %vo, align 1
   ret void
@@ -83,7 +83,7 @@
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.8
-  %v1 = load  <16 x i8>* %vi, align 1
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 1
 ;CHECK: vst1.8
   store <16 x i8> %v1, <16 x i8>* %vo, align 1
   ret void
@@ -101,7 +101,7 @@
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.8
-  %v1 = load  <8 x i16>* %vi, align 1
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 1
 ;CHECK: vst1.8
   store <8 x i16> %v1, <8 x i16>* %vo, align 1
   ret void
@@ -119,7 +119,7 @@
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x i32>* %vi, align 1
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x i32> %v1, <4 x i32>* %vo, align 1
   ret void
@@ -137,7 +137,7 @@
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.8
-  %v1 = load  <2 x i64>* %vi, align 1
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 1
 ;CHECK: vst1.8
   store <2 x i64> %v1, <2 x i64>* %vo, align 1
   ret void
@@ -155,7 +155,7 @@
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.8
-  %v1 = load  <4 x float>* %vi, align 1
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 1
 ;CHECK: vst1.8
   store <4 x float> %v1, <4 x float>* %vo, align 1
   ret void
@@ -173,7 +173,7 @@
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vld1.16
-  %v1 = load  <8 x i8>* %vi, align 2
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 2
 ;CHECK: vst1.16
   store <8 x i8> %v1, <8 x i8>* %vo, align 2
   ret void
@@ -191,7 +191,7 @@
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x i16>* %vi, align 2
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x i16> %v1, <4 x i16>* %vo, align 2
   ret void
@@ -209,7 +209,7 @@
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x i32>* %vi, align 2
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x i32> %v1, <2 x i32>* %vo, align 2
   ret void
@@ -227,7 +227,7 @@
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x float>* %vi, align 2
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x float> %v1, <2 x float>* %vo, align 2
   ret void
@@ -245,7 +245,7 @@
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.16
-  %v1 = load  <16 x i8>* %vi, align 2
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 2
 ;CHECK: vst1.16
   store <16 x i8> %v1, <16 x i8>* %vo, align 2
   ret void
@@ -263,7 +263,7 @@
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.16
-  %v1 = load  <8 x i16>* %vi, align 2
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 2
 ;CHECK: vst1.16
   store <8 x i16> %v1, <8 x i16>* %vo, align 2
   ret void
@@ -281,7 +281,7 @@
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x i32>* %vi, align 2
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x i32> %v1, <4 x i32>* %vo, align 2
   ret void
@@ -299,7 +299,7 @@
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.16
-  %v1 = load  <2 x i64>* %vi, align 2
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 2
 ;CHECK: vst1.16
   store <2 x i64> %v1, <2 x i64>* %vo, align 2
   ret void
@@ -317,7 +317,7 @@
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.16
-  %v1 = load  <4 x float>* %vi, align 2
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 2
 ;CHECK: vst1.16
   store <4 x float> %v1, <4 x float>* %vo, align 2
   ret void
@@ -335,7 +335,7 @@
   %vi  = bitcast i8* %pi to <8 x i8>*
   %vo  = bitcast i8* %po to <8 x i8>*
 ;CHECK: vldr
-  %v1 = load  <8 x i8>* %vi, align 4
+  %v1 = load  <8 x i8>,  <8 x i8>* %vi, align 4
 ;CHECK: vstr
   store <8 x i8> %v1, <8 x i8>* %vo, align 4
   ret void
@@ -353,7 +353,7 @@
   %vi  = bitcast i8* %pi to <4 x i16>*
   %vo  = bitcast i8* %po to <4 x i16>*
 ;CHECK: vldr
-  %v1 = load  <4 x i16>* %vi, align 4
+  %v1 = load  <4 x i16>,  <4 x i16>* %vi, align 4
 ;CHECK: vstr
   store <4 x i16> %v1, <4 x i16>* %vo, align 4
   ret void
@@ -371,7 +371,7 @@
   %vi  = bitcast i8* %pi to <2 x i32>*
   %vo  = bitcast i8* %po to <2 x i32>*
 ;CHECK: vldr
-  %v1 = load  <2 x i32>* %vi, align 4
+  %v1 = load  <2 x i32>,  <2 x i32>* %vi, align 4
 ;CHECK: vstr
   store <2 x i32> %v1, <2 x i32>* %vo, align 4
   ret void
@@ -389,7 +389,7 @@
   %vi  = bitcast i8* %pi to <2 x float>*
   %vo  = bitcast i8* %po to <2 x float>*
 ;CHECK: vldr
-  %v1 = load  <2 x float>* %vi, align 4
+  %v1 = load  <2 x float>,  <2 x float>* %vi, align 4
 ;CHECK: vstr
   store <2 x float> %v1, <2 x float>* %vo, align 4
   ret void
@@ -407,7 +407,7 @@
   %vi  = bitcast i8* %pi to <16 x i8>*
   %vo  = bitcast i8* %po to <16 x i8>*
 ;CHECK: vld1.32
-  %v1 = load  <16 x i8>* %vi, align 4
+  %v1 = load  <16 x i8>,  <16 x i8>* %vi, align 4
 ;CHECK: vst1.32
   store <16 x i8> %v1, <16 x i8>* %vo, align 4
   ret void
@@ -425,7 +425,7 @@
   %vi  = bitcast i8* %pi to <8 x i16>*
   %vo  = bitcast i8* %po to <8 x i16>*
 ;CHECK: vld1.32
-  %v1 = load  <8 x i16>* %vi, align 4
+  %v1 = load  <8 x i16>,  <8 x i16>* %vi, align 4
 ;CHECK: vst1.32
   store <8 x i16> %v1, <8 x i16>* %vo, align 4
   ret void
@@ -443,7 +443,7 @@
   %vi  = bitcast i8* %pi to <4 x i32>*
   %vo  = bitcast i8* %po to <4 x i32>*
 ;CHECK: vld1.32
-  %v1 = load  <4 x i32>* %vi, align 4
+  %v1 = load  <4 x i32>,  <4 x i32>* %vi, align 4
 ;CHECK: vst1.32
   store <4 x i32> %v1, <4 x i32>* %vo, align 4
   ret void
@@ -461,7 +461,7 @@
   %vi  = bitcast i8* %pi to <2 x i64>*
   %vo  = bitcast i8* %po to <2 x i64>*
 ;CHECK: vld1.32
-  %v1 = load  <2 x i64>* %vi, align 4
+  %v1 = load  <2 x i64>,  <2 x i64>* %vi, align 4
 ;CHECK: vst1.32
   store <2 x i64> %v1, <2 x i64>* %vo, align 4
   ret void
@@ -479,7 +479,7 @@
   %vi  = bitcast i8* %pi to <4 x float>*
   %vo  = bitcast i8* %po to <4 x float>*
 ;CHECK: vld1.32
-  %v1 = load  <4 x float>* %vi, align 4
+  %v1 = load  <4 x float>,  <4 x float>* %vi, align 4
 ;CHECK: vst1.32
   store <4 x float> %v1, <4 x float>* %vo, align 4
   ret void
diff --git a/llvm/test/CodeGen/ARM/undef-sext.ll b/llvm/test/CodeGen/ARM/undef-sext.ll
index 7774fcd..bb06bcb 100644
--- a/llvm/test/CodeGen/ARM/undef-sext.ll
+++ b/llvm/test/CodeGen/ARM/undef-sext.ll
@@ -9,6 +9,6 @@
 ; CHECK: bx lr
   %0 = sext i16 undef to i32
   %1 = getelementptr inbounds i32, i32* %a, i32 %0
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/ARM/vaba.ll b/llvm/test/CodeGen/ARM/vaba.ll
index 6478b18..4323f31 100644
--- a/llvm/test/CodeGen/ARM/vaba.ll
+++ b/llvm/test/CodeGen/ARM/vaba.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabas8:
 ;CHECK: vaba.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@
 define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabas16:
 ;CHECK: vaba.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@
 define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabas32:
 ;CHECK: vaba.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@
 define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabau8:
 ;CHECK: vaba.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -47,9 +47,9 @@
 define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabau16:
 ;CHECK: vaba.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -58,9 +58,9 @@
 define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabau32:
 ;CHECK: vaba.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -69,9 +69,9 @@
 define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabaQs8:
 ;CHECK: vaba.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -80,9 +80,9 @@
 define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabaQs16:
 ;CHECK: vaba.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -91,9 +91,9 @@
 define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabaQs32:
 ;CHECK: vaba.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -102,9 +102,9 @@
 define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabaQu8:
 ;CHECK: vaba.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -113,9 +113,9 @@
 define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabaQu16:
 ;CHECK: vaba.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -124,9 +124,9 @@
 define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabaQu32:
 ;CHECK: vaba.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -151,9 +151,9 @@
 define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabals8:
 ;CHECK: vabal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
 	%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -163,9 +163,9 @@
 define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabals16:
 ;CHECK: vabal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
 	%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -175,9 +175,9 @@
 define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabals32:
 ;CHECK: vabal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
 	%tmp6 = add <2 x i64> %tmp1, %tmp5
@@ -187,9 +187,9 @@
 define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vabalu8:
 ;CHECK: vabal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
 	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
 	%tmp6 = add <8 x i16> %tmp1, %tmp5
@@ -199,9 +199,9 @@
 define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vabalu16:
 ;CHECK: vabal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
 	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
 	%tmp6 = add <4 x i32> %tmp1, %tmp5
@@ -211,9 +211,9 @@
 define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vabalu32:
 ;CHECK: vabal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
 	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
 	%tmp6 = add <2 x i64> %tmp1, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vabd.ll b/llvm/test/CodeGen/ARM/vabd.ll
index 9ba8be2..548b8a3 100644
--- a/llvm/test/CodeGen/ARM/vabd.ll
+++ b/llvm/test/CodeGen/ARM/vabd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabds8:
 ;CHECK: vabd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabds16:
 ;CHECK: vabd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabds32:
 ;CHECK: vabd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdu8:
 ;CHECK: vabd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdu16:
 ;CHECK: vabd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdu32:
 ;CHECK: vabd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vabdf32:
 ;CHECK: vabd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -66,8 +66,8 @@
 define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdQs8:
 ;CHECK: vabd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@
 define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdQs16:
 ;CHECK: vabd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdQs32:
 ;CHECK: vabd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -93,8 +93,8 @@
 define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdQu8:
 ;CHECK: vabd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdQu16:
 ;CHECK: vabd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdQu32:
 ;CHECK: vabd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@
 define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vabdQf32:
 ;CHECK: vabd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -149,8 +149,8 @@
 define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdls8:
 ;CHECK: vabdl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -159,8 +159,8 @@
 define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdls16:
 ;CHECK: vabdl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -169,8 +169,8 @@
 define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdls32:
 ;CHECK: vabdl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
@@ -179,8 +179,8 @@
 define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vabdlu8:
 ;CHECK: vabdl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -189,8 +189,8 @@
 define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vabdlu16:
 ;CHECK: vabdl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -199,8 +199,8 @@
 define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vabdlu32:
 ;CHECK: vabdl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vabs.ll b/llvm/test/CodeGen/ARM/vabs.ll
index 3a1aec8..38c6d6c 100644
--- a/llvm/test/CodeGen/ARM/vabs.ll
+++ b/llvm/test/CodeGen/ARM/vabs.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vabss8:
 ;CHECK: vabs.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@
 define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vabss16:
 ;CHECK: vabs.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vabss32:
 ;CHECK: vabs.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@
 define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsf32:
 ;CHECK: vabs.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.fabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@
 define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vabsQs8:
 ;CHECK: vabs.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@
 define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vabsQs16:
 ;CHECK: vabs.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@
 define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vabsQs32:
 ;CHECK: vabs.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@
 define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vabsQf32:
 ;CHECK: vabs.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -77,7 +77,7 @@
 define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqabss8:
 ;CHECK: vqabs.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -85,7 +85,7 @@
 define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqabss16:
 ;CHECK: vqabs.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -93,7 +93,7 @@
 define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqabss32:
 ;CHECK: vqabs.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -101,7 +101,7 @@
 define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs8:
 ;CHECK: vqabs.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -109,7 +109,7 @@
 define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs16:
 ;CHECK: vqabs.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -117,7 +117,7 @@
 define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqabsQs32:
 ;CHECK: vqabs.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vadd.ll b/llvm/test/CodeGen/ARM/vadd.ll
index 86b0d02..dd35dd1 100644
--- a/llvm/test/CodeGen/ARM/vadd.ll
+++ b/llvm/test/CodeGen/ARM/vadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddi8:
 ;CHECK: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = add <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddi16:
 ;CHECK: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = add <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddi32:
 ;CHECK: vadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = add <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vaddi64:
 ;CHECK: vadd.i64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = add <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vaddf32:
 ;CHECK: vadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fadd <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -48,8 +48,8 @@
 define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddQi8:
 ;CHECK: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = add <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddQi16:
 ;CHECK: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = add <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@
 define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddQi32:
 ;CHECK: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = add <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@
 define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vaddQi64:
 ;CHECK: vadd.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = add <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vaddQf32:
 ;CHECK: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fadd <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -93,8 +93,8 @@
 define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vraddhni16:
 ;CHECK: vraddhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vraddhni32:
 ;CHECK: vraddhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vraddhni64:
 ;CHECK: vraddhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -151,8 +151,8 @@
 define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddls8:
 ;CHECK: vaddl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@
 define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddls16:
 ;CHECK: vaddl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@
 define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddls32:
 ;CHECK: vaddl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@
 define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddlu8:
 ;CHECK: vaddl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@
 define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddlu16:
 ;CHECK: vaddl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@
 define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddlu32:
 ;CHECK: vaddl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = add <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@
 define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddws8:
 ;CHECK: vaddw.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@
 define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddws16:
 ;CHECK: vaddw.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@
 define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddws32:
 ;CHECK: vaddw.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@
 define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vaddwu8:
 ;CHECK: vaddw.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@
 define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vaddwu16:
 ;CHECK: vaddw.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@
 define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vaddwu32:
 ;CHECK: vaddw.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vargs_align.ll b/llvm/test/CodeGen/ARM/vargs_align.ll
index 3abb57e..d19abd5 100644
--- a/llvm/test/CodeGen/ARM/vargs_align.ll
+++ b/llvm/test/CodeGen/ARM/vargs_align.ll
@@ -8,13 +8,13 @@
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
 	store i32 %a, i32* %a_addr
 	store i32 0, i32* %tmp
-	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp1, i32* %retval
 	call void @llvm.va_start(i8* null)
 	br label %return
 
 return:		; preds = %entry
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	%retval2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
 ; EABI: add sp, sp, #12
 ; EABI: add sp, sp, #16
diff --git a/llvm/test/CodeGen/ARM/vbits.ll b/llvm/test/CodeGen/ARM/vbits.ll
index dfeaacf..db9bc6c 100644
--- a/llvm/test/CodeGen/ARM/vbits.ll
+++ b/llvm/test/CodeGen/ARM/vbits.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_andi8:
 ;CHECK: vand
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_andi16:
 ;CHECK: vand
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_andi32:
 ;CHECK: vand
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_andi64:
 ;CHECK: vand
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = and <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_andQi8:
 ;CHECK: vand
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_andQi16:
 ;CHECK: vand
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_andQi32:
 ;CHECK: vand
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_andQi64:
 ;CHECK: vand
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = and <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_bici8:
 ;CHECK: vbic
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = and <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -85,8 +85,8 @@
 define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_bici16:
 ;CHECK: vbic
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = and <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@
 define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_bici32:
 ;CHECK: vbic
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
 	%tmp4 = and <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -105,8 +105,8 @@
 define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_bici64:
 ;CHECK: vbic
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
 	%tmp4 = and <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -115,8 +115,8 @@
 define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi8:
 ;CHECK: vbic
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = and <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -125,8 +125,8 @@
 define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi16:
 ;CHECK: vbic
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = and <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -135,8 +135,8 @@
 define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi32:
 ;CHECK: vbic
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp4 = and <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -145,8 +145,8 @@
 define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_bicQi64:
 ;CHECK: vbic
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
 	%tmp4 = and <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -155,8 +155,8 @@
 define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_eori8:
 ;CHECK: veor
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -164,8 +164,8 @@
 define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_eori16:
 ;CHECK: veor
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -173,8 +173,8 @@
 define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_eori32:
 ;CHECK: veor
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -182,8 +182,8 @@
 define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_eori64:
 ;CHECK: veor
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -191,8 +191,8 @@
 define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi8:
 ;CHECK: veor
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -200,8 +200,8 @@
 define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi16:
 ;CHECK: veor
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -209,8 +209,8 @@
 define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi32:
 ;CHECK: veor
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -218,8 +218,8 @@
 define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_eorQi64:
 ;CHECK: veor
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -227,7 +227,7 @@
 define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: v_mvni8:
 ;CHECK: vmvn
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <8 x i8> %tmp2
 }
@@ -235,7 +235,7 @@
 define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: v_mvni16:
 ;CHECK: vmvn
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <4 x i16> %tmp2
 }
@@ -243,7 +243,7 @@
 define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: v_mvni32:
 ;CHECK: vmvn
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
 	ret <2 x i32> %tmp2
 }
@@ -251,7 +251,7 @@
 define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: v_mvni64:
 ;CHECK: vmvn
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
 	ret <1 x i64> %tmp2
 }
@@ -259,7 +259,7 @@
 define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi8:
 ;CHECK: vmvn
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	ret <16 x i8> %tmp2
 }
@@ -267,7 +267,7 @@
 define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi16:
 ;CHECK: vmvn
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	ret <8 x i16> %tmp2
 }
@@ -275,7 +275,7 @@
 define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi32:
 ;CHECK: vmvn
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	ret <4 x i32> %tmp2
 }
@@ -283,7 +283,7 @@
 define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: v_mvnQi64:
 ;CHECK: vmvn
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
 	ret <2 x i64> %tmp2
 }
@@ -291,8 +291,8 @@
 define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orri8:
 ;CHECK: vorr
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = or <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -300,8 +300,8 @@
 define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orri16:
 ;CHECK: vorr
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = or <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -309,8 +309,8 @@
 define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orri32:
 ;CHECK: vorr
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = or <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -318,8 +318,8 @@
 define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orri64:
 ;CHECK: vorr
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = or <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -327,8 +327,8 @@
 define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi8:
 ;CHECK: vorr
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = or <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -336,8 +336,8 @@
 define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi16:
 ;CHECK: vorr
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = or <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -345,8 +345,8 @@
 define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi32:
 ;CHECK: vorr
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = or <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -354,8 +354,8 @@
 define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orrQi64:
 ;CHECK: vorr
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = or <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -363,8 +363,8 @@
 define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_orni8:
 ;CHECK: vorn
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = or <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -373,8 +373,8 @@
 define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_orni16:
 ;CHECK: vorn
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = or <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -383,8 +383,8 @@
 define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_orni32:
 ;CHECK: vorn
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
 	%tmp4 = or <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -393,8 +393,8 @@
 define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_orni64:
 ;CHECK: vorn
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
 	%tmp4 = or <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -403,8 +403,8 @@
 define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi8:
 ;CHECK: vorn
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp4 = or <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -413,8 +413,8 @@
 define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi16:
 ;CHECK: vorn
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp4 = or <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -423,8 +423,8 @@
 define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi32:
 ;CHECK: vorn
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp4 = or <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -433,8 +433,8 @@
 define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: v_ornQi64:
 ;CHECK: vorn
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
 	%tmp4 = or <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -443,8 +443,8 @@
 define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtsti8:
 ;CHECK: vtst.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = and <8 x i8> %tmp1, %tmp2
 	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
         %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
@@ -454,8 +454,8 @@
 define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtsti16:
 ;CHECK: vtst.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = and <4 x i16> %tmp1, %tmp2
 	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
@@ -465,8 +465,8 @@
 define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtsti32:
 ;CHECK: vtst.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = and <2 x i32> %tmp1, %tmp2
 	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
         %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
@@ -476,8 +476,8 @@
 define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtstQi8:
 ;CHECK: vtst.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = and <16 x i8> %tmp1, %tmp2
 	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
         %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
@@ -487,8 +487,8 @@
 define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vtstQi16:
 ;CHECK: vtst.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = and <8 x i16> %tmp1, %tmp2
 	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
         %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
@@ -498,8 +498,8 @@
 define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vtstQi32:
 ;CHECK: vtst.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = and <4 x i32> %tmp1, %tmp2
 	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
         %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
@@ -511,7 +511,7 @@
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vorr
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <8 x i8> %tmp3
 }
@@ -521,7 +521,7 @@
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vorr
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
 	ret <16 x i8> %tmp3
 }
@@ -531,7 +531,7 @@
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <8 x i8> %tmp3
 }
@@ -541,7 +541,7 @@
 ; CHECK-NOT: vmov
 ; CHECK-NOT: vmvn
 ; CHECK: vbic
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
 	ret <16 x i8> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vbsl-constant.ll b/llvm/test/CodeGen/ARM/vbsl-constant.ll
index 5e033fe..6bcbbc8 100644
--- a/llvm/test/CodeGen/ARM/vbsl-constant.ll
+++ b/llvm/test/CodeGen/ARM/vbsl-constant.ll
@@ -5,9 +5,9 @@
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 	%tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
 	%tmp7 = or <8 x i8> %tmp4, %tmp6
@@ -19,9 +19,9 @@
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3>
 	%tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4>
 	%tmp7 = or <4 x i16> %tmp4, %tmp6
@@ -33,9 +33,9 @@
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3>
 	%tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4>
 	%tmp7 = or <2 x i32> %tmp4, %tmp6
@@ -48,9 +48,9 @@
 ;CHECK: vldr
 ;CHECK: vldr
 ;CHECK: vbsl
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
-	%tmp3 = load <1 x i64>* %C
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
+	%tmp3 = load <1 x i64>, <1 x i64>* %C
 	%tmp4 = and <1 x i64> %tmp1, <i64 3>
 	%tmp6 = and <1 x i64> %tmp3, <i64 -4>
 	%tmp7 = or <1 x i64> %tmp4, %tmp6
@@ -62,9 +62,9 @@
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
 	%tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
 	%tmp7 = or <16 x i8> %tmp4, %tmp6
@@ -76,9 +76,9 @@
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
 	%tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4>
 	%tmp7 = or <8 x i16> %tmp4, %tmp6
@@ -90,9 +90,9 @@
 ;CHECK: vld1.32
 ;CHECK: vld1.32
 ;CHECK: vbsl
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3>
 	%tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4>
 	%tmp7 = or <4 x i32> %tmp4, %tmp6
@@ -105,9 +105,9 @@
 ;CHECK: vld1.32
 ;CHECK: vld1.64
 ;CHECK: vbsl
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = load <2 x i64>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
+	%tmp3 = load <2 x i64>, <2 x i64>* %C
 	%tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3>
 	%tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4>
 	%tmp7 = or <2 x i64> %tmp4, %tmp6
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index ddc37cc..6812dd9 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -5,9 +5,9 @@
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: v_bsli8:
 ;CHECK: vbsl
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = and <8 x i8> %tmp1, %tmp2
 	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp6 = and <8 x i8> %tmp5, %tmp3
@@ -18,9 +18,9 @@
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: v_bsli16:
 ;CHECK: vbsl
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = and <4 x i16> %tmp1, %tmp2
 	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp6 = and <4 x i16> %tmp5, %tmp3
@@ -31,9 +31,9 @@
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: v_bsli32:
 ;CHECK: vbsl
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = and <2 x i32> %tmp1, %tmp2
 	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
 	%tmp6 = and <2 x i32> %tmp5, %tmp3
@@ -44,9 +44,9 @@
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ;CHECK-LABEL: v_bsli64:
 ;CHECK: vbsl
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
-	%tmp3 = load <1 x i64>* %C
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
+	%tmp3 = load <1 x i64>, <1 x i64>* %C
 	%tmp4 = and <1 x i64> %tmp1, %tmp2
 	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
 	%tmp6 = and <1 x i64> %tmp5, %tmp3
@@ -57,9 +57,9 @@
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi8:
 ;CHECK: vbsl
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = and <16 x i8> %tmp1, %tmp2
 	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
 	%tmp6 = and <16 x i8> %tmp5, %tmp3
@@ -70,9 +70,9 @@
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi16:
 ;CHECK: vbsl
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = and <8 x i16> %tmp1, %tmp2
 	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
 	%tmp6 = and <8 x i16> %tmp5, %tmp3
@@ -83,9 +83,9 @@
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi32:
 ;CHECK: vbsl
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = and <4 x i32> %tmp1, %tmp2
 	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
 	%tmp6 = and <4 x i32> %tmp5, %tmp3
@@ -96,9 +96,9 @@
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
 ;CHECK-LABEL: v_bslQi64:
 ;CHECK: vbsl
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
-	%tmp3 = load <2 x i64>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
+	%tmp3 = load <2 x i64>, <2 x i64>* %C
 	%tmp4 = and <2 x i64> %tmp1, %tmp2
 	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
 	%tmp6 = and <2 x i64> %tmp5, %tmp3
diff --git a/llvm/test/CodeGen/ARM/vceq.ll b/llvm/test/CodeGen/ARM/vceq.ll
index e3202e4..3772401 100644
--- a/llvm/test/CodeGen/ARM/vceq.ll
+++ b/llvm/test/CodeGen/ARM/vceq.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vceqi8:
 ;CHECK: vceq.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@
 define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vceqi16:
 ;CHECK: vceq.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@
 define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vceqi32:
 ;CHECK: vceq.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@
 define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vceqf32:
 ;CHECK: vceq.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@
 define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vceqQi8:
 ;CHECK: vceq.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@
 define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vceqQi16:
 ;CHECK: vceq.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@
 define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vceqQi32:
 ;CHECK: vceq.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@
 define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vceqQf32:
 ;CHECK: vceq.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -85,7 +85,7 @@
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vceq.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcge.ll b/llvm/test/CodeGen/ARM/vcge.ll
index 3739f5ee..2cd33cf 100644
--- a/llvm/test/CodeGen/ARM/vcge.ll
+++ b/llvm/test/CodeGen/ARM/vcge.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcges8:
 ;CHECK: vcge.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@
 define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcges16:
 ;CHECK: vcge.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@
 define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcges32:
 ;CHECK: vcge.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@
 define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeu8:
 ;CHECK: vcge.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -43,8 +43,8 @@
 define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeu16:
 ;CHECK: vcge.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -53,8 +53,8 @@
 define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -63,8 +63,8 @@
 define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgef32:
 ;CHECK: vcge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -73,8 +73,8 @@
 define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs8:
 ;CHECK: vcge.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -83,8 +83,8 @@
 define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs16:
 ;CHECK: vcge.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -93,8 +93,8 @@
 define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeQs32:
 ;CHECK: vcge.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -103,8 +103,8 @@
 define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu8:
 ;CHECK: vcge.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -113,8 +113,8 @@
 define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu16:
 ;CHECK: vcge.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -123,8 +123,8 @@
 define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgeQu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -133,8 +133,8 @@
 define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgeQf32:
 ;CHECK: vcge.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -143,8 +143,8 @@
 define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgef32:
 ;CHECK: vacge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -152,8 +152,8 @@
 define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgeQf32:
 ;CHECK: vacge.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -166,7 +166,7 @@
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcge.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -177,7 +177,7 @@
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcle.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcgt.ll b/llvm/test/CodeGen/ARM/vcgt.ll
index 2f736f6..c39c939 100644
--- a/llvm/test/CodeGen/ARM/vcgt.ll
+++ b/llvm/test/CodeGen/ARM/vcgt.ll
@@ -4,8 +4,8 @@
 define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgts8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -14,8 +14,8 @@
 define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgts16:
 ;CHECK: vcgt.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -24,8 +24,8 @@
 define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgts32:
 ;CHECK: vcgt.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -34,8 +34,8 @@
 define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtu8:
 ;CHECK: vcgt.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -44,8 +44,8 @@
 define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -54,8 +54,8 @@
 define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtu32:
 ;CHECK: vcgt.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -64,8 +64,8 @@
 define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgtf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -74,8 +74,8 @@
 define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -84,8 +84,8 @@
 define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs16:
 ;CHECK: vcgt.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -94,8 +94,8 @@
 define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtQs32:
 ;CHECK: vcgt.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -104,8 +104,8 @@
 define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu8:
 ;CHECK: vcgt.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -114,8 +114,8 @@
 define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -124,8 +124,8 @@
 define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcgtQu32:
 ;CHECK: vcgt.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -134,8 +134,8 @@
 define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vcgtQf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -144,8 +144,8 @@
 define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgtf32:
 ;CHECK: vacgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -153,8 +153,8 @@
 define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vacgtQf32:
 ;CHECK: vacgt.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -165,8 +165,8 @@
 ;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1
 ;CHECK: vcgt.f32 [[Q1:q[0-9]+]]
 ;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]]
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
         %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -180,7 +180,7 @@
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vcgt.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -191,7 +191,7 @@
 ;CHECK-NOT: vmov
 ;CHECK-NOT: vmvn
 ;CHECK: vclt.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vcnt.ll b/llvm/test/CodeGen/ARM/vcnt.ll
index 390559b..de251c5 100644
--- a/llvm/test/CodeGen/ARM/vcnt.ll
+++ b/llvm/test/CodeGen/ARM/vcnt.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcnt8:
 ;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -12,7 +12,7 @@
 define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vcntQ8:
 ;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -23,7 +23,7 @@
 define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclz8:
 ;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
 	ret <8 x i8> %tmp2
 }
@@ -31,7 +31,7 @@
 define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclz16:
 ;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
 	ret <4 x i16> %tmp2
 }
@@ -39,7 +39,7 @@
 define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclz32:
 ;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
 	ret <2 x i32> %tmp2
 }
@@ -47,7 +47,7 @@
 define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclzQ8:
 ;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
 	ret <16 x i8> %tmp2
 }
@@ -55,7 +55,7 @@
 define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclzQ16:
 ;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
 	ret <8 x i16> %tmp2
 }
@@ -63,7 +63,7 @@
 define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclzQ32:
 ;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
 	ret <4 x i32> %tmp2
 }
@@ -79,7 +79,7 @@
 define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclss8:
 ;CHECK: vcls.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -87,7 +87,7 @@
 define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclss16:
 ;CHECK: vcls.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -95,7 +95,7 @@
 define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclss32:
 ;CHECK: vcls.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -103,7 +103,7 @@
 define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vclsQs8:
 ;CHECK: vcls.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -111,7 +111,7 @@
 define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vclsQs16:
 ;CHECK: vcls.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -119,7 +119,7 @@
 define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vclsQs32:
 ;CHECK: vcls.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index 33aa71d..9491c15 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -7,8 +7,8 @@
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 	ret <16 x i8> %tmp3
 }
@@ -19,8 +19,8 @@
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 	ret <8 x i16> %tmp3
 }
@@ -31,8 +31,8 @@
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	ret <4 x i32> %tmp3
 }
@@ -43,8 +43,8 @@
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 	ret <4 x float> %tmp3
 }
@@ -55,8 +55,8 @@
 ; CHECK-LE: vmov r2, r3, d17
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
 	ret <2 x i64> %tmp3
 }
@@ -69,7 +69,7 @@
 ; CHECK-NOT: vst
 ; CHECK-LE: vmov r0, r1, d16
 ; CHECK-BE: vmov r1, r0, d16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
         ret <4 x i16> %tmp2
 }
@@ -79,7 +79,7 @@
 ; CHECK-NOT: vst
 ; CHECK-LE: vmov r0, r1, d17
 ; CHECK-BE: vmov r1, r0, d16
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
         ret <8 x i8> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt-cost.ll b/llvm/test/CodeGen/ARM/vcvt-cost.ll
index 5e56a5b..c80450a 100644
--- a/llvm/test/CodeGen/ARM/vcvt-cost.ll
+++ b/llvm/test/CodeGen/ARM/vcvt-cost.ll
@@ -9,7 +9,7 @@
 ; CHECK: vmovl.s8
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
-  %v0 = load %T0_5* %loadaddr
+  %v0 = load %T0_5, %T0_5* %loadaddr
 ; COST: func_cvt5
 ; COST: cost of 3 {{.*}} sext
   %r = sext %T0_5 %v0 to %T1_5
@@ -25,7 +25,7 @@
 ; CHECK: vmovl.u8
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
-  %v0 = load %TA0_5* %loadaddr
+  %v0 = load %TA0_5, %TA0_5* %loadaddr
 ; COST: func_cvt1
 ; COST: cost of 3 {{.*}} zext
   %r = zext %TA0_5 %v0 to %TA1_5
@@ -40,7 +40,7 @@
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i16
-  %v0 = load %T0_51* %loadaddr
+  %v0 = load %T0_51, %T0_51* %loadaddr
 ; COST: func_cvt51
 ; COST: cost of 3 {{.*}} trunc
   %r = trunc %T0_51 %v0 to %T1_51
@@ -56,7 +56,7 @@
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
 ; CHECK: vmovl.s16
-  %v0 = load %TT0_5* %loadaddr
+  %v0 = load %TT0_5, %TT0_5* %loadaddr
 ; COST: func_cvt52
 ; COST: cost of 6 {{.*}} sext
   %r = sext %TT0_5 %v0 to %TT1_5
@@ -73,7 +73,7 @@
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
 ; CHECK: vmovl.u16
-  %v0 = load %TTA0_5* %loadaddr
+  %v0 = load %TTA0_5, %TTA0_5* %loadaddr
 ; COST: func_cvt12
 ; COST: cost of 6 {{.*}} zext
   %r = zext %TTA0_5 %v0 to %TTA1_5
@@ -91,7 +91,7 @@
 ; CHECK: vmovn.i32
 ; CHECK: vmovn.i16
 ; CHECK: vmovn.i16
-  %v0 = load %TT0_51* %loadaddr
+  %v0 = load %TT0_51, %TT0_51* %loadaddr
 ; COST: func_cvt512
 ; COST: cost of 6 {{.*}} trunc
   %r = trunc %TT0_51 %v0 to %TT1_51
@@ -103,7 +103,7 @@
 define void @sext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
-  %v0 = load <4 x i16>* %loadaddr
+  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
 ; COST: sext_v4i16_v4i64
 ; COST: cost of 3 {{.*}} sext
   %r = sext <4 x i16> %v0 to <4 x i64>
@@ -115,7 +115,7 @@
 define void @zext_v4i16_v4i64(<4 x i16>* %loadaddr, <4 x i64>* %storeaddr) {
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
-  %v0 = load <4 x i16>* %loadaddr
+  %v0 = load <4 x i16>, <4 x i16>* %loadaddr
 ; COST: zext_v4i16_v4i64
 ; COST: cost of 3 {{.*}} zext
   %r = zext <4 x i16> %v0 to <4 x i64>
@@ -129,7 +129,7 @@
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
 ; CHECK: vmovl.s32
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
 ; COST: sext_v8i16_v8i64
 ; COST: cost of 6 {{.*}} sext
   %r = sext <8 x i16> %v0 to <8 x i64>
@@ -143,7 +143,7 @@
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
 ; CHECK: vmovl.u32
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
 ; COST: zext_v8i16_v8i64
 ; COST: cost of 6 {{.*}} zext
   %r = zext <8 x i16> %v0 to <8 x i64>
diff --git a/llvm/test/CodeGen/ARM/vcvt-v8.ll b/llvm/test/CodeGen/ARM/vcvt-v8.ll
index c449009e..9d5972f 100644
--- a/llvm/test/CodeGen/ARM/vcvt-v8.ll
+++ b/llvm/test/CodeGen/ARM/vcvt-v8.ll
@@ -2,7 +2,7 @@
 define <4 x i32> @vcvtasq(<4 x float>* %A) {
 ; CHECK: vcvtasq
 ; CHECK: vcvta.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtas.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -10,7 +10,7 @@
 define <2 x i32> @vcvtasd(<2 x float>* %A) {
 ; CHECK: vcvtasd
 ; CHECK: vcvta.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtas.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -18,7 +18,7 @@
 define <4 x i32> @vcvtnsq(<4 x float>* %A) {
 ; CHECK: vcvtnsq
 ; CHECK: vcvtn.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtns.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -26,7 +26,7 @@
 define <2 x i32> @vcvtnsd(<2 x float>* %A) {
 ; CHECK: vcvtnsd
 ; CHECK: vcvtn.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtns.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -34,7 +34,7 @@
 define <4 x i32> @vcvtpsq(<4 x float>* %A) {
 ; CHECK: vcvtpsq
 ; CHECK: vcvtp.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtps.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -42,7 +42,7 @@
 define <2 x i32> @vcvtpsd(<2 x float>* %A) {
 ; CHECK: vcvtpsd
 ; CHECK: vcvtp.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtps.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -50,7 +50,7 @@
 define <4 x i32> @vcvtmsq(<4 x float>* %A) {
 ; CHECK: vcvtmsq
 ; CHECK: vcvtm.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtms.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -58,7 +58,7 @@
 define <2 x i32> @vcvtmsd(<2 x float>* %A) {
 ; CHECK: vcvtmsd
 ; CHECK: vcvtm.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtms.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -66,7 +66,7 @@
 define <4 x i32> @vcvtauq(<4 x float>* %A) {
 ; CHECK: vcvtauq
 ; CHECK: vcvta.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtau.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -74,7 +74,7 @@
 define <2 x i32> @vcvtaud(<2 x float>* %A) {
 ; CHECK: vcvtaud
 ; CHECK: vcvta.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtau.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -82,7 +82,7 @@
 define <4 x i32> @vcvtnuq(<4 x float>* %A) {
 ; CHECK: vcvtnuq
 ; CHECK: vcvtn.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtnu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -90,7 +90,7 @@
 define <2 x i32> @vcvtnud(<2 x float>* %A) {
 ; CHECK: vcvtnud
 ; CHECK: vcvtn.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtnu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -98,7 +98,7 @@
 define <4 x i32> @vcvtpuq(<4 x float>* %A) {
 ; CHECK: vcvtpuq
 ; CHECK: vcvtp.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtpu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -106,7 +106,7 @@
 define <2 x i32> @vcvtpud(<2 x float>* %A) {
 ; CHECK: vcvtpud
 ; CHECK: vcvtp.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtpu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
@@ -114,7 +114,7 @@
 define <4 x i32> @vcvtmuq(<4 x float>* %A) {
 ; CHECK: vcvtmuq
 ; CHECK: vcvtm.u32.f32 q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
+  %tmp1 = load <4 x float>, <4 x float>* %A
   %tmp2 = call <4 x i32> @llvm.arm.neon.vcvtmu.v4i32.v4f32(<4 x float> %tmp1)
   ret <4 x i32> %tmp2
 }
@@ -122,7 +122,7 @@
 define <2 x i32> @vcvtmud(<2 x float>* %A) {
 ; CHECK: vcvtmud
 ; CHECK: vcvtm.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
+  %tmp1 = load <2 x float>, <2 x float>* %A
   %tmp2 = call <2 x i32> @llvm.arm.neon.vcvtmu.v2i32.v2f32(<2 x float> %tmp1)
   ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt.ll b/llvm/test/CodeGen/ARM/vcvt.ll
index af4e6a3..0b7ffb8 100644
--- a/llvm/test/CodeGen/ARM/vcvt.ll
+++ b/llvm/test/CodeGen/ARM/vcvt.ll
@@ -3,7 +3,7 @@
 define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -11,7 +11,7 @@
 define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
@@ -27,7 +27,7 @@
 define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@
 define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -43,7 +43,7 @@
 define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -51,7 +51,7 @@
 define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
@@ -59,7 +59,7 @@
 define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@
 define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
 	ret <2 x i32> %tmp2
 }
@@ -75,7 +75,7 @@
 define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
 	ret <2 x i32> %tmp2
 }
@@ -83,7 +83,7 @@
 define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
 	ret <2 x float> %tmp2
 }
@@ -91,7 +91,7 @@
 define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvt_n_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
 	ret <2 x float> %tmp2
 }
@@ -104,7 +104,7 @@
 define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_f32tos32:
 ;CHECK: vcvt.s32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
 	ret <4 x i32> %tmp2
 }
@@ -112,7 +112,7 @@
 define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_f32tou32:
 ;CHECK: vcvt.u32.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
 	ret <4 x i32> %tmp2
 }
@@ -120,7 +120,7 @@
 define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_s32tof32:
 ;CHECK: vcvt.f32.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
 	ret <4 x float> %tmp2
 }
@@ -128,7 +128,7 @@
 define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vcvtQ_n_u32tof32:
 ;CHECK: vcvt.f32.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
 	ret <4 x float> %tmp2
 }
@@ -141,7 +141,7 @@
 define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f16tof32:
 ;CHECK: vcvt.f32.f16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -149,7 +149,7 @@
 define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vcvt_f32tof16:
 ;CHECK: vcvt.f16.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
 	ret <4 x i16> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vcvt_combine.ll b/llvm/test/CodeGen/ARM/vcvt_combine.ll
index 07ba230..0c856e8 100644
--- a/llvm/test/CodeGen/ARM/vcvt_combine.ll
+++ b/llvm/test/CodeGen/ARM/vcvt_combine.ll
@@ -7,7 +7,7 @@
 ; CHECK-NOT: vmul
 define void @t0() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -23,7 +23,7 @@
 ; CHECK-NOT: vmul
 define void @t1() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
@@ -39,7 +39,7 @@
 ; CHECK: vmul
 define void @t2() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
@@ -53,7 +53,7 @@
 ; CHECK: vmul
 define void @t3() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
@@ -67,7 +67,7 @@
 ; CHECK-NOT: vmul
 define void @t4() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
   %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
@@ -81,7 +81,7 @@
 ; CHECK-NOT: vmul
 define void @t5() nounwind {
 entry:
-  %tmp = load float* @in, align 4
+  %tmp = load float, float* @in, align 4
   %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vdiv_combine.ll b/llvm/test/CodeGen/ARM/vdiv_combine.ll
index 96807f7..8c6e4ba 100644
--- a/llvm/test/CodeGen/ARM/vdiv_combine.ll
+++ b/llvm/test/CodeGen/ARM/vdiv_combine.ll
@@ -11,7 +11,7 @@
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t1() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -27,7 +27,7 @@
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t2() nounwind {
 entry:
-  %tmp = load i32* @uin, align 4
+  %tmp = load i32, i32* @uin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -41,7 +41,7 @@
 ; CHECK: {{vdiv|vmul}}
 define void @t3() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -55,7 +55,7 @@
 ; CHECK: {{vdiv|vmul}}
 define void @t4() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -69,7 +69,7 @@
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t5() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
   %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
@@ -83,7 +83,7 @@
 ; CHECK-NOT: {{vdiv|vmul}}
 define void @t6() nounwind {
 entry:
-  %tmp = load i32* @iin, align 4
+  %tmp = load i32, i32* @iin, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vdup.ll b/llvm/test/CodeGen/ARM/vdup.ll
index 6f8b3dd..36eebbf 100644
--- a/llvm/test/CodeGen/ARM/vdup.ll
+++ b/llvm/test/CodeGen/ARM/vdup.ll
@@ -166,7 +166,7 @@
 define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplane8:
 ;CHECK: vdup.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i8> %tmp2
 }
@@ -174,7 +174,7 @@
 define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplane16:
 ;CHECK: vdup.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i16> %tmp2
 }
@@ -182,7 +182,7 @@
 define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplane32:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x i32> %tmp2
 }
@@ -190,7 +190,7 @@
 define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplanefloat:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
 	ret <2 x float> %tmp2
 }
@@ -198,7 +198,7 @@
 define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ8:
 ;CHECK: vdup.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <16 x i8> %tmp2
 }
@@ -206,7 +206,7 @@
 define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ16:
 ;CHECK: vdup.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
 	ret <8 x i16> %tmp2
 }
@@ -214,7 +214,7 @@
 define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQ32:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x i32> %tmp2
 }
@@ -222,7 +222,7 @@
 define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vduplaneQfloat:
 ;CHECK: vdup.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
 	ret <4 x float> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
index 566e955..29f4bb9 100644
--- a/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/llvm/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -21,7 +21,7 @@
 define void @test_illegal_build_vector() nounwind {
 entry:
   store <2 x i64> undef, <2 x i64>* undef, align 16
-  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
   %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
   store <16 x i8> %1, <16 x i8>* undef, align 16
   ret void
@@ -63,7 +63,7 @@
 ; Test trying to do a ShiftCombine on illegal types.
 ; The vector should be split first.
 define void @lshrIllegalType(<8 x i32>* %A) nounwind {
-       %tmp1 = load <8 x i32>* %A
+       %tmp1 = load <8 x i32>, <8 x i32>* %A
        %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
        store <8 x i32> %tmp2, <8 x i32>* %A
        ret void
@@ -89,7 +89,7 @@
 define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_buildvector
 ; CHECK: vldr
-  %t0 = load i64* %ptr, align 4
+  %t0 = load i64, i64* %ptr, align 4
   %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
   ret void
@@ -98,8 +98,8 @@
 define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_insertelement
 ; CHECK: vldr
-  %t0 = load i64* %ptr, align 4
-  %vec = load <2 x i64>* %vp
+  %t0 = load i64, i64* %ptr, align 4
+  %vec = load <2 x i64>, <2 x i64>* %vp
   %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
   store <2 x i64> %t1, <2 x i64>* %vp
   ret void
@@ -108,7 +108,7 @@
 define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
 ; CHECK: i64_extractelement
 ; CHECK: vstr
-  %vec = load <2 x i64>* %vp
+  %vec = load <2 x i64>, <2 x i64>* %vp
   %t1 = extractelement <2 x i64> %vec, i32 0
   store i64 %t1, i64* %ptr
   ret void
@@ -116,7 +116,7 @@
 
 ; Test trying to do a AND Combine on illegal types.
 define void @andVec(<3 x i8>* %A) nounwind {
-  %tmp = load <3 x i8>* %A, align 4
+  %tmp = load <3 x i8>, <3 x i8>* %A, align 4
   %and = and <3 x i8> %tmp, <i8 7, i8 7, i8 7>
   store <3 x i8> %and, <3 x i8>* %A
   ret void
@@ -125,7 +125,7 @@
 
 ; Test trying to do an OR Combine on illegal types.
 define void @orVec(<3 x i8>* %A) nounwind {
-  %tmp = load <3 x i8>* %A, align 4
+  %tmp = load <3 x i8>, <3 x i8>* %A, align 4
   %or = or <3 x i8> %tmp, <i8 7, i8 7, i8 7>
   store <3 x i8> %or, <3 x i8>* %A
   ret void
@@ -146,7 +146,7 @@
 ; shuffles.
 ; CHECK-LABEL: reverse_v8i16:
 define void @reverse_v8i16(<8 x i16>* %loadaddr, <8 x i16>* %storeaddr) {
-  %v0 = load <8 x i16>* %loadaddr
+  %v0 = load <8 x i16>, <8 x i16>* %loadaddr
   ; CHECK: vrev64.16
   ; CHECK: vext.16
   %v1 = shufflevector <8 x i16> %v0, <8 x i16> undef,
@@ -159,7 +159,7 @@
 ; shuffles.
 ; CHECK-LABEL: reverse_v16i8:
 define void @reverse_v16i8(<16 x i8>* %loadaddr, <16 x i8>* %storeaddr) {
-  %v0 = load <16 x i8>* %loadaddr
+  %v0 = load <16 x i8>, <16 x i8>* %loadaddr
   ; CHECK: vrev64.8
   ; CHECK: vext.8
   %v1 = shufflevector <16 x i8> %v0, <16 x i8> undef,
@@ -180,9 +180,9 @@
 define <8 x i16> @t3(i8 zeroext %xf, i8* nocapture %sp0, i8* nocapture %sp1, i32* nocapture %outp) {
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
-  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
   %vecinit = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %vecinit1 = insertelement <2 x i32> %vecinit, i32 %pix_sp1.0.copyload, i32 1
   %0 = bitcast <2 x i32> %vecinit1 to <8 x i8>
@@ -200,7 +200,7 @@
 ; CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r0]
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
   %0 = bitcast <2 x i32> %vec to <8 x i8>
   %vmull.i = tail call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %0, <8 x i8> %0)
@@ -219,11 +219,11 @@
 define <8 x i16> @t5(i8* nocapture %sp0, i8* nocapture %sp1, i8* nocapture %sp2) {
 entry:
   %pix_sp0.0.cast = bitcast i8* %sp0 to i32*
-  %pix_sp0.0.copyload = load i32* %pix_sp0.0.cast, align 1
+  %pix_sp0.0.copyload = load i32, i32* %pix_sp0.0.cast, align 1
   %pix_sp1.0.cast = bitcast i8* %sp1 to i32*
-  %pix_sp1.0.copyload = load i32* %pix_sp1.0.cast, align 1
+  %pix_sp1.0.copyload = load i32, i32* %pix_sp1.0.cast, align 1
   %pix_sp2.0.cast = bitcast i8* %sp2 to i32*
-  %pix_sp2.0.copyload = load i32* %pix_sp2.0.cast, align 1
+  %pix_sp2.0.copyload = load i32, i32* %pix_sp2.0.cast, align 1
   %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 1
   %vecinit1 = insertelement <2 x i32> %vec, i32 %pix_sp1.0.copyload, i32 0
   %vecinit2 = insertelement <2 x i32> %vec, i32 %pix_sp2.0.copyload, i32 0
diff --git a/llvm/test/CodeGen/ARM/vector-extend-narrow.ll b/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
index f321896..7e2751b 100644
--- a/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
+++ b/llvm/test/CodeGen/ARM/vector-extend-narrow.ll
@@ -4,7 +4,7 @@
 define float @f(<4 x i16>* nocapture %in) {
   ; CHECK: vldr
   ; CHECK: vmovl.u16
-  %1 = load <4 x i16>* %in
+  %1 = load <4 x i16>, <4 x i16>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i16> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -25,7 +25,7 @@
   ; CHECK: vld1
   ; CHECK: vmovl.u8
   ; CHECK: vmovl.u16
-  %1 = load <4 x i8>* %in
+  %1 = load <4 x i8>, <4 x i8>* %in
   ; CHECK: vcvt.f32.u32
   %2 = uitofp <4 x i8> %1 to <4 x float>
   %3 = extractelement <4 x float> %2, i32 0
@@ -58,7 +58,7 @@
   ; CHECK: vrecps
   ; CHECK: vmul
   ; CHECK: vmovn
-  %1 = load <4 x i8>* %x, align 4
+  %1 = load <4 x i8>, <4 x i8>* %x, align 4
   %2 = sdiv <4 x i8> zeroinitializer, %1
   ret <4 x i8> %2
 }
@@ -68,7 +68,7 @@
   ; CHECK: vmovl.u8
   ; CHECK: vmovl.u16
   ; CHECK-NOT: vand
-  %1 = load <4 x i8>* %in, align 4
+  %1 = load <4 x i8>, <4 x i8>* %in, align 4
   %2 = zext <4 x i8> %1 to <4 x i32>
   ret <4 x i32> %2
 }
diff --git a/llvm/test/CodeGen/ARM/vector-load.ll b/llvm/test/CodeGen/ARM/vector-load.ll
index f5cf70b..17f134f 100644
--- a/llvm/test/CodeGen/ARM/vector-load.ll
+++ b/llvm/test/CodeGen/ARM/vector-load.ll
@@ -6,16 +6,16 @@
 define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
 ;CHECK-LABEL: load_v8i8:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
+	%A = load <8 x i8>*, <8 x i8>** %ptr
+	%lA = load <8 x i8>, <8 x i8>* %A, align 1
 	ret <8 x i8> %lA
 }
 
 define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
 ;CHECK-LABEL: load_v8i8_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
-	%lA = load <8 x i8>* %A, align 1
+	%A = load <8 x i8>*, <8 x i8>** %ptr
+	%lA = load <8 x i8>, <8 x i8>* %A, align 1
 	%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
         store <8 x i8>* %inc, <8 x i8>** %ptr
 	ret <8 x i8> %lA
@@ -24,16 +24,16 @@
 define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
 ;CHECK-LABEL: load_v4i16:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
+	%A = load <4 x i16>*, <4 x i16>** %ptr
+	%lA = load <4 x i16>, <4 x i16>* %A, align 1
 	ret <4 x i16> %lA
 }
 
 define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
 ;CHECK-LABEL: load_v4i16_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
-	%lA = load <4 x i16>* %A, align 1
+	%A = load <4 x i16>*, <4 x i16>** %ptr
+	%lA = load <4 x i16>, <4 x i16>* %A, align 1
 	%inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
         store <4 x i16>* %inc, <4 x i16>** %ptr
 	ret <4 x i16> %lA
@@ -42,16 +42,16 @@
 define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
 ;CHECK-LABEL: load_v2i32:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
+	%A = load <2 x i32>*, <2 x i32>** %ptr
+	%lA = load <2 x i32>, <2 x i32>* %A, align 1
 	ret <2 x i32> %lA
 }
 
 define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
 ;CHECK-LABEL: load_v2i32_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
-	%lA = load <2 x i32>* %A, align 1
+	%A = load <2 x i32>*, <2 x i32>** %ptr
+	%lA = load <2 x i32>, <2 x i32>* %A, align 1
 	%inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
         store <2 x i32>* %inc, <2 x i32>** %ptr
 	ret <2 x i32> %lA
@@ -60,16 +60,16 @@
 define <2 x float> @load_v2f32(<2 x float>** %ptr) {
 ;CHECK-LABEL: load_v2f32:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
+	%A = load <2 x float>*, <2 x float>** %ptr
+	%lA = load <2 x float>, <2 x float>* %A, align 1
 	ret <2 x float> %lA
 }
 
 define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
 ;CHECK-LABEL: load_v2f32_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
-	%lA = load <2 x float>* %A, align 1
+	%A = load <2 x float>*, <2 x float>** %ptr
+	%lA = load <2 x float>, <2 x float>* %A, align 1
 	%inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
         store <2 x float>* %inc, <2 x float>** %ptr
 	ret <2 x float> %lA
@@ -78,16 +78,16 @@
 define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
 ;CHECK-LABEL: load_v1i64:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
+	%A = load <1 x i64>*, <1 x i64>** %ptr
+	%lA = load <1 x i64>, <1 x i64>* %A, align 1
 	ret <1 x i64> %lA
 }
 
 define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
 ;CHECK-LABEL: load_v1i64_update:
 ;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
-	%lA = load <1 x i64>* %A, align 1
+	%A = load <1 x i64>*, <1 x i64>** %ptr
+	%lA = load <1 x i64>, <1 x i64>* %A, align 1
 	%inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
         store <1 x i64>* %inc, <1 x i64>** %ptr
 	ret <1 x i64> %lA
@@ -96,16 +96,16 @@
 define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
 ;CHECK-LABEL: load_v16i8:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
+	%A = load <16 x i8>*, <16 x i8>** %ptr
+	%lA = load <16 x i8>, <16 x i8>* %A, align 1
 	ret <16 x i8> %lA
 }
 
 define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
 ;CHECK-LABEL: load_v16i8_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
-	%lA = load <16 x i8>* %A, align 1
+	%A = load <16 x i8>*, <16 x i8>** %ptr
+	%lA = load <16 x i8>, <16 x i8>* %A, align 1
 	%inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
         store <16 x i8>* %inc, <16 x i8>** %ptr
 	ret <16 x i8> %lA
@@ -114,16 +114,16 @@
 define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
 ;CHECK-LABEL: load_v8i16:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
+	%A = load <8 x i16>*, <8 x i16>** %ptr
+	%lA = load <8 x i16>, <8 x i16>* %A, align 1
 	ret <8 x i16> %lA
 }
 
 define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
 ;CHECK-LABEL: load_v8i16_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
-	%lA = load <8 x i16>* %A, align 1
+	%A = load <8 x i16>*, <8 x i16>** %ptr
+	%lA = load <8 x i16>, <8 x i16>* %A, align 1
 	%inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
         store <8 x i16>* %inc, <8 x i16>** %ptr
 	ret <8 x i16> %lA
@@ -132,16 +132,16 @@
 define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
 ;CHECK-LABEL: load_v4i32:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
+	%A = load <4 x i32>*, <4 x i32>** %ptr
+	%lA = load <4 x i32>, <4 x i32>* %A, align 1
 	ret <4 x i32> %lA
 }
 
 define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
 ;CHECK-LABEL: load_v4i32_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
-	%lA = load <4 x i32>* %A, align 1
+	%A = load <4 x i32>*, <4 x i32>** %ptr
+	%lA = load <4 x i32>, <4 x i32>* %A, align 1
 	%inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
         store <4 x i32>* %inc, <4 x i32>** %ptr
 	ret <4 x i32> %lA
@@ -150,16 +150,16 @@
 define <4 x float> @load_v4f32(<4 x float>** %ptr) {
 ;CHECK-LABEL: load_v4f32:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
+	%A = load <4 x float>*, <4 x float>** %ptr
+	%lA = load <4 x float>, <4 x float>* %A, align 1
 	ret <4 x float> %lA
 }
 
 define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
 ;CHECK-LABEL: load_v4f32_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
-	%lA = load <4 x float>* %A, align 1
+	%A = load <4 x float>*, <4 x float>** %ptr
+	%lA = load <4 x float>, <4 x float>* %A, align 1
 	%inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
         store <4 x float>* %inc, <4 x float>** %ptr
 	ret <4 x float> %lA
@@ -168,16 +168,16 @@
 define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 1
 	ret <2 x i64> %lA
 }
 
 define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update:
 ;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 1
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 1
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -187,8 +187,8 @@
 define <2 x i64> @load_v2i64_update_aligned2(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned2:
 ;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 2
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 2
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -197,8 +197,8 @@
 define <2 x i64> @load_v2i64_update_aligned4(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned4:
 ;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 4
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 4
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -207,8 +207,8 @@
 define <2 x i64> @load_v2i64_update_aligned8(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned8:
 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 8
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 8
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -217,8 +217,8 @@
 define <2 x i64> @load_v2i64_update_aligned16(<2 x i64>** %ptr) {
 ;CHECK-LABEL: load_v2i64_update_aligned16:
 ;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
-	%lA = load <2 x i64>* %A, align 16
+	%A = load <2 x i64>*, <2 x i64>** %ptr
+	%lA = load <2 x i64>, <2 x i64>* %A, align 16
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
 	ret <2 x i64> %lA
@@ -230,8 +230,8 @@
 ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
 ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
 ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
+	%A = load <4 x i8>*, <4 x i8>** %ptr
+	%lA = load <4 x i8>, <4 x i8>* %A, align 4
         %zlA = zext <4 x i8> %lA to <4 x i32>
 	ret <4 x i32> %zlA
 }
@@ -244,8 +244,8 @@
 ;CHECK: str.w   r[[INCREG]], [r0]
 ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
 ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
-	%A = load <4 x i8>** %ptr
-	%lA = load <4 x i8>* %A, align 4
+	%A = load <4 x i8>*, <4 x i8>** %ptr
+	%lA = load <4 x i8>, <4 x i8>* %A, align 4
 	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
         store <4 x i8>* %inc, <4 x i8>** %ptr
         %zlA = zext <4 x i8> %lA to <4 x i32>
diff --git a/llvm/test/CodeGen/ARM/vector-promotion.ll b/llvm/test/CodeGen/ARM/vector-promotion.ll
index 42ceb60..1dabee3 100644
--- a/llvm/test/CodeGen/ARM/vector-promotion.ll
+++ b/llvm/test/CodeGen/ARM/vector-promotion.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=thumbv7-apple-ios %s -o - -mattr=+neon | FileCheck --check-prefix=ASM %s
 
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 undef, i32 1>
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[VECTOR_OR]], i32 1
 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -16,7 +16,7 @@
 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1:32]
 ; ASM-NEXT: bx
 define void @simpleOneInstructionPromotion(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -24,7 +24,7 @@
 }
 
 ; IR-BOTH-LABEL: @unsupportedInstructionForPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
 ; IR-BOTH-NEXT: [[CMP:%[a-zA-Z_0-9-]+]] = icmp eq i32 [[EXTRACT]], %in2
 ; IR-BOTH-NEXT: store i1 [[CMP]], i1* %dest
@@ -35,7 +35,7 @@
 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define void @unsupportedInstructionForPromotion(<2 x i32>* %addr1, i32 %in2, i1* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   %out = icmp eq i32 %extract, %in2
   store i1 %out, i1* %dest, align 4
@@ -44,7 +44,7 @@
 
 
 ; IR-BOTH-LABEL: @unsupportedChainInDifferentBBs
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 0
 ; IR-BOTH-NEXT: br i1 %bool, label %bb2, label %end
 ; BB2
@@ -58,7 +58,7 @@
 ; ASM: bx
 define void @unsupportedChainInDifferentBBs(<2 x i32>* %addr1, i32* %dest, i1 %bool) {
 bb1:
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   br i1 %bool, label %bb2, label %end
 bb2: 
@@ -70,7 +70,7 @@
 }
 
 ; IR-LABEL: @chainOfInstructionsToPromote
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR1:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[LOAD]], <i32 1, i32 undef>
 ; IR-BOTH-NEXT: [[VECTOR_OR2:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR1]], <i32 1, i32 undef>
 ; IR-BOTH-NEXT: [[VECTOR_OR3:%[a-zA-Z_0-9-]+]] = or <2 x i32> [[VECTOR_OR2]], <i32 1, i32 undef>
@@ -87,7 +87,7 @@
 ; ASM-NOT: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define void @chainOfInstructionsToPromote(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 0
   %out1 = or i32 %extract, 1
   %out2 = or i32 %out1, 1
@@ -101,7 +101,7 @@
 }
 
 ; IR-BOTH-LABEL: @unsupportedMultiUses
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[OR:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
 ; IR-BOTH-NEXT: store i32 [[OR]], i32* %dest
@@ -112,7 +112,7 @@
 ; ASM: vmov.32 {{r[0-9]+}}, [[LOAD]]
 ; ASM: bx
 define i32 @unsupportedMultiUses(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -122,7 +122,7 @@
 ; Check that we promote we a splat constant when this is a division.
 ; The NORMAL mode does not promote anything as divisions are not legal.
 ; IR-BOTH-LABEL: @udivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 [[EXTRACT]], 7
@@ -133,7 +133,7 @@
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @udivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = udiv i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -141,7 +141,7 @@
 }
 
 ; IR-BOTH-LABEL: @uremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = urem i32 [[EXTRACT]], 7
@@ -152,7 +152,7 @@
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @uremCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = urem i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -160,7 +160,7 @@
 }
 
 ; IR-BOTH-LABEL: @sdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = sdiv i32 [[EXTRACT]], 7
@@ -171,7 +171,7 @@
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @sdivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = sdiv i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -179,7 +179,7 @@
 }
 
 ; IR-BOTH-LABEL: @sremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 [[EXTRACT]], 7
@@ -190,7 +190,7 @@
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret 
 define void @sremCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = srem i32 %extract, 7
   store i32 %out, i32* %dest, align 4
@@ -198,7 +198,7 @@
 }
 
 ; IR-BOTH-LABEL: @fdivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fdiv float [[EXTRACT]], 7.0
@@ -209,7 +209,7 @@
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @fdivCase(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = fdiv float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -217,7 +217,7 @@
 }
 
 ; IR-BOTH-LABEL: @fremCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem float [[EXTRACT]], 7.0
@@ -228,7 +228,7 @@
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @fremCase(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -238,13 +238,13 @@
 ; Check that we do not promote when we may introduce undefined behavior
 ; like division by zero.
 ; IR-BOTH-LABEL: @undefDivCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = udiv i32 7, [[EXTRACT]]
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefDivCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = udiv i32 7, %extract
   store i32 %out, i32* %dest, align 4
@@ -255,13 +255,13 @@
 ; Check that we do not promote when we may introduce undefined behavior
 ; like division by zero.
 ; IR-BOTH-LABEL: @undefRemCase
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 1
 ; IR-BOTH-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = srem i32 7, [[EXTRACT]]
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefRemCase(<2 x i32>* %addr1, i32* %dest) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 1
   %out = srem i32 7, %extract
   store i32 %out, i32* %dest, align 4
@@ -271,7 +271,7 @@
 ; Check that we use an undef mask for undefined behavior if the fast-math
 ; flag is set.
 ; IR-BOTH-LABEL: @undefConstantFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float [[EXTRACT]], 7.0
@@ -282,7 +282,7 @@
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefConstantFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem nnan float %extract, 7.0
   store float %out, float* %dest, align 4
@@ -292,7 +292,7 @@
 ; Check that we use an undef mask for undefined behavior if the fast-math
 ; flag is set.
 ; IR-BOTH-LABEL: @undefVectorFRemCaseWithFastMath
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = frem nnan float 7.000000e+00, [[EXTRACT]]
@@ -303,7 +303,7 @@
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @undefVectorFRemCaseWithFastMath(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8   
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8   
   %extract = extractelement <2 x float> %in1, i32 1
   %out = frem nnan float 7.0, %extract
   store float %out, float* %dest, align 4
@@ -314,7 +314,7 @@
 ; This requires the STRESS mode, as floating point value are
 ; not promote on armv7.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionFloat
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x float>, <2 x float>* %addr1
 ; Scalar version: 
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x float> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = fadd float [[EXTRACT]], 1.0
@@ -325,7 +325,7 @@
 ; IR-BOTH-NEXT: store float [[RES]], float* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotionFloat(<2 x float>* %addr1, float* %dest) {
-  %in1 = load <2 x float>* %addr1, align 8
+  %in1 = load <2 x float>, <2 x float>* %addr1, align 8
   %extract = extractelement <2 x float> %in1, i32 1
   %out = fadd float %extract, 1.0
   store float %out, float* %dest, align 4
@@ -337,7 +337,7 @@
 ; This requires the STRESS modes, as variable index are expensive
 ; to lower.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotionVariableIdx
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <2 x i32>, <2 x i32>* %addr1
 ; Scalar version:
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <2 x i32> [[LOAD]], i32 %idx
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i32 [[EXTRACT]], 1
@@ -348,7 +348,7 @@
 ; IR-BOTH-NEXT: store i32 [[RES]], i32* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotionVariableIdx(<2 x i32>* %addr1, i32* %dest, i32 %idx) {
-  %in1 = load <2 x i32>* %addr1, align 8
+  %in1 = load <2 x i32>, <2 x i32>* %addr1, align 8
   %extract = extractelement <2 x i32> %in1, i32 %idx
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 4
@@ -360,7 +360,7 @@
 ; as legal or custom, althought the actual assembly is better if we were
 ; promoting it.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion8x8
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <8 x i8>, <8 x i8>* %addr1
 ; Scalar version:  
 ; IR-NORMAL-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <8 x i8> [[LOAD]], i32 1
 ; IR-NORMAL-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = or i8 [[EXTRACT]], 1
@@ -371,7 +371,7 @@
 ; IR-BOTH-NEXT: store i8 [[RES]], i8* %dest
 ; IR-BOTH-NEXT: ret
 define void @simpleOneInstructionPromotion8x8(<8 x i8>* %addr1, i8* %dest) {
-  %in1 = load <8 x i8>* %addr1, align 8
+  %in1 = load <8 x i8>, <8 x i8>* %addr1, align 8
   %extract = extractelement <8 x i8> %in1, i32 1
   %out = or i8 %extract, 1
   store i8 %out, i8* %dest, align 4
@@ -381,7 +381,7 @@
 ; Check that we optimized the sequence correctly when it can be
 ; lowered on a Q register.
 ; IR-BOTH-LABEL: @simpleOneInstructionPromotion
-; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>* %addr1
+; IR-BOTH: [[LOAD:%[a-zA-Z_0-9-]+]] = load <4 x i32>, <4 x i32>* %addr1
 ; IR-BOTH-NEXT: [[VECTOR_OR:%[a-zA-Z_0-9-]+]] = or <4 x i32> [[LOAD]], <i32 undef, i32 1, i32 undef, i32 undef>
 ; IR-BOTH-NEXT: [[EXTRACT:%[a-zA-Z_0-9-]+]] = extractelement <4 x i32> [[VECTOR_OR]], i32 1
 ; IR-BOTH-NEXT: store i32 [[EXTRACT]], i32* %dest
@@ -395,7 +395,7 @@
 ; ASM-NEXT: vst1.32 {[[LOAD]][1]}, [r1]
 ; ASM-NEXT: bx
 define void @simpleOneInstructionPromotion4x32(<4 x i32>* %addr1, i32* %dest) {
-  %in1 = load <4 x i32>* %addr1, align 8
+  %in1 = load <4 x i32>, <4 x i32>* %addr1, align 8
   %extract = extractelement <4 x i32> %in1, i32 1
   %out = or i32 %extract, 1
   store i32 %out, i32* %dest, align 1
diff --git a/llvm/test/CodeGen/ARM/vector-spilling.ll b/llvm/test/CodeGen/ARM/vector-spilling.ll
index c138bc3..b8058c8 100644
--- a/llvm/test/CodeGen/ARM/vector-spilling.ll
+++ b/llvm/test/CodeGen/ARM/vector-spilling.ll
@@ -11,16 +11,16 @@
 define void @test(<8 x i64>* %src) #0 {
 entry:
   %0 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 0
-  %1 = load <8 x i64>* %0, align 8
+  %1 = load <8 x i64>, <8 x i64>* %0, align 8
 
   %2 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 1
-  %3 = load <8 x i64>* %2, align 8
+  %3 = load <8 x i64>, <8 x i64>* %2, align 8
 
   %4 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 2
-  %5 = load <8 x i64>* %4, align 8
+  %5 = load <8 x i64>, <8 x i64>* %4, align 8
 
   %6 = getelementptr inbounds <8 x i64>, <8 x i64>* %src, i32 3
-  %7 = load <8 x i64>* %6, align 8
+  %7 = load <8 x i64>, <8 x i64>* %6, align 8
 
   %8 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   %9 = shufflevector <8 x i64> %1, <8 x i64> %3, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
diff --git a/llvm/test/CodeGen/ARM/vector-store.ll b/llvm/test/CodeGen/ARM/vector-store.ll
index b5ac5bb..30baa9a 100644
--- a/llvm/test/CodeGen/ARM/vector-store.ll
+++ b/llvm/test/CodeGen/ARM/vector-store.ll
@@ -6,7 +6,7 @@
 define void @store_v8i8(<8 x i8>** %ptr, <8 x i8> %val) {
 ;CHECK-LABEL: store_v8i8:
 ;CHECK: str r1, [r0]
-	%A = load <8 x i8>** %ptr
+	%A = load <8 x i8>*, <8 x i8>** %ptr
 	store  <8 x i8> %val, <8 x i8>* %A, align 1
 	ret void
 }
@@ -14,7 +14,7 @@
 define void @store_v8i8_update(<8 x i8>** %ptr, <8 x i8> %val) {
 ;CHECK-LABEL: store_v8i8_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i8>** %ptr
+	%A = load <8 x i8>*, <8 x i8>** %ptr
 	store  <8 x i8> %val, <8 x i8>* %A, align 1
 	%inc = getelementptr <8 x i8>, <8 x i8>* %A, i38 1
         store <8 x i8>* %inc, <8 x i8>** %ptr
@@ -24,7 +24,7 @@
 define void @store_v4i16(<4 x i16>** %ptr, <4 x i16> %val) {
 ;CHECK-LABEL: store_v4i16:
 ;CHECK: str r1, [r0]
-	%A = load <4 x i16>** %ptr
+	%A = load <4 x i16>*, <4 x i16>** %ptr
 	store  <4 x i16> %val, <4 x i16>* %A, align 1
 	ret void
 }
@@ -32,7 +32,7 @@
 define void @store_v4i16_update(<4 x i16>** %ptr, <4 x i16> %val) {
 ;CHECK-LABEL: store_v4i16_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i16>** %ptr
+	%A = load <4 x i16>*, <4 x i16>** %ptr
 	store  <4 x i16> %val, <4 x i16>* %A, align 1
 	%inc = getelementptr <4 x i16>, <4 x i16>* %A, i34 1
         store <4 x i16>* %inc, <4 x i16>** %ptr
@@ -42,7 +42,7 @@
 define void @store_v2i32(<2 x i32>** %ptr, <2 x i32> %val) {
 ;CHECK-LABEL: store_v2i32:
 ;CHECK: str r1, [r0]
-	%A = load <2 x i32>** %ptr
+	%A = load <2 x i32>*, <2 x i32>** %ptr
 	store  <2 x i32> %val, <2 x i32>* %A, align 1
 	ret void
 }
@@ -50,7 +50,7 @@
 define void @store_v2i32_update(<2 x i32>** %ptr, <2 x i32> %val) {
 ;CHECK-LABEL: store_v2i32_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i32>** %ptr
+	%A = load <2 x i32>*, <2 x i32>** %ptr
 	store  <2 x i32> %val, <2 x i32>* %A, align 1
 	%inc = getelementptr <2 x i32>, <2 x i32>* %A, i32 1
         store <2 x i32>* %inc, <2 x i32>** %ptr
@@ -60,7 +60,7 @@
 define void @store_v2f32(<2 x float>** %ptr, <2 x float> %val) {
 ;CHECK-LABEL: store_v2f32:
 ;CHECK: str r1, [r0]
-	%A = load <2 x float>** %ptr
+	%A = load <2 x float>*, <2 x float>** %ptr
 	store  <2 x float> %val, <2 x float>* %A, align 1
 	ret void
 }
@@ -68,7 +68,7 @@
 define void @store_v2f32_update(<2 x float>** %ptr, <2 x float> %val) {
 ;CHECK-LABEL: store_v2f32_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x float>** %ptr
+	%A = load <2 x float>*, <2 x float>** %ptr
 	store  <2 x float> %val, <2 x float>* %A, align 1
 	%inc = getelementptr <2 x float>, <2 x float>* %A, i32 1
         store <2 x float>* %inc, <2 x float>** %ptr
@@ -78,7 +78,7 @@
 define void @store_v1i64(<1 x i64>** %ptr, <1 x i64> %val) {
 ;CHECK-LABEL: store_v1i64:
 ;CHECK: str r1, [r0]
-	%A = load <1 x i64>** %ptr
+	%A = load <1 x i64>*, <1 x i64>** %ptr
 	store  <1 x i64> %val, <1 x i64>* %A, align 1
 	ret void
 }
@@ -86,7 +86,7 @@
 define void @store_v1i64_update(<1 x i64>** %ptr, <1 x i64> %val) {
 ;CHECK-LABEL: store_v1i64_update:
 ;CHECK: vst1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <1 x i64>** %ptr
+	%A = load <1 x i64>*, <1 x i64>** %ptr
 	store  <1 x i64> %val, <1 x i64>* %A, align 1
 	%inc = getelementptr <1 x i64>, <1 x i64>* %A, i31 1
         store <1 x i64>* %inc, <1 x i64>** %ptr
@@ -96,7 +96,7 @@
 define void @store_v16i8(<16 x i8>** %ptr, <16 x i8> %val) {
 ;CHECK-LABEL: store_v16i8:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <16 x i8>** %ptr
+	%A = load <16 x i8>*, <16 x i8>** %ptr
 	store  <16 x i8> %val, <16 x i8>* %A, align 1
 	ret void
 }
@@ -104,7 +104,7 @@
 define void @store_v16i8_update(<16 x i8>** %ptr, <16 x i8> %val) {
 ;CHECK-LABEL: store_v16i8_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <16 x i8>** %ptr
+	%A = load <16 x i8>*, <16 x i8>** %ptr
 	store  <16 x i8> %val, <16 x i8>* %A, align 1
 	%inc = getelementptr <16 x i8>, <16 x i8>* %A, i316 1
         store <16 x i8>* %inc, <16 x i8>** %ptr
@@ -114,7 +114,7 @@
 define void @store_v8i16(<8 x i16>** %ptr, <8 x i16> %val) {
 ;CHECK-LABEL: store_v8i16:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <8 x i16>** %ptr
+	%A = load <8 x i16>*, <8 x i16>** %ptr
 	store  <8 x i16> %val, <8 x i16>* %A, align 1
 	ret void
 }
@@ -122,7 +122,7 @@
 define void @store_v8i16_update(<8 x i16>** %ptr, <8 x i16> %val) {
 ;CHECK-LABEL: store_v8i16_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <8 x i16>** %ptr
+	%A = load <8 x i16>*, <8 x i16>** %ptr
 	store  <8 x i16> %val, <8 x i16>* %A, align 1
 	%inc = getelementptr <8 x i16>, <8 x i16>* %A, i38 1
         store <8 x i16>* %inc, <8 x i16>** %ptr
@@ -132,7 +132,7 @@
 define void @store_v4i32(<4 x i32>** %ptr, <4 x i32> %val) {
 ;CHECK-LABEL: store_v4i32:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x i32>** %ptr
+	%A = load <4 x i32>*, <4 x i32>** %ptr
 	store  <4 x i32> %val, <4 x i32>* %A, align 1
 	ret void
 }
@@ -140,7 +140,7 @@
 define void @store_v4i32_update(<4 x i32>** %ptr, <4 x i32> %val) {
 ;CHECK-LABEL: store_v4i32_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x i32>** %ptr
+	%A = load <4 x i32>*, <4 x i32>** %ptr
 	store  <4 x i32> %val, <4 x i32>* %A, align 1
 	%inc = getelementptr <4 x i32>, <4 x i32>* %A, i34 1
         store <4 x i32>* %inc, <4 x i32>** %ptr
@@ -150,7 +150,7 @@
 define void @store_v4f32(<4 x float>** %ptr, <4 x float> %val) {
 ;CHECK-LABEL: store_v4f32:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <4 x float>** %ptr
+	%A = load <4 x float>*, <4 x float>** %ptr
 	store  <4 x float> %val, <4 x float>* %A, align 1
 	ret void
 }
@@ -158,7 +158,7 @@
 define void @store_v4f32_update(<4 x float>** %ptr, <4 x float> %val) {
 ;CHECK-LABEL: store_v4f32_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <4 x float>** %ptr
+	%A = load <4 x float>*, <4 x float>** %ptr
 	store  <4 x float> %val, <4 x float>* %A, align 1
 	%inc = getelementptr <4 x float>, <4 x float>* %A, i34 1
         store <4 x float>* %inc, <4 x float>** %ptr
@@ -168,7 +168,7 @@
 define void @store_v2i64(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 1
 	ret void
 }
@@ -176,7 +176,7 @@
 define void @store_v2i64_update(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update:
 ;CHECK: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 1
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -186,7 +186,7 @@
 define void @store_v2i64_update_aligned2(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned2:
 ;CHECK: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 2
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -196,7 +196,7 @@
 define void @store_v2i64_update_aligned4(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned4:
 ;CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 4
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -206,7 +206,7 @@
 define void @store_v2i64_update_aligned8(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned8:
 ;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 8
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -216,7 +216,7 @@
 define void @store_v2i64_update_aligned16(<2 x i64>** %ptr, <2 x i64> %val) {
 ;CHECK-LABEL: store_v2i64_update_aligned16:
 ;CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}:128]!
-	%A = load <2 x i64>** %ptr
+	%A = load <2 x i64>*, <2 x i64>** %ptr
 	store  <2 x i64> %val, <2 x i64>* %A, align 16
 	%inc = getelementptr <2 x i64>, <2 x i64>* %A, i32 1
         store <2 x i64>* %inc, <2 x i64>** %ptr
@@ -232,7 +232,7 @@
 ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
 ;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
-	%A = load <4 x i8>** %ptr
+	%A = load <4 x i8>*, <4 x i8>** %ptr
         %trunc = trunc <4 x i32> %val to <4 x i8>
 	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
 	ret void
@@ -249,7 +249,7 @@
 ;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
 ;CHECK: str     r[[PTRREG]], [r0]
-	%A = load <4 x i8>** %ptr
+	%A = load <4 x i8>*, <4 x i8>** %ptr
         %trunc = trunc <4 x i32> %val to <4 x i8>
 	store  <4 x i8> %trunc, <4 x i8>* %A, align 4
 	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
diff --git a/llvm/test/CodeGen/ARM/vext.ll b/llvm/test/CodeGen/ARM/vext.ll
index 4407451..72ecf0e 100644
--- a/llvm/test/CodeGen/ARM/vext.ll
+++ b/llvm/test/CodeGen/ARM/vext.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRd:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
 	ret <8 x i8> %tmp3
 }
@@ -21,8 +21,8 @@
 define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextq:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
 	ret <16 x i8> %tmp3
 }
@@ -30,8 +30,8 @@
 define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_vextd16:
 ;CHECK: vext
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: test_vextq32:
 ;CHECK: vext
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 	ret <4 x i32> %tmp3
 }
@@ -59,8 +59,8 @@
 define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd_undef:
 ;CHECK: vext
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
 	ret <8 x i8> %tmp3
 }
@@ -68,8 +68,8 @@
 define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextRq_undef:
 ;CHECK: vext
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
 	ret <16 x i8> %tmp3
 }
@@ -118,8 +118,8 @@
 ;CHECK: vext.16
 ;CHECK-NOT: vext.16
 ;CHECK: vzip.16
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
@@ -128,8 +128,8 @@
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_undef:
 ;CHECK: vzip.16
-        %tmp1 = load <8 x i16>* %A
-        %tmp2 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
+        %tmp2 = load <8 x i16>, <8 x i16>* %B
         %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
         ret <4 x i16> %tmp3
 }
@@ -143,7 +143,7 @@
 ;CHECK: vmov.16 [[REG]][1]
 ;CHECK: vmov.16 [[REG]][2]
 ;CHECK: vmov.16 [[REG]][3]
-        %tmp1 = load <32 x i16>* %B
+        %tmp1 = load <32 x i16>, <32 x i16>* %B
         %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
         ret <4 x i16> %tmp2
 }
@@ -156,7 +156,7 @@
 ;CHECK: vmov.16 [[REG]][1]
 ;CHECK: vmov.16 [[REG]][2]
 ;CHECK: vmov.16 [[REG]][3]
-        %tmp1 = load <8 x i16>* %B
+        %tmp1 = load <8 x i16>, <8 x i16>* %B
         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
         ret <4 x i16> %tmp2
 }
@@ -174,8 +174,8 @@
 ;CHECK: vmov.16 [[REG2]][1]
 ;CHECK: vmov.16 [[REG2]][2]
 ;CHECK: vmov.16 [[REG2]][3]
-       %tmp1 = load <8 x i16>* %A
-       %tmp2 = load <8 x i16>* %B
+       %tmp1 = load <8 x i16>, <8 x i16>* %A
+       %tmp2 = load <8 x i16>, <8 x i16>* %B
        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 13, i32 3, i32 2, i32 2, i32 9>
        ret <8 x i16> %tmp3
 }
@@ -185,7 +185,7 @@
 define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>* nocapture %dest) nounwind {
 ; CHECK-LABEL: test_elem_mismatch:
 ; CHECK: vstr
-  %tmp0 = load <2 x i64>* %src, align 16
+  %tmp0 = load <2 x i64>, <2 x i64>* %src, align 16
   %tmp1 = bitcast <2 x i64> %tmp0 to <4 x i32>
   %tmp2 = extractelement <4 x i32> %tmp1, i32 0
   %tmp3 = extractelement <4 x i32> %tmp1, i32 2
diff --git a/llvm/test/CodeGen/ARM/vfcmp.ll b/llvm/test/CodeGen/ARM/vfcmp.ll
index 4b2fea9..8673b7d 100644
--- a/llvm/test/CodeGen/ARM/vfcmp.ll
+++ b/llvm/test/CodeGen/ARM/vfcmp.ll
@@ -7,8 +7,8 @@
 ;CHECK-LABEL: vcunef32:
 ;CHECK: vceq.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -18,8 +18,8 @@
 define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcoltf32:
 ;CHECK: vcgt.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -29,8 +29,8 @@
 define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vcolef32:
 ;CHECK: vcge.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -41,8 +41,8 @@
 ;CHECK-LABEL: vcugef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -53,8 +53,8 @@
 ;CHECK-LABEL: vculef32:
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -65,8 +65,8 @@
 ;CHECK-LABEL: vcugtf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -77,8 +77,8 @@
 ;CHECK-LABEL: vcultf32:
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -91,8 +91,8 @@
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -104,8 +104,8 @@
 ;CHECK: vcgt.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -118,8 +118,8 @@
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -131,8 +131,8 @@
 ;CHECK: vcge.f32
 ;CHECK-NEXT: vcgt.f32
 ;CHECK-NEXT: vorr
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vfp.ll b/llvm/test/CodeGen/ARM/vfp.ll
index 57ff9d3..31b55e8 100644
--- a/llvm/test/CodeGen/ARM/vfp.ll
+++ b/llvm/test/CodeGen/ARM/vfp.ll
@@ -2,8 +2,8 @@
 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s
 
 define void @test(float* %P, double* %D) {
-	%A = load float* %P		; <float> [#uses=1]
-	%B = load double* %D		; <double> [#uses=1]
+	%A = load float, float* %P		; <float> [#uses=1]
+	%B = load double, double* %D		; <double> [#uses=1]
 	store float %A, float* %P
 	store double %B, double* %D
 	ret void
@@ -15,11 +15,11 @@
 
 define void @test_abs(float* %P, double* %D) {
 ;CHECK-LABEL: test_abs:
-	%a = load float* %P		; <float> [#uses=1]
+	%a = load float, float* %P		; <float> [#uses=1]
 ;CHECK: vabs.f32
 	%b = call float @fabsf( float %a ) readnone	; <float> [#uses=1]
 	store float %b, float* %P
-	%A = load double* %D		; <double> [#uses=1]
+	%A = load double, double* %D		; <double> [#uses=1]
 ;CHECK: vabs.f64
 	%B = call double @fabs( double %A ) readnone	; <double> [#uses=1]
 	store double %B, double* %D
@@ -28,10 +28,10 @@
 
 define void @test_add(float* %P, double* %D) {
 ;CHECK-LABEL: test_add:
-	%a = load float* %P		; <float> [#uses=2]
+	%a = load float, float* %P		; <float> [#uses=2]
 	%b = fadd float %a, %a		; <float> [#uses=1]
 	store float %b, float* %P
-	%A = load double* %D		; <double> [#uses=2]
+	%A = load double, double* %D		; <double> [#uses=2]
 	%B = fadd double %A, %A		; <double> [#uses=1]
 	store double %B, double* %D
 	ret void
@@ -39,11 +39,11 @@
 
 define void @test_ext_round(float* %P, double* %D) {
 ;CHECK-LABEL: test_ext_round:
-	%a = load float* %P		; <float> [#uses=1]
+	%a = load float, float* %P		; <float> [#uses=1]
 ;CHECK: vcvt.f64.f32
 ;CHECK: vcvt.f32.f64
 	%b = fpext float %a to double		; <double> [#uses=1]
-	%A = load double* %D		; <double> [#uses=1]
+	%A = load double, double* %D		; <double> [#uses=1]
 	%B = fptrunc double %A to float		; <float> [#uses=1]
 	store double %b, double* %D
 	store float %B, float* %P
@@ -52,9 +52,9 @@
 
 define void @test_fma(float* %P1, float* %P2, float* %P3) {
 ;CHECK-LABEL: test_fma:
-	%a1 = load float* %P1		; <float> [#uses=1]
-	%a2 = load float* %P2		; <float> [#uses=1]
-	%a3 = load float* %P3		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
+	%a2 = load float, float* %P2		; <float> [#uses=1]
+	%a3 = load float, float* %P3		; <float> [#uses=1]
 ;CHECK: vnmls.f32
 	%X = fmul float %a1, %a2		; <float> [#uses=1]
 	%Y = fsub float %X, %a3		; <float> [#uses=1]
@@ -64,7 +64,7 @@
 
 define i32 @test_ftoi(float* %P1) {
 ;CHECK-LABEL: test_ftoi:
-	%a1 = load float* %P1		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.s32.f32
 	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -72,7 +72,7 @@
 
 define i32 @test_ftou(float* %P1) {
 ;CHECK-LABEL: test_ftou:
-	%a1 = load float* %P1		; <float> [#uses=1]
+	%a1 = load float, float* %P1		; <float> [#uses=1]
 ;CHECK: vcvt.u32.f32
 	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -80,7 +80,7 @@
 
 define i32 @test_dtoi(double* %P1) {
 ;CHECK-LABEL: test_dtoi:
-	%a1 = load double* %P1		; <double> [#uses=1]
+	%a1 = load double, double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.s32.f64
 	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -88,7 +88,7 @@
 
 define i32 @test_dtou(double* %P1) {
 ;CHECK-LABEL: test_dtou:
-	%a1 = load double* %P1		; <double> [#uses=1]
+	%a1 = load double, double* %P1		; <double> [#uses=1]
 ;CHECK: vcvt.u32.f64
 	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
 	ret i32 %b1
@@ -113,9 +113,9 @@
 define void @test_cmp(float* %glob, i32 %X) {
 ;CHECK-LABEL: test_cmp:
 entry:
-	%tmp = load float* %glob		; <float> [#uses=2]
+	%tmp = load float, float* %glob		; <float> [#uses=2]
 	%tmp3 = getelementptr float, float* %glob, i32 2		; <float*> [#uses=1]
-	%tmp4 = load float* %tmp3		; <float> [#uses=2]
+	%tmp4 = load float, float* %tmp3		; <float> [#uses=2]
 	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
 	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
@@ -141,7 +141,7 @@
 define void @test_cmpfp0(float* %glob, i32 %X) {
 ;CHECK-LABEL: test_cmpfp0:
 entry:
-	%tmp = load float* %glob		; <float> [#uses=1]
+	%tmp = load float, float* %glob		; <float> [#uses=1]
 ;CHECK: vcmpe.f32
 	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
diff --git a/llvm/test/CodeGen/ARM/vget_lane.ll b/llvm/test/CodeGen/ARM/vget_lane.ll
index 2518ee2..d4cbfad 100644
--- a/llvm/test/CodeGen/ARM/vget_lane.ll
+++ b/llvm/test/CodeGen/ARM/vget_lane.ll
@@ -5,7 +5,7 @@
 define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vget_lanes8:
 ;CHECK: vmov.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -14,7 +14,7 @@
 define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vget_lanes16:
 ;CHECK: vmov.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -23,7 +23,7 @@
 define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vget_laneu8:
 ;CHECK: vmov.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -32,7 +32,7 @@
 define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vget_laneu16:
 ;CHECK: vmov.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -42,7 +42,7 @@
 define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vget_lanei32:
 ;CHECK: vmov.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = add <2 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
 	ret i32 %tmp3
@@ -51,7 +51,7 @@
 define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanes8:
 ;CHECK: vmov.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -60,7 +60,7 @@
 define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanes16:
 ;CHECK: vmov.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = sext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -69,7 +69,7 @@
 define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_laneu8:
 ;CHECK: vmov.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
 	%tmp3 = zext i8 %tmp2 to i32
 	ret i32 %tmp3
@@ -78,7 +78,7 @@
 define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_laneu16:
 ;CHECK: vmov.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
 	%tmp3 = zext i16 %tmp2 to i32
 	ret i32 %tmp3
@@ -88,7 +88,7 @@
 define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vgetQ_lanei32:
 ;CHECK: vmov.32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
 	ret i32 %tmp3
@@ -100,7 +100,7 @@
   %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %0 = load <4 x i16>, <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
   %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
   %2 = add i16 %1, %1
   store i16 %2, i16* %out_uint16_t, align 2
@@ -116,7 +116,7 @@
   %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %0 = load <8 x i8>, <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
   %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
   %2 = add i8 %1, %1
   store i8 %2, i8* %out_uint8_t, align 1
@@ -132,7 +132,7 @@
   %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
   %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %0 = load <8 x i16>, <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
   %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
   %2 = add i16 %1, %1
   store i16 %2, i16* %out_uint16_t, align 2
@@ -148,7 +148,7 @@
   %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
   %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
   %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
   %2 = add i8 %1, %1
   store i8 %2, i8* %out_uint8_t, align 1
@@ -161,7 +161,7 @@
 define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
 ;CHECK-LABEL: vset_lane8:
 ;CHECK: vmov.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
 	ret <8 x i8> %tmp2
 }
@@ -169,7 +169,7 @@
 define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
 ;CHECK-LABEL: vset_lane16:
 ;CHECK: vmov.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
 	ret <4 x i16> %tmp2
 }
@@ -177,7 +177,7 @@
 define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
 ;CHECK-LABEL: vset_lane32:
 ;CHECK: vmov.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
 	ret <2 x i32> %tmp2
 }
@@ -185,7 +185,7 @@
 define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane8:
 ;CHECK: vmov.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
 	ret <16 x i8> %tmp2
 }
@@ -193,7 +193,7 @@
 define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane16:
 ;CHECK: vmov.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
 	ret <8 x i16> %tmp2
 }
@@ -201,7 +201,7 @@
 define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
 ;CHECK-LABEL: vsetQ_lane32:
 ;CHECK: vmov.32 d{{.*}}[1], r1
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
 	ret <4 x i32> %tmp2
 }
@@ -219,14 +219,14 @@
 ; be an immediate constant.  Make sure a variable lane number is handled.
 
 define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
 	%tmp3 = sext i8 %tmp2 to i32
 	ret i32 %tmp3
 }
 
 define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = add <4 x i32> %tmp1, %tmp1
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
 	ret i32 %tmp3
diff --git a/llvm/test/CodeGen/ARM/vhadd.ll b/llvm/test/CodeGen/ARM/vhadd.ll
index 6183db3..01e239d 100644
--- a/llvm/test/CodeGen/ARM/vhadd.ll
+++ b/llvm/test/CodeGen/ARM/vhadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhadds8:
 ;CHECK: vhadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhadds16:
 ;CHECK: vhadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhadds32:
 ;CHECK: vhadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddu8:
 ;CHECK: vhadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddu16:
 ;CHECK: vhadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddu32:
 ;CHECK: vhadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs8:
 ;CHECK: vhadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs16:
 ;CHECK: vhadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddQs32:
 ;CHECK: vhadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@
 define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu8:
 ;CHECK: vhadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -93,8 +93,8 @@
 define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu16:
 ;CHECK: vhadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -102,8 +102,8 @@
 define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhaddQu32:
 ;CHECK: vhadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -127,8 +127,8 @@
 define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhadds8:
 ;CHECK: vrhadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -136,8 +136,8 @@
 define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhadds16:
 ;CHECK: vrhadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -145,8 +145,8 @@
 define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhadds32:
 ;CHECK: vrhadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -154,8 +154,8 @@
 define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu8:
 ;CHECK: vrhadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -163,8 +163,8 @@
 define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu16:
 ;CHECK: vrhadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -172,8 +172,8 @@
 define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddu32:
 ;CHECK: vrhadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -181,8 +181,8 @@
 define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs8:
 ;CHECK: vrhadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -190,8 +190,8 @@
 define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs16:
 ;CHECK: vrhadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -199,8 +199,8 @@
 define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQs32:
 ;CHECK: vrhadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -208,8 +208,8 @@
 define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu8:
 ;CHECK: vrhadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -217,8 +217,8 @@
 define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu16:
 ;CHECK: vrhadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -226,8 +226,8 @@
 define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrhaddQu32:
 ;CHECK: vrhadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vhsub.ll b/llvm/test/CodeGen/ARM/vhsub.ll
index f1a0cb2..7b3b29a 100644
--- a/llvm/test/CodeGen/ARM/vhsub.ll
+++ b/llvm/test/CodeGen/ARM/vhsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubs8:
 ;CHECK: vhsub.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubs16:
 ;CHECK: vhsub.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubs32:
 ;CHECK: vhsub.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubu8:
 ;CHECK: vhsub.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubu16:
 ;CHECK: vhsub.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubu32:
 ;CHECK: vhsub.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs8:
 ;CHECK: vhsub.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -66,8 +66,8 @@
 define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs16:
 ;CHECK: vhsub.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubQs32:
 ;CHECK: vhsub.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -84,8 +84,8 @@
 define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu8:
 ;CHECK: vhsub.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -93,8 +93,8 @@
 define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu16:
 ;CHECK: vhsub.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -102,8 +102,8 @@
 define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vhsubQu32:
 ;CHECK: vhsub.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vicmp.ll b/llvm/test/CodeGen/ARM/vicmp.ll
index bebb320..21b104a 100644
--- a/llvm/test/CodeGen/ARM/vicmp.ll
+++ b/llvm/test/CodeGen/ARM/vicmp.ll
@@ -10,8 +10,8 @@
 ;CHECK-LABEL: vcnei8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
 	ret <8 x i8> %tmp4
@@ -21,8 +21,8 @@
 ;CHECK-LABEL: vcnei16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -32,8 +32,8 @@
 ;CHECK-LABEL: vcnei32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
         %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
 	ret <2 x i32> %tmp4
@@ -43,8 +43,8 @@
 ;CHECK-LABEL: vcneQi8:
 ;CHECK: vceq.i8
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -54,8 +54,8 @@
 ;CHECK-LABEL: vcneQi16:
 ;CHECK: vceq.i16
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
         %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
 	ret <8 x i16> %tmp4
@@ -65,8 +65,8 @@
 ;CHECK-LABEL: vcneQi32:
 ;CHECK: vceq.i32
 ;CHECK-NEXT: vmvn
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
@@ -75,8 +75,8 @@
 define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vcltQs8:
 ;CHECK: vcgt.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
         %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
 	ret <16 x i8> %tmp4
@@ -85,8 +85,8 @@
 define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcles16:
 ;CHECK: vcge.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -95,8 +95,8 @@
 define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vcltu16:
 ;CHECK: vcgt.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
 	ret <4 x i16> %tmp4
@@ -105,8 +105,8 @@
 define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vcleQu32:
 ;CHECK: vcge.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
         %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
 	ret <4 x i32> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vld1.ll b/llvm/test/CodeGen/ARM/vld1.ll
index 24dc1ab..8064ea4 100644
--- a/llvm/test/CodeGen/ARM/vld1.ll
+++ b/llvm/test/CodeGen/ARM/vld1.ll
@@ -23,7 +23,7 @@
 define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld1i16_update:
 ;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 4
@@ -43,7 +43,7 @@
 define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld1i32_update:
 ;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 %inc
@@ -79,7 +79,7 @@
 define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK-LABEL: vld1Qi8_update:
 ;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 16
 	store i8* %tmp2, i8** %ptr
diff --git a/llvm/test/CodeGen/ARM/vld2.ll b/llvm/test/CodeGen/ARM/vld2.ll
index 26097cf..391b491 100644
--- a/llvm/test/CodeGen/ARM/vld2.ll
+++ b/llvm/test/CodeGen/ARM/vld2.ll
@@ -60,7 +60,7 @@
 define <2 x float> @vld2f_update(float** %ptr) nounwind {
 ;CHECK-LABEL: vld2f_update:
 ;CHECK: vld2.32 {d16, d17}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
@@ -98,7 +98,7 @@
 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld2Qi8_update:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
diff --git a/llvm/test/CodeGen/ARM/vld3.ll b/llvm/test/CodeGen/ARM/vld3.ll
index a98b35e..0d14179 100644
--- a/llvm/test/CodeGen/ARM/vld3.ll
+++ b/llvm/test/CodeGen/ARM/vld3.ll
@@ -38,7 +38,7 @@
 define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3i16_update:
 ;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
@@ -137,7 +137,7 @@
 ;CHECK-LABEL: vld3Qi32_update:
 ;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]!
 ;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
diff --git a/llvm/test/CodeGen/ARM/vld4.ll b/llvm/test/CodeGen/ARM/vld4.ll
index e244e4b..575e0fa 100644
--- a/llvm/test/CodeGen/ARM/vld4.ll
+++ b/llvm/test/CodeGen/ARM/vld4.ll
@@ -26,7 +26,7 @@
 define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld4i8_update:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
@@ -126,7 +126,7 @@
 ;CHECK-LABEL: vld4Qi16_update:
 ;CHECK: vld4.16 {d16, d18, d20, d22}, [r1:64]!
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
 	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
diff --git a/llvm/test/CodeGen/ARM/vlddup.ll b/llvm/test/CodeGen/ARM/vlddup.ll
index caf00a4..09304d8 100644
--- a/llvm/test/CodeGen/ARM/vlddup.ll
+++ b/llvm/test/CodeGen/ARM/vlddup.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: vld1dupi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[]}, [r0]
-	%tmp1 = load i8* %A, align 8
+	%tmp1 = load i8, i8* %A, align 8
 	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
 	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
         ret <8 x i8> %tmp3
@@ -14,7 +14,7 @@
 ;CHECK-LABEL: vld1dupi16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[]}, [r0:16]
-	%tmp1 = load i16* %A, align 8
+	%tmp1 = load i16, i16* %A, align 8
 	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
 	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
         ret <4 x i16> %tmp3
@@ -24,7 +24,7 @@
 ;CHECK-LABEL: vld1dupi32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
-	%tmp1 = load i32* %A, align 8
+	%tmp1 = load i32, i32* %A, align 8
 	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
 	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
         ret <2 x i32> %tmp3
@@ -33,7 +33,7 @@
 define <2 x float> @vld1dupf(float* %A) nounwind {
 ;CHECK-LABEL: vld1dupf:
 ;CHECK: vld1.32 {d16[]}, [r0:32]
-	%tmp0 = load float* %A
+	%tmp0 = load float, float* %A
         %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
         ret <2 x float> %tmp2
@@ -43,7 +43,7 @@
 ;CHECK-LABEL: vld1dupQi8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[], d17[]}, [r0]
-	%tmp1 = load i8* %A, align 8
+	%tmp1 = load i8, i8* %A, align 8
 	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
 	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
         ret <16 x i8> %tmp3
@@ -52,7 +52,7 @@
 define <4 x float> @vld1dupQf(float* %A) nounwind {
 ;CHECK-LABEL: vld1dupQf:
 ;CHECK: vld1.32 {d16[], d17[]}, [r0:32]
-        %tmp0 = load float* %A
+        %tmp0 = load float, float* %A
         %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
         %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
         ret <4 x float> %tmp2
@@ -93,7 +93,7 @@
 define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld2dupi16_update:
 ;CHECK: vld2.16 {d16[], d17[]}, [r1]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
 	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
@@ -130,7 +130,7 @@
 define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3dupi8_update:
 ;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
-	%A = load i8** %ptr
+	%A = load i8*, i8** %ptr
 	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
@@ -171,7 +171,7 @@
 define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 ;CHECK-LABEL: vld4dupi16_update:
 ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
 	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
 	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
diff --git a/llvm/test/CodeGen/ARM/vldlane.ll b/llvm/test/CodeGen/ARM/vldlane.ll
index 14d4e3f..ac2be7f8 100644
--- a/llvm/test/CodeGen/ARM/vldlane.ll
+++ b/llvm/test/CodeGen/ARM/vldlane.ll
@@ -7,8 +7,8 @@
 ;CHECK-LABEL: vld1lanei8:
 ;Check the (default) alignment value.
 ;CHECK: vld1.8 {d16[3]}, [r0]
-	%tmp1 = load <8 x i8>* %B
-	%tmp2 = load i8* %A, align 8
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
+	%tmp2 = load i8, i8* %A, align 8
 	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
         ret <8 x i8> %tmp3
 }
@@ -17,8 +17,8 @@
 ;CHECK-LABEL: vld1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld1.16 {d16[2]}, [r0:16]
-	%tmp1 = load <4 x i16>* %B
-	%tmp2 = load i16* %A, align 8
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
+	%tmp2 = load i16, i16* %A, align 8
 	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
         ret <4 x i16> %tmp3
 }
@@ -27,8 +27,8 @@
 ;CHECK-LABEL: vld1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
-	%tmp2 = load i32* %A, align 8
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 8
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
         ret <2 x i32> %tmp3
 }
@@ -37,8 +37,8 @@
 ;CHECK-LABEL: vld1lanei32a32:
 ;Check the alignment value.  Legal values are none or :32.
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
-	%tmp2 = load i32* %A, align 4
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 4
 	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
         ret <2 x i32> %tmp3
 }
@@ -46,8 +46,8 @@
 define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vld1lanef:
 ;CHECK: vld1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x float>* %B
-	%tmp2 = load float* %A, align 4
+	%tmp1 = load <2 x float>, <2 x float>* %B
+	%tmp2 = load float, float* %A, align 4
 	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
 	ret <2 x float> %tmp3
 }
@@ -55,8 +55,8 @@
 define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi8:
 ;CHECK: vld1.8 {d17[1]}, [r0]
-	%tmp1 = load <16 x i8>* %B
-	%tmp2 = load i8* %A, align 8
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
+	%tmp2 = load i8, i8* %A, align 8
 	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
 	ret <16 x i8> %tmp3
 }
@@ -64,8 +64,8 @@
 define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi16:
 ;CHECK: vld1.16 {d17[1]}, [r0:16]
-	%tmp1 = load <8 x i16>* %B
-	%tmp2 = load i16* %A, align 8
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
+	%tmp2 = load i16, i16* %A, align 8
 	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
 	ret <8 x i16> %tmp3
 }
@@ -73,8 +73,8 @@
 define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQi32:
 ;CHECK: vld1.32 {d17[1]}, [r0:32]
-	%tmp1 = load <4 x i32>* %B
-	%tmp2 = load i32* %A, align 8
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
+	%tmp2 = load i32, i32* %A, align 8
 	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
 	ret <4 x i32> %tmp3
 }
@@ -82,8 +82,8 @@
 define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vld1laneQf:
 ;CHECK: vld1.32 {d16[0]}, [r0:32]
-	%tmp1 = load <4 x float>* %B
-	%tmp2 = load float* %A
+	%tmp1 = load <4 x float>, <4 x float>* %B
+	%tmp2 = load float, float* %A
 	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
 	ret <4 x float> %tmp3
 }
@@ -101,7 +101,7 @@
 ;CHECK-LABEL: vld2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
@@ -114,7 +114,7 @@
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
@@ -126,7 +126,7 @@
 ;CHECK-LABEL: vld2lanei32:
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
@@ -138,9 +138,9 @@
 define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vld2lanei32_update:
 ;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
@@ -154,7 +154,7 @@
 ;CHECK-LABEL: vld2lanef:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
@@ -167,7 +167,7 @@
 ;Check the (default) alignment.
 ;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
@@ -180,7 +180,7 @@
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
@@ -192,7 +192,7 @@
 ;CHECK-LABEL: vld2laneQf:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
@@ -221,7 +221,7 @@
 define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld3lanei8:
 ;CHECK: vld3.8
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
@@ -236,7 +236,7 @@
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
@@ -250,7 +250,7 @@
 ;CHECK-LABEL: vld3lanei32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
@@ -264,7 +264,7 @@
 ;CHECK-LABEL: vld3lanef:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
@@ -279,7 +279,7 @@
 ;Check the (default) alignment value.  VLD3 does not support alignment.
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
@@ -293,9 +293,9 @@
 define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3laneQi16_update:
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}], {{r[0-9]+}}
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
@@ -311,7 +311,7 @@
 ;CHECK-LABEL: vld3laneQi32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
@@ -325,7 +325,7 @@
 ;CHECK-LABEL: vld3laneQf:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
@@ -357,7 +357,7 @@
 ;CHECK-LABEL: vld4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -373,8 +373,8 @@
 define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld4lanei8_update:
 ;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
@@ -394,7 +394,7 @@
 ;being loaded is ignored.
 ;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
@@ -412,7 +412,7 @@
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
@@ -428,7 +428,7 @@
 ;CHECK-LABEL: vld4lanef:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
@@ -445,7 +445,7 @@
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
@@ -462,7 +462,7 @@
 ;Check the (default) alignment.
 ;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
@@ -478,7 +478,7 @@
 ;CHECK-LABEL: vld4laneQf:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
diff --git a/llvm/test/CodeGen/ARM/vldm-liveness.ll b/llvm/test/CodeGen/ARM/vldm-liveness.ll
index 060044f..e114e69 100644
--- a/llvm/test/CodeGen/ARM/vldm-liveness.ll
+++ b/llvm/test/CodeGen/ARM/vldm-liveness.ll
@@ -23,13 +23,13 @@
 ; CHECK: vldmia r0, {s0, s1}
 ; CHECK: vldr s2, [r0, #16]
    %off0 = getelementptr float, float* %ptr, i32 0
-   %val0 = load float* %off0
+   %val0 = load float, float* %off0
    %off1 = getelementptr float, float* %ptr, i32 1
-   %val1 = load float* %off1
+   %val1 = load float, float* %off1
    %off4 = getelementptr float, float* %ptr, i32 4
-   %val4 = load float* %off4
+   %val4 = load float, float* %off4
    %off2 = getelementptr float, float* %ptr, i32 2
-   %val2 = load float* %off2
+   %val2 = load float, float* %off2
 
    %vec1 = insertelement <4 x float> undef, float %val0, i32 0
    %vec2 = insertelement <4 x float> %vec1, float %val1, i32 1
diff --git a/llvm/test/CodeGen/ARM/vldm-sched-a9.ll b/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
index 368c26e..0e0cf97 100644
--- a/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
+++ b/llvm/test/CodeGen/ARM/vldm-sched-a9.ll
@@ -13,9 +13,9 @@
   %arrayidx39 = getelementptr inbounds i64, i64* %src, i32 13
   %vecinit285 = shufflevector <16 x i64> undef, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
   store <16 x i64> %vecinit285, <16 x i64>* undef, align 128
-  %0 = load i64* undef, align 8
+  %0 = load i64, i64* undef, align 8
   %vecinit379 = insertelement <16 x i64> undef, i64 %0, i32 9
-  %1 = load i64* undef, align 8
+  %1 = load i64, i64* undef, align 8
   %vecinit419 = insertelement <16 x i64> undef, i64 %1, i32 15
   store <16 x i64> %vecinit419, <16 x i64>* undef, align 128
   %vecinit579 = insertelement <16 x i64> undef, i64 0, i32 4
@@ -23,14 +23,14 @@
   %vecinit584 = insertelement <16 x i64> %vecinit582, i64 undef, i32 9
   %vecinit586 = insertelement <16 x i64> %vecinit584, i64 0, i32 10
   %vecinit589 = shufflevector <16 x i64> %vecinit586, <16 x i64> <i64 12, i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 18, i32 19, i32 undef>
-  %2 = load i64* undef, align 8
+  %2 = load i64, i64* undef, align 8
   %vecinit591 = insertelement <16 x i64> %vecinit589, i64 %2, i32 15
   store <16 x i64> %vecinit591, <16 x i64>* undef, align 128
   %vecinit694 = shufflevector <16 x i64> undef, <16 x i64> <i64 13, i64 14, i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
   store <16 x i64> %vecinit694, <16 x i64>* undef, align 128
-  %3 = load i64* undef, align 8
+  %3 = load i64, i64* undef, align 8
   %vecinit1331 = insertelement <16 x i64> undef, i64 %3, i32 14
-  %4 = load i64* undef, align 8
+  %4 = load i64, i64* undef, align 8
   %vecinit1468 = insertelement <16 x i64> undef, i64 %4, i32 11
   %vecinit1471 = shufflevector <16 x i64> %vecinit1468, <16 x i64> <i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 undef, i32 undef>
   %vecinit1474 = shufflevector <16 x i64> %vecinit1471, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
@@ -45,14 +45,14 @@
   %vecinit1599 = insertelement <16 x i64> %vecinit1597, i64 undef, i32 8
   %vecinit1601 = insertelement <16 x i64> %vecinit1599, i64 undef, i32 9
   %vecinit1603 = insertelement <16 x i64> %vecinit1601, i64 undef, i32 10
-  %5 = load i64* undef, align 8
+  %5 = load i64, i64* undef, align 8
   %vecinit1605 = insertelement <16 x i64> %vecinit1603, i64 %5, i32 11
   %vecinit1608 = shufflevector <16 x i64> %vecinit1605, <16 x i64> <i64 13, i64 14, i64 15, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 undef>
-  %6 = load i64* undef, align 8
+  %6 = load i64, i64* undef, align 8
   %vecinit1610 = insertelement <16 x i64> %vecinit1608, i64 %6, i32 15
   store <16 x i64> %vecinit1610, <16 x i64>* undef, align 128
   %vecinit2226 = shufflevector <16 x i64> undef, <16 x i64> <i64 6, i64 7, i64 8, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %7 = load i64* undef, align 8
+  %7 = load i64, i64* undef, align 8
   %vecinit2228 = insertelement <16 x i64> %vecinit2226, i64 %7, i32 8
   %vecinit2230 = insertelement <16 x i64> %vecinit2228, i64 undef, i32 9
   %vecinit2233 = shufflevector <16 x i64> %vecinit2230, <16 x i64> <i64 11, i64 12, i64 13, i64 14, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef>
@@ -62,7 +62,7 @@
   %vecinit2249 = shufflevector <16 x i64> %vecinit2246, <16 x i64> <i64 7, i64 8, i64 9, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 16, i32 17, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %vecinit2252 = shufflevector <16 x i64> %vecinit2249, <16 x i64> <i64 10, i64 11, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %vecinit2255 = shufflevector <16 x i64> %vecinit2252, <16 x i64> <i64 12, i64 13, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 16, i32 17, i32 undef, i32 undef, i32 undef>
-  %8 = load i64* %arrayidx39, align 8
+  %8 = load i64, i64* %arrayidx39, align 8
   %vecinit2257 = insertelement <16 x i64> %vecinit2255, i64 %8, i32 13
   %vecinit2260 = shufflevector <16 x i64> %vecinit2257, <16 x i64> <i64 15, i64 16, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 16, i32 17>
   store <16 x i64> %vecinit2260, <16 x i64>* null, align 128
diff --git a/llvm/test/CodeGen/ARM/vminmax.ll b/llvm/test/CodeGen/ARM/vminmax.ll
index 1167ebe..011bfd7 100644
--- a/llvm/test/CodeGen/ARM/vminmax.ll
+++ b/llvm/test/CodeGen/ARM/vminmax.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmins8:
 ;CHECK: vmin.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmins16:
 ;CHECK: vmin.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmins32:
 ;CHECK: vmin.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminu8:
 ;CHECK: vmin.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminu16:
 ;CHECK: vmin.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminu32:
 ;CHECK: vmin.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vminf32:
 ;CHECK: vmin.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -66,8 +66,8 @@
 define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminQs8:
 ;CHECK: vmin.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -75,8 +75,8 @@
 define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminQs16:
 ;CHECK: vmin.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminQs32:
 ;CHECK: vmin.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -93,8 +93,8 @@
 define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vminQu8:
 ;CHECK: vmin.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -102,8 +102,8 @@
 define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vminQu16:
 ;CHECK: vmin.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -111,8 +111,8 @@
 define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vminQu32:
 ;CHECK: vmin.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -120,8 +120,8 @@
 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vminQf32:
 ;CHECK: vmin.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -149,8 +149,8 @@
 define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxs8:
 ;CHECK: vmax.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -158,8 +158,8 @@
 define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxs16:
 ;CHECK: vmax.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -167,8 +167,8 @@
 define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxs32:
 ;CHECK: vmax.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -176,8 +176,8 @@
 define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxu8:
 ;CHECK: vmax.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -185,8 +185,8 @@
 define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxu16:
 ;CHECK: vmax.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -194,8 +194,8 @@
 define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxu32:
 ;CHECK: vmax.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -203,8 +203,8 @@
 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vmaxf32:
 ;CHECK: vmax.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -212,8 +212,8 @@
 define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs8:
 ;CHECK: vmax.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -221,8 +221,8 @@
 define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs16:
 ;CHECK: vmax.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -230,8 +230,8 @@
 define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxQs32:
 ;CHECK: vmax.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -239,8 +239,8 @@
 define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu8:
 ;CHECK: vmax.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -248,8 +248,8 @@
 define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu16:
 ;CHECK: vmax.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -257,8 +257,8 @@
 define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmaxQu32:
 ;CHECK: vmax.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -266,8 +266,8 @@
 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vmaxQf32:
 ;CHECK: vmax.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vminmaxnm.ll b/llvm/test/CodeGen/ARM/vminmaxnm.ll
index 39289a0..a183284 100644
--- a/llvm/test/CodeGen/ARM/vminmaxnm.ll
+++ b/llvm/test/CodeGen/ARM/vminmaxnm.ll
@@ -4,8 +4,8 @@
 define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: vmaxnmq:
 ; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
@@ -13,8 +13,8 @@
 define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LABEL: vmaxnmd:
 ; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
@@ -22,8 +22,8 @@
 define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK-LABEL: vminnmq:
 ; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>* %A
-  %tmp2 = load <4 x float>* %B
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
   %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
   ret <4 x float> %tmp3
 }
@@ -31,8 +31,8 @@
 define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LABEL: vminnmd:
 ; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>* %A
-  %tmp2 = load <2 x float>* %B
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
   %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
   ret <2 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vmla.ll b/llvm/test/CodeGen/ARM/vmla.ll
index 6073fc5..8ca33a9 100644
--- a/llvm/test/CodeGen/ARM/vmla.ll
+++ b/llvm/test/CodeGen/ARM/vmla.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlai8:
 ;CHECK: vmla.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = mul <8 x i8> %tmp2, %tmp3
 	%tmp5 = add <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@
 define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlai16:
 ;CHECK: vmla.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = mul <4 x i16> %tmp2, %tmp3
 	%tmp5 = add <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@
 define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlai32:
 ;CHECK: vmla.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = mul <2 x i32> %tmp2, %tmp3
 	%tmp5 = add <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@
 define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlaf32:
 ;CHECK: vmla.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = load <2 x float>* %C
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
+	%tmp3 = load <2 x float>, <2 x float>* %C
 	%tmp4 = fmul <2 x float> %tmp2, %tmp3
 	%tmp5 = fadd <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
@@ -47,9 +47,9 @@
 define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlaQi8:
 ;CHECK: vmla.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = mul <16 x i8> %tmp2, %tmp3
 	%tmp5 = add <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@
 define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlaQi16:
 ;CHECK: vmla.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = mul <8 x i16> %tmp2, %tmp3
 	%tmp5 = add <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@
 define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlaQi32:
 ;CHECK: vmla.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = mul <4 x i32> %tmp2, %tmp3
 	%tmp5 = add <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@
 define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlaQf32:
 ;CHECK: vmla.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = load <4 x float>* %C
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
+	%tmp3 = load <4 x float>, <4 x float>* %C
 	%tmp4 = fmul <4 x float> %tmp2, %tmp3
 	%tmp5 = fadd <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
@@ -91,9 +91,9 @@
 define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlals8:
 ;CHECK: vmlal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@
 define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlals16:
 ;CHECK: vmlal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@
 define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlals32:
 ;CHECK: vmlal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@
 define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlalu8:
 ;CHECK: vmlal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@
 define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlalu16:
 ;CHECK: vmlal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@
 define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlalu32:
 ;CHECK: vmlal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vmls.ll b/llvm/test/CodeGen/ARM/vmls.ll
index f86739c..d149281 100644
--- a/llvm/test/CodeGen/ARM/vmls.ll
+++ b/llvm/test/CodeGen/ARM/vmls.ll
@@ -3,9 +3,9 @@
 define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlsi8:
 ;CHECK: vmls.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = mul <8 x i8> %tmp2, %tmp3
 	%tmp5 = sub <8 x i8> %tmp1, %tmp4
 	ret <8 x i8> %tmp5
@@ -14,9 +14,9 @@
 define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsi16:
 ;CHECK: vmls.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = mul <4 x i16> %tmp2, %tmp3
 	%tmp5 = sub <4 x i16> %tmp1, %tmp4
 	ret <4 x i16> %tmp5
@@ -25,9 +25,9 @@
 define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsi32:
 ;CHECK: vmls.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = mul <2 x i32> %tmp2, %tmp3
 	%tmp5 = sub <2 x i32> %tmp1, %tmp4
 	ret <2 x i32> %tmp5
@@ -36,9 +36,9 @@
 define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlsf32:
 ;CHECK: vmls.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
-	%tmp3 = load <2 x float>* %C
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
+	%tmp3 = load <2 x float>, <2 x float>* %C
 	%tmp4 = fmul <2 x float> %tmp2, %tmp3
 	%tmp5 = fsub <2 x float> %tmp1, %tmp4
 	ret <2 x float> %tmp5
@@ -47,9 +47,9 @@
 define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
 ;CHECK-LABEL: vmlsQi8:
 ;CHECK: vmls.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
-	%tmp3 = load <16 x i8>* %C
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
+	%tmp3 = load <16 x i8>, <16 x i8>* %C
 	%tmp4 = mul <16 x i8> %tmp2, %tmp3
 	%tmp5 = sub <16 x i8> %tmp1, %tmp4
 	ret <16 x i8> %tmp5
@@ -58,9 +58,9 @@
 define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsQi16:
 ;CHECK: vmls.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
-	%tmp3 = load <8 x i16>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
+	%tmp3 = load <8 x i16>, <8 x i16>* %C
 	%tmp4 = mul <8 x i16> %tmp2, %tmp3
 	%tmp5 = sub <8 x i16> %tmp1, %tmp4
 	ret <8 x i16> %tmp5
@@ -69,9 +69,9 @@
 define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsQi32:
 ;CHECK: vmls.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
-	%tmp3 = load <4 x i32>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
+	%tmp3 = load <4 x i32>, <4 x i32>* %C
 	%tmp4 = mul <4 x i32> %tmp2, %tmp3
 	%tmp5 = sub <4 x i32> %tmp1, %tmp4
 	ret <4 x i32> %tmp5
@@ -80,9 +80,9 @@
 define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
 ;CHECK-LABEL: vmlsQf32:
 ;CHECK: vmls.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
-	%tmp3 = load <4 x float>* %C
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
+	%tmp3 = load <4 x float>, <4 x float>* %C
 	%tmp4 = fmul <4 x float> %tmp2, %tmp3
 	%tmp5 = fsub <4 x float> %tmp1, %tmp4
 	ret <4 x float> %tmp5
@@ -91,9 +91,9 @@
 define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlsls8:
 ;CHECK: vmlsl.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -104,9 +104,9 @@
 define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlsls16:
 ;CHECK: vmlsl.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -117,9 +117,9 @@
 define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlsls32:
 ;CHECK: vmlsl.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
@@ -130,9 +130,9 @@
 define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vmlslu8:
 ;CHECK: vmlsl.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
 	%tmp6 = mul <8 x i16> %tmp4, %tmp5
@@ -143,9 +143,9 @@
 define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vmlslu16:
 ;CHECK: vmlsl.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
-	%tmp3 = load <4 x i16>* %C
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+	%tmp3 = load <4 x i16>, <4 x i16>* %C
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
 	%tmp6 = mul <4 x i32> %tmp4, %tmp5
@@ -156,9 +156,9 @@
 define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vmlslu32:
 ;CHECK: vmlsl.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
-	%tmp3 = load <2 x i32>* %C
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+	%tmp3 = load <2 x i32>, <2 x i32>* %C
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
 	%tmp6 = mul <2 x i64> %tmp4, %tmp5
diff --git a/llvm/test/CodeGen/ARM/vmov.ll b/llvm/test/CodeGen/ARM/vmov.ll
index ae4a8f9..b7a23b7 100644
--- a/llvm/test/CodeGen/ARM/vmov.ll
+++ b/llvm/test/CodeGen/ARM/vmov.ll
@@ -191,7 +191,7 @@
 define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vmovls8:
 ;CHECK: vmovl.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
 	ret <8 x i16> %tmp2
 }
@@ -199,7 +199,7 @@
 define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovls16:
 ;CHECK: vmovl.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -207,7 +207,7 @@
 define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovls32:
 ;CHECK: vmovl.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
 	ret <2 x i64> %tmp2
 }
@@ -215,7 +215,7 @@
 define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vmovlu8:
 ;CHECK: vmovl.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
 	ret <8 x i16> %tmp2
 }
@@ -223,7 +223,7 @@
 define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovlu16:
 ;CHECK: vmovl.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
 	ret <4 x i32> %tmp2
 }
@@ -231,7 +231,7 @@
 define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovlu32:
 ;CHECK: vmovl.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
 	ret <2 x i64> %tmp2
 }
@@ -239,7 +239,7 @@
 define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vmovni16:
 ;CHECK: vmovn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
 	ret <8 x i8> %tmp2
 }
@@ -247,7 +247,7 @@
 define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vmovni32:
 ;CHECK: vmovn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
 	ret <4 x i16> %tmp2
 }
@@ -255,7 +255,7 @@
 define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vmovni64:
 ;CHECK: vmovn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
 	ret <2 x i32> %tmp2
 }
@@ -263,7 +263,7 @@
 define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovns16:
 ;CHECK: vqmovn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -271,7 +271,7 @@
 define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovns32:
 ;CHECK: vqmovn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -279,7 +279,7 @@
 define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovns64:
 ;CHECK: vqmovn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -287,7 +287,7 @@
 define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu16:
 ;CHECK: vqmovn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -295,7 +295,7 @@
 define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu32:
 ;CHECK: vqmovn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -303,7 +303,7 @@
 define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovnu64:
 ;CHECK: vqmovn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -311,7 +311,7 @@
 define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns16:
 ;CHECK: vqmovun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -319,7 +319,7 @@
 define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns32:
 ;CHECK: vqmovun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -327,7 +327,7 @@
 define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqmovuns64:
 ;CHECK: vqmovun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -348,7 +348,7 @@
 ; Radar 8598391.
 define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
 ;CHECK: vmovn
-  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp1 = load <4 x i32>, <4 x i32>* %a, align 16
   %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
   store <4 x i16> %tmp2, <4 x i16>* %b, align 8
   ret void
@@ -376,7 +376,7 @@
 entry:
 ;CHECK-LABEL: v_mov_v4f32_undef:
 ;CHECK: vmov.f32 q{{.*}}, #1.000000e+00
-  %a = load <4 x float> *%p
+  %a = load <4 x float> , <4 x float> *%p
   %b = fadd <4 x float> %a, <float undef, float 1.0, float 1.0, float 1.0>
   store <4 x float> %b, <4 x float> *%p
   ret void
diff --git a/llvm/test/CodeGen/ARM/vmul.ll b/llvm/test/CodeGen/ARM/vmul.ll
index a9e5ad5..c3e41ca 100644
--- a/llvm/test/CodeGen/ARM/vmul.ll
+++ b/llvm/test/CodeGen/ARM/vmul.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmuli8:
 ;CHECK: vmul.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = mul <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmuli16:
 ;CHECK: vmul.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = mul <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmuli32:
 ;CHECK: vmul.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = mul <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vmulf32:
 ;CHECK: vmul.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fmul <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -39,8 +39,8 @@
 define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulp8:
 ;CHECK: vmul.p8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulQi8:
 ;CHECK: vmul.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = mul <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulQi16:
 ;CHECK: vmul.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = mul <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@
 define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulQi32:
 ;CHECK: vmul.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = mul <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@
 define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vmulQf32:
 ;CHECK: vmul.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fmul <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -84,8 +84,8 @@
 define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulQp8:
 ;CHECK: vmul.p8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -150,8 +150,8 @@
 define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulls8:
 ;CHECK: vmull.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -161,8 +161,8 @@
 define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmulls8_int:
 ;CHECK: vmull.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -170,8 +170,8 @@
 define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulls16:
 ;CHECK: vmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -181,8 +181,8 @@
 define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmulls16_int:
 ;CHECK: vmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -190,8 +190,8 @@
 define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulls32:
 ;CHECK: vmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -201,8 +201,8 @@
 define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmulls32_int:
 ;CHECK: vmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -210,8 +210,8 @@
 define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullu8:
 ;CHECK: vmull.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = mul <8 x i16> %tmp3, %tmp4
@@ -221,8 +221,8 @@
 define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullu8_int:
 ;CHECK: vmull.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -230,8 +230,8 @@
 define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmullu16:
 ;CHECK: vmull.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = mul <4 x i32> %tmp3, %tmp4
@@ -241,8 +241,8 @@
 define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vmullu16_int:
 ;CHECK: vmull.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -250,8 +250,8 @@
 define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmullu32:
 ;CHECK: vmull.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = mul <2 x i64> %tmp3, %tmp4
@@ -261,8 +261,8 @@
 define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vmullu32_int:
 ;CHECK: vmull.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -270,8 +270,8 @@
 define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vmullp8:
 ;CHECK: vmull.p8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -560,7 +560,7 @@
 for.body33:                                       ; preds = %for.body33, %for.body33.lr.ph
   %add45 = add i32 undef, undef
   %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
-  %0 = load i32** undef, align 4
+  %0 = load i32*, i32** undef, align 4
   %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
   %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
   %vmovl.i249 = zext <8 x i8> %1 to <8 x i16>
@@ -616,7 +616,7 @@
 ; PR15970
 define void @no_illegal_types_vmull_sext(<4 x i32> %a) {
 entry:
-  %wide.load283.i = load <4 x i8>* undef, align 1
+  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
   %0 = sext <4 x i8> %wide.load283.i to <4 x i32>
   %1 = sub nsw <4 x i32> %0, %a
   %2 = mul nsw <4 x i32> %1, %1
@@ -626,7 +626,7 @@
 }
 define void @no_illegal_types_vmull_zext(<4 x i32> %a) {
 entry:
-  %wide.load283.i = load <4 x i8>* undef, align 1
+  %wide.load283.i = load <4 x i8>, <4 x i8>* undef, align 1
   %0 = zext <4 x i8> %wide.load283.i to <4 x i32>
   %1 = sub nsw <4 x i32> %0, %a
   %2 = mul nsw <4 x i32> %1, %1
@@ -642,8 +642,8 @@
 ;   and used a vector * scalar instruction.
 ; CHECK: vldr  {{s[0-9]+}}, [r2]
 ; CHECK: vmul.f32  q8, q8, d0[0]
-  %tmp = load float* %src, align 4
-  %tmp5 = load <4 x float>* %a, align 4
+  %tmp = load float, float* %src, align 4
+  %tmp5 = load <4 x float>, <4 x float>* %a, align 4
   %tmp6 = insertelement <4 x float> undef, float %tmp, i32 0
   %tmp7 = insertelement <4 x float> %tmp6, float %tmp, i32 1
   %tmp8 = insertelement <4 x float> %tmp7, float %tmp, i32 2
diff --git a/llvm/test/CodeGen/ARM/vneg.ll b/llvm/test/CodeGen/ARM/vneg.ll
index 4d548dd..24a585f 100644
--- a/llvm/test/CodeGen/ARM/vneg.ll
+++ b/llvm/test/CodeGen/ARM/vneg.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vnegs8:
 ;CHECK: vneg.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@
 define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vnegs16:
 ;CHECK: vneg.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vnegs32:
 ;CHECK: vneg.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@
 define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vnegf32:
 ;CHECK: vneg.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@
 define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vnegQs8:
 ;CHECK: vneg.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@
 define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vnegQs16:
 ;CHECK: vneg.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@
 define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vnegQs32:
 ;CHECK: vneg.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@
 define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vnegQf32:
 ;CHECK: vneg.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@
 define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqnegs8:
 ;CHECK: vqneg.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
 	ret <8 x i8> %tmp2
 }
@@ -75,7 +75,7 @@
 define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqnegs16:
 ;CHECK: vqneg.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -83,7 +83,7 @@
 define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqnegs32:
 ;CHECK: vqneg.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -91,7 +91,7 @@
 define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs8:
 ;CHECK: vqneg.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
 	ret <16 x i8> %tmp2
 }
@@ -99,7 +99,7 @@
 define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs16:
 ;CHECK: vqneg.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -107,7 +107,7 @@
 define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqnegQs32:
 ;CHECK: vqneg.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vpadal.ll b/llvm/test/CodeGen/ARM/vpadal.ll
index ffeac73..ffb6924 100644
--- a/llvm/test/CodeGen/ARM/vpadal.ll
+++ b/llvm/test/CodeGen/ARM/vpadal.ll
@@ -3,8 +3,8 @@
 define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadals8:
 ;CHECK: vpadal.s8
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -12,8 +12,8 @@
 define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadals16:
 ;CHECK: vpadal.s16
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -21,8 +21,8 @@
 define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadals32:
 ;CHECK: vpadal.s32
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -30,8 +30,8 @@
 define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalu8:
 ;CHECK: vpadal.u8
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalu16:
 ;CHECK: vpadal.u16
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -48,8 +48,8 @@
 define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalu32:
 ;CHECK: vpadal.u32
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs8:
 ;CHECK: vpadal.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@
 define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs16:
 ;CHECK: vpadal.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@
 define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalQs32:
 ;CHECK: vpadal.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu8:
 ;CHECK: vpadal.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu16:
 ;CHECK: vpadal.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpadalQu32:
 ;CHECK: vpadal.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vpadd.ll b/llvm/test/CodeGen/ARM/vpadd.ll
index 01cb1c7..e362ce3 100644
--- a/llvm/test/CodeGen/ARM/vpadd.ll
+++ b/llvm/test/CodeGen/ARM/vpadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpaddi8:
 ;CHECK: vpadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpaddi16:
 ;CHECK: vpadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpaddi32:
 ;CHECK: vpadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpaddf32:
 ;CHECK: vpadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -45,7 +45,7 @@
 define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddls8:
 ;CHECK: vpaddl.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -53,7 +53,7 @@
 define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddls16:
 ;CHECK: vpaddl.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -61,7 +61,7 @@
 define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddls32:
 ;CHECK: vpaddl.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
 	ret <1 x i64> %tmp2
 }
@@ -69,7 +69,7 @@
 define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu8:
 ;CHECK: vpaddl.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
 	ret <4 x i16> %tmp2
 }
@@ -77,7 +77,7 @@
 define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu16:
 ;CHECK: vpaddl.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -85,7 +85,7 @@
 define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlu32:
 ;CHECK: vpaddl.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
 	ret <1 x i64> %tmp2
 }
@@ -93,7 +93,7 @@
 define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs8:
 ;CHECK: vpaddl.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -101,7 +101,7 @@
 define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs16:
 ;CHECK: vpaddl.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -109,7 +109,7 @@
 define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQs32:
 ;CHECK: vpaddl.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
 	ret <2 x i64> %tmp2
 }
@@ -117,7 +117,7 @@
 define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu8:
 ;CHECK: vpaddl.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
 	ret <8 x i16> %tmp2
 }
@@ -125,7 +125,7 @@
 define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu16:
 ;CHECK: vpaddl.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -133,7 +133,7 @@
 define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vpaddlQu32:
 ;CHECK: vpaddl.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
 	ret <2 x i64> %tmp2
 }
@@ -143,9 +143,9 @@
 ; CHECK: vpaddl.s8
   %cbcr = alloca <16 x i8>, align 16
   %X = alloca <8 x i8>, align 8
-  %tmp = load <16 x i8>* %cbcr
+  %tmp = load <16 x i8>, <16 x i8>* %cbcr
   %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-  %tmp2 = load <16 x i8>* %cbcr
+  %tmp2 = load <16 x i8>, <16 x i8>* %cbcr
   %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   %add = add <8 x i8> %tmp3, %tmp1
   store <8 x i8> %add, <8 x i8>* %X, align 8
diff --git a/llvm/test/CodeGen/ARM/vpminmax.ll b/llvm/test/CodeGen/ARM/vpminmax.ll
index 0b893e5..9ea8c69 100644
--- a/llvm/test/CodeGen/ARM/vpminmax.ll
+++ b/llvm/test/CodeGen/ARM/vpminmax.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmins8:
 ;CHECK: vpmin.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmins16:
 ;CHECK: vpmin.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmins32:
 ;CHECK: vpmin.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpminu8:
 ;CHECK: vpmin.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -39,8 +39,8 @@
 define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpminu16:
 ;CHECK: vpmin.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -48,8 +48,8 @@
 define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpminu32:
 ;CHECK: vpmin.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpminf32:
 ;CHECK: vpmin.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -76,8 +76,8 @@
 define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs8:
 ;CHECK: vpmax.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -85,8 +85,8 @@
 define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs16:
 ;CHECK: vpmax.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -94,8 +94,8 @@
 define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmaxs32:
 ;CHECK: vpmax.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -103,8 +103,8 @@
 define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu8:
 ;CHECK: vpmax.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -112,8 +112,8 @@
 define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu16:
 ;CHECK: vpmax.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -121,8 +121,8 @@
 define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vpmaxu32:
 ;CHECK: vpmax.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -130,8 +130,8 @@
 define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vpmaxf32:
 ;CHECK: vpmax.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqadd.ll b/llvm/test/CodeGen/ARM/vqadd.ll
index 81acc8b..d1e90cb 100644
--- a/llvm/test/CodeGen/ARM/vqadd.ll
+++ b/llvm/test/CodeGen/ARM/vqadd.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqadds8:
 ;CHECK: vqadd.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqadds16:
 ;CHECK: vqadd.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqadds32:
 ;CHECK: vqadd.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqadds64:
 ;CHECK: vqadd.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddu8:
 ;CHECK: vqadd.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddu16:
 ;CHECK: vqadd.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddu32:
 ;CHECK: vqadd.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddu64:
 ;CHECK: vqadd.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs8:
 ;CHECK: vqadd.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs16:
 ;CHECK: vqadd.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs32:
 ;CHECK: vqadd.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddQs64:
 ;CHECK: vqadd.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@
 define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu8:
 ;CHECK: vqadd.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu16:
 ;CHECK: vqadd.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu32:
 ;CHECK: vqadd.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqaddQu64:
 ;CHECK: vqadd.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqdmul.ll b/llvm/test/CodeGen/ARM/vqdmul.ll
index d298167..6da0800 100644
--- a/llvm/test/CodeGen/ARM/vqdmul.ll
+++ b/llvm/test/CodeGen/ARM/vqdmul.ll
@@ -5,8 +5,8 @@
 define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhs16:
 ;CHECK: vqdmulh.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -14,8 +14,8 @@
 define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhs32:
 ;CHECK: vqdmulh.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -23,8 +23,8 @@
 define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhQs16:
 ;CHECK: vqdmulh.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -32,8 +32,8 @@
 define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulhQs32:
 ;CHECK: vqdmulh.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -83,8 +83,8 @@
 define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhs16:
 ;CHECK: vqrdmulh.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -92,8 +92,8 @@
 define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhs32:
 ;CHECK: vqrdmulh.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -101,8 +101,8 @@
 define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhQs16:
 ;CHECK: vqrdmulh.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -110,8 +110,8 @@
 define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrdmulhQs32:
 ;CHECK: vqrdmulh.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -161,8 +161,8 @@
 define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqdmulls16:
 ;CHECK: vqdmull.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -170,8 +170,8 @@
 define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqdmulls32:
 ;CHECK: vqdmull.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -200,9 +200,9 @@
 define <4 x i32> @vqdmlals16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vqdmlals16_natural:
 ;CHECK: vqdmlal.s16
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i16>* %C
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i16>, <4 x i16>* %C
         %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
         %tmp5 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
         ret <4 x i32> %tmp5
@@ -211,9 +211,9 @@
 define <2 x i64> @vqdmlals32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vqdmlals32_natural:
 ;CHECK: vqdmlal.s32
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i32>* %C
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i32>, <2 x i32>* %C
         %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
         %tmp5 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
         ret <2 x i64> %tmp5
@@ -245,9 +245,9 @@
 define <4 x i32> @vqdmlsls16_natural(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ;CHECK-LABEL: vqdmlsls16_natural:
 ;CHECK: vqdmlsl.s16
-        %tmp1 = load <4 x i32>* %A
-        %tmp2 = load <4 x i16>* %B
-        %tmp3 = load <4 x i16>* %C
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
+        %tmp2 = load <4 x i16>, <4 x i16>* %B
+        %tmp3 = load <4 x i16>, <4 x i16>* %C
         %tmp4 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp2, <4 x i16> %tmp3)
         %tmp5 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp4)
         ret <4 x i32> %tmp5
@@ -256,9 +256,9 @@
 define <2 x i64> @vqdmlsls32_natural(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ;CHECK-LABEL: vqdmlsls32_natural:
 ;CHECK: vqdmlsl.s32
-        %tmp1 = load <2 x i64>* %A
-        %tmp2 = load <2 x i32>* %B
-        %tmp3 = load <2 x i32>* %C
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
+        %tmp2 = load <2 x i32>, <2 x i32>* %B
+        %tmp3 = load <2 x i32>, <2 x i32>* %C
         %tmp4 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp2, <2 x i32> %tmp3)
         %tmp5 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp4)
         ret <2 x i64> %tmp5
diff --git a/llvm/test/CodeGen/ARM/vqshl.ll b/llvm/test/CodeGen/ARM/vqshl.ll
index 4afef6d..6a6d9af 100644
--- a/llvm/test/CodeGen/ARM/vqshl.ll
+++ b/llvm/test/CodeGen/ARM/vqshl.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshls8:
 ;CHECK: vqshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshls16:
 ;CHECK: vqshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshls32:
 ;CHECK: vqshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshls64:
 ;CHECK: vqshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlu8:
 ;CHECK: vqshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlu16:
 ;CHECK: vqshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlu32:
 ;CHECK: vqshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlu64:
 ;CHECK: vqshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs8:
 ;CHECK: vqshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs16:
 ;CHECK: vqshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs32:
 ;CHECK: vqshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlQs64:
 ;CHECK: vqshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@
 define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu8:
 ;CHECK: vqshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu16:
 ;CHECK: vqshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu32:
 ;CHECK: vqshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqshlQu64:
 ;CHECK: vqshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -147,7 +147,7 @@
 define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n8:
 ;CHECK: vqshl.s8{{.*#7}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -155,7 +155,7 @@
 define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n16:
 ;CHECK: vqshl.s16{{.*#15}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -163,7 +163,7 @@
 define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n32:
 ;CHECK: vqshl.s32{{.*#31}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -171,7 +171,7 @@
 define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshls_n64:
 ;CHECK: vqshl.s64{{.*#63}}
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -179,7 +179,7 @@
 define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -187,7 +187,7 @@
 define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -195,7 +195,7 @@
 define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -203,7 +203,7 @@
 define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -211,7 +211,7 @@
 define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n8:
 ;CHECK: vqshlu.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -219,7 +219,7 @@
 define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n16:
 ;CHECK: vqshlu.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -227,7 +227,7 @@
 define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n32:
 ;CHECK: vqshlu.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -235,7 +235,7 @@
 define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlsu_n64:
 ;CHECK: vqshlu.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -243,7 +243,7 @@
 define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n8:
 ;CHECK: vqshl.s8{{.*#7}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -251,7 +251,7 @@
 define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n16:
 ;CHECK: vqshl.s16{{.*#15}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -259,7 +259,7 @@
 define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n32:
 ;CHECK: vqshl.s32{{.*#31}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -267,7 +267,7 @@
 define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQs_n64:
 ;CHECK: vqshl.s64{{.*#63}}
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -275,7 +275,7 @@
 define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n8:
 ;CHECK: vqshl.u8{{.*#7}}
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -283,7 +283,7 @@
 define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n16:
 ;CHECK: vqshl.u16{{.*#15}}
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -291,7 +291,7 @@
 define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n32:
 ;CHECK: vqshl.u32{{.*#31}}
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -299,7 +299,7 @@
 define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQu_n64:
 ;CHECK: vqshl.u64{{.*#63}}
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -307,7 +307,7 @@
 define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n8:
 ;CHECK: vqshlu.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -315,7 +315,7 @@
 define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n16:
 ;CHECK: vqshlu.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -323,7 +323,7 @@
 define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n32:
 ;CHECK: vqshlu.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -331,7 +331,7 @@
 define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshlQsu_n64:
 ;CHECK: vqshlu.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -369,8 +369,8 @@
 define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshls8:
 ;CHECK: vqrshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -378,8 +378,8 @@
 define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshls16:
 ;CHECK: vqrshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -387,8 +387,8 @@
 define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshls32:
 ;CHECK: vqrshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -396,8 +396,8 @@
 define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshls64:
 ;CHECK: vqrshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -405,8 +405,8 @@
 define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu8:
 ;CHECK: vqrshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -414,8 +414,8 @@
 define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu16:
 ;CHECK: vqrshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -423,8 +423,8 @@
 define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu32:
 ;CHECK: vqrshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -432,8 +432,8 @@
 define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlu64:
 ;CHECK: vqrshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -441,8 +441,8 @@
 define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs8:
 ;CHECK: vqrshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -450,8 +450,8 @@
 define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs16:
 ;CHECK: vqrshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -459,8 +459,8 @@
 define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs32:
 ;CHECK: vqrshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -468,8 +468,8 @@
 define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQs64:
 ;CHECK: vqrshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -477,8 +477,8 @@
 define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu8:
 ;CHECK: vqrshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -486,8 +486,8 @@
 define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu16:
 ;CHECK: vqrshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -495,8 +495,8 @@
 define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu32:
 ;CHECK: vqrshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -504,8 +504,8 @@
 define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqrshlQu64:
 ;CHECK: vqrshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vqshrn.ll b/llvm/test/CodeGen/ARM/vqshrn.ll
index f02482c..b4b5e96 100644
--- a/llvm/test/CodeGen/ARM/vqshrn.ll
+++ b/llvm/test/CodeGen/ARM/vqshrn.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshrns8:
 ;CHECK: vqshrn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@
 define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshrns16:
 ;CHECK: vqshrn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshrns32:
 ;CHECK: vqshrn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@
 define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu8:
 ;CHECK: vqshrn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -35,7 +35,7 @@
 define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu16:
 ;CHECK: vqshrn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -43,7 +43,7 @@
 define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshrnu32:
 ;CHECK: vqshrn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -51,7 +51,7 @@
 define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqshruns8:
 ;CHECK: vqshrun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -59,7 +59,7 @@
 define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqshruns16:
 ;CHECK: vqshrun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -67,7 +67,7 @@
 define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqshruns32:
 ;CHECK: vqshrun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -87,7 +87,7 @@
 define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns8:
 ;CHECK: vqrshrn.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -95,7 +95,7 @@
 define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns16:
 ;CHECK: vqrshrn.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -103,7 +103,7 @@
 define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshrns32:
 ;CHECK: vqrshrn.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -111,7 +111,7 @@
 define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu8:
 ;CHECK: vqrshrn.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -119,7 +119,7 @@
 define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu16:
 ;CHECK: vqrshrn.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -127,7 +127,7 @@
 define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshrnu32:
 ;CHECK: vqrshrn.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -135,7 +135,7 @@
 define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns8:
 ;CHECK: vqrshrun.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -143,7 +143,7 @@
 define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns16:
 ;CHECK: vqrshrun.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -151,7 +151,7 @@
 define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vqrshruns32:
 ;CHECK: vqrshrun.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vqsub.ll b/llvm/test/CodeGen/ARM/vqsub.ll
index 4af4380..40963ce 100644
--- a/llvm/test/CodeGen/ARM/vqsub.ll
+++ b/llvm/test/CodeGen/ARM/vqsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubs8:
 ;CHECK: vqsub.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubs16:
 ;CHECK: vqsub.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubs32:
 ;CHECK: vqsub.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubs64:
 ;CHECK: vqsub.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubu8:
 ;CHECK: vqsub.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubu16:
 ;CHECK: vqsub.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubu32:
 ;CHECK: vqsub.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubu64:
 ;CHECK: vqsub.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs8:
 ;CHECK: vqsub.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs16:
 ;CHECK: vqsub.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs32:
 ;CHECK: vqsub.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubQs64:
 ;CHECK: vqsub.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@
 define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu8:
 ;CHECK: vqsub.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu16:
 ;CHECK: vqsub.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu32:
 ;CHECK: vqsub.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vqsubQu64:
 ;CHECK: vqsub.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vrec.ll b/llvm/test/CodeGen/ARM/vrec.ll
index 91979e5..a7ebd79 100644
--- a/llvm/test/CodeGen/ARM/vrec.ll
+++ b/llvm/test/CodeGen/ARM/vrec.ll
@@ -3,7 +3,7 @@
 define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrecpei32:
 ;CHECK: vrecpe.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -11,7 +11,7 @@
 define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrecpeQi32:
 ;CHECK: vrecpe.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vrecpef32:
 ;CHECK: vrecpe.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -27,7 +27,7 @@
 define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vrecpeQf32:
 ;CHECK: vrecpe.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -41,8 +41,8 @@
 define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vrecpsf32:
 ;CHECK: vrecps.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -50,8 +50,8 @@
 define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vrecpsQf32:
 ;CHECK: vrecps.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
@@ -62,7 +62,7 @@
 define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrsqrtei32:
 ;CHECK: vrsqrte.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
 	ret <2 x i32> %tmp2
 }
@@ -70,7 +70,7 @@
 define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrsqrteQi32:
 ;CHECK: vrsqrte.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
 	ret <4 x i32> %tmp2
 }
@@ -78,7 +78,7 @@
 define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: vrsqrtef32:
 ;CHECK: vrsqrte.f32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
@@ -86,7 +86,7 @@
 define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: vrsqrteQf32:
 ;CHECK: vrsqrte.f32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
@@ -100,8 +100,8 @@
 define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vrsqrtsf32:
 ;CHECK: vrsqrts.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
@@ -109,8 +109,8 @@
 define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vrsqrtsQf32:
 ;CHECK: vrsqrts.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vrev.ll b/llvm/test/CodeGen/ARM/vrev.ll
index 51d4f99..a20d4b6 100644
--- a/llvm/test/CodeGen/ARM/vrev.ll
+++ b/llvm/test/CodeGen/ARM/vrev.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8:
 ;CHECK: vrev64.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -11,7 +11,7 @@
 define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D16:
 ;CHECK: vrev64.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 	ret <4 x i16> %tmp2
 }
@@ -19,7 +19,7 @@
 define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D32:
 ;CHECK: vrev64.32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x i32> %tmp2
 }
@@ -27,7 +27,7 @@
 define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Df:
 ;CHECK: vrev64.32
-	%tmp1 = load <2 x float>* %A
+	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x float> %tmp2
 }
@@ -35,7 +35,7 @@
 define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q8:
 ;CHECK: vrev64.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 	ret <16 x i8> %tmp2
 }
@@ -43,7 +43,7 @@
 define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q16:
 ;CHECK: vrev64.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i16> %tmp2
 }
@@ -51,7 +51,7 @@
 define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Q32:
 ;CHECK: vrev64.32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i32> %tmp2
 }
@@ -59,7 +59,7 @@
 define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64Qf:
 ;CHECK: vrev64.32
-	%tmp1 = load <4 x float>* %A
+	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x float> %tmp2
 }
@@ -67,7 +67,7 @@
 define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D8:
 ;CHECK: vrev32.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i8> %tmp2
 }
@@ -75,7 +75,7 @@
 define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32D16:
 ;CHECK: vrev32.16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i16> %tmp2
 }
@@ -83,7 +83,7 @@
 define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q8:
 ;CHECK: vrev32.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 	ret <16 x i8> %tmp2
 }
@@ -91,7 +91,7 @@
 define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16:
 ;CHECK: vrev32.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i16> %tmp2
 }
@@ -99,7 +99,7 @@
 define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16D8:
 ;CHECK: vrev16.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i8> %tmp2
 }
@@ -107,7 +107,7 @@
 define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev16Q8:
 ;CHECK: vrev16.8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 	ret <16 x i8> %tmp2
 }
@@ -117,7 +117,7 @@
 define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: test_vrev64D8_undef:
 ;CHECK: vrev64.8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
 }
@@ -125,7 +125,7 @@
 define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: test_vrev32Q16_undef:
 ;CHECK: vrev32.16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
 }
@@ -136,7 +136,7 @@
 ;CHECK-LABEL: test_with_vcombine:
 ;CHECK-NOT: vext
 ;CHECK: vrev64.32
-  %tmp1 = load <4 x float>* %v, align 16
+  %tmp1 = load <4 x float>, <4 x float>* %v, align 16
   %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
   %tmp3 = extractelement <2 x double> %tmp2, i32 0
   %tmp4 = bitcast double %tmp3 to <2 x float>
@@ -155,7 +155,7 @@
 ; CHECK: vst1.32
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
-  %tmp2 = load <8 x i16>* %0, align 4
+  %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
   %tmp3 = extractelement <8 x i16> %tmp2, i32 6
   %tmp5 = insertelement <2 x i16> undef, i16 %tmp3, i32 0
   %tmp9 = extractelement <8 x i16> %tmp2, i32 5
@@ -171,7 +171,7 @@
 ; CHECK: vrev64.32
 entry:
   %0 = bitcast float* %source to <4 x float>*
-  %tmp2 = load <4 x float>* %0, align 4
+  %tmp2 = load <4 x float>, <4 x float>* %0, align 4
   %tmp5 = shufflevector <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x float> %tmp2, <4 x i32> <i32 0, i32 7, i32 0, i32 0>
   %arrayidx8 = getelementptr inbounds <4 x float>, <4 x float>* %dest, i32 11
   store <4 x float> %tmp5, <4 x float>* %arrayidx8, align 4
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index e999034..0eb0510 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -18,8 +18,8 @@
 ; CHECK-LABEL: func_blend10:
 define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
                            %T1_10* %blend, %T0_10* %storeaddr) {
-  %v0 = load %T0_10* %loadaddr
-  %v1 = load %T0_10* %loadaddr2
+  %v0 = load %T0_10, %T0_10* %loadaddr
+  %v1 = load %T0_10, %T0_10* %loadaddr2
   %c = icmp slt %T0_10 %v0, %v1
 ; CHECK: vbsl
 ; CHECK: vbsl
@@ -34,8 +34,8 @@
 ; CHECK-LABEL: func_blend14:
 define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
                            %T1_14* %blend, %T0_14* %storeaddr) {
-  %v0 = load %T0_14* %loadaddr
-  %v1 = load %T0_14* %loadaddr2
+  %v0 = load %T0_14, %T0_14* %loadaddr
+  %v1 = load %T0_14, %T0_14* %loadaddr2
   %c = icmp slt %T0_14 %v0, %v1
 ; CHECK: vbsl
 ; CHECK: vbsl
@@ -52,8 +52,8 @@
                            %T1_15* %blend, %T0_15* %storeaddr) {
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_15* %loadaddr
-  %v1 = load %T0_15* %loadaddr2
+  %v0 = load %T0_15, %T0_15* %loadaddr
+  %v1 = load %T0_15, %T0_15* %loadaddr2
   %c = icmp slt %T0_15 %v0, %v1
 ; COST: func_blend15
 ; COST: cost of 82 {{.*}} select
@@ -68,8 +68,8 @@
                            %T1_18* %blend, %T0_18* %storeaddr) {
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_18* %loadaddr
-  %v1 = load %T0_18* %loadaddr2
+  %v0 = load %T0_18, %T0_18* %loadaddr
+  %v1 = load %T0_18, %T0_18* %loadaddr2
   %c = icmp slt %T0_18 %v0, %v1
 ; COST: func_blend18
 ; COST: cost of 19 {{.*}} select
@@ -86,8 +86,8 @@
 ; CHECK: vbsl
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_19* %loadaddr
-  %v1 = load %T0_19* %loadaddr2
+  %v0 = load %T0_19, %T0_19* %loadaddr
+  %v1 = load %T0_19, %T0_19* %loadaddr2
   %c = icmp slt %T0_19 %v0, %v1
 ; COST: func_blend19
 ; COST: cost of 50 {{.*}} select
@@ -108,8 +108,8 @@
 ; CHECK: vbsl
 ; CHECK: vbsl
 ; CHECK: vbsl
-  %v0 = load %T0_20* %loadaddr
-  %v1 = load %T0_20* %loadaddr2
+  %v0 = load %T0_20, %T0_20* %loadaddr
+  %v1 = load %T0_20, %T0_20* %loadaddr2
   %c = icmp slt %T0_20 %v0, %v1
 ; COST: func_blend20
 ; COST: cost of 100 {{.*}} select
diff --git a/llvm/test/CodeGen/ARM/vshift.ll b/llvm/test/CodeGen/ARM/vshift.ll
index 618a137..31e4cb0 100644
--- a/llvm/test/CodeGen/ARM/vshift.ll
+++ b/llvm/test/CodeGen/ARM/vshift.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shl <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshls16:
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shl <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshls32:
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shl <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshls64:
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = shl <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,7 +39,7 @@
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -47,7 +47,7 @@
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -55,7 +55,7 @@
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -63,7 +63,7 @@
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -71,8 +71,8 @@
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shl <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -80,8 +80,8 @@
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shl <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -89,8 +89,8 @@
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shl <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -98,8 +98,8 @@
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = shl <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -107,7 +107,7 @@
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -115,7 +115,7 @@
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -123,7 +123,7 @@
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -131,7 +131,7 @@
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
@@ -140,8 +140,8 @@
 ;CHECK-LABEL: vlshru8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -150,8 +150,8 @@
 ;CHECK-LABEL: vlshru16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -160,8 +160,8 @@
 ;CHECK-LABEL: vlshru32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -170,8 +170,8 @@
 ;CHECK-LABEL: vlshru64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -179,7 +179,7 @@
 define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshri8:
 ;CHECK: vshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = lshr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -187,7 +187,7 @@
 define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshri16:
 ;CHECK: vshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = lshr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -195,7 +195,7 @@
 define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshri32:
 ;CHECK: vshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = lshr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -203,7 +203,7 @@
 define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshri64:
 ;CHECK: vshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = lshr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -212,8 +212,8 @@
 ;CHECK-LABEL: vlshrQu8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -222,8 +222,8 @@
 ;CHECK-LABEL: vlshrQu16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -232,8 +232,8 @@
 ;CHECK-LABEL: vlshrQu32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -242,8 +242,8 @@
 ;CHECK-LABEL: vlshrQu64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -251,7 +251,7 @@
 define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi8:
 ;CHECK: vshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = lshr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -259,7 +259,7 @@
 define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi16:
 ;CHECK: vshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = lshr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -267,7 +267,7 @@
 define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi32:
 ;CHECK: vshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = lshr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -275,7 +275,7 @@
 define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vlshrQi64:
 ;CHECK: vshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = lshr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
@@ -291,8 +291,8 @@
 ;CHECK-LABEL: vashrs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -301,8 +301,8 @@
 ;CHECK-LABEL: vashrs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -311,8 +311,8 @@
 ;CHECK-LABEL: vashrs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -321,8 +321,8 @@
 ;CHECK-LABEL: vashrs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -330,7 +330,7 @@
 define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashri8:
 ;CHECK: vshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = ashr <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <8 x i8> %tmp2
 }
@@ -338,7 +338,7 @@
 define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashri16:
 ;CHECK: vshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = ashr <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
 	ret <4 x i16> %tmp2
 }
@@ -346,7 +346,7 @@
 define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashri32:
 ;CHECK: vshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = ashr <2 x i32> %tmp1, < i32 31, i32 31 >
 	ret <2 x i32> %tmp2
 }
@@ -354,7 +354,7 @@
 define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashri64:
 ;CHECK: vshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = ashr <1 x i64> %tmp1, < i64 63 >
 	ret <1 x i64> %tmp2
 }
@@ -363,8 +363,8 @@
 ;CHECK-LABEL: vashrQs8:
 ;CHECK: vneg.s8
 ;CHECK: vshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -373,8 +373,8 @@
 ;CHECK-LABEL: vashrQs16:
 ;CHECK: vneg.s16
 ;CHECK: vshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -383,8 +383,8 @@
 ;CHECK-LABEL: vashrQs32:
 ;CHECK: vneg.s32
 ;CHECK: vshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -393,8 +393,8 @@
 ;CHECK-LABEL: vashrQs64:
 ;CHECK: vsub.i64
 ;CHECK: vshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -402,7 +402,7 @@
 define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vashrQi8:
 ;CHECK: vshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = ashr <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
 	ret <16 x i8> %tmp2
 }
@@ -410,7 +410,7 @@
 define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vashrQi16:
 ;CHECK: vshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = ashr <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
 	ret <8 x i16> %tmp2
 }
@@ -418,7 +418,7 @@
 define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vashrQi32:
 ;CHECK: vshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = ashr <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
 	ret <4 x i32> %tmp2
 }
@@ -426,7 +426,7 @@
 define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vashrQi64:
 ;CHECK: vshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = ashr <2 x i64> %tmp1, < i64 63, i64 63 >
 	ret <2 x i64> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vshiftins.ll b/llvm/test/CodeGen/ARM/vshiftins.ll
index 9526c32..2948737 100644
--- a/llvm/test/CodeGen/ARM/vshiftins.ll
+++ b/llvm/test/CodeGen/ARM/vshiftins.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsli8:
 ;CHECK: vsli.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsli16:
 ;CHECK: vsli.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsli32:
 ;CHECK: vsli.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsli64:
 ;CHECK: vsli.64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsliQ8:
 ;CHECK: vsli.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsliQ16:
 ;CHECK: vsli.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsliQ32:
 ;CHECK: vsli.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsliQ64:
 ;CHECK: vsli.64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsri8:
 ;CHECK: vsri.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsri16:
 ;CHECK: vsri.16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsri32:
 ;CHECK: vsri.32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsri64:
 ;CHECK: vsri.64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp3
 }
@@ -111,8 +111,8 @@
 define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsriQ8:
 ;CHECK: vsri.8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsriQ16:
 ;CHECK: vsri.16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsriQ32:
 ;CHECK: vsri.32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsriQ64:
 ;CHECK: vsri.64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp3
 }
diff --git a/llvm/test/CodeGen/ARM/vshl.ll b/llvm/test/CodeGen/ARM/vshl.ll
index 6228652..ef76e3d 100644
--- a/llvm/test/CodeGen/ARM/vshl.ll
+++ b/llvm/test/CodeGen/ARM/vshl.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshls8:
 ;CHECK: vshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshls16:
 ;CHECK: vshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshls32:
 ;CHECK: vshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshls64:
 ;CHECK: vshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlu8:
 ;CHECK: vshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -48,8 +48,8 @@
 define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlu16:
 ;CHECK: vshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -57,8 +57,8 @@
 define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlu32:
 ;CHECK: vshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -66,8 +66,8 @@
 define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlu64:
 ;CHECK: vshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -75,8 +75,8 @@
 define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQs8:
 ;CHECK: vshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -84,8 +84,8 @@
 define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQs16:
 ;CHECK: vshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -93,8 +93,8 @@
 define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQs32:
 ;CHECK: vshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -102,8 +102,8 @@
 define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQs64:
 ;CHECK: vshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -111,8 +111,8 @@
 define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vshlQu8:
 ;CHECK: vshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vshlQu16:
 ;CHECK: vshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vshlQu32:
 ;CHECK: vshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vshlQu64:
 ;CHECK: vshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -150,7 +150,7 @@
 define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshli8:
 ;CHECK: vshl.i8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <8 x i8> %tmp2
 }
@@ -158,7 +158,7 @@
 define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshli16:
 ;CHECK: vshl.i16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
 	ret <4 x i16> %tmp2
 }
@@ -166,7 +166,7 @@
 define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshli32:
 ;CHECK: vshl.i32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
 	ret <2 x i32> %tmp2
 }
@@ -174,7 +174,7 @@
 define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshli64:
 ;CHECK: vshl.i64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
 	ret <1 x i64> %tmp2
 }
@@ -182,7 +182,7 @@
 define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlQi8:
 ;CHECK: vshl.i8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
 	ret <16 x i8> %tmp2
 }
@@ -190,7 +190,7 @@
 define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlQi16:
 ;CHECK: vshl.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
 	ret <8 x i16> %tmp2
 }
@@ -198,7 +198,7 @@
 define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlQi32:
 ;CHECK: vshl.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
 	ret <4 x i32> %tmp2
 }
@@ -206,7 +206,7 @@
 define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshlQi64:
 ;CHECK: vshl.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
 	ret <2 x i64> %tmp2
 }
@@ -216,7 +216,7 @@
 define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrs8:
 ;CHECK: vshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -224,7 +224,7 @@
 define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrs16:
 ;CHECK: vshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -232,7 +232,7 @@
 define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrs32:
 ;CHECK: vshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -240,7 +240,7 @@
 define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrs64:
 ;CHECK: vshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -248,7 +248,7 @@
 define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshru8:
 ;CHECK: vshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -256,7 +256,7 @@
 define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshru16:
 ;CHECK: vshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -264,7 +264,7 @@
 define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshru32:
 ;CHECK: vshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -272,7 +272,7 @@
 define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshru64:
 ;CHECK: vshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -280,7 +280,7 @@
 define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrQs8:
 ;CHECK: vshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -288,7 +288,7 @@
 define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrQs16:
 ;CHECK: vshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -296,7 +296,7 @@
 define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrQs32:
 ;CHECK: vshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -304,7 +304,7 @@
 define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrQs64:
 ;CHECK: vshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -312,7 +312,7 @@
 define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshrQu8:
 ;CHECK: vshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -320,7 +320,7 @@
 define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrQu16:
 ;CHECK: vshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -328,7 +328,7 @@
 define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrQu32:
 ;CHECK: vshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -336,7 +336,7 @@
 define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrQu64:
 ;CHECK: vshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -364,8 +364,8 @@
 define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshls8:
 ;CHECK: vrshl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -373,8 +373,8 @@
 define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshls16:
 ;CHECK: vrshl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -382,8 +382,8 @@
 define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshls32:
 ;CHECK: vrshl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -391,8 +391,8 @@
 define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshls64:
 ;CHECK: vrshl.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -400,8 +400,8 @@
 define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlu8:
 ;CHECK: vrshl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -409,8 +409,8 @@
 define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlu16:
 ;CHECK: vrshl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -418,8 +418,8 @@
 define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlu32:
 ;CHECK: vrshl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -427,8 +427,8 @@
 define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlu64:
 ;CHECK: vrshl.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
 	ret <1 x i64> %tmp3
 }
@@ -436,8 +436,8 @@
 define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs8:
 ;CHECK: vrshl.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -445,8 +445,8 @@
 define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs16:
 ;CHECK: vrshl.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -454,8 +454,8 @@
 define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs32:
 ;CHECK: vrshl.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -463,8 +463,8 @@
 define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlQs64:
 ;CHECK: vrshl.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -472,8 +472,8 @@
 define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu8:
 ;CHECK: vrshl.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
 	ret <16 x i8> %tmp3
 }
@@ -481,8 +481,8 @@
 define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu16:
 ;CHECK: vrshl.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i16> %tmp3
 }
@@ -490,8 +490,8 @@
 define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu32:
 ;CHECK: vrshl.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i32> %tmp3
 }
@@ -499,8 +499,8 @@
 define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrshlQu64:
 ;CHECK: vrshl.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i64> %tmp3
 }
@@ -508,7 +508,7 @@
 define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrs8:
 ;CHECK: vrshr.s8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -516,7 +516,7 @@
 define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrs16:
 ;CHECK: vrshr.s16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -524,7 +524,7 @@
 define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrs32:
 ;CHECK: vrshr.s32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -532,7 +532,7 @@
 define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrs64:
 ;CHECK: vrshr.s64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -540,7 +540,7 @@
 define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshru8:
 ;CHECK: vrshr.u8
-	%tmp1 = load <8 x i8>* %A
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -548,7 +548,7 @@
 define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshru16:
 ;CHECK: vrshr.u16
-	%tmp1 = load <4 x i16>* %A
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -556,7 +556,7 @@
 define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshru32:
 ;CHECK: vrshr.u32
-	%tmp1 = load <2 x i32>* %A
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
 	ret <2 x i32> %tmp2
 }
@@ -564,7 +564,7 @@
 define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshru64:
 ;CHECK: vrshr.u64
-	%tmp1 = load <1 x i64>* %A
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
 	ret <1 x i64> %tmp2
 }
@@ -572,7 +572,7 @@
 define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs8:
 ;CHECK: vrshr.s8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -580,7 +580,7 @@
 define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs16:
 ;CHECK: vrshr.s16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -588,7 +588,7 @@
 define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs32:
 ;CHECK: vrshr.s32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -596,7 +596,7 @@
 define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrQs64:
 ;CHECK: vrshr.s64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
@@ -604,7 +604,7 @@
 define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu8:
 ;CHECK: vrshr.u8
-	%tmp1 = load <16 x i8>* %A
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
 	ret <16 x i8> %tmp2
 }
@@ -612,7 +612,7 @@
 define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu16:
 ;CHECK: vrshr.u16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
 	ret <8 x i16> %tmp2
 }
@@ -620,7 +620,7 @@
 define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu32:
 ;CHECK: vrshr.u32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
 	ret <4 x i32> %tmp2
 }
@@ -628,7 +628,7 @@
 define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrQu64:
 ;CHECK: vrshr.u64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
 	ret <2 x i64> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vshll.ll b/llvm/test/CodeGen/ARM/vshll.ll
index 27873eb..a823013 100644
--- a/llvm/test/CodeGen/ARM/vshll.ll
+++ b/llvm/test/CodeGen/ARM/vshll.ll
@@ -3,7 +3,7 @@
 define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlls8:
 ;CHECK: vshll.s8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %sext = sext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %sext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
         ret <8 x i16> %shift
@@ -12,7 +12,7 @@
 define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlls16:
 ;CHECK: vshll.s16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %sext = sext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %sext, <i32 15, i32 15, i32 15, i32 15>
         ret <4 x i32> %shift
@@ -21,7 +21,7 @@
 define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlls32:
 ;CHECK: vshll.s32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %sext = sext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %sext, <i64 31, i64 31>
         ret <2 x i64> %shift
@@ -30,7 +30,7 @@
 define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshllu8:
 ;CHECK: vshll.u8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %zext = zext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %zext, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
         ret <8 x i16> %shift
@@ -39,7 +39,7 @@
 define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshllu16:
 ;CHECK: vshll.u16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %zext = zext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %zext, <i32 15, i32 15, i32 15, i32 15>
         ret <4 x i32> %shift
@@ -48,7 +48,7 @@
 define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshllu32:
 ;CHECK: vshll.u32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 31, i64 31>
         ret <2 x i64> %shift
@@ -59,7 +59,7 @@
 define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
 ;CHECK-LABEL: vshlli8:
 ;CHECK: vshll.i8
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %sext = sext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %sext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         ret <8 x i16> %shift
@@ -68,7 +68,7 @@
 define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshlli16:
 ;CHECK: vshll.i16
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %zext = zext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %zext, <i32 16, i32 16, i32 16, i32 16>
         ret <4 x i32> %shift
@@ -77,7 +77,7 @@
 define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshlli32:
 ;CHECK: vshll.i32
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 32, i64 32>
         ret <2 x i64> %shift
@@ -89,7 +89,7 @@
 ; CHECK-LABEL: vshllu8_bad:
 ; CHECK: vmovl.u8
 ; CHECK: vshl.i16
-        %tmp1 = load <8 x i8>* %A
+        %tmp1 = load <8 x i8>, <8 x i8>* %A
         %zext = zext <8 x i8> %tmp1 to <8 x i16>
         %shift = shl <8 x i16> %zext, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
         ret <8 x i16> %shift
@@ -99,7 +99,7 @@
 ; CHECK-LABEL: vshlls16_bad:
 ; CHECK: vmovl.s16
 ; CHECK: vshl.i32
-        %tmp1 = load <4 x i16>* %A
+        %tmp1 = load <4 x i16>, <4 x i16>* %A
         %sext = sext <4 x i16> %tmp1 to <4 x i32>
         %shift = shl <4 x i32> %sext, <i32 17, i32 17, i32 17, i32 17>
         ret <4 x i32> %shift
@@ -109,7 +109,7 @@
 ; CHECK-LABEL: vshllu32_bad:
 ; CHECK: vmovl.u32
 ; CHECK: vshl.i64
-        %tmp1 = load <2 x i32>* %A
+        %tmp1 = load <2 x i32>, <2 x i32>* %A
         %zext = zext <2 x i32> %tmp1 to <2 x i64>
         %shift = shl <2 x i64> %zext, <i64 33, i64 33>
         ret <2 x i64> %shift
diff --git a/llvm/test/CodeGen/ARM/vshrn.ll b/llvm/test/CodeGen/ARM/vshrn.ll
index 8aa009a..e033486 100644
--- a/llvm/test/CodeGen/ARM/vshrn.ll
+++ b/llvm/test/CodeGen/ARM/vshrn.ll
@@ -3,7 +3,7 @@
 define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vshrns8:
 ;CHECK: vshrn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
 	ret <8 x i8> %tmp3
@@ -12,7 +12,7 @@
 define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vshrns16:
 ;CHECK: vshrn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
 	ret <4 x i16> %tmp3
@@ -21,7 +21,7 @@
 define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vshrns32:
 ;CHECK: vshrn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
 	ret <2 x i32> %tmp3
@@ -31,7 +31,7 @@
 ; CHECK-LABEL: vshrns8_bad:
 ; CHECK: vshr.s16
 ; CHECK: vmovn.i16
-        %tmp1 = load <8 x i16>* %A
+        %tmp1 = load <8 x i16>, <8 x i16>* %A
         %tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
         %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
         ret <8 x i8> %tmp3
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: vshrns16_bad:
 ; CHECK: vshr.u32
 ; CHECK: vmovn.i32
-        %tmp1 = load <4 x i32>* %A
+        %tmp1 = load <4 x i32>, <4 x i32>* %A
         %tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17>
         %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
         ret <4 x i16> %tmp3
@@ -51,7 +51,7 @@
 ; CHECK-LABEL: vshrns32_bad:
 ; CHECK: vshr.u64
 ; CHECK: vmovn.i64
-        %tmp1 = load <2 x i64>* %A
+        %tmp1 = load <2 x i64>, <2 x i64>* %A
         %tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33>
         %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
         ret <2 x i32> %tmp3
@@ -60,7 +60,7 @@
 define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
 ;CHECK-LABEL: vrshrns8:
 ;CHECK: vrshrn.i16
-	%tmp1 = load <8 x i16>* %A
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
 	ret <8 x i8> %tmp2
 }
@@ -68,7 +68,7 @@
 define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
 ;CHECK-LABEL: vrshrns16:
 ;CHECK: vrshrn.i32
-	%tmp1 = load <4 x i32>* %A
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
 	ret <4 x i16> %tmp2
 }
@@ -76,7 +76,7 @@
 define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
 ;CHECK-LABEL: vrshrns32:
 ;CHECK: vrshrn.i64
-	%tmp1 = load <2 x i64>* %A
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
 	ret <2 x i32> %tmp2
 }
diff --git a/llvm/test/CodeGen/ARM/vsra.ll b/llvm/test/CodeGen/ARM/vsra.ll
index fa5985a..cb758fa 100644
--- a/llvm/test/CodeGen/ARM/vsra.ll
+++ b/llvm/test/CodeGen/ARM/vsra.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsras8:
 ;CHECK: vsra.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = ashr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
     %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -13,8 +13,8 @@
 define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsras16:
 ;CHECK: vsra.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = ashr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -23,8 +23,8 @@
 define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsras32:
 ;CHECK: vsra.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = ashr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -33,8 +33,8 @@
 define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsras64:
 ;CHECK: vsra.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = ashr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -43,8 +43,8 @@
 define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQs8:
 ;CHECK: vsra.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = ashr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -53,8 +53,8 @@
 define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQs16:
 ;CHECK: vsra.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = ashr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -63,8 +63,8 @@
 define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQs32:
 ;CHECK: vsra.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = ashr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -73,8 +73,8 @@
 define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQs64:
 ;CHECK: vsra.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = ashr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -83,8 +83,8 @@
 define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsrau8:
 ;CHECK: vsra.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = lshr <8 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -93,8 +93,8 @@
 define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsrau16:
 ;CHECK: vsra.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = lshr <4 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -103,8 +103,8 @@
 define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsrau32:
 ;CHECK: vsra.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = lshr <2 x i32> %tmp2, < i32 31, i32 31 >
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -113,8 +113,8 @@
 define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsrau64:
 ;CHECK: vsra.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = lshr <1 x i64> %tmp2, < i64 63 >
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -123,8 +123,8 @@
 define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsraQu8:
 ;CHECK: vsra.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = lshr <16 x i8> %tmp2, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -133,8 +133,8 @@
 define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsraQu16:
 ;CHECK: vsra.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = lshr <8 x i16> %tmp2, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -143,8 +143,8 @@
 define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsraQu32:
 ;CHECK: vsra.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = lshr <4 x i32> %tmp2, < i32 31, i32 31, i32 31, i32 31 >
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -153,8 +153,8 @@
 define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsraQu64:
 ;CHECK: vsra.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = lshr <2 x i64> %tmp2, < i64 63, i64 63 >
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -163,8 +163,8 @@
 define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsras8:
 ;CHECK: vrsra.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -173,8 +173,8 @@
 define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsras16:
 ;CHECK: vrsra.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -183,8 +183,8 @@
 define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsras32:
 ;CHECK: vrsra.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -193,8 +193,8 @@
 define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsras64:
 ;CHECK: vrsra.s64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -203,8 +203,8 @@
 define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsrau8:
 ;CHECK: vrsra.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <8 x i8> %tmp1, %tmp3
 	ret <8 x i8> %tmp4
@@ -213,8 +213,8 @@
 define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsrau16:
 ;CHECK: vrsra.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <4 x i16> %tmp1, %tmp3
 	ret <4 x i16> %tmp4
@@ -223,8 +223,8 @@
 define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsrau32:
 ;CHECK: vrsra.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
         %tmp4 = add <2 x i32> %tmp1, %tmp3
 	ret <2 x i32> %tmp4
@@ -233,8 +233,8 @@
 define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsrau64:
 ;CHECK: vrsra.u64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
         %tmp4 = add <1 x i64> %tmp1, %tmp3
 	ret <1 x i64> %tmp4
@@ -243,8 +243,8 @@
 define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs8:
 ;CHECK: vrsra.s8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -253,8 +253,8 @@
 define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs16:
 ;CHECK: vrsra.s16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -263,8 +263,8 @@
 define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs32:
 ;CHECK: vrsra.s32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -273,8 +273,8 @@
 define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsraQs64:
 ;CHECK: vrsra.s64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -283,8 +283,8 @@
 define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu8:
 ;CHECK: vrsra.u8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
         %tmp4 = add <16 x i8> %tmp1, %tmp3
 	ret <16 x i8> %tmp4
@@ -293,8 +293,8 @@
 define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu16:
 ;CHECK: vrsra.u16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
         %tmp4 = add <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -303,8 +303,8 @@
 define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu32:
 ;CHECK: vrsra.u32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
         %tmp4 = add <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -313,8 +313,8 @@
 define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsraQu64:
 ;CHECK: vrsra.u64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
         %tmp4 = add <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vst1.ll b/llvm/test/CodeGen/ARM/vst1.ll
index 723eb1d..f605fa4 100644
--- a/llvm/test/CodeGen/ARM/vst1.ll
+++ b/llvm/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: vst1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst1.8 {d16}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
 }
@@ -13,7 +13,7 @@
 ;CHECK-LABEL: vst1i16:
 ;CHECK: vst1.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -22,7 +22,7 @@
 ;CHECK-LABEL: vst1i32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -31,7 +31,7 @@
 ;CHECK-LABEL: vst1f:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -40,9 +40,9 @@
 define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1f_update:
 ;CHECK: vst1.32 {d16}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 2
 	store float* %tmp2, float** %ptr
@@ -53,7 +53,7 @@
 ;CHECK-LABEL: vst1i64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
 	ret void
 }
@@ -62,7 +62,7 @@
 ;CHECK-LABEL: vst1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.8 {d16, d17}, [r0:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -72,7 +72,7 @@
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
 	ret void
 }
@@ -81,9 +81,9 @@
 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst1Qi16_update:
 ;CHECK: vst1.16 {d16, d17}, [r1:64], r2
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
@@ -94,7 +94,7 @@
 ;CHECK-LABEL: vst1Qi32:
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -103,7 +103,7 @@
 ;CHECK-LABEL: vst1Qf:
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -112,7 +112,7 @@
 ;CHECK-LABEL: vst1Qi64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %B
 	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
 	ret void
 }
@@ -121,7 +121,7 @@
 ;CHECK-LABEL: vst1Qf64:
 ;CHECK: vst1.64
 	%tmp0 = bitcast double* %A to i8*
-	%tmp1 = load <2 x double>* %B
+	%tmp1 = load <2 x double>, <2 x double>* %B
 	call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vst2.ll b/llvm/test/CodeGen/ARM/vst2.ll
index 2130e13..17c8a4b 100644
--- a/llvm/test/CodeGen/ARM/vst2.ll
+++ b/llvm/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: vst2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.8 {d16, d17}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -13,8 +13,8 @@
 define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst2i8_update:
 ;CHECK: vst2.8 {d16, d17}, [r1], r2
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
@@ -26,7 +26,7 @@
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 	ret void
 }
@@ -35,7 +35,7 @@
 ;CHECK-LABEL: vst2i32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -44,7 +44,7 @@
 ;CHECK-LABEL: vst2f:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -54,7 +54,7 @@
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
 	ret void
 }
@@ -63,9 +63,9 @@
 define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst2i64_update:
 ;CHECK: vst1.64 {d16, d17}, [r1:64]!
-	%A = load i64** %ptr
+	%A = load i64*, i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
 	%tmp2 = getelementptr i64, i64* %A, i32 2
 	store i64* %tmp2, i64** %ptr
@@ -76,7 +76,7 @@
 ;CHECK-LABEL: vst2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -86,7 +86,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 	ret void
 }
@@ -96,7 +96,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 	ret void
 }
@@ -105,7 +105,7 @@
 ;CHECK-LABEL: vst2Qf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -113,7 +113,7 @@
 define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst2update:
 ;CHECK: vst2.16 {d16, d17}, [r0]!
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
 	%t5 = getelementptr inbounds i8, i8* %out, i32 16
 	ret i8* %t5
@@ -122,7 +122,7 @@
 define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp align 2 {
 ;CHECK-LABEL: vst2update2:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
-  %tmp1 = load <4 x float>* %this
+  %tmp1 = load <4 x float>, <4 x float>* %this
   call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
   %tmp2 = getelementptr inbounds i8, i8* %out, i32  32
   ret i8* %tmp2
diff --git a/llvm/test/CodeGen/ARM/vst3.ll b/llvm/test/CodeGen/ARM/vst3.ll
index 3a861dd..691ee3b 100644
--- a/llvm/test/CodeGen/ARM/vst3.ll
+++ b/llvm/test/CodeGen/ARM/vst3.ll
@@ -5,7 +5,7 @@
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
 }
@@ -14,7 +14,7 @@
 ;CHECK-LABEL: vst3i16:
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -23,7 +23,7 @@
 ;CHECK-LABEL: vst3i32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -32,9 +32,9 @@
 define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3i32_update:
 ;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 6
 	store i32* %tmp2, i32** %ptr
@@ -45,7 +45,7 @@
 ;CHECK-LABEL: vst3f:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -56,7 +56,7 @@
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
 	ret void
 }
@@ -64,9 +64,9 @@
 define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst3i64_update
 ;CHECK: vst1.64	{d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-        %A = load i64** %ptr
+        %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = load <1 x i64>* %B
+        %tmp1 = load <1 x i64>, <1 x i64>* %B
         call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 3
         store i64* %tmp2, i64** %ptr
@@ -79,7 +79,7 @@
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
 }
@@ -89,7 +89,7 @@
 ;CHECK: vst3.16
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -99,9 +99,9 @@
 ;CHECK-LABEL: vst3Qi16_update:
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 24
 	store i16* %tmp2, i16** %ptr
@@ -113,7 +113,7 @@
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -123,7 +123,7 @@
 ;CHECK: vst3.32
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vst4.ll b/llvm/test/CodeGen/ARM/vst4.ll
index e865884..c343c6c 100644
--- a/llvm/test/CodeGen/ARM/vst4.ll
+++ b/llvm/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: vst4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
@@ -13,8 +13,8 @@
 define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst4i8_update:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
@@ -26,7 +26,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 	ret void
 }
@@ -36,7 +36,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 	ret void
 }
@@ -45,7 +45,7 @@
 ;CHECK-LABEL: vst4f:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
@@ -55,7 +55,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %B
 	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
 	ret void
 }
@@ -63,9 +63,9 @@
 define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vst4i64_update:
 ;CHECK: vst1.64	{d16, d17, d18, d19}, [r1]!
-        %A = load i64** %ptr
+        %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = load <1 x i64>* %B
+        %tmp1 = load <1 x i64>, <1 x i64>* %B
         call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 4
         store i64* %tmp2, i64** %ptr
@@ -77,7 +77,7 @@
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
 	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
 }
@@ -88,7 +88,7 @@
 ;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
 ;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
@@ -98,7 +98,7 @@
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
@@ -108,7 +108,7 @@
 ;CHECK: vst4.32
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
@@ -118,9 +118,9 @@
 ;CHECK-LABEL: vst4Qf_update:
 ;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
 ;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
-	%A = load float** %ptr
+	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 16
 	store float* %tmp2, float** %ptr
diff --git a/llvm/test/CodeGen/ARM/vstlane.ll b/llvm/test/CodeGen/ARM/vstlane.ll
index af1ba9b..a457541 100644
--- a/llvm/test/CodeGen/ARM/vstlane.ll
+++ b/llvm/test/CodeGen/ARM/vstlane.ll
@@ -4,7 +4,7 @@
 ;CHECK-LABEL: vst1lanei8:
 ;Check the (default) alignment.
 ;CHECK: vst1.8 {d16[3]}, [r0]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
         %tmp2 = extractelement <8 x i8> %tmp1, i32 3
         store i8 %tmp2, i8* %A, align 8
 	ret void
@@ -14,8 +14,8 @@
 define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst1lanei8_update:
 ;CHECK: vst1.8 {d16[3]}, [{{r[0-9]}}]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
 	store i8 %tmp2, i8* %A, align 8
 	%tmp3 = getelementptr i8, i8* %A, i32 1
@@ -27,7 +27,7 @@
 ;CHECK-LABEL: vst1lanei16:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst1.16 {d16[2]}, [r0:16]
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
         %tmp2 = extractelement <4 x i16> %tmp1, i32 2
         store i16 %tmp2, i16* %A, align 8
 	ret void
@@ -37,7 +37,7 @@
 ;CHECK-LABEL: vst1lanei32:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
         %tmp2 = extractelement <2 x i32> %tmp1, i32 1
         store i32 %tmp2, i32* %A, align 8
 	ret void
@@ -46,7 +46,7 @@
 define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vst1lanef:
 ;CHECK: vst1.32 {d16[1]}, [r0:32]
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
         %tmp2 = extractelement <2 x float> %tmp1, i32 1
         store float %tmp2, float* %A
 	ret void
@@ -56,7 +56,7 @@
 ;CHECK-LABEL: vst1laneQi8:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.8 {d17[1]}, [r0]
-	%tmp1 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %B
         %tmp2 = extractelement <16 x i8> %tmp1, i32 9
         store i8 %tmp2, i8* %A, align 8
 	ret void
@@ -65,7 +65,7 @@
 define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst1laneQi16:
 ;CHECK: vst1.16 {d17[1]}, [r0:16]
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
         %tmp2 = extractelement <8 x i16> %tmp1, i32 5
         store i16 %tmp2, i16* %A, align 8
 	ret void
@@ -75,7 +75,7 @@
 ;CHECK-LABEL: vst1laneQi32:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0:32]
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
         %tmp2 = extractelement <4 x i32> %tmp1, i32 3
         store i32 %tmp2, i32* %A, align 8
 	ret void
@@ -86,8 +86,8 @@
 ;CHECK-LABEL: vst1laneQi32_update:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r1:32]!
-	%A = load i32** %ptr
-	%tmp1 = load <4 x i32>* %B
+	%A = load i32*, i32** %ptr
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
 	store i32 %tmp2, i32* %A, align 8
 	%tmp3 = getelementptr i32, i32* %A, i32 1
@@ -99,7 +99,7 @@
 ;CHECK-LABEL: vst1laneQf:
 ; // Can use scalar load. No need to use vectors.
 ; // CHE-CK: vst1.32 {d17[1]}, [r0]
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
         %tmp2 = extractelement <4 x float> %tmp1, i32 3
         store float %tmp2, float* %A
 	ret void
@@ -109,7 +109,7 @@
 ;CHECK-LABEL: vst2lanei8:
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
 }
@@ -119,7 +119,7 @@
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -128,9 +128,9 @@
 define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
 ;CHECK-LABEL: vst2lanei16_update:
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
-	%A = load i16** %ptr
+	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
@@ -141,7 +141,7 @@
 ;CHECK-LABEL: vst2lanei32:
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -150,7 +150,7 @@
 ;CHECK-LABEL: vst2lanef:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -160,7 +160,7 @@
 ;Check the (default) alignment.
 ;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
 	ret void
 }
@@ -170,7 +170,7 @@
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 	ret void
 }
@@ -179,7 +179,7 @@
 ;CHECK-LABEL: vst2laneQf:
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
 	ret void
 }
@@ -196,7 +196,7 @@
 define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3lanei8:
 ;CHECK: vst3.8
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -206,7 +206,7 @@
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -215,7 +215,7 @@
 ;CHECK-LABEL: vst3lanei32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -224,7 +224,7 @@
 ;CHECK-LABEL: vst3lanef:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -234,7 +234,7 @@
 ;Check the (default) alignment value.  VST3 does not support alignment.
 ;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
 	ret void
 }
@@ -243,7 +243,7 @@
 ;CHECK-LABEL: vst3laneQi32:
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	ret void
 }
@@ -252,9 +252,9 @@
 define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vst3laneQi32_update:
 ;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
-	%A = load i32** %ptr
+	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 3
 	store i32* %tmp2, i32** %ptr
@@ -265,7 +265,7 @@
 ;CHECK-LABEL: vst3laneQf:
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -284,7 +284,7 @@
 ;CHECK-LABEL: vst4lanei8:
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
-	%tmp1 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
 }
@@ -293,8 +293,8 @@
 define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst4lanei8_update:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
-	%A = load i8** %ptr
-	%tmp1 = load <8 x i8>* %B
+	%A = load i8*, i8** %ptr
+	%tmp1 = load <8 x i8>, <8 x i8>* %B
 	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 4
 	store i8* %tmp2, i8** %ptr
@@ -305,7 +305,7 @@
 ;CHECK-LABEL: vst4lanei16:
 ;CHECK: vst4.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -315,7 +315,7 @@
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 	ret void
 }
@@ -324,7 +324,7 @@
 ;CHECK-LABEL: vst4lanef:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %B
 	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
@@ -334,7 +334,7 @@
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %B
 	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 	ret void
 }
@@ -344,7 +344,7 @@
 ;Check the (default) alignment.
 ;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %B
 	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
 	ret void
 }
@@ -353,7 +353,7 @@
 ;CHECK-LABEL: vst4laneQf:
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %B
 	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
diff --git a/llvm/test/CodeGen/ARM/vsub.ll b/llvm/test/CodeGen/ARM/vsub.ll
index d1a094b..75fb7d4 100644
--- a/llvm/test/CodeGen/ARM/vsub.ll
+++ b/llvm/test/CodeGen/ARM/vsub.ll
@@ -3,8 +3,8 @@
 define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubi8:
 ;CHECK: vsub.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sub <8 x i8> %tmp1, %tmp2
 	ret <8 x i8> %tmp3
 }
@@ -12,8 +12,8 @@
 define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubi16:
 ;CHECK: vsub.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sub <4 x i16> %tmp1, %tmp2
 	ret <4 x i16> %tmp3
 }
@@ -21,8 +21,8 @@
 define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubi32:
 ;CHECK: vsub.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sub <2 x i32> %tmp1, %tmp2
 	ret <2 x i32> %tmp3
 }
@@ -30,8 +30,8 @@
 define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsubi64:
 ;CHECK: vsub.i64
-	%tmp1 = load <1 x i64>* %A
-	%tmp2 = load <1 x i64>* %B
+	%tmp1 = load <1 x i64>, <1 x i64>* %A
+	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = sub <1 x i64> %tmp1, %tmp2
 	ret <1 x i64> %tmp3
 }
@@ -39,8 +39,8 @@
 define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: vsubf32:
 ;CHECK: vsub.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = fsub <2 x float> %tmp1, %tmp2
 	ret <2 x float> %tmp3
 }
@@ -48,8 +48,8 @@
 define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubQi8:
 ;CHECK: vsub.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = sub <16 x i8> %tmp1, %tmp2
 	ret <16 x i8> %tmp3
 }
@@ -57,8 +57,8 @@
 define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubQi16:
 ;CHECK: vsub.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = sub <8 x i16> %tmp1, %tmp2
 	ret <8 x i16> %tmp3
 }
@@ -66,8 +66,8 @@
 define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubQi32:
 ;CHECK: vsub.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = sub <4 x i32> %tmp1, %tmp2
 	ret <4 x i32> %tmp3
 }
@@ -75,8 +75,8 @@
 define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vsubQi64:
 ;CHECK: vsub.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = sub <2 x i64> %tmp1, %tmp2
 	ret <2 x i64> %tmp3
 }
@@ -84,8 +84,8 @@
 define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK-LABEL: vsubQf32:
 ;CHECK: vsub.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = fsub <4 x float> %tmp1, %tmp2
 	ret <4 x float> %tmp3
 }
@@ -120,8 +120,8 @@
 define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni16:
 ;CHECK: vrsubhn.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -129,8 +129,8 @@
 define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni32:
 ;CHECK: vrsubhn.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
 	ret <4 x i16> %tmp3
 }
@@ -138,8 +138,8 @@
 define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
 ;CHECK-LABEL: vrsubhni64:
 ;CHECK: vrsubhn.i64
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i64>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
 	ret <2 x i32> %tmp3
 }
@@ -151,8 +151,8 @@
 define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubls8:
 ;CHECK: vsubl.s8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -162,8 +162,8 @@
 define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubls16:
 ;CHECK: vsubl.s16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -173,8 +173,8 @@
 define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubls32:
 ;CHECK: vsubl.s32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -184,8 +184,8 @@
 define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsublu8:
 ;CHECK: vsubl.u8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
 	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp5 = sub <8 x i16> %tmp3, %tmp4
@@ -195,8 +195,8 @@
 define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsublu16:
 ;CHECK: vsubl.u16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
 	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp5 = sub <4 x i32> %tmp3, %tmp4
@@ -206,8 +206,8 @@
 define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsublu32:
 ;CHECK: vsubl.u32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
 	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp5 = sub <2 x i64> %tmp3, %tmp4
@@ -217,8 +217,8 @@
 define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubws8:
 ;CHECK: vsubw.s8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -227,8 +227,8 @@
 define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubws16:
 ;CHECK: vsubw.s16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -237,8 +237,8 @@
 define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubws32:
 ;CHECK: vsubw.s32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
@@ -247,8 +247,8 @@
 define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vsubwu8:
 ;CHECK: vsubw.u8
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
 	%tmp4 = sub <8 x i16> %tmp1, %tmp3
 	ret <8 x i16> %tmp4
@@ -257,8 +257,8 @@
 define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vsubwu16:
 ;CHECK: vsubw.u16
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
 	%tmp4 = sub <4 x i32> %tmp1, %tmp3
 	ret <4 x i32> %tmp4
@@ -267,8 +267,8 @@
 define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
 ;CHECK-LABEL: vsubwu32:
 ;CHECK: vsubw.u32
-	%tmp1 = load <2 x i64>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i64>, <2 x i64>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
 	%tmp4 = sub <2 x i64> %tmp1, %tmp3
 	ret <2 x i64> %tmp4
diff --git a/llvm/test/CodeGen/ARM/vtbl.ll b/llvm/test/CodeGen/ARM/vtbl.ll
index 32258a3..e4dd572 100644
--- a/llvm/test/CodeGen/ARM/vtbl.ll
+++ b/llvm/test/CodeGen/ARM/vtbl.ll
@@ -7,8 +7,8 @@
 define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vtbl1:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
 	ret <8 x i8> %tmp3
 }
@@ -16,8 +16,8 @@
 define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
 ;CHECK-LABEL: vtbl2:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
 	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
@@ -27,8 +27,8 @@
 define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
 ;CHECK-LABEL: vtbl3:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -39,8 +39,8 @@
 define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
 ;CHECK-LABEL: vtbl4:
 ;CHECK: vtbl.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -52,9 +52,9 @@
 define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx1:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
-	%tmp3 = load <8 x i8>* %C
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
+	%tmp3 = load <8 x i8>, <8 x i8>* %C
 	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
 	ret <8 x i8> %tmp4
 }
@@ -62,11 +62,11 @@
 define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx2:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
-	%tmp5 = load <8 x i8>* %C
+	%tmp5 = load <8 x i8>, <8 x i8>* %C
 	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
 	ret <8 x i8> %tmp6
 }
@@ -74,12 +74,12 @@
 define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx3:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t, %struct.__neon_int8x8x3_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
-	%tmp6 = load <8 x i8>* %C
+	%tmp6 = load <8 x i8>, <8 x i8>* %C
 	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
 	ret <8 x i8> %tmp7
 }
@@ -87,13 +87,13 @@
 define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
 ;CHECK-LABEL: vtbx4:
 ;CHECK: vtbx.8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t, %struct.__neon_int8x8x4_t* %B
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
         %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
-	%tmp7 = load <8 x i8>* %C
+	%tmp7 = load <8 x i8>, <8 x i8>* %C
 	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
 	ret <8 x i8> %tmp8
 }
diff --git a/llvm/test/CodeGen/ARM/vtrn.ll b/llvm/test/CodeGen/ARM/vtrn.ll
index cdae7f8..caa5bec 100644
--- a/llvm/test/CodeGen/ARM/vtrn.ll
+++ b/llvm/test/CodeGen/ARM/vtrn.ll
@@ -4,8 +4,8 @@
 ;CHECK-LABEL: vtrni8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@
 ;CHECK-LABEL: vtrni16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -28,8 +28,8 @@
 ;CHECK-LABEL: vtrni32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <2 x i32>* %A
-	%tmp2 = load <2 x i32>* %B
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -40,8 +40,8 @@
 ;CHECK-LABEL: vtrnf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <2 x float>* %A
-	%tmp2 = load <2 x float>* %B
+	%tmp1 = load <2 x float>, <2 x float>* %A
+	%tmp2 = load <2 x float>, <2 x float>* %B
 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
 	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
         %tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -52,8 +52,8 @@
 ;CHECK-LABEL: vtrnQi8:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -64,8 +64,8 @@
 ;CHECK-LABEL: vtrnQi16:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -76,8 +76,8 @@
 ;CHECK-LABEL: vtrnQi32:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -88,8 +88,8 @@
 ;CHECK-LABEL: vtrnQf:
 ;CHECK: vtrn.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -102,8 +102,8 @@
 ;CHECK-LABEL: vtrni8_undef:
 ;CHECK: vtrn.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -114,8 +114,8 @@
 ;CHECK-LABEL: vtrnQi16_undef:
 ;CHECK: vtrn.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 832be6c..7a7306a265 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -4,8 +4,8 @@
 ;CHECK-LABEL: vuzpi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@
 ;CHECK-LABEL: vuzpi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@
 ;CHECK-LABEL: vuzpQi8:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@
 ;CHECK-LABEL: vuzpQi16:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@
 ;CHECK-LABEL: vuzpQi32:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@
 ;CHECK-LABEL: vuzpQf:
 ;CHECK: vuzp.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@
 ;CHECK-LABEL: vuzpi8_undef:
 ;CHECK: vuzp.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@
 ;CHECK-LABEL: vuzpQi16_undef:
 ;CHECK: vuzp.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/vzip.ll b/llvm/test/CodeGen/ARM/vzip.ll
index f74dc62..a1b5b45 100644
--- a/llvm/test/CodeGen/ARM/vzip.ll
+++ b/llvm/test/CodeGen/ARM/vzip.ll
@@ -4,8 +4,8 @@
 ;CHECK-LABEL: vzipi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -16,8 +16,8 @@
 ;CHECK-LABEL: vzipi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <4 x i16>* %A
-	%tmp2 = load <4 x i16>* %B
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -30,8 +30,8 @@
 ;CHECK-LABEL: vzipQi8:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
@@ -42,8 +42,8 @@
 ;CHECK-LABEL: vzipQi16:
 ;CHECK: vzip.16
 ;CHECK-NEXT: vadd.i16
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -54,8 +54,8 @@
 ;CHECK-LABEL: vzipQi32:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.i32
-	%tmp1 = load <4 x i32>* %A
-	%tmp2 = load <4 x i32>* %B
+	%tmp1 = load <4 x i32>, <4 x i32>* %A
+	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -66,8 +66,8 @@
 ;CHECK-LABEL: vzipQf:
 ;CHECK: vzip.32
 ;CHECK-NEXT: vadd.f32
-	%tmp1 = load <4 x float>* %A
-	%tmp2 = load <4 x float>* %B
+	%tmp1 = load <4 x float>, <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %B
 	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
@@ -80,8 +80,8 @@
 ;CHECK-LABEL: vzipi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <8 x i8>* %A
-	%tmp2 = load <8 x i8>* %B
+	%tmp1 = load <8 x i8>, <8 x i8>* %A
+	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
 	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -92,8 +92,8 @@
 ;CHECK-LABEL: vzipQi8_undef:
 ;CHECK: vzip.8
 ;CHECK-NEXT: vadd.i8
-	%tmp1 = load <16 x i8>* %A
-	%tmp2 = load <16 x i8>* %B
+	%tmp1 = load <16 x i8>, <16 x i8>* %A
+	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
         %tmp5 = add <16 x i8> %tmp3, %tmp4
diff --git a/llvm/test/CodeGen/ARM/zextload_demandedbits.ll b/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
index 0b41265..ba7393c 100644
--- a/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
+++ b/llvm/test/CodeGen/ARM/zextload_demandedbits.ll
@@ -15,7 +15,7 @@
 define void @quux(%struct.eggs* %arg) {
 bb:
   %tmp1 = getelementptr inbounds %struct.eggs, %struct.eggs* %arg, i32 0, i32 1
-  %0 = load i16* %tmp1, align 2
+  %0 = load i16, i16* %tmp1, align 2
   %tobool = icmp eq i16 %0, 0
   br i1 %tobool, label %bb16, label %bb3
 
diff --git a/llvm/test/CodeGen/BPF/basictest.ll b/llvm/test/CodeGen/BPF/basictest.ll
index f789532..c0b6af4 100644
--- a/llvm/test/CodeGen/BPF/basictest.ll
+++ b/llvm/test/CodeGen/BPF/basictest.ll
@@ -20,7 +20,7 @@
 
 @G = external global i8
 define zeroext i8 @loadG() {
-  %tmp = load i8* @G
+  %tmp = load i8, i8* @G
   ret i8 %tmp
 ; CHECK-LABEL: loadG:
 ; CHECK: ld_64 r1
diff --git a/llvm/test/CodeGen/BPF/ex1.ll b/llvm/test/CodeGen/BPF/ex1.ll
index 9dd26ea3..0145851 100644
--- a/llvm/test/CodeGen/BPF/ex1.ll
+++ b/llvm/test/CodeGen/BPF/ex1.ll
@@ -14,7 +14,7 @@
   %1 = getelementptr inbounds [3 x i8], [3 x i8]* %devname, i64 0, i64 0
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* getelementptr inbounds ([3 x i8]* @bpf_prog1.devname, i64 0, i64 0), i64 3, i32 1, i1 false)
   %2 = getelementptr inbounds %struct.bpf_context, %struct.bpf_context* %ctx, i64 0, i32 0
-  %3 = load i64* %2, align 8
+  %3 = load i64, i64* %2, align 8
   %4 = inttoptr i64 %3 to %struct.sk_buff*
   %5 = getelementptr inbounds %struct.sk_buff, %struct.sk_buff* %4, i64 0, i32 2
   %6 = bitcast i64* %5 to i8*
diff --git a/llvm/test/CodeGen/BPF/intrinsics.ll b/llvm/test/CodeGen/BPF/intrinsics.ll
index 9a078fb..e0f050e 100644
--- a/llvm/test/CodeGen/BPF/intrinsics.ll
+++ b/llvm/test/CodeGen/BPF/intrinsics.ll
@@ -4,11 +4,11 @@
 define i32 @ld_b(i64 %foo, i64* nocapture %bar, i8* %ctx, i8* %ctx2) #0 {
   %1 = tail call i64 @llvm.bpf.load.byte(i8* %ctx, i64 123) #2
   %2 = add i64 %1, %foo
-  %3 = load volatile i64* %bar, align 8
+  %3 = load volatile i64, i64* %bar, align 8
   %4 = add i64 %2, %3
   %5 = tail call i64 @llvm.bpf.load.byte(i8* %ctx2, i64 %foo) #2
   %6 = add i64 %4, %5
-  %7 = load volatile i64* %bar, align 8
+  %7 = load volatile i64, i64* %bar, align 8
   %8 = add i64 %6, %7
   %9 = trunc i64 %8 to i32
   ret i32 %9
diff --git a/llvm/test/CodeGen/BPF/load.ll b/llvm/test/CodeGen/BPF/load.ll
index e3bcce1..fcfce49 100644
--- a/llvm/test/CodeGen/BPF/load.ll
+++ b/llvm/test/CodeGen/BPF/load.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=bpf | FileCheck %s
 
 define i16 @am1(i16* %a) nounwind {
-  %1 = load i16* %a
+  %1 = load i16, i16* %a
   ret i16 %1
 }
 ; CHECK-LABEL: am1:
@@ -10,14 +10,14 @@
 @foo = external global i16
 
 define i16 @am2() nounwind {
-  %1 = load i16* @foo
+  %1 = load i16, i16* @foo
   ret i16 %1
 }
 ; CHECK-LABEL: am2:
 ; CHECK: ldh r0, 0(r1)
 
 define i16 @am4() nounwind {
-  %1 = load volatile i16* inttoptr(i16 32 to i16*)
+  %1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
   ret i16 %1
 }
 ; CHECK-LABEL: am4:
@@ -26,7 +26,7 @@
 
 define i16 @am5(i16* %a) nounwind {
   %1 = getelementptr i16, i16* %a, i16 2
-  %2 = load i16* %1
+  %2 = load i16, i16* %1
   ret i16 %2
 }
 ; CHECK-LABEL: am5:
@@ -36,7 +36,7 @@
 @baz = common global %S zeroinitializer, align 1
 
 define i16 @am6() nounwind {
-  %1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+  %1 = load i16, i16* getelementptr (%S* @baz, i32 0, i32 1)
   ret i16 %1
 }
 ; CHECK-LABEL: am6:
diff --git a/llvm/test/CodeGen/BPF/loops.ll b/llvm/test/CodeGen/BPF/loops.ll
index fd47f2f..4798d78 100644
--- a/llvm/test/CodeGen/BPF/loops.ll
+++ b/llvm/test/CodeGen/BPF/loops.ll
@@ -11,7 +11,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: add:
 ; CHECK: add r{{[0-9]+}}, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -33,7 +33,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: sub:
 ; CHECK: sub r{{[0-9]+}}, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -55,7 +55,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: or:
 ; CHECK: or r{{[0-9]+}}, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -77,7 +77,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: xor:
 ; CHECK: xor r{{[0-9]+}}, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -99,7 +99,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: and:
 ; CHECK: and r{{[0-9]+}}, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/BPF/struct_ret1.ll b/llvm/test/CodeGen/BPF/struct_ret1.ll
index 1477c56..708f88d 100644
--- a/llvm/test/CodeGen/BPF/struct_ret1.ll
+++ b/llvm/test/CodeGen/BPF/struct_ret1.ll
@@ -9,8 +9,8 @@
 ; Function Attrs: nounwind readonly uwtable
 define { i64, i32 } @bar(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) #0 {
 entry:
-  %retval.sroa.0.0.copyload = load i64* bitcast (%struct.S* @s to i64*), align 4
-  %retval.sroa.2.0.copyload = load i32* getelementptr inbounds (%struct.S* @s, i64 0, i32 2), align 4
+  %retval.sroa.0.0.copyload = load i64, i64* bitcast (%struct.S* @s to i64*), align 4
+  %retval.sroa.2.0.copyload = load i32, i32* getelementptr inbounds (%struct.S* @s, i64 0, i32 2), align 4
   %.fca.0.insert = insertvalue { i64, i32 } undef, i64 %retval.sroa.0.0.copyload, 0
   %.fca.1.insert = insertvalue { i64, i32 } %.fca.0.insert, i32 %retval.sroa.2.0.copyload, 1
   ret { i64, i32 } %.fca.1.insert
diff --git a/llvm/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/llvm/test/CodeGen/CPP/2009-05-01-Long-Double.ll
index 0b2d882..ae18582 100644
--- a/llvm/test/CodeGen/CPP/2009-05-01-Long-Double.ll
+++ b/llvm/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -6,7 +6,7 @@
 	%call = call i32 (...)* @other_func()		; <i32> [#uses=1]
 	%conv = sitofp i32 %call to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %conv, x86_fp80* %retval
-	%0 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	%0 = load x86_fp80, x86_fp80* %retval		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %0
 }
 
diff --git a/llvm/test/CodeGen/CPP/2009-05-04-CondBr.ll b/llvm/test/CodeGen/CPP/2009-05-04-CondBr.ll
index feb2cf7..9ce1e5f 100644
--- a/llvm/test/CodeGen/CPP/2009-05-04-CondBr.ll
+++ b/llvm/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -6,10 +6,10 @@
 	%retval = alloca i32		; <i32*> [#uses=2]
 	%a.addr = alloca i32		; <i32*> [#uses=8]
 	store i32 %a, i32* %a.addr
-	%tmp = load i32* %a.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %a.addr		; <i32> [#uses=1]
 	%inc = add i32 %tmp, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %a.addr
-	%tmp1 = load i32* %a.addr		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %a.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp1, 3		; <i1> [#uses=1]
 	br i1 %cmp, label %if.then, label %if.end
 
@@ -18,11 +18,11 @@
 	br label %if.end
 
 if.end:		; preds = %if.then, %entry
-	%tmp2 = load i32* %a.addr		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %a.addr		; <i32> [#uses=1]
 	%inc3 = add i32 %tmp2, 1		; <i32> [#uses=1]
 	store i32 %inc3, i32* %a.addr
-	%tmp4 = load i32* %a.addr		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %a.addr		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %retval
-	%0 = load i32* %retval		; <i32> [#uses=1]
+	%0 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/llvm/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
index fcff331..c2ffc79 100644
--- a/llvm/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
+++ b/llvm/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -51,93 +51,93 @@
 	%tmp.112 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 0		; <i32*> [#uses=1]
 	%tmp.114 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 6		; <i32*> [#uses=1]
 	%tmp.118 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 7		; <i32*> [#uses=1]
-	%tmp.135 = load i32* %operation		; <i32> [#uses=1]
-	%tmp.137 = load i32* %tmp.112		; <i32> [#uses=1]
+	%tmp.135 = load i32, i32* %operation		; <i32> [#uses=1]
+	%tmp.137 = load i32, i32* %tmp.112		; <i32> [#uses=1]
 	%tmp.138 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 1		; <float*> [#uses=1]
-	%tmp.139 = load float* %tmp.138		; <float> [#uses=1]
+	%tmp.139 = load float, float* %tmp.138		; <float> [#uses=1]
 	%tmp.140 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 2		; <i32*> [#uses=1]
-	%tmp.141 = load i32* %tmp.140		; <i32> [#uses=1]
+	%tmp.141 = load i32, i32* %tmp.140		; <i32> [#uses=1]
 	%tmp.142 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 3		; <i32*> [#uses=1]
-	%tmp.143 = load i32* %tmp.142		; <i32> [#uses=1]
-	%tmp.145 = load i8** %tmp.101		; <i8*> [#uses=1]
+	%tmp.143 = load i32, i32* %tmp.142		; <i32> [#uses=1]
+	%tmp.145 = load i8*, i8** %tmp.101		; <i8*> [#uses=1]
 	%tmp.146 = getelementptr %struct..s_placer_opts, %struct..s_placer_opts* %placer_opts, i64 0, i32 5		; <i32*> [#uses=1]
-	%tmp.147 = load i32* %tmp.146		; <i32> [#uses=1]
-	%tmp.149 = load i32* %tmp.114		; <i32> [#uses=1]
-	%tmp.154 = load i32* %full_stats		; <i32> [#uses=1]
-	%tmp.155 = load i32* %verify_binary_search		; <i32> [#uses=1]
+	%tmp.147 = load i32, i32* %tmp.146		; <i32> [#uses=1]
+	%tmp.149 = load i32, i32* %tmp.114		; <i32> [#uses=1]
+	%tmp.154 = load i32, i32* %full_stats		; <i32> [#uses=1]
+	%tmp.155 = load i32, i32* %verify_binary_search		; <i32> [#uses=1]
 	%tmp.156 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 0		; <i32*> [#uses=1]
-	%tmp.157 = load i32* %tmp.156		; <i32> [#uses=1]
+	%tmp.157 = load i32, i32* %tmp.156		; <i32> [#uses=1]
 	%tmp.158 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 1		; <float*> [#uses=1]
-	%tmp.159 = load float* %tmp.158		; <float> [#uses=1]
+	%tmp.159 = load float, float* %tmp.158		; <float> [#uses=1]
 	%tmp.160 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 2		; <float*> [#uses=1]
-	%tmp.161 = load float* %tmp.160		; <float> [#uses=1]
+	%tmp.161 = load float, float* %tmp.160		; <float> [#uses=1]
 	%tmp.162 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 3		; <float*> [#uses=1]
-	%tmp.163 = load float* %tmp.162		; <float> [#uses=1]
+	%tmp.163 = load float, float* %tmp.162		; <float> [#uses=1]
 	%tmp.164 = getelementptr %struct..s_annealing_sched, %struct..s_annealing_sched* %annealing_sched, i64 0, i32 4		; <float*> [#uses=1]
-	%tmp.165 = load float* %tmp.164		; <float> [#uses=1]
+	%tmp.165 = load float, float* %tmp.164		; <float> [#uses=1]
 	%tmp.166 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 0		; <float*> [#uses=1]
-	%tmp.167 = load float* %tmp.166		; <float> [#uses=1]
+	%tmp.167 = load float, float* %tmp.166		; <float> [#uses=1]
 	%tmp.168 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 1		; <float*> [#uses=1]
-	%tmp.169 = load float* %tmp.168		; <float> [#uses=1]
+	%tmp.169 = load float, float* %tmp.168		; <float> [#uses=1]
 	%tmp.170 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 2		; <float*> [#uses=1]
-	%tmp.171 = load float* %tmp.170		; <float> [#uses=1]
+	%tmp.171 = load float, float* %tmp.170		; <float> [#uses=1]
 	%tmp.172 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 3		; <float*> [#uses=1]
-	%tmp.173 = load float* %tmp.172		; <float> [#uses=1]
+	%tmp.173 = load float, float* %tmp.172		; <float> [#uses=1]
 	%tmp.174 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 4		; <float*> [#uses=1]
-	%tmp.175 = load float* %tmp.174		; <float> [#uses=1]
+	%tmp.175 = load float, float* %tmp.174		; <float> [#uses=1]
 	%tmp.176 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 5		; <i32*> [#uses=1]
-	%tmp.177 = load i32* %tmp.176		; <i32> [#uses=1]
+	%tmp.177 = load i32, i32* %tmp.176		; <i32> [#uses=1]
 	%tmp.178 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 6		; <i32*> [#uses=1]
-	%tmp.179 = load i32* %tmp.178		; <i32> [#uses=1]
-	%tmp.181 = load i32* %tmp.118		; <i32> [#uses=1]
+	%tmp.179 = load i32, i32* %tmp.178		; <i32> [#uses=1]
+	%tmp.181 = load i32, i32* %tmp.118		; <i32> [#uses=1]
 	%tmp.182 = getelementptr %struct..s_router_opts, %struct..s_router_opts* %router_opts, i64 0, i32 8		; <i32*> [#uses=1]
-	%tmp.183 = load i32* %tmp.182		; <i32> [#uses=1]
+	%tmp.183 = load i32, i32* %tmp.182		; <i32> [#uses=1]
 	%tmp.184 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 0		; <i32*> [#uses=1]
-	%tmp.185 = load i32* %tmp.184		; <i32> [#uses=1]
+	%tmp.185 = load i32, i32* %tmp.184		; <i32> [#uses=1]
 	%tmp.186 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 1		; <float*> [#uses=1]
-	%tmp.187 = load float* %tmp.186		; <float> [#uses=1]
+	%tmp.187 = load float, float* %tmp.186		; <float> [#uses=1]
 	%tmp.188 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 2		; <float*> [#uses=1]
-	%tmp.189 = load float* %tmp.188		; <float> [#uses=1]
+	%tmp.189 = load float, float* %tmp.188		; <float> [#uses=1]
 	%tmp.190 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 3		; <float*> [#uses=1]
-	%tmp.191 = load float* %tmp.190		; <float> [#uses=1]
+	%tmp.191 = load float, float* %tmp.190		; <float> [#uses=1]
 	%tmp.192 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 4		; <i32*> [#uses=1]
-	%tmp.193 = load i32* %tmp.192		; <i32> [#uses=1]
+	%tmp.193 = load i32, i32* %tmp.192		; <i32> [#uses=1]
 	%tmp.194 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 5		; <i32*> [#uses=1]
-	%tmp.195 = load i32* %tmp.194		; <i32> [#uses=1]
+	%tmp.195 = load i32, i32* %tmp.194		; <i32> [#uses=1]
 	%tmp.196 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 6		; <i16*> [#uses=1]
-	%tmp.197 = load i16* %tmp.196		; <i16> [#uses=1]
+	%tmp.197 = load i16, i16* %tmp.196		; <i16> [#uses=1]
 	%tmp.198 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 7		; <i16*> [#uses=1]
-	%tmp.199 = load i16* %tmp.198		; <i16> [#uses=1]
+	%tmp.199 = load i16, i16* %tmp.198		; <i16> [#uses=1]
 	%tmp.200 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 8		; <i16*> [#uses=1]
-	%tmp.201 = load i16* %tmp.200		; <i16> [#uses=1]
+	%tmp.201 = load i16, i16* %tmp.200		; <i16> [#uses=1]
 	%tmp.202 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 9		; <float*> [#uses=1]
-	%tmp.203 = load float* %tmp.202		; <float> [#uses=1]
+	%tmp.203 = load float, float* %tmp.202		; <float> [#uses=1]
 	%tmp.204 = getelementptr %struct..s_det_routing_arch, %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 10		; <float*> [#uses=1]
-	%tmp.205 = load float* %tmp.204		; <float> [#uses=1]
-	%tmp.206 = load %struct..s_segment_inf** %segment_inf		; <%struct..s_segment_inf*> [#uses=1]
-	%tmp.208 = load i32* %tmp.109		; <i32> [#uses=1]
+	%tmp.205 = load float, float* %tmp.204		; <float> [#uses=1]
+	%tmp.206 = load %struct..s_segment_inf*, %struct..s_segment_inf** %segment_inf		; <%struct..s_segment_inf*> [#uses=1]
+	%tmp.208 = load i32, i32* %tmp.109		; <i32> [#uses=1]
 	%tmp.209 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 1		; <float*> [#uses=1]
-	%tmp.210 = load float* %tmp.209		; <float> [#uses=1]
+	%tmp.210 = load float, float* %tmp.209		; <float> [#uses=1]
 	%tmp.211 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 2		; <float*> [#uses=1]
-	%tmp.212 = load float* %tmp.211		; <float> [#uses=1]
+	%tmp.212 = load float, float* %tmp.211		; <float> [#uses=1]
 	%tmp.213 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 3		; <float*> [#uses=1]
-	%tmp.214 = load float* %tmp.213		; <float> [#uses=1]
+	%tmp.214 = load float, float* %tmp.213		; <float> [#uses=1]
 	%tmp.215 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 4		; <float*> [#uses=1]
-	%tmp.216 = load float* %tmp.215		; <float> [#uses=1]
+	%tmp.216 = load float, float* %tmp.215		; <float> [#uses=1]
 	%tmp.217 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 5		; <float*> [#uses=1]
-	%tmp.218 = load float* %tmp.217		; <float> [#uses=1]
+	%tmp.218 = load float, float* %tmp.217		; <float> [#uses=1]
 	%tmp.219 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 6		; <float*> [#uses=1]
-	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.220 = load float, float* %tmp.219		; <float> [#uses=1]
 	%tmp.221 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 7		; <float*> [#uses=1]
-	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.222 = load float, float* %tmp.221		; <float> [#uses=1]
 	%tmp.223 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 8		; <float*> [#uses=1]
-	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.224 = load float, float* %tmp.223		; <float> [#uses=1]
 	%tmp.225 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 9		; <float*> [#uses=1]
-	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.226 = load float, float* %tmp.225		; <float> [#uses=1]
 	%tmp.227 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }, { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 10		; <float*> [#uses=1]
-	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	%tmp.228 = load float, float* %tmp.227		; <float> [#uses=1]
 	call void @place_and_route( i32 %tmp.135, i32 %tmp.137, float %tmp.139, i32 %tmp.141, i32 %tmp.143, i8* %tmp.145, i32 %tmp.147, i32 %tmp.149, i8* %tmp.107, i8* %tmp.105, i8* %tmp.106, i8* %tmp.108, i32 %tmp.154, i32 %tmp.155, i32 %tmp.157, float %tmp.159, float %tmp.161, float %tmp.163, float %tmp.165, float %tmp.167, float %tmp.169, float %tmp.171, float %tmp.173, float %tmp.175, i32 %tmp.177, i32 %tmp.179, i32 %tmp.181, i32 %tmp.183, i32 %tmp.185, float %tmp.187, float %tmp.189, float %tmp.191, i32 %tmp.193, i32 %tmp.195, i16 %tmp.197, i16 %tmp.199, i16 %tmp.201, float %tmp.203, float %tmp.205, %struct..s_segment_inf* %tmp.206, i32 %tmp.208, float %tmp.210, float %tmp.212, float %tmp.214, float %tmp.216, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228 )
-	%tmp.231 = load i32* %show_graphics		; <i32> [#uses=1]
+	%tmp.231 = load i32, i32* %show_graphics		; <i32> [#uses=1]
 	%tmp.232 = icmp ne i32 %tmp.231, 0		; <i1> [#uses=1]
 	br i1 %tmp.232, label %then.2, label %endif.2
 
diff --git a/llvm/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/llvm/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
index 06147ad..9e3d254 100644
--- a/llvm/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
+++ b/llvm/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -22,7 +22,7 @@
 
 define internal i32 @OpenOutput(i8* %filename.1) {
 entry:
-        %tmp.0 = load %FileType** @Output               ; <%FileType*> [#uses=1]
+        %tmp.0 = load %FileType*, %FileType** @Output               ; <%FileType*> [#uses=1]
         %tmp.4 = getelementptr %FileType, %FileType* %tmp.0, i64 1         ; <%FileType*> [#uses=1]
         %addrOfGlobal = getelementptr [16 x %FileType], [16 x %FileType]* @OutputFiles, i64 0             ; <[16 x %FileType]*> [#uses=1]
         %constantGEP = getelementptr [16 x %FileType], [16 x %FileType]* %addrOfGlobal, i64 1             ; <[16 x %FileType]*> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/llvm/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
index 6904b2c..360bf05 100644
--- a/llvm/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
+++ b/llvm/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -27,7 +27,7 @@
 loopentry:              ; preds = %loopentry, %entry
         %i = phi i64 [ 0, %entry ], [ %inc.i, %loopentry ]              ; <i64> [#uses=3]
         %cptr = getelementptr [6 x i8], [6 x i8]* @yy_ec, i64 0, i64 %i           ; <i8*> [#uses=1]
-        %c = load i8* %cptr             ; <i8> [#uses=1]
+        %c = load i8, i8* %cptr             ; <i8> [#uses=1]
         %ignore = call i32 (i8*, ...)* @printf( i8* getelementptr ([8 x i8]* @.str_3, i64 0, i64 0), i64 %i )        ; <i32> [#uses=0]
         %ignore2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str_4, i64 0, i64 0), i8 %c )        ; <i32> [#uses=0]
         %inc.i = add i64 %i, 1          ; <i64> [#uses=2]
diff --git a/llvm/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/llvm/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
index d4a4cf8..8dfdd01 100644
--- a/llvm/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
+++ b/llvm/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -3,8 +3,8 @@
 @global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]
 
 define i32 @main() {
-        %l1 = load i64* @global_long_1          ; <i64> [#uses=1]
-        %l2 = load i64* @global_long_2          ; <i64> [#uses=1]
+        %l1 = load i64, i64* @global_long_1          ; <i64> [#uses=1]
+        %l2 = load i64, i64* @global_long_2          ; <i64> [#uses=1]
         %cond = icmp sle i64 %l1, %l2           ; <i1> [#uses=1]
         %cast2 = zext i1 %cond to i32           ; <i32> [#uses=1]
         %RV = sub i32 1, %cast2         ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/llvm/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
index 5508272..8a43b6a 100644
--- a/llvm/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
+++ b/llvm/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -42,7 +42,7 @@
 	ret void
 
 else.1.i56:		; preds = %then.0.i40
-	%tmp.28.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.28.i = load i32, i32* @G		; <i32> [#uses=1]
 	%tmp.29.i = icmp eq i32 %tmp.28.i, 1		; <i1> [#uses=1]
 	br i1 %tmp.29.i, label %shortcirc_next.i, label %shortcirc_done.i
 
diff --git a/llvm/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/llvm/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
index d8ff9f3..554cd2e 100644
--- a/llvm/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
+++ b/llvm/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -37,7 +37,7 @@
 cond_next18:		; preds = %cond_next12, %cond_true
 	%tmp20 = bitcast %struct.tree_node* %tmp2 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
 	%tmp21 = getelementptr %struct.tree_type, %struct.tree_type* %tmp20, i32 0, i32 17		; <%struct.tree_node**> [#uses=1]
-	%tmp22 = load %struct.tree_node** %tmp21		; <%struct.tree_node*> [#uses=6]
+	%tmp22 = load %struct.tree_node*, %struct.tree_node** %tmp21		; <%struct.tree_node*> [#uses=6]
 	%tmp24 = icmp eq %struct.tree_node* %tmp22, %tmp23		; <i1> [#uses=1]
 	br i1 %tmp24, label %return, label %cond_next28
 
@@ -45,7 +45,7 @@
 	%tmp30 = bitcast %struct.tree_node* %tmp2 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
 	%tmp = getelementptr %struct.tree_common, %struct.tree_common* %tmp30, i32 0, i32 2		; <i8*> [#uses=1]
 	%tmp.upgrd.1 = bitcast i8* %tmp to i32*		; <i32*> [#uses=1]
-	%tmp.upgrd.2 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i32, i32* %tmp.upgrd.1		; <i32> [#uses=1]
 	%tmp32 = trunc i32 %tmp.upgrd.2 to i8		; <i8> [#uses=1]
 	%tmp33 = icmp eq i8 %tmp32, 7		; <i1> [#uses=1]
 	br i1 %tmp33, label %cond_true34, label %cond_next84
@@ -69,23 +69,23 @@
 	br i1 %tmp.upgrd.6, label %return, label %cond_true92
 
 cond_true92.preheader:		; preds = %entry
-	%tmp7 = load %struct.tree_node** @void_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp23 = load %struct.tree_node** @float_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp39 = load %struct.tree_node** @char_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp48 = load %struct.tree_node** @signed_char_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp57 = load %struct.tree_node** @unsigned_char_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp66 = load %struct.tree_node** @short_integer_type_node		; <%struct.tree_node*> [#uses=1]
-	%tmp75 = load %struct.tree_node** @short_unsigned_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp7 = load %struct.tree_node*, %struct.tree_node** @void_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp23 = load %struct.tree_node*, %struct.tree_node** @float_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp39 = load %struct.tree_node*, %struct.tree_node** @char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp48 = load %struct.tree_node*, %struct.tree_node** @signed_char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp57 = load %struct.tree_node*, %struct.tree_node** @unsigned_char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp66 = load %struct.tree_node*, %struct.tree_node** @short_integer_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp75 = load %struct.tree_node*, %struct.tree_node** @short_unsigned_type_node		; <%struct.tree_node*> [#uses=1]
 	br label %cond_true92
 
 cond_true92:		; preds = %cond_true92.preheader, %cond_next84, %cond_true34
 	%t.0.0 = phi %struct.tree_node* [ %parms, %cond_true92.preheader ], [ %tmp6, %cond_true34 ], [ %tmp6, %cond_next84 ]		; <%struct.tree_node*> [#uses=2]
 	%tmp.upgrd.4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_list*		; <%struct.tree_list*> [#uses=1]
 	%tmp.upgrd.5 = getelementptr %struct.tree_list, %struct.tree_list* %tmp.upgrd.4, i32 0, i32 2		; <%struct.tree_node**> [#uses=1]
-	%tmp2 = load %struct.tree_node** %tmp.upgrd.5		; <%struct.tree_node*> [#uses=5]
+	%tmp2 = load %struct.tree_node*, %struct.tree_node** %tmp.upgrd.5		; <%struct.tree_node*> [#uses=5]
 	%tmp4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
 	%tmp5 = getelementptr %struct.tree_common, %struct.tree_common* %tmp4, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]
-	%tmp6 = load %struct.tree_node** %tmp5		; <%struct.tree_node*> [#uses=3]
+	%tmp6 = load %struct.tree_node*, %struct.tree_node** %tmp5		; <%struct.tree_node*> [#uses=3]
 	%tmp.upgrd.6 = icmp eq %struct.tree_node* %tmp6, null		; <i1> [#uses=3]
 	br i1 %tmp.upgrd.6, label %cond_true, label %cond_next12
 
diff --git a/llvm/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/llvm/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
index 8465b82..f68dc32 100644
--- a/llvm/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
+++ b/llvm/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -22,8 +22,8 @@
 	]
 
 then.10:		; preds = %else.3, %else.3
-	%tmp.112 = load i16* null		; <i16> [#uses=2]
-	%tmp.113 = load i16* @G		; <i16> [#uses=2]
+	%tmp.112 = load i16, i16* null		; <i16> [#uses=2]
+	%tmp.113 = load i16, i16* @G		; <i16> [#uses=2]
 	%tmp.114 = icmp ugt i16 %tmp.112, %tmp.113		; <i1> [#uses=1]
 	%tmp.120 = icmp ult i16 %tmp.112, %tmp.113		; <i1> [#uses=1]
 	%bothcond = and i1 %tmp.114, %tmp.120		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/llvm/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
index 9b4016b..80be64c 100644
--- a/llvm/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
+++ b/llvm/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -11,7 +11,7 @@
 cond_true23:		; preds = %entry
 	%tmp138 = getelementptr %struct.cl_perfunc_opts, %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]
 	%tmp138.upgrd.1 = bitcast i8* %tmp138 to i32*		; <i32*> [#uses=2]
-	%tmp139 = load i32* %tmp138.upgrd.1		; <i32> [#uses=1]
+	%tmp139 = load i32, i32* %tmp138.upgrd.1		; <i32> [#uses=1]
 	%tmp140 = shl i32 1, 27		; <i32> [#uses=1]
 	%tmp141 = and i32 %tmp140, 134217728		; <i32> [#uses=1]
 	%tmp142 = and i32 %tmp139, -134217729		; <i32> [#uses=1]
@@ -19,7 +19,7 @@
 	store i32 %tmp143, i32* %tmp138.upgrd.1
 	%tmp144 = getelementptr %struct.cl_perfunc_opts, %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]
 	%tmp144.upgrd.2 = bitcast i8* %tmp144 to i32*		; <i32*> [#uses=1]
-	%tmp145 = load i32* %tmp144.upgrd.2		; <i32> [#uses=1]
+	%tmp145 = load i32, i32* %tmp144.upgrd.2		; <i32> [#uses=1]
 	%tmp146 = shl i32 %tmp145, 22		; <i32> [#uses=1]
 	%tmp147 = lshr i32 %tmp146, 31		; <i32> [#uses=1]
 	%tmp147.upgrd.3 = trunc i32 %tmp147 to i8		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/llvm/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
index c4f2fb0..bdd9787 100644
--- a/llvm/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
+++ b/llvm/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -169,7 +169,7 @@
 	br i1 %tmp781, label %cond_next784, label %bb790
 
 cond_next784:		; preds = %cond_next778
-	%tmp785 = load i32* @ix86_cpu		; <i32> [#uses=1]
+	%tmp785 = load i32, i32* @ix86_cpu		; <i32> [#uses=1]
 	%tmp786 = icmp eq i32 %tmp785, 5		; <i1> [#uses=1]
 	br i1 %tmp786, label %UnifiedReturnBlock, label %bb790
 
@@ -208,7 +208,7 @@
 	ret void
 
 bb1648:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
-	%tmp1650 = load i32* @which_alternative		; <i32> [#uses=1]
+	%tmp1650 = load i32, i32* @which_alternative		; <i32> [#uses=1]
 	switch i32 %tmp1650, label %bb1701 [
 		 i32 0, label %cond_next1675
 		 i32 1, label %cond_next1675
@@ -219,7 +219,7 @@
 	ret void
 
 bb1701:		; preds = %bb1648
-	%tmp1702 = load i32* @which_alternative		; <i32> [#uses=1]
+	%tmp1702 = load i32, i32* @which_alternative		; <i32> [#uses=1]
 	switch i32 %tmp1702, label %bb1808 [
 		 i32 0, label %cond_next1727
 		 i32 1, label %cond_next1727
@@ -237,7 +237,7 @@
 	ret void
 
 bb1876:		; preds = %bb1808
-	%tmp1877signed = load i32* @which_alternative		; <i32> [#uses=4]
+	%tmp1877signed = load i32, i32* @which_alternative		; <i32> [#uses=4]
 	%tmp1877 = bitcast i32 %tmp1877signed to i32		; <i32> [#uses=1]
 	%bothcond699 = icmp ult i32 %tmp1877, 2		; <i1> [#uses=1]
 	%tmp1888 = icmp eq i32 %tmp1877signed, 2		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/llvm/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
index 05746f3..109a146 100644
--- a/llvm/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
+++ b/llvm/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -78,11 +78,11 @@
 	ret void
 
 bb1567:		; preds = %cond_true1254
-	%tmp1580 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 3)		; <i64> [#uses=1]
-	%tmp1591 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 4)		; <i64> [#uses=1]
+	%tmp1580 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 3)		; <i64> [#uses=1]
+	%tmp1591 = load i64, i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 4)		; <i64> [#uses=1]
 	%tmp1572 = tail call fastcc i32 @FirstOne( )		; <i32> [#uses=5]
 	%tmp1582 = getelementptr [64 x i32], [64 x i32]* @bishop_shift_rl45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]
-	%tmp1583 = load i32* %tmp1582		; <i32> [#uses=1]
+	%tmp1583 = load i32, i32* %tmp1582		; <i32> [#uses=1]
 	%tmp1583.upgrd.1 = trunc i32 %tmp1583 to i8		; <i8> [#uses=1]
 	%shift.upgrd.2 = zext i8 %tmp1583.upgrd.1 to i64		; <i64> [#uses=1]
 	%tmp1584 = lshr i64 %tmp1580, %shift.upgrd.2		; <i64> [#uses=1]
@@ -90,9 +90,9 @@
 	%tmp1585 = and i32 %tmp1584.upgrd.3, 255		; <i32> [#uses=1]
 	%gep.upgrd.4 = zext i32 %tmp1585 to i64		; <i64> [#uses=1]
 	%tmp1587 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rl45, i32 0, i32 %tmp1572, i64 %gep.upgrd.4		; <i32*> [#uses=1]
-	%tmp1588 = load i32* %tmp1587		; <i32> [#uses=1]
+	%tmp1588 = load i32, i32* %tmp1587		; <i32> [#uses=1]
 	%tmp1593 = getelementptr [64 x i32], [64 x i32]* @bishop_shift_rr45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]
-	%tmp1594 = load i32* %tmp1593		; <i32> [#uses=1]
+	%tmp1594 = load i32, i32* %tmp1593		; <i32> [#uses=1]
 	%tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8		; <i8> [#uses=1]
 	%shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64		; <i64> [#uses=1]
 	%tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6		; <i64> [#uses=1]
@@ -100,11 +100,11 @@
 	%tmp1596 = and i32 %tmp1595.upgrd.7, 255		; <i32> [#uses=1]
 	%gep.upgrd.8 = zext i32 %tmp1596 to i64		; <i64> [#uses=1]
 	%tmp1598 = getelementptr [64 x [256 x i32]], [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8		; <i32*> [#uses=1]
-	%tmp1599 = load i32* %tmp1598		; <i32> [#uses=1]
+	%tmp1599 = load i32, i32* %tmp1598		; <i32> [#uses=1]
 	%tmp1600.neg = sub i32 0, %tmp1588		; <i32> [#uses=1]
 	%tmp1602 = sub i32 %tmp1600.neg, %tmp1599		; <i32> [#uses=1]
 	%tmp1604 = getelementptr [64 x i8], [64 x i8]* @black_outpost, i32 0, i32 %tmp1572		; <i8*> [#uses=1]
-	%tmp1605 = load i8* %tmp1604		; <i8> [#uses=1]
+	%tmp1605 = load i8, i8* %tmp1604		; <i8> [#uses=1]
 	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
 	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
 
diff --git a/llvm/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/llvm/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
index 26d0f4f..40f91b2 100644
--- a/llvm/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
+++ b/llvm/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -10,9 +10,9 @@
 	br i1 false, label %cond_true48, label %cond_next80
 
 cond_true48:		; preds = %bb.preheader
-	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp = load i8, i8* null		; <i8> [#uses=1]
 	%tmp51 = zext i8 %tmp to i16		; <i16> [#uses=1]
-	%tmp99 = load i8* null		; <i8> [#uses=1]
+	%tmp99 = load i8, i8* null		; <i8> [#uses=1]
 	%tmp54 = bitcast i8 %tmp99 to i8		; <i8> [#uses=1]
 	%tmp54.upgrd.1 = zext i8 %tmp54 to i32		; <i32> [#uses=1]
 	%tmp55 = lshr i32 %tmp54.upgrd.1, 3		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
index 255b120..aa6793b 100644
--- a/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
+++ b/llvm/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -4,8 +4,8 @@
 declare i1 @foo()
 
 define i32 @test(i32* %A, i32* %B) {
-	%a = load i32* %A
-	%b = load i32* %B
+	%a = load i32, i32* %A
+	%b = load i32, i32* %B
 	%cond = call i1 @foo()
 	%c = select i1 %cond, i32 %a, i32 %b
 	ret i32 %c
diff --git a/llvm/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/llvm/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
index 314bb05..4558f09 100644
--- a/llvm/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
+++ b/llvm/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -4,24 +4,24 @@
 define i32 @f(i16* %pc) {
 entry:
 	%acc = alloca i64, align 8		; <i64*> [#uses=4]
-	%tmp97 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp97 = load i64, i64* %acc, align 8		; <i64> [#uses=1]
 	%tmp98 = and i64 %tmp97, 4294967295		; <i64> [#uses=1]
-	%tmp99 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp99 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%tmp100 = and i64 %tmp99, 4294967295		; <i64> [#uses=1]
 	%tmp101 = mul i64 %tmp98, %tmp100		; <i64> [#uses=1]
 	%tmp103 = lshr i64 %tmp101, 0		; <i64> [#uses=1]
-	%tmp104 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp104 = load i64, i64* %acc, align 8		; <i64> [#uses=1]
 	%.cast105 = zext i32 32 to i64		; <i64> [#uses=1]
 	%tmp106 = lshr i64 %tmp104, %.cast105		; <i64> [#uses=1]
-	%tmp107 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp107 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%tmp108 = and i64 %tmp107, 4294967295		; <i64> [#uses=1]
 	%tmp109 = mul i64 %tmp106, %tmp108		; <i64> [#uses=1]
 	%tmp112 = add i64 %tmp109, 0		; <i64> [#uses=1]
 	%tmp116 = add i64 %tmp112, 0		; <i64> [#uses=1]
 	%tmp117 = add i64 %tmp103, %tmp116		; <i64> [#uses=1]
-	%tmp118 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp118 = load i64, i64* %acc, align 8		; <i64> [#uses=1]
 	%tmp120 = lshr i64 %tmp118, 0		; <i64> [#uses=1]
-	%tmp121 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp121 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%tmp123 = lshr i64 %tmp121, 0		; <i64> [#uses=1]
 	%tmp124 = mul i64 %tmp120, %tmp123		; <i64> [#uses=1]
 	%tmp126 = shl i64 %tmp124, 0		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/llvm/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
index e33ec1f..f24d1bc 100644
--- a/llvm/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
+++ b/llvm/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -9,7 +9,7 @@
 bb41:		; preds = %bb20
 	%tmp8182 = trunc i64 %tmp42.rle to i32		; <i32> [#uses=1]
 	%tmp83 = getelementptr [63 x i8], [63 x i8]* @letters.3100, i32 0, i32 %tmp8182		; <i8*> [#uses=1]
-	%tmp84 = load i8* %tmp83, align 1		; <i8> [#uses=1]
+	%tmp84 = load i8, i8* %tmp83, align 1		; <i8> [#uses=1]
 	store i8 %tmp84, i8* null, align 1
 	%tmp90 = urem i64 %tmp42.rle, 62		; <i64> [#uses=1]
 	%tmp9091 = trunc i64 %tmp90 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/llvm/test/CodeGen/Generic/2008-02-25-NegateZero.ll
index 97db667..35c0f20 100644
--- a/llvm/test/CodeGen/Generic/2008-02-25-NegateZero.ll
+++ b/llvm/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -3,8 +3,8 @@
 
 define void @test() {
 entry:
-	%tmp98 = load float* null, align 4		; <float> [#uses=1]
-	%tmp106 = load float* null, align 4		; <float> [#uses=1]
+	%tmp98 = load float, float* null, align 4		; <float> [#uses=1]
+	%tmp106 = load float, float* null, align 4		; <float> [#uses=1]
 	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
 	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
 	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
diff --git a/llvm/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/llvm/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
index 45b561a..c18e3c9 100644
--- a/llvm/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
+++ b/llvm/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -4,7 +4,7 @@
 @m = external global <2 x double>
 
 define double @vector_ex() nounwind {
-       %v = load <2 x double>* @m
+       %v = load <2 x double>, <2 x double>* @m
        %x = extractelement <2 x double> %v, i32 1
        ret double %x
 }
diff --git a/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
index b62f811..78f97ee 100644
--- a/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
+++ b/llvm/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -5,7 +5,7 @@
 
 define i32 @test(i128* %P) nounwind {
 entry:
-	%tmp48 = load i128* %P
+	%tmp48 = load i128, i128* %P
 	%and49 = and i128 %tmp48, 18446744073709551616		; <i128> [#uses=1]
 	%tobool = icmp ne i128 %and49, 0		; <i1> [#uses=1]
 	br i1 %tobool, label %if.then50, label %if.end61
@@ -19,7 +19,7 @@
 
 define i32 @test2(i320* %P) nounwind {
 entry:
-	%tmp48 = load i320* %P
+	%tmp48 = load i320, i320* %P
 	%and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584
 	%tobool = icmp ne i320 %and49, 0		; <i1> [#uses=1]
 	br i1 %tobool, label %if.then50, label %if.end61
diff --git a/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
index cd446d5..5cc48c2 100644
--- a/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
+++ b/llvm/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
@@ -5,8 +5,8 @@
 
 define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
 entry:
-  %c = load i256* %cc
-  %d = load i256* %dd
+  %c = load i256, i256* %cc
+  %d = load i256, i256* %dd
   %add = add nsw i256 %c, %d
   store i256 %add, i256* %a, align 8
   %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
diff --git a/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll b/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
index 2c096bf..88ca936 100644
--- a/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
+++ b/llvm/test/CodeGen/Generic/2012-06-08-APIntCrash.ll
@@ -2,7 +2,7 @@
 
 define void @test1(<8 x i32>* %ptr)
 {
-	%1 = load <8 x i32>* %ptr, align 32
+	%1 = load <8 x i32>, <8 x i32>* %ptr, align 32
 	%2 = and <8 x i32> %1, <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1>
 	store <8 x i32> %2, <8 x i32>* %ptr, align 16
 	ret void
diff --git a/llvm/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll b/llvm/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
index 263cc3a..1497bbb 100644
--- a/llvm/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
+++ b/llvm/test/CodeGen/Generic/2014-02-05-OpaqueConstants.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define void @fn() {
   store i32* inttoptr (i64 68719476735 to i32*), i32** @a, align 8
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %2 = sext i32 %1 to i64
   %3 = lshr i64 %2, 12
   %4 = and i64 %3, 68719476735
diff --git a/llvm/test/CodeGen/Generic/APIntLoadStore.ll b/llvm/test/CodeGen/Generic/APIntLoadStore.ll
index 7c71a33..f8d22f5 100644
--- a/llvm/test/CodeGen/Generic/APIntLoadStore.ll
+++ b/llvm/test/CodeGen/Generic/APIntLoadStore.ll
@@ -513,1537 +513,1537 @@
 @i256_s = external global i256		; <i256*> [#uses=1]
 
 define void @i1_ls() nounwind  {
-	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	%tmp = load i1, i1* @i1_l		; <i1> [#uses=1]
 	store i1 %tmp, i1* @i1_s
 	ret void
 }
 
 define void @i2_ls() nounwind  {
-	%tmp = load i2* @i2_l		; <i2> [#uses=1]
+	%tmp = load i2, i2* @i2_l		; <i2> [#uses=1]
 	store i2 %tmp, i2* @i2_s
 	ret void
 }
 
 define void @i3_ls() nounwind  {
-	%tmp = load i3* @i3_l		; <i3> [#uses=1]
+	%tmp = load i3, i3* @i3_l		; <i3> [#uses=1]
 	store i3 %tmp, i3* @i3_s
 	ret void
 }
 
 define void @i4_ls() nounwind  {
-	%tmp = load i4* @i4_l		; <i4> [#uses=1]
+	%tmp = load i4, i4* @i4_l		; <i4> [#uses=1]
 	store i4 %tmp, i4* @i4_s
 	ret void
 }
 
 define void @i5_ls() nounwind  {
-	%tmp = load i5* @i5_l		; <i5> [#uses=1]
+	%tmp = load i5, i5* @i5_l		; <i5> [#uses=1]
 	store i5 %tmp, i5* @i5_s
 	ret void
 }
 
 define void @i6_ls() nounwind  {
-	%tmp = load i6* @i6_l		; <i6> [#uses=1]
+	%tmp = load i6, i6* @i6_l		; <i6> [#uses=1]
 	store i6 %tmp, i6* @i6_s
 	ret void
 }
 
 define void @i7_ls() nounwind  {
-	%tmp = load i7* @i7_l		; <i7> [#uses=1]
+	%tmp = load i7, i7* @i7_l		; <i7> [#uses=1]
 	store i7 %tmp, i7* @i7_s
 	ret void
 }
 
 define void @i8_ls() nounwind  {
-	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	%tmp = load i8, i8* @i8_l		; <i8> [#uses=1]
 	store i8 %tmp, i8* @i8_s
 	ret void
 }
 
 define void @i9_ls() nounwind  {
-	%tmp = load i9* @i9_l		; <i9> [#uses=1]
+	%tmp = load i9, i9* @i9_l		; <i9> [#uses=1]
 	store i9 %tmp, i9* @i9_s
 	ret void
 }
 
 define void @i10_ls() nounwind  {
-	%tmp = load i10* @i10_l		; <i10> [#uses=1]
+	%tmp = load i10, i10* @i10_l		; <i10> [#uses=1]
 	store i10 %tmp, i10* @i10_s
 	ret void
 }
 
 define void @i11_ls() nounwind  {
-	%tmp = load i11* @i11_l		; <i11> [#uses=1]
+	%tmp = load i11, i11* @i11_l		; <i11> [#uses=1]
 	store i11 %tmp, i11* @i11_s
 	ret void
 }
 
 define void @i12_ls() nounwind  {
-	%tmp = load i12* @i12_l		; <i12> [#uses=1]
+	%tmp = load i12, i12* @i12_l		; <i12> [#uses=1]
 	store i12 %tmp, i12* @i12_s
 	ret void
 }
 
 define void @i13_ls() nounwind  {
-	%tmp = load i13* @i13_l		; <i13> [#uses=1]
+	%tmp = load i13, i13* @i13_l		; <i13> [#uses=1]
 	store i13 %tmp, i13* @i13_s
 	ret void
 }
 
 define void @i14_ls() nounwind  {
-	%tmp = load i14* @i14_l		; <i14> [#uses=1]
+	%tmp = load i14, i14* @i14_l		; <i14> [#uses=1]
 	store i14 %tmp, i14* @i14_s
 	ret void
 }
 
 define void @i15_ls() nounwind  {
-	%tmp = load i15* @i15_l		; <i15> [#uses=1]
+	%tmp = load i15, i15* @i15_l		; <i15> [#uses=1]
 	store i15 %tmp, i15* @i15_s
 	ret void
 }
 
 define void @i16_ls() nounwind  {
-	%tmp = load i16* @i16_l		; <i16> [#uses=1]
+	%tmp = load i16, i16* @i16_l		; <i16> [#uses=1]
 	store i16 %tmp, i16* @i16_s
 	ret void
 }
 
 define void @i17_ls() nounwind  {
-	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	%tmp = load i17, i17* @i17_l		; <i17> [#uses=1]
 	store i17 %tmp, i17* @i17_s
 	ret void
 }
 
 define void @i18_ls() nounwind  {
-	%tmp = load i18* @i18_l		; <i18> [#uses=1]
+	%tmp = load i18, i18* @i18_l		; <i18> [#uses=1]
 	store i18 %tmp, i18* @i18_s
 	ret void
 }
 
 define void @i19_ls() nounwind  {
-	%tmp = load i19* @i19_l		; <i19> [#uses=1]
+	%tmp = load i19, i19* @i19_l		; <i19> [#uses=1]
 	store i19 %tmp, i19* @i19_s
 	ret void
 }
 
 define void @i20_ls() nounwind  {
-	%tmp = load i20* @i20_l		; <i20> [#uses=1]
+	%tmp = load i20, i20* @i20_l		; <i20> [#uses=1]
 	store i20 %tmp, i20* @i20_s
 	ret void
 }
 
 define void @i21_ls() nounwind  {
-	%tmp = load i21* @i21_l		; <i21> [#uses=1]
+	%tmp = load i21, i21* @i21_l		; <i21> [#uses=1]
 	store i21 %tmp, i21* @i21_s
 	ret void
 }
 
 define void @i22_ls() nounwind  {
-	%tmp = load i22* @i22_l		; <i22> [#uses=1]
+	%tmp = load i22, i22* @i22_l		; <i22> [#uses=1]
 	store i22 %tmp, i22* @i22_s
 	ret void
 }
 
 define void @i23_ls() nounwind  {
-	%tmp = load i23* @i23_l		; <i23> [#uses=1]
+	%tmp = load i23, i23* @i23_l		; <i23> [#uses=1]
 	store i23 %tmp, i23* @i23_s
 	ret void
 }
 
 define void @i24_ls() nounwind  {
-	%tmp = load i24* @i24_l		; <i24> [#uses=1]
+	%tmp = load i24, i24* @i24_l		; <i24> [#uses=1]
 	store i24 %tmp, i24* @i24_s
 	ret void
 }
 
 define void @i25_ls() nounwind  {
-	%tmp = load i25* @i25_l		; <i25> [#uses=1]
+	%tmp = load i25, i25* @i25_l		; <i25> [#uses=1]
 	store i25 %tmp, i25* @i25_s
 	ret void
 }
 
 define void @i26_ls() nounwind  {
-	%tmp = load i26* @i26_l		; <i26> [#uses=1]
+	%tmp = load i26, i26* @i26_l		; <i26> [#uses=1]
 	store i26 %tmp, i26* @i26_s
 	ret void
 }
 
 define void @i27_ls() nounwind  {
-	%tmp = load i27* @i27_l		; <i27> [#uses=1]
+	%tmp = load i27, i27* @i27_l		; <i27> [#uses=1]
 	store i27 %tmp, i27* @i27_s
 	ret void
 }
 
 define void @i28_ls() nounwind  {
-	%tmp = load i28* @i28_l		; <i28> [#uses=1]
+	%tmp = load i28, i28* @i28_l		; <i28> [#uses=1]
 	store i28 %tmp, i28* @i28_s
 	ret void
 }
 
 define void @i29_ls() nounwind  {
-	%tmp = load i29* @i29_l		; <i29> [#uses=1]
+	%tmp = load i29, i29* @i29_l		; <i29> [#uses=1]
 	store i29 %tmp, i29* @i29_s
 	ret void
 }
 
 define void @i30_ls() nounwind  {
-	%tmp = load i30* @i30_l		; <i30> [#uses=1]
+	%tmp = load i30, i30* @i30_l		; <i30> [#uses=1]
 	store i30 %tmp, i30* @i30_s
 	ret void
 }
 
 define void @i31_ls() nounwind  {
-	%tmp = load i31* @i31_l		; <i31> [#uses=1]
+	%tmp = load i31, i31* @i31_l		; <i31> [#uses=1]
 	store i31 %tmp, i31* @i31_s
 	ret void
 }
 
 define void @i32_ls() nounwind  {
-	%tmp = load i32* @i32_l		; <i32> [#uses=1]
+	%tmp = load i32, i32* @i32_l		; <i32> [#uses=1]
 	store i32 %tmp, i32* @i32_s
 	ret void
 }
 
 define void @i33_ls() nounwind  {
-	%tmp = load i33* @i33_l		; <i33> [#uses=1]
+	%tmp = load i33, i33* @i33_l		; <i33> [#uses=1]
 	store i33 %tmp, i33* @i33_s
 	ret void
 }
 
 define void @i34_ls() nounwind  {
-	%tmp = load i34* @i34_l		; <i34> [#uses=1]
+	%tmp = load i34, i34* @i34_l		; <i34> [#uses=1]
 	store i34 %tmp, i34* @i34_s
 	ret void
 }
 
 define void @i35_ls() nounwind  {
-	%tmp = load i35* @i35_l		; <i35> [#uses=1]
+	%tmp = load i35, i35* @i35_l		; <i35> [#uses=1]
 	store i35 %tmp, i35* @i35_s
 	ret void
 }
 
 define void @i36_ls() nounwind  {
-	%tmp = load i36* @i36_l		; <i36> [#uses=1]
+	%tmp = load i36, i36* @i36_l		; <i36> [#uses=1]
 	store i36 %tmp, i36* @i36_s
 	ret void
 }
 
 define void @i37_ls() nounwind  {
-	%tmp = load i37* @i37_l		; <i37> [#uses=1]
+	%tmp = load i37, i37* @i37_l		; <i37> [#uses=1]
 	store i37 %tmp, i37* @i37_s
 	ret void
 }
 
 define void @i38_ls() nounwind  {
-	%tmp = load i38* @i38_l		; <i38> [#uses=1]
+	%tmp = load i38, i38* @i38_l		; <i38> [#uses=1]
 	store i38 %tmp, i38* @i38_s
 	ret void
 }
 
 define void @i39_ls() nounwind  {
-	%tmp = load i39* @i39_l		; <i39> [#uses=1]
+	%tmp = load i39, i39* @i39_l		; <i39> [#uses=1]
 	store i39 %tmp, i39* @i39_s
 	ret void
 }
 
 define void @i40_ls() nounwind  {
-	%tmp = load i40* @i40_l		; <i40> [#uses=1]
+	%tmp = load i40, i40* @i40_l		; <i40> [#uses=1]
 	store i40 %tmp, i40* @i40_s
 	ret void
 }
 
 define void @i41_ls() nounwind  {
-	%tmp = load i41* @i41_l		; <i41> [#uses=1]
+	%tmp = load i41, i41* @i41_l		; <i41> [#uses=1]
 	store i41 %tmp, i41* @i41_s
 	ret void
 }
 
 define void @i42_ls() nounwind  {
-	%tmp = load i42* @i42_l		; <i42> [#uses=1]
+	%tmp = load i42, i42* @i42_l		; <i42> [#uses=1]
 	store i42 %tmp, i42* @i42_s
 	ret void
 }
 
 define void @i43_ls() nounwind  {
-	%tmp = load i43* @i43_l		; <i43> [#uses=1]
+	%tmp = load i43, i43* @i43_l		; <i43> [#uses=1]
 	store i43 %tmp, i43* @i43_s
 	ret void
 }
 
 define void @i44_ls() nounwind  {
-	%tmp = load i44* @i44_l		; <i44> [#uses=1]
+	%tmp = load i44, i44* @i44_l		; <i44> [#uses=1]
 	store i44 %tmp, i44* @i44_s
 	ret void
 }
 
 define void @i45_ls() nounwind  {
-	%tmp = load i45* @i45_l		; <i45> [#uses=1]
+	%tmp = load i45, i45* @i45_l		; <i45> [#uses=1]
 	store i45 %tmp, i45* @i45_s
 	ret void
 }
 
 define void @i46_ls() nounwind  {
-	%tmp = load i46* @i46_l		; <i46> [#uses=1]
+	%tmp = load i46, i46* @i46_l		; <i46> [#uses=1]
 	store i46 %tmp, i46* @i46_s
 	ret void
 }
 
 define void @i47_ls() nounwind  {
-	%tmp = load i47* @i47_l		; <i47> [#uses=1]
+	%tmp = load i47, i47* @i47_l		; <i47> [#uses=1]
 	store i47 %tmp, i47* @i47_s
 	ret void
 }
 
 define void @i48_ls() nounwind  {
-	%tmp = load i48* @i48_l		; <i48> [#uses=1]
+	%tmp = load i48, i48* @i48_l		; <i48> [#uses=1]
 	store i48 %tmp, i48* @i48_s
 	ret void
 }
 
 define void @i49_ls() nounwind  {
-	%tmp = load i49* @i49_l		; <i49> [#uses=1]
+	%tmp = load i49, i49* @i49_l		; <i49> [#uses=1]
 	store i49 %tmp, i49* @i49_s
 	ret void
 }
 
 define void @i50_ls() nounwind  {
-	%tmp = load i50* @i50_l		; <i50> [#uses=1]
+	%tmp = load i50, i50* @i50_l		; <i50> [#uses=1]
 	store i50 %tmp, i50* @i50_s
 	ret void
 }
 
 define void @i51_ls() nounwind  {
-	%tmp = load i51* @i51_l		; <i51> [#uses=1]
+	%tmp = load i51, i51* @i51_l		; <i51> [#uses=1]
 	store i51 %tmp, i51* @i51_s
 	ret void
 }
 
 define void @i52_ls() nounwind  {
-	%tmp = load i52* @i52_l		; <i52> [#uses=1]
+	%tmp = load i52, i52* @i52_l		; <i52> [#uses=1]
 	store i52 %tmp, i52* @i52_s
 	ret void
 }
 
 define void @i53_ls() nounwind  {
-	%tmp = load i53* @i53_l		; <i53> [#uses=1]
+	%tmp = load i53, i53* @i53_l		; <i53> [#uses=1]
 	store i53 %tmp, i53* @i53_s
 	ret void
 }
 
 define void @i54_ls() nounwind  {
-	%tmp = load i54* @i54_l		; <i54> [#uses=1]
+	%tmp = load i54, i54* @i54_l		; <i54> [#uses=1]
 	store i54 %tmp, i54* @i54_s
 	ret void
 }
 
 define void @i55_ls() nounwind  {
-	%tmp = load i55* @i55_l		; <i55> [#uses=1]
+	%tmp = load i55, i55* @i55_l		; <i55> [#uses=1]
 	store i55 %tmp, i55* @i55_s
 	ret void
 }
 
 define void @i56_ls() nounwind  {
-	%tmp = load i56* @i56_l		; <i56> [#uses=1]
+	%tmp = load i56, i56* @i56_l		; <i56> [#uses=1]
 	store i56 %tmp, i56* @i56_s
 	ret void
 }
 
 define void @i57_ls() nounwind  {
-	%tmp = load i57* @i57_l		; <i57> [#uses=1]
+	%tmp = load i57, i57* @i57_l		; <i57> [#uses=1]
 	store i57 %tmp, i57* @i57_s
 	ret void
 }
 
 define void @i58_ls() nounwind  {
-	%tmp = load i58* @i58_l		; <i58> [#uses=1]
+	%tmp = load i58, i58* @i58_l		; <i58> [#uses=1]
 	store i58 %tmp, i58* @i58_s
 	ret void
 }
 
 define void @i59_ls() nounwind  {
-	%tmp = load i59* @i59_l		; <i59> [#uses=1]
+	%tmp = load i59, i59* @i59_l		; <i59> [#uses=1]
 	store i59 %tmp, i59* @i59_s
 	ret void
 }
 
 define void @i60_ls() nounwind  {
-	%tmp = load i60* @i60_l		; <i60> [#uses=1]
+	%tmp = load i60, i60* @i60_l		; <i60> [#uses=1]
 	store i60 %tmp, i60* @i60_s
 	ret void
 }
 
 define void @i61_ls() nounwind  {
-	%tmp = load i61* @i61_l		; <i61> [#uses=1]
+	%tmp = load i61, i61* @i61_l		; <i61> [#uses=1]
 	store i61 %tmp, i61* @i61_s
 	ret void
 }
 
 define void @i62_ls() nounwind  {
-	%tmp = load i62* @i62_l		; <i62> [#uses=1]
+	%tmp = load i62, i62* @i62_l		; <i62> [#uses=1]
 	store i62 %tmp, i62* @i62_s
 	ret void
 }
 
 define void @i63_ls() nounwind  {
-	%tmp = load i63* @i63_l		; <i63> [#uses=1]
+	%tmp = load i63, i63* @i63_l		; <i63> [#uses=1]
 	store i63 %tmp, i63* @i63_s
 	ret void
 }
 
 define void @i64_ls() nounwind  {
-	%tmp = load i64* @i64_l		; <i64> [#uses=1]
+	%tmp = load i64, i64* @i64_l		; <i64> [#uses=1]
 	store i64 %tmp, i64* @i64_s
 	ret void
 }
 
 define void @i65_ls() nounwind  {
-	%tmp = load i65* @i65_l		; <i65> [#uses=1]
+	%tmp = load i65, i65* @i65_l		; <i65> [#uses=1]
 	store i65 %tmp, i65* @i65_s
 	ret void
 }
 
 define void @i66_ls() nounwind  {
-	%tmp = load i66* @i66_l		; <i66> [#uses=1]
+	%tmp = load i66, i66* @i66_l		; <i66> [#uses=1]
 	store i66 %tmp, i66* @i66_s
 	ret void
 }
 
 define void @i67_ls() nounwind  {
-	%tmp = load i67* @i67_l		; <i67> [#uses=1]
+	%tmp = load i67, i67* @i67_l		; <i67> [#uses=1]
 	store i67 %tmp, i67* @i67_s
 	ret void
 }
 
 define void @i68_ls() nounwind  {
-	%tmp = load i68* @i68_l		; <i68> [#uses=1]
+	%tmp = load i68, i68* @i68_l		; <i68> [#uses=1]
 	store i68 %tmp, i68* @i68_s
 	ret void
 }
 
 define void @i69_ls() nounwind  {
-	%tmp = load i69* @i69_l		; <i69> [#uses=1]
+	%tmp = load i69, i69* @i69_l		; <i69> [#uses=1]
 	store i69 %tmp, i69* @i69_s
 	ret void
 }
 
 define void @i70_ls() nounwind  {
-	%tmp = load i70* @i70_l		; <i70> [#uses=1]
+	%tmp = load i70, i70* @i70_l		; <i70> [#uses=1]
 	store i70 %tmp, i70* @i70_s
 	ret void
 }
 
 define void @i71_ls() nounwind  {
-	%tmp = load i71* @i71_l		; <i71> [#uses=1]
+	%tmp = load i71, i71* @i71_l		; <i71> [#uses=1]
 	store i71 %tmp, i71* @i71_s
 	ret void
 }
 
 define void @i72_ls() nounwind  {
-	%tmp = load i72* @i72_l		; <i72> [#uses=1]
+	%tmp = load i72, i72* @i72_l		; <i72> [#uses=1]
 	store i72 %tmp, i72* @i72_s
 	ret void
 }
 
 define void @i73_ls() nounwind  {
-	%tmp = load i73* @i73_l		; <i73> [#uses=1]
+	%tmp = load i73, i73* @i73_l		; <i73> [#uses=1]
 	store i73 %tmp, i73* @i73_s
 	ret void
 }
 
 define void @i74_ls() nounwind  {
-	%tmp = load i74* @i74_l		; <i74> [#uses=1]
+	%tmp = load i74, i74* @i74_l		; <i74> [#uses=1]
 	store i74 %tmp, i74* @i74_s
 	ret void
 }
 
 define void @i75_ls() nounwind  {
-	%tmp = load i75* @i75_l		; <i75> [#uses=1]
+	%tmp = load i75, i75* @i75_l		; <i75> [#uses=1]
 	store i75 %tmp, i75* @i75_s
 	ret void
 }
 
 define void @i76_ls() nounwind  {
-	%tmp = load i76* @i76_l		; <i76> [#uses=1]
+	%tmp = load i76, i76* @i76_l		; <i76> [#uses=1]
 	store i76 %tmp, i76* @i76_s
 	ret void
 }
 
 define void @i77_ls() nounwind  {
-	%tmp = load i77* @i77_l		; <i77> [#uses=1]
+	%tmp = load i77, i77* @i77_l		; <i77> [#uses=1]
 	store i77 %tmp, i77* @i77_s
 	ret void
 }
 
 define void @i78_ls() nounwind  {
-	%tmp = load i78* @i78_l		; <i78> [#uses=1]
+	%tmp = load i78, i78* @i78_l		; <i78> [#uses=1]
 	store i78 %tmp, i78* @i78_s
 	ret void
 }
 
 define void @i79_ls() nounwind  {
-	%tmp = load i79* @i79_l		; <i79> [#uses=1]
+	%tmp = load i79, i79* @i79_l		; <i79> [#uses=1]
 	store i79 %tmp, i79* @i79_s
 	ret void
 }
 
 define void @i80_ls() nounwind  {
-	%tmp = load i80* @i80_l		; <i80> [#uses=1]
+	%tmp = load i80, i80* @i80_l		; <i80> [#uses=1]
 	store i80 %tmp, i80* @i80_s
 	ret void
 }
 
 define void @i81_ls() nounwind  {
-	%tmp = load i81* @i81_l		; <i81> [#uses=1]
+	%tmp = load i81, i81* @i81_l		; <i81> [#uses=1]
 	store i81 %tmp, i81* @i81_s
 	ret void
 }
 
 define void @i82_ls() nounwind  {
-	%tmp = load i82* @i82_l		; <i82> [#uses=1]
+	%tmp = load i82, i82* @i82_l		; <i82> [#uses=1]
 	store i82 %tmp, i82* @i82_s
 	ret void
 }
 
 define void @i83_ls() nounwind  {
-	%tmp = load i83* @i83_l		; <i83> [#uses=1]
+	%tmp = load i83, i83* @i83_l		; <i83> [#uses=1]
 	store i83 %tmp, i83* @i83_s
 	ret void
 }
 
 define void @i84_ls() nounwind  {
-	%tmp = load i84* @i84_l		; <i84> [#uses=1]
+	%tmp = load i84, i84* @i84_l		; <i84> [#uses=1]
 	store i84 %tmp, i84* @i84_s
 	ret void
 }
 
 define void @i85_ls() nounwind  {
-	%tmp = load i85* @i85_l		; <i85> [#uses=1]
+	%tmp = load i85, i85* @i85_l		; <i85> [#uses=1]
 	store i85 %tmp, i85* @i85_s
 	ret void
 }
 
 define void @i86_ls() nounwind  {
-	%tmp = load i86* @i86_l		; <i86> [#uses=1]
+	%tmp = load i86, i86* @i86_l		; <i86> [#uses=1]
 	store i86 %tmp, i86* @i86_s
 	ret void
 }
 
 define void @i87_ls() nounwind  {
-	%tmp = load i87* @i87_l		; <i87> [#uses=1]
+	%tmp = load i87, i87* @i87_l		; <i87> [#uses=1]
 	store i87 %tmp, i87* @i87_s
 	ret void
 }
 
 define void @i88_ls() nounwind  {
-	%tmp = load i88* @i88_l		; <i88> [#uses=1]
+	%tmp = load i88, i88* @i88_l		; <i88> [#uses=1]
 	store i88 %tmp, i88* @i88_s
 	ret void
 }
 
 define void @i89_ls() nounwind  {
-	%tmp = load i89* @i89_l		; <i89> [#uses=1]
+	%tmp = load i89, i89* @i89_l		; <i89> [#uses=1]
 	store i89 %tmp, i89* @i89_s
 	ret void
 }
 
 define void @i90_ls() nounwind  {
-	%tmp = load i90* @i90_l		; <i90> [#uses=1]
+	%tmp = load i90, i90* @i90_l		; <i90> [#uses=1]
 	store i90 %tmp, i90* @i90_s
 	ret void
 }
 
 define void @i91_ls() nounwind  {
-	%tmp = load i91* @i91_l		; <i91> [#uses=1]
+	%tmp = load i91, i91* @i91_l		; <i91> [#uses=1]
 	store i91 %tmp, i91* @i91_s
 	ret void
 }
 
 define void @i92_ls() nounwind  {
-	%tmp = load i92* @i92_l		; <i92> [#uses=1]
+	%tmp = load i92, i92* @i92_l		; <i92> [#uses=1]
 	store i92 %tmp, i92* @i92_s
 	ret void
 }
 
 define void @i93_ls() nounwind  {
-	%tmp = load i93* @i93_l		; <i93> [#uses=1]
+	%tmp = load i93, i93* @i93_l		; <i93> [#uses=1]
 	store i93 %tmp, i93* @i93_s
 	ret void
 }
 
 define void @i94_ls() nounwind  {
-	%tmp = load i94* @i94_l		; <i94> [#uses=1]
+	%tmp = load i94, i94* @i94_l		; <i94> [#uses=1]
 	store i94 %tmp, i94* @i94_s
 	ret void
 }
 
 define void @i95_ls() nounwind  {
-	%tmp = load i95* @i95_l		; <i95> [#uses=1]
+	%tmp = load i95, i95* @i95_l		; <i95> [#uses=1]
 	store i95 %tmp, i95* @i95_s
 	ret void
 }
 
 define void @i96_ls() nounwind  {
-	%tmp = load i96* @i96_l		; <i96> [#uses=1]
+	%tmp = load i96, i96* @i96_l		; <i96> [#uses=1]
 	store i96 %tmp, i96* @i96_s
 	ret void
 }
 
 define void @i97_ls() nounwind  {
-	%tmp = load i97* @i97_l		; <i97> [#uses=1]
+	%tmp = load i97, i97* @i97_l		; <i97> [#uses=1]
 	store i97 %tmp, i97* @i97_s
 	ret void
 }
 
 define void @i98_ls() nounwind  {
-	%tmp = load i98* @i98_l		; <i98> [#uses=1]
+	%tmp = load i98, i98* @i98_l		; <i98> [#uses=1]
 	store i98 %tmp, i98* @i98_s
 	ret void
 }
 
 define void @i99_ls() nounwind  {
-	%tmp = load i99* @i99_l		; <i99> [#uses=1]
+	%tmp = load i99, i99* @i99_l		; <i99> [#uses=1]
 	store i99 %tmp, i99* @i99_s
 	ret void
 }
 
 define void @i100_ls() nounwind  {
-	%tmp = load i100* @i100_l		; <i100> [#uses=1]
+	%tmp = load i100, i100* @i100_l		; <i100> [#uses=1]
 	store i100 %tmp, i100* @i100_s
 	ret void
 }
 
 define void @i101_ls() nounwind  {
-	%tmp = load i101* @i101_l		; <i101> [#uses=1]
+	%tmp = load i101, i101* @i101_l		; <i101> [#uses=1]
 	store i101 %tmp, i101* @i101_s
 	ret void
 }
 
 define void @i102_ls() nounwind  {
-	%tmp = load i102* @i102_l		; <i102> [#uses=1]
+	%tmp = load i102, i102* @i102_l		; <i102> [#uses=1]
 	store i102 %tmp, i102* @i102_s
 	ret void
 }
 
 define void @i103_ls() nounwind  {
-	%tmp = load i103* @i103_l		; <i103> [#uses=1]
+	%tmp = load i103, i103* @i103_l		; <i103> [#uses=1]
 	store i103 %tmp, i103* @i103_s
 	ret void
 }
 
 define void @i104_ls() nounwind  {
-	%tmp = load i104* @i104_l		; <i104> [#uses=1]
+	%tmp = load i104, i104* @i104_l		; <i104> [#uses=1]
 	store i104 %tmp, i104* @i104_s
 	ret void
 }
 
 define void @i105_ls() nounwind  {
-	%tmp = load i105* @i105_l		; <i105> [#uses=1]
+	%tmp = load i105, i105* @i105_l		; <i105> [#uses=1]
 	store i105 %tmp, i105* @i105_s
 	ret void
 }
 
 define void @i106_ls() nounwind  {
-	%tmp = load i106* @i106_l		; <i106> [#uses=1]
+	%tmp = load i106, i106* @i106_l		; <i106> [#uses=1]
 	store i106 %tmp, i106* @i106_s
 	ret void
 }
 
 define void @i107_ls() nounwind  {
-	%tmp = load i107* @i107_l		; <i107> [#uses=1]
+	%tmp = load i107, i107* @i107_l		; <i107> [#uses=1]
 	store i107 %tmp, i107* @i107_s
 	ret void
 }
 
 define void @i108_ls() nounwind  {
-	%tmp = load i108* @i108_l		; <i108> [#uses=1]
+	%tmp = load i108, i108* @i108_l		; <i108> [#uses=1]
 	store i108 %tmp, i108* @i108_s
 	ret void
 }
 
 define void @i109_ls() nounwind  {
-	%tmp = load i109* @i109_l		; <i109> [#uses=1]
+	%tmp = load i109, i109* @i109_l		; <i109> [#uses=1]
 	store i109 %tmp, i109* @i109_s
 	ret void
 }
 
 define void @i110_ls() nounwind  {
-	%tmp = load i110* @i110_l		; <i110> [#uses=1]
+	%tmp = load i110, i110* @i110_l		; <i110> [#uses=1]
 	store i110 %tmp, i110* @i110_s
 	ret void
 }
 
 define void @i111_ls() nounwind  {
-	%tmp = load i111* @i111_l		; <i111> [#uses=1]
+	%tmp = load i111, i111* @i111_l		; <i111> [#uses=1]
 	store i111 %tmp, i111* @i111_s
 	ret void
 }
 
 define void @i112_ls() nounwind  {
-	%tmp = load i112* @i112_l		; <i112> [#uses=1]
+	%tmp = load i112, i112* @i112_l		; <i112> [#uses=1]
 	store i112 %tmp, i112* @i112_s
 	ret void
 }
 
 define void @i113_ls() nounwind  {
-	%tmp = load i113* @i113_l		; <i113> [#uses=1]
+	%tmp = load i113, i113* @i113_l		; <i113> [#uses=1]
 	store i113 %tmp, i113* @i113_s
 	ret void
 }
 
 define void @i114_ls() nounwind  {
-	%tmp = load i114* @i114_l		; <i114> [#uses=1]
+	%tmp = load i114, i114* @i114_l		; <i114> [#uses=1]
 	store i114 %tmp, i114* @i114_s
 	ret void
 }
 
 define void @i115_ls() nounwind  {
-	%tmp = load i115* @i115_l		; <i115> [#uses=1]
+	%tmp = load i115, i115* @i115_l		; <i115> [#uses=1]
 	store i115 %tmp, i115* @i115_s
 	ret void
 }
 
 define void @i116_ls() nounwind  {
-	%tmp = load i116* @i116_l		; <i116> [#uses=1]
+	%tmp = load i116, i116* @i116_l		; <i116> [#uses=1]
 	store i116 %tmp, i116* @i116_s
 	ret void
 }
 
 define void @i117_ls() nounwind  {
-	%tmp = load i117* @i117_l		; <i117> [#uses=1]
+	%tmp = load i117, i117* @i117_l		; <i117> [#uses=1]
 	store i117 %tmp, i117* @i117_s
 	ret void
 }
 
 define void @i118_ls() nounwind  {
-	%tmp = load i118* @i118_l		; <i118> [#uses=1]
+	%tmp = load i118, i118* @i118_l		; <i118> [#uses=1]
 	store i118 %tmp, i118* @i118_s
 	ret void
 }
 
 define void @i119_ls() nounwind  {
-	%tmp = load i119* @i119_l		; <i119> [#uses=1]
+	%tmp = load i119, i119* @i119_l		; <i119> [#uses=1]
 	store i119 %tmp, i119* @i119_s
 	ret void
 }
 
 define void @i120_ls() nounwind  {
-	%tmp = load i120* @i120_l		; <i120> [#uses=1]
+	%tmp = load i120, i120* @i120_l		; <i120> [#uses=1]
 	store i120 %tmp, i120* @i120_s
 	ret void
 }
 
 define void @i121_ls() nounwind  {
-	%tmp = load i121* @i121_l		; <i121> [#uses=1]
+	%tmp = load i121, i121* @i121_l		; <i121> [#uses=1]
 	store i121 %tmp, i121* @i121_s
 	ret void
 }
 
 define void @i122_ls() nounwind  {
-	%tmp = load i122* @i122_l		; <i122> [#uses=1]
+	%tmp = load i122, i122* @i122_l		; <i122> [#uses=1]
 	store i122 %tmp, i122* @i122_s
 	ret void
 }
 
 define void @i123_ls() nounwind  {
-	%tmp = load i123* @i123_l		; <i123> [#uses=1]
+	%tmp = load i123, i123* @i123_l		; <i123> [#uses=1]
 	store i123 %tmp, i123* @i123_s
 	ret void
 }
 
 define void @i124_ls() nounwind  {
-	%tmp = load i124* @i124_l		; <i124> [#uses=1]
+	%tmp = load i124, i124* @i124_l		; <i124> [#uses=1]
 	store i124 %tmp, i124* @i124_s
 	ret void
 }
 
 define void @i125_ls() nounwind  {
-	%tmp = load i125* @i125_l		; <i125> [#uses=1]
+	%tmp = load i125, i125* @i125_l		; <i125> [#uses=1]
 	store i125 %tmp, i125* @i125_s
 	ret void
 }
 
 define void @i126_ls() nounwind  {
-	%tmp = load i126* @i126_l		; <i126> [#uses=1]
+	%tmp = load i126, i126* @i126_l		; <i126> [#uses=1]
 	store i126 %tmp, i126* @i126_s
 	ret void
 }
 
 define void @i127_ls() nounwind  {
-	%tmp = load i127* @i127_l		; <i127> [#uses=1]
+	%tmp = load i127, i127* @i127_l		; <i127> [#uses=1]
 	store i127 %tmp, i127* @i127_s
 	ret void
 }
 
 define void @i128_ls() nounwind  {
-	%tmp = load i128* @i128_l		; <i128> [#uses=1]
+	%tmp = load i128, i128* @i128_l		; <i128> [#uses=1]
 	store i128 %tmp, i128* @i128_s
 	ret void
 }
 
 define void @i129_ls() nounwind  {
-	%tmp = load i129* @i129_l		; <i129> [#uses=1]
+	%tmp = load i129, i129* @i129_l		; <i129> [#uses=1]
 	store i129 %tmp, i129* @i129_s
 	ret void
 }
 
 define void @i130_ls() nounwind  {
-	%tmp = load i130* @i130_l		; <i130> [#uses=1]
+	%tmp = load i130, i130* @i130_l		; <i130> [#uses=1]
 	store i130 %tmp, i130* @i130_s
 	ret void
 }
 
 define void @i131_ls() nounwind  {
-	%tmp = load i131* @i131_l		; <i131> [#uses=1]
+	%tmp = load i131, i131* @i131_l		; <i131> [#uses=1]
 	store i131 %tmp, i131* @i131_s
 	ret void
 }
 
 define void @i132_ls() nounwind  {
-	%tmp = load i132* @i132_l		; <i132> [#uses=1]
+	%tmp = load i132, i132* @i132_l		; <i132> [#uses=1]
 	store i132 %tmp, i132* @i132_s
 	ret void
 }
 
 define void @i133_ls() nounwind  {
-	%tmp = load i133* @i133_l		; <i133> [#uses=1]
+	%tmp = load i133, i133* @i133_l		; <i133> [#uses=1]
 	store i133 %tmp, i133* @i133_s
 	ret void
 }
 
 define void @i134_ls() nounwind  {
-	%tmp = load i134* @i134_l		; <i134> [#uses=1]
+	%tmp = load i134, i134* @i134_l		; <i134> [#uses=1]
 	store i134 %tmp, i134* @i134_s
 	ret void
 }
 
 define void @i135_ls() nounwind  {
-	%tmp = load i135* @i135_l		; <i135> [#uses=1]
+	%tmp = load i135, i135* @i135_l		; <i135> [#uses=1]
 	store i135 %tmp, i135* @i135_s
 	ret void
 }
 
 define void @i136_ls() nounwind  {
-	%tmp = load i136* @i136_l		; <i136> [#uses=1]
+	%tmp = load i136, i136* @i136_l		; <i136> [#uses=1]
 	store i136 %tmp, i136* @i136_s
 	ret void
 }
 
 define void @i137_ls() nounwind  {
-	%tmp = load i137* @i137_l		; <i137> [#uses=1]
+	%tmp = load i137, i137* @i137_l		; <i137> [#uses=1]
 	store i137 %tmp, i137* @i137_s
 	ret void
 }
 
 define void @i138_ls() nounwind  {
-	%tmp = load i138* @i138_l		; <i138> [#uses=1]
+	%tmp = load i138, i138* @i138_l		; <i138> [#uses=1]
 	store i138 %tmp, i138* @i138_s
 	ret void
 }
 
 define void @i139_ls() nounwind  {
-	%tmp = load i139* @i139_l		; <i139> [#uses=1]
+	%tmp = load i139, i139* @i139_l		; <i139> [#uses=1]
 	store i139 %tmp, i139* @i139_s
 	ret void
 }
 
 define void @i140_ls() nounwind  {
-	%tmp = load i140* @i140_l		; <i140> [#uses=1]
+	%tmp = load i140, i140* @i140_l		; <i140> [#uses=1]
 	store i140 %tmp, i140* @i140_s
 	ret void
 }
 
 define void @i141_ls() nounwind  {
-	%tmp = load i141* @i141_l		; <i141> [#uses=1]
+	%tmp = load i141, i141* @i141_l		; <i141> [#uses=1]
 	store i141 %tmp, i141* @i141_s
 	ret void
 }
 
 define void @i142_ls() nounwind  {
-	%tmp = load i142* @i142_l		; <i142> [#uses=1]
+	%tmp = load i142, i142* @i142_l		; <i142> [#uses=1]
 	store i142 %tmp, i142* @i142_s
 	ret void
 }
 
 define void @i143_ls() nounwind  {
-	%tmp = load i143* @i143_l		; <i143> [#uses=1]
+	%tmp = load i143, i143* @i143_l		; <i143> [#uses=1]
 	store i143 %tmp, i143* @i143_s
 	ret void
 }
 
 define void @i144_ls() nounwind  {
-	%tmp = load i144* @i144_l		; <i144> [#uses=1]
+	%tmp = load i144, i144* @i144_l		; <i144> [#uses=1]
 	store i144 %tmp, i144* @i144_s
 	ret void
 }
 
 define void @i145_ls() nounwind  {
-	%tmp = load i145* @i145_l		; <i145> [#uses=1]
+	%tmp = load i145, i145* @i145_l		; <i145> [#uses=1]
 	store i145 %tmp, i145* @i145_s
 	ret void
 }
 
 define void @i146_ls() nounwind  {
-	%tmp = load i146* @i146_l		; <i146> [#uses=1]
+	%tmp = load i146, i146* @i146_l		; <i146> [#uses=1]
 	store i146 %tmp, i146* @i146_s
 	ret void
 }
 
 define void @i147_ls() nounwind  {
-	%tmp = load i147* @i147_l		; <i147> [#uses=1]
+	%tmp = load i147, i147* @i147_l		; <i147> [#uses=1]
 	store i147 %tmp, i147* @i147_s
 	ret void
 }
 
 define void @i148_ls() nounwind  {
-	%tmp = load i148* @i148_l		; <i148> [#uses=1]
+	%tmp = load i148, i148* @i148_l		; <i148> [#uses=1]
 	store i148 %tmp, i148* @i148_s
 	ret void
 }
 
 define void @i149_ls() nounwind  {
-	%tmp = load i149* @i149_l		; <i149> [#uses=1]
+	%tmp = load i149, i149* @i149_l		; <i149> [#uses=1]
 	store i149 %tmp, i149* @i149_s
 	ret void
 }
 
 define void @i150_ls() nounwind  {
-	%tmp = load i150* @i150_l		; <i150> [#uses=1]
+	%tmp = load i150, i150* @i150_l		; <i150> [#uses=1]
 	store i150 %tmp, i150* @i150_s
 	ret void
 }
 
 define void @i151_ls() nounwind  {
-	%tmp = load i151* @i151_l		; <i151> [#uses=1]
+	%tmp = load i151, i151* @i151_l		; <i151> [#uses=1]
 	store i151 %tmp, i151* @i151_s
 	ret void
 }
 
 define void @i152_ls() nounwind  {
-	%tmp = load i152* @i152_l		; <i152> [#uses=1]
+	%tmp = load i152, i152* @i152_l		; <i152> [#uses=1]
 	store i152 %tmp, i152* @i152_s
 	ret void
 }
 
 define void @i153_ls() nounwind  {
-	%tmp = load i153* @i153_l		; <i153> [#uses=1]
+	%tmp = load i153, i153* @i153_l		; <i153> [#uses=1]
 	store i153 %tmp, i153* @i153_s
 	ret void
 }
 
 define void @i154_ls() nounwind  {
-	%tmp = load i154* @i154_l		; <i154> [#uses=1]
+	%tmp = load i154, i154* @i154_l		; <i154> [#uses=1]
 	store i154 %tmp, i154* @i154_s
 	ret void
 }
 
 define void @i155_ls() nounwind  {
-	%tmp = load i155* @i155_l		; <i155> [#uses=1]
+	%tmp = load i155, i155* @i155_l		; <i155> [#uses=1]
 	store i155 %tmp, i155* @i155_s
 	ret void
 }
 
 define void @i156_ls() nounwind  {
-	%tmp = load i156* @i156_l		; <i156> [#uses=1]
+	%tmp = load i156, i156* @i156_l		; <i156> [#uses=1]
 	store i156 %tmp, i156* @i156_s
 	ret void
 }
 
 define void @i157_ls() nounwind  {
-	%tmp = load i157* @i157_l		; <i157> [#uses=1]
+	%tmp = load i157, i157* @i157_l		; <i157> [#uses=1]
 	store i157 %tmp, i157* @i157_s
 	ret void
 }
 
 define void @i158_ls() nounwind  {
-	%tmp = load i158* @i158_l		; <i158> [#uses=1]
+	%tmp = load i158, i158* @i158_l		; <i158> [#uses=1]
 	store i158 %tmp, i158* @i158_s
 	ret void
 }
 
 define void @i159_ls() nounwind  {
-	%tmp = load i159* @i159_l		; <i159> [#uses=1]
+	%tmp = load i159, i159* @i159_l		; <i159> [#uses=1]
 	store i159 %tmp, i159* @i159_s
 	ret void
 }
 
 define void @i160_ls() nounwind  {
-	%tmp = load i160* @i160_l		; <i160> [#uses=1]
+	%tmp = load i160, i160* @i160_l		; <i160> [#uses=1]
 	store i160 %tmp, i160* @i160_s
 	ret void
 }
 
 define void @i161_ls() nounwind  {
-	%tmp = load i161* @i161_l		; <i161> [#uses=1]
+	%tmp = load i161, i161* @i161_l		; <i161> [#uses=1]
 	store i161 %tmp, i161* @i161_s
 	ret void
 }
 
 define void @i162_ls() nounwind  {
-	%tmp = load i162* @i162_l		; <i162> [#uses=1]
+	%tmp = load i162, i162* @i162_l		; <i162> [#uses=1]
 	store i162 %tmp, i162* @i162_s
 	ret void
 }
 
 define void @i163_ls() nounwind  {
-	%tmp = load i163* @i163_l		; <i163> [#uses=1]
+	%tmp = load i163, i163* @i163_l		; <i163> [#uses=1]
 	store i163 %tmp, i163* @i163_s
 	ret void
 }
 
 define void @i164_ls() nounwind  {
-	%tmp = load i164* @i164_l		; <i164> [#uses=1]
+	%tmp = load i164, i164* @i164_l		; <i164> [#uses=1]
 	store i164 %tmp, i164* @i164_s
 	ret void
 }
 
 define void @i165_ls() nounwind  {
-	%tmp = load i165* @i165_l		; <i165> [#uses=1]
+	%tmp = load i165, i165* @i165_l		; <i165> [#uses=1]
 	store i165 %tmp, i165* @i165_s
 	ret void
 }
 
 define void @i166_ls() nounwind  {
-	%tmp = load i166* @i166_l		; <i166> [#uses=1]
+	%tmp = load i166, i166* @i166_l		; <i166> [#uses=1]
 	store i166 %tmp, i166* @i166_s
 	ret void
 }
 
 define void @i167_ls() nounwind  {
-	%tmp = load i167* @i167_l		; <i167> [#uses=1]
+	%tmp = load i167, i167* @i167_l		; <i167> [#uses=1]
 	store i167 %tmp, i167* @i167_s
 	ret void
 }
 
 define void @i168_ls() nounwind  {
-	%tmp = load i168* @i168_l		; <i168> [#uses=1]
+	%tmp = load i168, i168* @i168_l		; <i168> [#uses=1]
 	store i168 %tmp, i168* @i168_s
 	ret void
 }
 
 define void @i169_ls() nounwind  {
-	%tmp = load i169* @i169_l		; <i169> [#uses=1]
+	%tmp = load i169, i169* @i169_l		; <i169> [#uses=1]
 	store i169 %tmp, i169* @i169_s
 	ret void
 }
 
 define void @i170_ls() nounwind  {
-	%tmp = load i170* @i170_l		; <i170> [#uses=1]
+	%tmp = load i170, i170* @i170_l		; <i170> [#uses=1]
 	store i170 %tmp, i170* @i170_s
 	ret void
 }
 
 define void @i171_ls() nounwind  {
-	%tmp = load i171* @i171_l		; <i171> [#uses=1]
+	%tmp = load i171, i171* @i171_l		; <i171> [#uses=1]
 	store i171 %tmp, i171* @i171_s
 	ret void
 }
 
 define void @i172_ls() nounwind  {
-	%tmp = load i172* @i172_l		; <i172> [#uses=1]
+	%tmp = load i172, i172* @i172_l		; <i172> [#uses=1]
 	store i172 %tmp, i172* @i172_s
 	ret void
 }
 
 define void @i173_ls() nounwind  {
-	%tmp = load i173* @i173_l		; <i173> [#uses=1]
+	%tmp = load i173, i173* @i173_l		; <i173> [#uses=1]
 	store i173 %tmp, i173* @i173_s
 	ret void
 }
 
 define void @i174_ls() nounwind  {
-	%tmp = load i174* @i174_l		; <i174> [#uses=1]
+	%tmp = load i174, i174* @i174_l		; <i174> [#uses=1]
 	store i174 %tmp, i174* @i174_s
 	ret void
 }
 
 define void @i175_ls() nounwind  {
-	%tmp = load i175* @i175_l		; <i175> [#uses=1]
+	%tmp = load i175, i175* @i175_l		; <i175> [#uses=1]
 	store i175 %tmp, i175* @i175_s
 	ret void
 }
 
 define void @i176_ls() nounwind  {
-	%tmp = load i176* @i176_l		; <i176> [#uses=1]
+	%tmp = load i176, i176* @i176_l		; <i176> [#uses=1]
 	store i176 %tmp, i176* @i176_s
 	ret void
 }
 
 define void @i177_ls() nounwind  {
-	%tmp = load i177* @i177_l		; <i177> [#uses=1]
+	%tmp = load i177, i177* @i177_l		; <i177> [#uses=1]
 	store i177 %tmp, i177* @i177_s
 	ret void
 }
 
 define void @i178_ls() nounwind  {
-	%tmp = load i178* @i178_l		; <i178> [#uses=1]
+	%tmp = load i178, i178* @i178_l		; <i178> [#uses=1]
 	store i178 %tmp, i178* @i178_s
 	ret void
 }
 
 define void @i179_ls() nounwind  {
-	%tmp = load i179* @i179_l		; <i179> [#uses=1]
+	%tmp = load i179, i179* @i179_l		; <i179> [#uses=1]
 	store i179 %tmp, i179* @i179_s
 	ret void
 }
 
 define void @i180_ls() nounwind  {
-	%tmp = load i180* @i180_l		; <i180> [#uses=1]
+	%tmp = load i180, i180* @i180_l		; <i180> [#uses=1]
 	store i180 %tmp, i180* @i180_s
 	ret void
 }
 
 define void @i181_ls() nounwind  {
-	%tmp = load i181* @i181_l		; <i181> [#uses=1]
+	%tmp = load i181, i181* @i181_l		; <i181> [#uses=1]
 	store i181 %tmp, i181* @i181_s
 	ret void
 }
 
 define void @i182_ls() nounwind  {
-	%tmp = load i182* @i182_l		; <i182> [#uses=1]
+	%tmp = load i182, i182* @i182_l		; <i182> [#uses=1]
 	store i182 %tmp, i182* @i182_s
 	ret void
 }
 
 define void @i183_ls() nounwind  {
-	%tmp = load i183* @i183_l		; <i183> [#uses=1]
+	%tmp = load i183, i183* @i183_l		; <i183> [#uses=1]
 	store i183 %tmp, i183* @i183_s
 	ret void
 }
 
 define void @i184_ls() nounwind  {
-	%tmp = load i184* @i184_l		; <i184> [#uses=1]
+	%tmp = load i184, i184* @i184_l		; <i184> [#uses=1]
 	store i184 %tmp, i184* @i184_s
 	ret void
 }
 
 define void @i185_ls() nounwind  {
-	%tmp = load i185* @i185_l		; <i185> [#uses=1]
+	%tmp = load i185, i185* @i185_l		; <i185> [#uses=1]
 	store i185 %tmp, i185* @i185_s
 	ret void
 }
 
 define void @i186_ls() nounwind  {
-	%tmp = load i186* @i186_l		; <i186> [#uses=1]
+	%tmp = load i186, i186* @i186_l		; <i186> [#uses=1]
 	store i186 %tmp, i186* @i186_s
 	ret void
 }
 
 define void @i187_ls() nounwind  {
-	%tmp = load i187* @i187_l		; <i187> [#uses=1]
+	%tmp = load i187, i187* @i187_l		; <i187> [#uses=1]
 	store i187 %tmp, i187* @i187_s
 	ret void
 }
 
 define void @i188_ls() nounwind  {
-	%tmp = load i188* @i188_l		; <i188> [#uses=1]
+	%tmp = load i188, i188* @i188_l		; <i188> [#uses=1]
 	store i188 %tmp, i188* @i188_s
 	ret void
 }
 
 define void @i189_ls() nounwind  {
-	%tmp = load i189* @i189_l		; <i189> [#uses=1]
+	%tmp = load i189, i189* @i189_l		; <i189> [#uses=1]
 	store i189 %tmp, i189* @i189_s
 	ret void
 }
 
 define void @i190_ls() nounwind  {
-	%tmp = load i190* @i190_l		; <i190> [#uses=1]
+	%tmp = load i190, i190* @i190_l		; <i190> [#uses=1]
 	store i190 %tmp, i190* @i190_s
 	ret void
 }
 
 define void @i191_ls() nounwind  {
-	%tmp = load i191* @i191_l		; <i191> [#uses=1]
+	%tmp = load i191, i191* @i191_l		; <i191> [#uses=1]
 	store i191 %tmp, i191* @i191_s
 	ret void
 }
 
 define void @i192_ls() nounwind  {
-	%tmp = load i192* @i192_l		; <i192> [#uses=1]
+	%tmp = load i192, i192* @i192_l		; <i192> [#uses=1]
 	store i192 %tmp, i192* @i192_s
 	ret void
 }
 
 define void @i193_ls() nounwind  {
-	%tmp = load i193* @i193_l		; <i193> [#uses=1]
+	%tmp = load i193, i193* @i193_l		; <i193> [#uses=1]
 	store i193 %tmp, i193* @i193_s
 	ret void
 }
 
 define void @i194_ls() nounwind  {
-	%tmp = load i194* @i194_l		; <i194> [#uses=1]
+	%tmp = load i194, i194* @i194_l		; <i194> [#uses=1]
 	store i194 %tmp, i194* @i194_s
 	ret void
 }
 
 define void @i195_ls() nounwind  {
-	%tmp = load i195* @i195_l		; <i195> [#uses=1]
+	%tmp = load i195, i195* @i195_l		; <i195> [#uses=1]
 	store i195 %tmp, i195* @i195_s
 	ret void
 }
 
 define void @i196_ls() nounwind  {
-	%tmp = load i196* @i196_l		; <i196> [#uses=1]
+	%tmp = load i196, i196* @i196_l		; <i196> [#uses=1]
 	store i196 %tmp, i196* @i196_s
 	ret void
 }
 
 define void @i197_ls() nounwind  {
-	%tmp = load i197* @i197_l		; <i197> [#uses=1]
+	%tmp = load i197, i197* @i197_l		; <i197> [#uses=1]
 	store i197 %tmp, i197* @i197_s
 	ret void
 }
 
 define void @i198_ls() nounwind  {
-	%tmp = load i198* @i198_l		; <i198> [#uses=1]
+	%tmp = load i198, i198* @i198_l		; <i198> [#uses=1]
 	store i198 %tmp, i198* @i198_s
 	ret void
 }
 
 define void @i199_ls() nounwind  {
-	%tmp = load i199* @i199_l		; <i199> [#uses=1]
+	%tmp = load i199, i199* @i199_l		; <i199> [#uses=1]
 	store i199 %tmp, i199* @i199_s
 	ret void
 }
 
 define void @i200_ls() nounwind  {
-	%tmp = load i200* @i200_l		; <i200> [#uses=1]
+	%tmp = load i200, i200* @i200_l		; <i200> [#uses=1]
 	store i200 %tmp, i200* @i200_s
 	ret void
 }
 
 define void @i201_ls() nounwind  {
-	%tmp = load i201* @i201_l		; <i201> [#uses=1]
+	%tmp = load i201, i201* @i201_l		; <i201> [#uses=1]
 	store i201 %tmp, i201* @i201_s
 	ret void
 }
 
 define void @i202_ls() nounwind  {
-	%tmp = load i202* @i202_l		; <i202> [#uses=1]
+	%tmp = load i202, i202* @i202_l		; <i202> [#uses=1]
 	store i202 %tmp, i202* @i202_s
 	ret void
 }
 
 define void @i203_ls() nounwind  {
-	%tmp = load i203* @i203_l		; <i203> [#uses=1]
+	%tmp = load i203, i203* @i203_l		; <i203> [#uses=1]
 	store i203 %tmp, i203* @i203_s
 	ret void
 }
 
 define void @i204_ls() nounwind  {
-	%tmp = load i204* @i204_l		; <i204> [#uses=1]
+	%tmp = load i204, i204* @i204_l		; <i204> [#uses=1]
 	store i204 %tmp, i204* @i204_s
 	ret void
 }
 
 define void @i205_ls() nounwind  {
-	%tmp = load i205* @i205_l		; <i205> [#uses=1]
+	%tmp = load i205, i205* @i205_l		; <i205> [#uses=1]
 	store i205 %tmp, i205* @i205_s
 	ret void
 }
 
 define void @i206_ls() nounwind  {
-	%tmp = load i206* @i206_l		; <i206> [#uses=1]
+	%tmp = load i206, i206* @i206_l		; <i206> [#uses=1]
 	store i206 %tmp, i206* @i206_s
 	ret void
 }
 
 define void @i207_ls() nounwind  {
-	%tmp = load i207* @i207_l		; <i207> [#uses=1]
+	%tmp = load i207, i207* @i207_l		; <i207> [#uses=1]
 	store i207 %tmp, i207* @i207_s
 	ret void
 }
 
 define void @i208_ls() nounwind  {
-	%tmp = load i208* @i208_l		; <i208> [#uses=1]
+	%tmp = load i208, i208* @i208_l		; <i208> [#uses=1]
 	store i208 %tmp, i208* @i208_s
 	ret void
 }
 
 define void @i209_ls() nounwind  {
-	%tmp = load i209* @i209_l		; <i209> [#uses=1]
+	%tmp = load i209, i209* @i209_l		; <i209> [#uses=1]
 	store i209 %tmp, i209* @i209_s
 	ret void
 }
 
 define void @i210_ls() nounwind  {
-	%tmp = load i210* @i210_l		; <i210> [#uses=1]
+	%tmp = load i210, i210* @i210_l		; <i210> [#uses=1]
 	store i210 %tmp, i210* @i210_s
 	ret void
 }
 
 define void @i211_ls() nounwind  {
-	%tmp = load i211* @i211_l		; <i211> [#uses=1]
+	%tmp = load i211, i211* @i211_l		; <i211> [#uses=1]
 	store i211 %tmp, i211* @i211_s
 	ret void
 }
 
 define void @i212_ls() nounwind  {
-	%tmp = load i212* @i212_l		; <i212> [#uses=1]
+	%tmp = load i212, i212* @i212_l		; <i212> [#uses=1]
 	store i212 %tmp, i212* @i212_s
 	ret void
 }
 
 define void @i213_ls() nounwind  {
-	%tmp = load i213* @i213_l		; <i213> [#uses=1]
+	%tmp = load i213, i213* @i213_l		; <i213> [#uses=1]
 	store i213 %tmp, i213* @i213_s
 	ret void
 }
 
 define void @i214_ls() nounwind  {
-	%tmp = load i214* @i214_l		; <i214> [#uses=1]
+	%tmp = load i214, i214* @i214_l		; <i214> [#uses=1]
 	store i214 %tmp, i214* @i214_s
 	ret void
 }
 
 define void @i215_ls() nounwind  {
-	%tmp = load i215* @i215_l		; <i215> [#uses=1]
+	%tmp = load i215, i215* @i215_l		; <i215> [#uses=1]
 	store i215 %tmp, i215* @i215_s
 	ret void
 }
 
 define void @i216_ls() nounwind  {
-	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	%tmp = load i216, i216* @i216_l		; <i216> [#uses=1]
 	store i216 %tmp, i216* @i216_s
 	ret void
 }
 
 define void @i217_ls() nounwind  {
-	%tmp = load i217* @i217_l		; <i217> [#uses=1]
+	%tmp = load i217, i217* @i217_l		; <i217> [#uses=1]
 	store i217 %tmp, i217* @i217_s
 	ret void
 }
 
 define void @i218_ls() nounwind  {
-	%tmp = load i218* @i218_l		; <i218> [#uses=1]
+	%tmp = load i218, i218* @i218_l		; <i218> [#uses=1]
 	store i218 %tmp, i218* @i218_s
 	ret void
 }
 
 define void @i219_ls() nounwind  {
-	%tmp = load i219* @i219_l		; <i219> [#uses=1]
+	%tmp = load i219, i219* @i219_l		; <i219> [#uses=1]
 	store i219 %tmp, i219* @i219_s
 	ret void
 }
 
 define void @i220_ls() nounwind  {
-	%tmp = load i220* @i220_l		; <i220> [#uses=1]
+	%tmp = load i220, i220* @i220_l		; <i220> [#uses=1]
 	store i220 %tmp, i220* @i220_s
 	ret void
 }
 
 define void @i221_ls() nounwind  {
-	%tmp = load i221* @i221_l		; <i221> [#uses=1]
+	%tmp = load i221, i221* @i221_l		; <i221> [#uses=1]
 	store i221 %tmp, i221* @i221_s
 	ret void
 }
 
 define void @i222_ls() nounwind  {
-	%tmp = load i222* @i222_l		; <i222> [#uses=1]
+	%tmp = load i222, i222* @i222_l		; <i222> [#uses=1]
 	store i222 %tmp, i222* @i222_s
 	ret void
 }
 
 define void @i223_ls() nounwind  {
-	%tmp = load i223* @i223_l		; <i223> [#uses=1]
+	%tmp = load i223, i223* @i223_l		; <i223> [#uses=1]
 	store i223 %tmp, i223* @i223_s
 	ret void
 }
 
 define void @i224_ls() nounwind  {
-	%tmp = load i224* @i224_l		; <i224> [#uses=1]
+	%tmp = load i224, i224* @i224_l		; <i224> [#uses=1]
 	store i224 %tmp, i224* @i224_s
 	ret void
 }
 
 define void @i225_ls() nounwind  {
-	%tmp = load i225* @i225_l		; <i225> [#uses=1]
+	%tmp = load i225, i225* @i225_l		; <i225> [#uses=1]
 	store i225 %tmp, i225* @i225_s
 	ret void
 }
 
 define void @i226_ls() nounwind  {
-	%tmp = load i226* @i226_l		; <i226> [#uses=1]
+	%tmp = load i226, i226* @i226_l		; <i226> [#uses=1]
 	store i226 %tmp, i226* @i226_s
 	ret void
 }
 
 define void @i227_ls() nounwind  {
-	%tmp = load i227* @i227_l		; <i227> [#uses=1]
+	%tmp = load i227, i227* @i227_l		; <i227> [#uses=1]
 	store i227 %tmp, i227* @i227_s
 	ret void
 }
 
 define void @i228_ls() nounwind  {
-	%tmp = load i228* @i228_l		; <i228> [#uses=1]
+	%tmp = load i228, i228* @i228_l		; <i228> [#uses=1]
 	store i228 %tmp, i228* @i228_s
 	ret void
 }
 
 define void @i229_ls() nounwind  {
-	%tmp = load i229* @i229_l		; <i229> [#uses=1]
+	%tmp = load i229, i229* @i229_l		; <i229> [#uses=1]
 	store i229 %tmp, i229* @i229_s
 	ret void
 }
 
 define void @i230_ls() nounwind  {
-	%tmp = load i230* @i230_l		; <i230> [#uses=1]
+	%tmp = load i230, i230* @i230_l		; <i230> [#uses=1]
 	store i230 %tmp, i230* @i230_s
 	ret void
 }
 
 define void @i231_ls() nounwind  {
-	%tmp = load i231* @i231_l		; <i231> [#uses=1]
+	%tmp = load i231, i231* @i231_l		; <i231> [#uses=1]
 	store i231 %tmp, i231* @i231_s
 	ret void
 }
 
 define void @i232_ls() nounwind  {
-	%tmp = load i232* @i232_l		; <i232> [#uses=1]
+	%tmp = load i232, i232* @i232_l		; <i232> [#uses=1]
 	store i232 %tmp, i232* @i232_s
 	ret void
 }
 
 define void @i233_ls() nounwind  {
-	%tmp = load i233* @i233_l		; <i233> [#uses=1]
+	%tmp = load i233, i233* @i233_l		; <i233> [#uses=1]
 	store i233 %tmp, i233* @i233_s
 	ret void
 }
 
 define void @i234_ls() nounwind  {
-	%tmp = load i234* @i234_l		; <i234> [#uses=1]
+	%tmp = load i234, i234* @i234_l		; <i234> [#uses=1]
 	store i234 %tmp, i234* @i234_s
 	ret void
 }
 
 define void @i235_ls() nounwind  {
-	%tmp = load i235* @i235_l		; <i235> [#uses=1]
+	%tmp = load i235, i235* @i235_l		; <i235> [#uses=1]
 	store i235 %tmp, i235* @i235_s
 	ret void
 }
 
 define void @i236_ls() nounwind  {
-	%tmp = load i236* @i236_l		; <i236> [#uses=1]
+	%tmp = load i236, i236* @i236_l		; <i236> [#uses=1]
 	store i236 %tmp, i236* @i236_s
 	ret void
 }
 
 define void @i237_ls() nounwind  {
-	%tmp = load i237* @i237_l		; <i237> [#uses=1]
+	%tmp = load i237, i237* @i237_l		; <i237> [#uses=1]
 	store i237 %tmp, i237* @i237_s
 	ret void
 }
 
 define void @i238_ls() nounwind  {
-	%tmp = load i238* @i238_l		; <i238> [#uses=1]
+	%tmp = load i238, i238* @i238_l		; <i238> [#uses=1]
 	store i238 %tmp, i238* @i238_s
 	ret void
 }
 
 define void @i239_ls() nounwind  {
-	%tmp = load i239* @i239_l		; <i239> [#uses=1]
+	%tmp = load i239, i239* @i239_l		; <i239> [#uses=1]
 	store i239 %tmp, i239* @i239_s
 	ret void
 }
 
 define void @i240_ls() nounwind  {
-	%tmp = load i240* @i240_l		; <i240> [#uses=1]
+	%tmp = load i240, i240* @i240_l		; <i240> [#uses=1]
 	store i240 %tmp, i240* @i240_s
 	ret void
 }
 
 define void @i241_ls() nounwind  {
-	%tmp = load i241* @i241_l		; <i241> [#uses=1]
+	%tmp = load i241, i241* @i241_l		; <i241> [#uses=1]
 	store i241 %tmp, i241* @i241_s
 	ret void
 }
 
 define void @i242_ls() nounwind  {
-	%tmp = load i242* @i242_l		; <i242> [#uses=1]
+	%tmp = load i242, i242* @i242_l		; <i242> [#uses=1]
 	store i242 %tmp, i242* @i242_s
 	ret void
 }
 
 define void @i243_ls() nounwind  {
-	%tmp = load i243* @i243_l		; <i243> [#uses=1]
+	%tmp = load i243, i243* @i243_l		; <i243> [#uses=1]
 	store i243 %tmp, i243* @i243_s
 	ret void
 }
 
 define void @i244_ls() nounwind  {
-	%tmp = load i244* @i244_l		; <i244> [#uses=1]
+	%tmp = load i244, i244* @i244_l		; <i244> [#uses=1]
 	store i244 %tmp, i244* @i244_s
 	ret void
 }
 
 define void @i245_ls() nounwind  {
-	%tmp = load i245* @i245_l		; <i245> [#uses=1]
+	%tmp = load i245, i245* @i245_l		; <i245> [#uses=1]
 	store i245 %tmp, i245* @i245_s
 	ret void
 }
 
 define void @i246_ls() nounwind  {
-	%tmp = load i246* @i246_l		; <i246> [#uses=1]
+	%tmp = load i246, i246* @i246_l		; <i246> [#uses=1]
 	store i246 %tmp, i246* @i246_s
 	ret void
 }
 
 define void @i247_ls() nounwind  {
-	%tmp = load i247* @i247_l		; <i247> [#uses=1]
+	%tmp = load i247, i247* @i247_l		; <i247> [#uses=1]
 	store i247 %tmp, i247* @i247_s
 	ret void
 }
 
 define void @i248_ls() nounwind  {
-	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	%tmp = load i248, i248* @i248_l		; <i248> [#uses=1]
 	store i248 %tmp, i248* @i248_s
 	ret void
 }
 
 define void @i249_ls() nounwind  {
-	%tmp = load i249* @i249_l		; <i249> [#uses=1]
+	%tmp = load i249, i249* @i249_l		; <i249> [#uses=1]
 	store i249 %tmp, i249* @i249_s
 	ret void
 }
 
 define void @i250_ls() nounwind  {
-	%tmp = load i250* @i250_l		; <i250> [#uses=1]
+	%tmp = load i250, i250* @i250_l		; <i250> [#uses=1]
 	store i250 %tmp, i250* @i250_s
 	ret void
 }
 
 define void @i251_ls() nounwind  {
-	%tmp = load i251* @i251_l		; <i251> [#uses=1]
+	%tmp = load i251, i251* @i251_l		; <i251> [#uses=1]
 	store i251 %tmp, i251* @i251_s
 	ret void
 }
 
 define void @i252_ls() nounwind  {
-	%tmp = load i252* @i252_l		; <i252> [#uses=1]
+	%tmp = load i252, i252* @i252_l		; <i252> [#uses=1]
 	store i252 %tmp, i252* @i252_s
 	ret void
 }
 
 define void @i253_ls() nounwind  {
-	%tmp = load i253* @i253_l		; <i253> [#uses=1]
+	%tmp = load i253, i253* @i253_l		; <i253> [#uses=1]
 	store i253 %tmp, i253* @i253_s
 	ret void
 }
 
 define void @i254_ls() nounwind  {
-	%tmp = load i254* @i254_l		; <i254> [#uses=1]
+	%tmp = load i254, i254* @i254_l		; <i254> [#uses=1]
 	store i254 %tmp, i254* @i254_s
 	ret void
 }
 
 define void @i255_ls() nounwind  {
-	%tmp = load i255* @i255_l		; <i255> [#uses=1]
+	%tmp = load i255, i255* @i255_l		; <i255> [#uses=1]
 	store i255 %tmp, i255* @i255_s
 	ret void
 }
 
 define void @i256_ls() nounwind  {
-	%tmp = load i256* @i256_l		; <i256> [#uses=1]
+	%tmp = load i256, i256* @i256_l		; <i256> [#uses=1]
 	store i256 %tmp, i256* @i256_s
 	ret void
 }
diff --git a/llvm/test/CodeGen/Generic/badFoldGEP.ll b/llvm/test/CodeGen/Generic/badFoldGEP.ll
index 318cc91..8150390 100644
--- a/llvm/test/CodeGen/Generic/badFoldGEP.ll
+++ b/llvm/test/CodeGen/Generic/badFoldGEP.ll
@@ -21,7 +21,7 @@
         %cann-indvar-idxcast = sext i32 %argc to i64            ; <i64> [#uses=1]
         %reg841 = getelementptr [497 x %Domain], [497 x %Domain]* @domain_array, i64 0, i64 %cann-indvar-idxcast, i32 3          ; <i32*> [#uses=1]
         %reg846 = getelementptr i32, i32* %reg841, i64 1             ; <i32*> [#uses=1]
-        %reg820 = load i32* %reg846             ; <i32> [#uses=1]
+        %reg820 = load i32, i32* %reg846             ; <i32> [#uses=1]
         ret i32 %reg820
 }
 
diff --git a/llvm/test/CodeGen/Generic/builtin-expect.ll b/llvm/test/CodeGen/Generic/builtin-expect.ll
index e8cd07b..2f76acf 100644
--- a/llvm/test/CodeGen/Generic/builtin-expect.ll
+++ b/llvm/test/CodeGen/Generic/builtin-expect.ll
@@ -5,7 +5,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp sgt i32 %tmp, 1
   %conv = zext i1 %cmp to i32
   %conv1 = sext i32 %conv to i64
@@ -23,7 +23,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -36,7 +36,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
   %tobool = icmp ne i64 %expval, 0
@@ -52,7 +52,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -61,7 +61,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %tobool = icmp ne i32 %tmp, 0
   %lnot = xor i1 %tobool, true
   %lnot.ext = zext i1 %lnot to i32
@@ -80,7 +80,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -89,7 +89,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %tobool = icmp ne i32 %tmp, 0
   %lnot = xor i1 %tobool, true
   %lnot1 = xor i1 %lnot, true
@@ -109,7 +109,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -118,7 +118,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp slt i32 %tmp, 0
   %conv = zext i1 %cmp to i32
   %conv1 = sext i32 %conv to i64
@@ -136,7 +136,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -145,7 +145,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
   switch i64 %expval, label %sw.epilog [
@@ -162,7 +162,7 @@
   br label %return
 
 return:                                           ; preds = %sw.epilog, %sw.bb
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -171,7 +171,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
   switch i64 %expval, label %sw.epilog [
@@ -180,7 +180,7 @@
   ]
 
 sw.bb:                                            ; preds = %entry, %entry
-  %tmp1 = load i32* %x.addr, align 4
+  %tmp1 = load i32, i32* %x.addr, align 4
   store i32 %tmp1, i32* %retval
   br label %return
 
@@ -189,7 +189,7 @@
   br label %return
 
 return:                                           ; preds = %sw.epilog, %sw.bb
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -198,7 +198,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp sgt i32 %tmp, 1
   %conv = zext i1 %cmp to i32
   %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
@@ -215,7 +215,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/Generic/cast-fp.ll b/llvm/test/CodeGen/Generic/cast-fp.ll
index 53ed6a4..3b03096 100644
--- a/llvm/test/CodeGen/Generic/cast-fp.ll
+++ b/llvm/test/CodeGen/Generic/cast-fp.ll
@@ -10,7 +10,7 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-	%a = load double* @A		; <double> [#uses=4]
+	%a = load double, double* @A		; <double> [#uses=4]
 	%a_fs = getelementptr [8 x i8], [8 x i8]* @a_fstr, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_fs, double %a )		; <i32>:1 [#uses=0]
 	%a_d2l = fptosi double %a to i64		; <i64> [#uses=1]
@@ -23,7 +23,7 @@
 	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2sb )		; <i32>:4 [#uses=0]
 	%a_d2i2sb = trunc i32 %a_d2i to i8		; <i8> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2i2sb )		; <i32>:5 [#uses=0]
-	%b = load i32* @B		; <i32> [#uses=2]
+	%b = load i32, i32* @B		; <i32> [#uses=2]
 	%b_ds = getelementptr [8 x i8], [8 x i8]* @b_dstr, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %b_ds, i32 %b )		; <i32>:6 [#uses=0]
 	%b_i2d = sitofp i32 %b to double		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/constindices.ll b/llvm/test/CodeGen/Generic/constindices.ll
index f4b98d1..3b43db0 100644
--- a/llvm/test/CodeGen/Generic/constindices.ll
+++ b/llvm/test/CodeGen/Generic/constindices.ll
@@ -28,12 +28,12 @@
         %fptrA2 = getelementptr %MixedA, %MixedA* %fptrA1, i64 0, i32 1          ; <[15 x i32]*> [#uses=1]
         %fptrA3 = getelementptr [15 x i32], [15 x i32]* %fptrA2, i64 0, i64 8               ; <i32*> [#uses=1]
         store i32 5, i32* %fptrA3
-        %sqrtTwo = load float* %I1              ; <float> [#uses=1]
-        %exp = load float* %I2          ; <float> [#uses=1]
+        %sqrtTwo = load float, float* %I1              ; <float> [#uses=1]
+        %exp = load float, float* %I2          ; <float> [#uses=1]
         %I3 = getelementptr %MixedA, %MixedA* %ArrayA, i64 1, i32 0              ; <float*> [#uses=1]
-        %pi = load float* %I3           ; <float> [#uses=1]
+        %pi = load float, float* %I3           ; <float> [#uses=1]
         %I4 = getelementptr %MixedB, %MixedB* %ArrayB, i64 2, i32 1, i32 0               ; <float*> [#uses=1]
-        %five = load float* %I4         ; <float> [#uses=1]
+        %five = load float, float* %I4         ; <float> [#uses=1]
         %dsqrtTwo = fpext float %sqrtTwo to double              ; <double> [#uses=1]
         %dexp = fpext float %exp to double              ; <double> [#uses=1]
         %dpi = fpext float %pi to double                ; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/crash.ll b/llvm/test/CodeGen/Generic/crash.ll
index abcef7d..ae51428 100644
--- a/llvm/test/CodeGen/Generic/crash.ll
+++ b/llvm/test/CodeGen/Generic/crash.ll
@@ -13,15 +13,15 @@
 
 define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
 entry:
-%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
+%.pre = load %struct.CAMERA*, %struct.CAMERA** %Camera_Ptr, align 4
 %0 = getelementptr inbounds %struct.CAMERA, %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
 %1 = getelementptr inbounds %struct.CAMERA, %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
 br label %bb32
 
 bb32:                                             ; preds = %bb6
-%2 = load double* %0, align 4
-%3 = load double* %1, align 4
-%4 = load double* %0, align 4
+%2 = load double, double* %0, align 4
+%3 = load double, double* %1, align 4
+%4 = load double, double* %0, align 4
 call void @Parse_Vector(double* %0) nounwind
 %5 = call i32 @llvm.objectsize.i32.p0i8(i8* undef, i1 false)
 %6 = icmp eq i32 %5, -1
diff --git a/llvm/test/CodeGen/Generic/dag-combine-crash.ll b/llvm/test/CodeGen/Generic/dag-combine-crash.ll
index a7810b5..45abd1d 100644
--- a/llvm/test/CodeGen/Generic/dag-combine-crash.ll
+++ b/llvm/test/CodeGen/Generic/dag-combine-crash.ll
@@ -5,7 +5,7 @@
   br label %block.i.i
 
 block.i.i:
-  %tmpbb = load i8* undef
+  %tmpbb = load i8, i8* undef
   %tmp54 = zext i8 %tmpbb to i64
   %tmp59 = and i64 %tmp54, 8
   %tmp60 = add i64 %tmp59, 3691045929300498764
diff --git a/llvm/test/CodeGen/Generic/empty-load-store.ll b/llvm/test/CodeGen/Generic/empty-load-store.ll
index bca7305..32ece8b 100644
--- a/llvm/test/CodeGen/Generic/empty-load-store.ll
+++ b/llvm/test/CodeGen/Generic/empty-load-store.ll
@@ -8,11 +8,11 @@
         %retval = alloca i32
         store i32 0, i32* %retval
         %local_foo = alloca {  }
-        load {  }* @current_foo
+        load {  }, {  }* @current_foo
         store {  } %0, {  }* %local_foo
         br label %return
 
 return:
-        load i32* %retval
+        load i32, i32* %retval
         ret i32 %1
 }
diff --git a/llvm/test/CodeGen/Generic/empty-phi.ll b/llvm/test/CodeGen/Generic/empty-phi.ll
index 8d5f3b96..f9191b9 100644
--- a/llvm/test/CodeGen/Generic/empty-phi.ll
+++ b/llvm/test/CodeGen/Generic/empty-phi.ll
@@ -11,7 +11,7 @@
   br i1 %1, label %bb2, label %bb3
 
 bb2:
-  %load = load [0 x { i8*, i64, i64 }]* undef, align 8
+  %load = load [0 x { i8*, i64, i64 }], [0 x { i8*, i64, i64 }]* undef, align 8
   br label %bb1
 
 bb3:
diff --git a/llvm/test/CodeGen/Generic/fp-to-int-invalid.ll b/llvm/test/CodeGen/Generic/fp-to-int-invalid.ll
index cdcc3a2..6a37660 100644
--- a/llvm/test/CodeGen/Generic/fp-to-int-invalid.ll
+++ b/llvm/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -7,9 +7,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i8* %result, i8** %result_addr
 	store float 0x40B2AFA160000000, float* %test, align 4
-	%0 = load float* %test, align 4		; <float> [#uses=1]
+	%0 = load float, float* %test, align 4		; <float> [#uses=1]
 	%1 = fptosi float %0 to i8		; <i8> [#uses=1]
-	%2 = load i8** %result_addr, align 4		; <i8*> [#uses=1]
+	%2 = load i8*, i8** %result_addr, align 4		; <i8*> [#uses=1]
 	store i8 %1, i8* %2, align 1
 	br label %return
 
diff --git a/llvm/test/CodeGen/Generic/fwdtwice.ll b/llvm/test/CodeGen/Generic/fwdtwice.ll
index 6b38f04..0c0bfad 100644
--- a/llvm/test/CodeGen/Generic/fwdtwice.ll
+++ b/llvm/test/CodeGen/Generic/fwdtwice.ll
@@ -18,7 +18,7 @@
 
 Top:            ; preds = %Top, %0
         store i32 %Num, i32* %Num.upgrd.1
-        %reg108 = load i32* %Num.upgrd.1                ; <i32> [#uses=1]
+        %reg108 = load i32, i32* %Num.upgrd.1                ; <i32> [#uses=1]
         %cast1006 = bitcast i32 %reg108 to i32          ; <i32> [#uses=1]
         %cond1001 = icmp ule i32 %cast1006, 0           ; <i1> [#uses=1]
         br i1 %cond1001, label %bb6, label %Top
diff --git a/llvm/test/CodeGen/Generic/global-ret0.ll b/llvm/test/CodeGen/Generic/global-ret0.ll
index 74bff87..a2a24b0 100644
--- a/llvm/test/CodeGen/Generic/global-ret0.ll
+++ b/llvm/test/CodeGen/Generic/global-ret0.ll
@@ -3,6 +3,6 @@
 @g = global i32 0               ; <i32*> [#uses=1]
 
 define i32 @main() {
-        %h = load i32* @g               ; <i32> [#uses=1]
+        %h = load i32, i32* @g               ; <i32> [#uses=1]
         ret i32 %h
 }
diff --git a/llvm/test/CodeGen/Generic/inline-asm-mem-clobber.ll b/llvm/test/CodeGen/Generic/inline-asm-mem-clobber.ll
index 5aa827a..be1e0a3 100644
--- a/llvm/test/CodeGen/Generic/inline-asm-mem-clobber.ll
+++ b/llvm/test/CodeGen/Generic/inline-asm-mem-clobber.ll
@@ -8,13 +8,13 @@
   %rv = alloca i32, align 4
   store i8* %p, i8** %p.addr, align 8
   store i32 0, i32* @G, align 4
-  %0 = load i8** %p.addr, align 8
+  %0 = load i8*, i8** %p.addr, align 8
 ; CHECK: blah
   %1 = call i32 asm "blah", "=r,r,~{memory}"(i8* %0) nounwind
 ; CHECK: @G
   store i32 %1, i32* %rv, align 4
-  %2 = load i32* %rv, align 4
-  %3 = load i32* @G, align 4
+  %2 = load i32, i32* %rv, align 4
+  %3 = load i32, i32* @G, align 4
   %add = add nsw i32 %2, %3
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/Generic/pr2625.ll b/llvm/test/CodeGen/Generic/pr2625.ll
index 3e3dc4b..c745603 100644
--- a/llvm/test/CodeGen/Generic/pr2625.ll
+++ b/llvm/test/CodeGen/Generic/pr2625.ll
@@ -7,11 +7,11 @@
         store { i32, { i32 } }* %0, { i32, { i32 } }** %state
         %retval = alloca i32            ; <i32*> [#uses=2]
         store i32 0, i32* %retval
-        load { i32, { i32 } }** %state          ; <{ i32, { i32 } }*>:1 [#uses=1]
+        load { i32, { i32 } }*, { i32, { i32 } }** %state          ; <{ i32, { i32 } }*>:1 [#uses=1]
         store { i32, { i32 } } zeroinitializer, { i32, { i32 } }* %1
         br label %return
 
 return:         ; preds = %entry
-        load i32* %retval               ; <i32>:2 [#uses=1]
+        load i32, i32* %retval               ; <i32>:2 [#uses=1]
         ret i32 %2
 }
diff --git a/llvm/test/CodeGen/Generic/print-arith-fp.ll b/llvm/test/CodeGen/Generic/print-arith-fp.ll
index 8d49895..b00229c 100644
--- a/llvm/test/CodeGen/Generic/print-arith-fp.ll
+++ b/llvm/test/CodeGen/Generic/print-arith-fp.ll
@@ -18,8 +18,8 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-	%a = load double* @A		; <double> [#uses=12]
-	%b = load double* @B		; <double> [#uses=12]
+	%a = load double, double* @A		; <double> [#uses=12]
+	%b = load double, double* @B		; <double> [#uses=12]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
diff --git a/llvm/test/CodeGen/Generic/print-arith-int.ll b/llvm/test/CodeGen/Generic/print-arith-int.ll
index fa3aa5c..2e176e4 100644
--- a/llvm/test/CodeGen/Generic/print-arith-int.ll
+++ b/llvm/test/CodeGen/Generic/print-arith-int.ll
@@ -23,8 +23,8 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-	%a = load i32* @A		; <i32> [#uses=16]
-	%b = load i32* @B		; <i32> [#uses=17]
+	%a = load i32, i32* @A		; <i32> [#uses=16]
+	%b = load i32, i32* @B		; <i32> [#uses=17]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
diff --git a/llvm/test/CodeGen/Generic/print-mul-exp.ll b/llvm/test/CodeGen/Generic/print-mul-exp.ll
index ce397bf..a08333d 100644
--- a/llvm/test/CodeGen/Generic/print-mul-exp.ll
+++ b/llvm/test/CodeGen/Generic/print-mul-exp.ll
@@ -7,7 +7,7 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-	%a = load i32* @A		; <i32> [#uses=21]
+	%a = load i32, i32* @A		; <i32> [#uses=21]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=20]
 	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
diff --git a/llvm/test/CodeGen/Generic/print-mul.ll b/llvm/test/CodeGen/Generic/print-mul.ll
index 782d664..06f2b40 100644
--- a/llvm/test/CodeGen/Generic/print-mul.ll
+++ b/llvm/test/CodeGen/Generic/print-mul.ll
@@ -10,8 +10,8 @@
 
 define i32 @main() {
 entry:
-	%a = load i32* @A		; <i32> [#uses=2]
-	%b = load i32* @B		; <i32> [#uses=1]
+	%a = load i32, i32* @A		; <i32> [#uses=2]
+	%b = load i32, i32* @B		; <i32> [#uses=1]
 	%a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
 	%a_mul_s = getelementptr [13 x i8], [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/print-shift.ll b/llvm/test/CodeGen/Generic/print-shift.ll
index 489c889..af14f77 100644
--- a/llvm/test/CodeGen/Generic/print-shift.ll
+++ b/llvm/test/CodeGen/Generic/print-shift.ll
@@ -10,8 +10,8 @@
 
 define i32 @main() {
 entry:
-        %a = load i32* @A               ; <i32> [#uses=2]
-        %b = load i32* @B               ; <i32> [#uses=1]
+        %a = load i32, i32* @A               ; <i32> [#uses=2]
+        %b = load i32, i32* @B               ; <i32> [#uses=1]
         %a_s = getelementptr [8 x i8], [8 x i8]* @a_str, i64 0, i64 0             ; <i8*> [#uses=1]
         %b_s = getelementptr [8 x i8], [8 x i8]* @b_str, i64 0, i64 0             ; <i8*> [#uses=1]
         %a_shl_s = getelementptr [14 x i8], [14 x i8]* @a_shl_str, i64 0, i64 0            ; <i8*> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/select.ll b/llvm/test/CodeGen/Generic/select.ll
index e4f0cf9..1958cd9 100644
--- a/llvm/test/CodeGen/Generic/select.ll
+++ b/llvm/test/CodeGen/Generic/select.ll
@@ -70,7 +70,7 @@
         %castsmall = trunc i64 1 to i32         ; <i32> [#uses=1]
         %usebig = add i32 %castbig, %castsmall          ; <i32> [#uses=0]
         %castglob = bitcast i32* @AConst to i64*                ; <i64*> [#uses=1]
-        %dummyl = load i64* %castglob           ; <i64> [#uses=0]
+        %dummyl = load i64, i64* %castglob           ; <i64> [#uses=0]
         %castnull = inttoptr i64 0 to i32*              ; <i32*> [#uses=1]
         ret i32* %castnull
 }
@@ -155,7 +155,7 @@
         %cast116 = ptrtoint i32* %A to i64              ; <i64> [#uses=1]
         %reg116 = add i64 %cast116, %cast115            ; <i64> [#uses=1]
         %castPtr = inttoptr i64 %reg116 to i32*         ; <i32*> [#uses=1]
-        %reg118 = load i32* %castPtr            ; <i32> [#uses=1]
+        %reg118 = load i32, i32* %castPtr            ; <i32> [#uses=1]
         %cast117 = sext i32 %reg118 to i64              ; <i64> [#uses=2]
         %reg159 = add i64 1234567, %cast117             ; <i64> [#uses=0]
         %reg160 = add i64 7654321, %cast117             ; <i64> [#uses=0]
@@ -181,7 +181,7 @@
 ;
 define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
         %reg841 = getelementptr %Domain, %Domain* %D, i64 0, i32 1               ; <i32*> [#uses=1]
-        %reg820 = load i32* %reg841             ; <i32> [#uses=1]
+        %reg820 = load i32, i32* %reg841             ; <i32> [#uses=1]
         ret i32 %reg820
 }
 
diff --git a/llvm/test/CodeGen/Generic/undef-phi.ll b/llvm/test/CodeGen/Generic/undef-phi.ll
index 067f34a..03f3a6a 100644
--- a/llvm/test/CodeGen/Generic/undef-phi.ll
+++ b/llvm/test/CodeGen/Generic/undef-phi.ll
@@ -14,13 +14,13 @@
 for.body:
   %stack.addr.02 = phi %struct.xx_stack* [ %0, %for.body ], [ %stack, %entry ]
   %next = getelementptr inbounds %struct.xx_stack, %struct.xx_stack* %stack.addr.02, i64 0, i32 1
-  %0 = load %struct.xx_stack** %next, align 8
+  %0 = load %struct.xx_stack*, %struct.xx_stack** %next, align 8
   %tobool = icmp eq %struct.xx_stack* %0, null
   br i1 %tobool, label %for.end, label %for.body
 
 for.end:
   %top.0.lcssa = phi %struct.xx_stack* [ undef, %entry ], [ %stack.addr.02, %for.body ]
   %first = getelementptr inbounds %struct.xx_stack, %struct.xx_stack* %top.0.lcssa, i64 0, i32 0
-  %1 = load i32* %first, align 4
+  %1 = load i32, i32* %first, align 4
   ret i32 %1
 }
diff --git a/llvm/test/CodeGen/Generic/v-split.ll b/llvm/test/CodeGen/Generic/v-split.ll
index 634b562..00c62f3 100644
--- a/llvm/test/CodeGen/Generic/v-split.ll
+++ b/llvm/test/CodeGen/Generic/v-split.ll
@@ -2,8 +2,8 @@
 %f8 = type <8 x float>
 
 define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
-  %p = load %f8* %P
-  %q = load %f8* %Q
+  %p = load %f8, %f8* %P
+  %q = load %f8, %f8* %Q
   %R = fadd %f8 %p, %q
   store %f8 %R, %f8 *%S
   ret void
diff --git a/llvm/test/CodeGen/Generic/vector-casts.ll b/llvm/test/CodeGen/Generic/vector-casts.ll
index a26918b..fee72b6 100644
--- a/llvm/test/CodeGen/Generic/vector-casts.ll
+++ b/llvm/test/CodeGen/Generic/vector-casts.ll
@@ -2,43 +2,43 @@
 ; PR2671
 
 define void @a(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>* %p
+        %t = load <2 x double>, <2 x double>* %p
 	%r = fptosi <2 x double> %t to <2 x i8>
         store <2 x i8> %r, <2 x i8>* %q
 	ret void
 }
 define void @b(<2 x double>* %p, <2 x i8>* %q) {
-        %t = load <2 x double>* %p
+        %t = load <2 x double>, <2 x double>* %p
 	%r = fptoui <2 x double> %t to <2 x i8>
         store <2 x i8> %r, <2 x i8>* %q
 	ret void
 }
 define void @c(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>* %p
+        %t = load <2 x i8>, <2 x i8>* %p
 	%r = sitofp <2 x i8> %t to <2 x double>
         store <2 x double> %r, <2 x double>* %q
 	ret void
 }
 define void @d(<2 x i8>* %p, <2 x double>* %q) {
-        %t = load <2 x i8>* %p
+        %t = load <2 x i8>, <2 x i8>* %p
 	%r = uitofp <2 x i8> %t to <2 x double>
         store <2 x double> %r, <2 x double>* %q
 	ret void
 }
 define void @e(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>* %p
+        %t = load <2 x i8>, <2 x i8>* %p
 	%r = sext <2 x i8> %t to <2 x i16>
         store <2 x i16> %r, <2 x i16>* %q
 	ret void
 }
 define void @f(<2 x i8>* %p, <2 x i16>* %q) {
-        %t = load <2 x i8>* %p
+        %t = load <2 x i8>, <2 x i8>* %p
 	%r = zext <2 x i8> %t to <2 x i16>
         store <2 x i16> %r, <2 x i16>* %q
 	ret void
 }
 define void @g(<2 x i16>* %p, <2 x i8>* %q) {
-        %t = load <2 x i16>* %p
+        %t = load <2 x i16>, <2 x i16>* %p
 	%r = trunc <2 x i16> %t to <2 x i8>
         store <2 x i8> %r, <2 x i8>* %q
 	ret void
diff --git a/llvm/test/CodeGen/Generic/vector-identity-shuffle.ll b/llvm/test/CodeGen/Generic/vector-identity-shuffle.ll
index 332d6d8..d933f22 100644
--- a/llvm/test/CodeGen/Generic/vector-identity-shuffle.ll
+++ b/llvm/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -2,7 +2,7 @@
 
 
 define void @test(<4 x float>* %tmp2.i) {
-        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
+        %tmp2.i.upgrd.1 = load <4 x float>, <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
         %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0         ; <float> [#uses=1]
         %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0                ; <<4 x float>> [#uses=1]
         %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1         ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Generic/vector.ll b/llvm/test/CodeGen/Generic/vector.ll
index 2d8298f..9479efd 100644
--- a/llvm/test/CodeGen/Generic/vector.ll
+++ b/llvm/test/CodeGen/Generic/vector.ll
@@ -12,48 +12,48 @@
 ;;; TEST HANDLING OF VARIOUS VECTOR SIZES
 
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
-	%p = load %f1* %P		; <%f1> [#uses=1]
-	%q = load %f1* %Q		; <%f1> [#uses=1]
+	%p = load %f1, %f1* %P		; <%f1> [#uses=1]
+	%q = load %f1, %f1* %Q		; <%f1> [#uses=1]
 	%R = fadd %f1 %p, %q		; <%f1> [#uses=1]
 	store %f1 %R, %f1* %S
 	ret void
 }
 
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
-	%p = load %f2* %P		; <%f2> [#uses=1]
-	%q = load %f2* %Q		; <%f2> [#uses=1]
+	%p = load %f2, %f2* %P		; <%f2> [#uses=1]
+	%q = load %f2, %f2* %Q		; <%f2> [#uses=1]
 	%R = fadd %f2 %p, %q		; <%f2> [#uses=1]
 	store %f2 %R, %f2* %S
 	ret void
 }
 
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
-	%p = load %f4* %P		; <%f4> [#uses=1]
-	%q = load %f4* %Q		; <%f4> [#uses=1]
+	%p = load %f4, %f4* %P		; <%f4> [#uses=1]
+	%q = load %f4, %f4* %Q		; <%f4> [#uses=1]
 	%R = fadd %f4 %p, %q		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
-	%p = load %f8* %P		; <%f8> [#uses=1]
-	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%p = load %f8, %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8, %f8* %Q		; <%f8> [#uses=1]
 	%R = fadd %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
 
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
-	%p = load %f8* %P		; <%f8> [#uses=1]
-	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%p = load %f8, %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8, %f8* %Q		; <%f8> [#uses=1]
 	%R = fmul %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
 }
 
 define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
-	%p = load %f8* %P		; <%f8> [#uses=1]
-	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%p = load %f8, %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8, %f8* %Q		; <%f8> [#uses=1]
 	%R = fdiv %f8 %p, %q		; <%f8> [#uses=1]
 	store %f8 %R, %f8* %S
 	ret void
@@ -63,21 +63,21 @@
 
 
 define void @test_cst(%f4* %P, %f4* %S) {
-	%p = load %f4* %P		; <%f4> [#uses=1]
+	%p = load %f4, %f4* %P		; <%f4> [#uses=1]
 	%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
-	%p = load %f4* %P		; <%f4> [#uses=1]
+	%p = load %f4, %f4* %P		; <%f4> [#uses=1]
 	%R = fadd %f4 %p, zeroinitializer		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
-	%p = load %f4* %P		; <%f4> [#uses=1]
+	%p = load %f4, %f4* %P		; <%f4> [#uses=1]
 	%R = fadd %f4 %p, undef		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %S
 	ret void
@@ -102,19 +102,19 @@
 }
 
 define float @test_extract_elt(%f8* %P) {
-	%p = load %f8* %P		; <%f8> [#uses=1]
+	%p = load %f8, %f8* %P		; <%f8> [#uses=1]
 	%R = extractelement %f8 %p, i32 3		; <float> [#uses=1]
 	ret float %R
 }
 
 define double @test_extract_elt2(%d8* %P) {
-	%p = load %d8* %P		; <%d8> [#uses=1]
+	%p = load %d8, %d8* %P		; <%d8> [#uses=1]
 	%R = extractelement %d8 %p, i32 3		; <double> [#uses=1]
 	ret double %R
 }
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
-	%tmp = load %f4* %b		; <%f4> [#uses=1]
+	%tmp = load %f4, %f4* %b		; <%f4> [#uses=1]
 	%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
 	%tmp3 = bitcast %f4 %tmp2 to %i4		; <%i4> [#uses=1]
 	%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >		; <%i4> [#uses=1]
@@ -123,7 +123,7 @@
 }
 
 define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
-	%T = load %f8* %a		; <%f8> [#uses=1]
+	%T = load %f8, %f8* %a		; <%f8> [#uses=1]
 	%T2 = bitcast %f8 %T to <8 x i32>		; <<8 x i32>> [#uses=1]
 	store <8 x i32> %T2, <8 x i32>* %b
 	ret void
@@ -136,7 +136,7 @@
 	%tmp2 = insertelement %f4 %tmp, float %X, i32 1		; <%f4> [#uses=1]
 	%tmp4 = insertelement %f4 %tmp2, float %X, i32 2		; <%f4> [#uses=1]
 	%tmp6 = insertelement %f4 %tmp4, float %X, i32 3		; <%f4> [#uses=1]
-	%q = load %f4* %Q		; <%f4> [#uses=1]
+	%q = load %f4, %f4* %Q		; <%f4> [#uses=1]
 	%R = fadd %f4 %q, %tmp6		; <%f4> [#uses=1]
 	store %f4 %R, %f4* %P
 	ret void
@@ -147,7 +147,7 @@
 	%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1		; <%i4> [#uses=1]
 	%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2		; <%i4> [#uses=1]
 	%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3		; <%i4> [#uses=1]
-	%q = load %i4* %Q		; <%i4> [#uses=1]
+	%q = load %i4, %i4* %Q		; <%i4> [#uses=1]
 	%R = add %i4 %q, %tmp6		; <%i4> [#uses=1]
 	store %i4 %R, %i4* %P
 	ret void
diff --git a/llvm/test/CodeGen/Hexagon/BranchPredict.ll b/llvm/test/CodeGen/Hexagon/BranchPredict.ll
index 5d56449..0cd616b 100644
--- a/llvm/test/CodeGen/Hexagon/BranchPredict.ll
+++ b/llvm/test/CodeGen/Hexagon/BranchPredict.ll
@@ -53,7 +53,7 @@
 define i32 @foo_bar(i32 %a, i16 signext %b) nounwind {
 ; CHECK: if{{ *}}(!cmp.eq(r{{[0-9]*}}.new, #0)) jump:nt
 entry:
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.else, label %if.then, !prof !0
 
diff --git a/llvm/test/CodeGen/Hexagon/absaddr-store.ll b/llvm/test/CodeGen/Hexagon/absaddr-store.ll
index 5c2554d..3be4b1c 100644
--- a/llvm/test/CodeGen/Hexagon/absaddr-store.ll
+++ b/llvm/test/CodeGen/Hexagon/absaddr-store.ll
@@ -9,7 +9,7 @@
 define zeroext i8 @absStoreByte() nounwind {
 ; CHECK: memb(##b){{ *}}={{ *}}r{{[0-9]+}}
 entry:
-  %0 = load i8* @b, align 1
+  %0 = load i8, i8* @b, align 1
   %conv = zext i8 %0 to i32
   %mul = mul nsw i32 100, %conv
   %conv1 = trunc i32 %mul to i8
@@ -20,7 +20,7 @@
 define signext i16 @absStoreHalf() nounwind {
 ; CHECK: memh(##c){{ *}}={{ *}}r{{[0-9]+}}
 entry:
-  %0 = load i16* @c, align 2
+  %0 = load i16, i16* @c, align 2
   %conv = sext i16 %0 to i32
   %mul = mul nsw i32 100, %conv
   %conv1 = trunc i32 %mul to i16
@@ -31,7 +31,7 @@
 define i32 @absStoreWord() nounwind {
 ; CHECK: memw(##a){{ *}}={{ *}}r{{[0-9]+}}
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %mul = mul nsw i32 100, %0
   store i32 %mul, i32* @a, align 4
   ret i32 %mul
diff --git a/llvm/test/CodeGen/Hexagon/absimm.ll b/llvm/test/CodeGen/Hexagon/absimm.ll
index b8f5edc..07adb3fe 100644
--- a/llvm/test/CodeGen/Hexagon/absimm.ll
+++ b/llvm/test/CodeGen/Hexagon/absimm.ll
@@ -12,7 +12,7 @@
 define i32* @f2(i32* nocapture %i) nounwind {
 entry:
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(##786432)
-  %0 = load volatile i32* inttoptr (i32 786432 to i32*), align 262144
+  %0 = load volatile i32, i32* inttoptr (i32 786432 to i32*), align 262144
   %1 = inttoptr i32 %0 to i32*
   ret i32* %1
   }
diff --git a/llvm/test/CodeGen/Hexagon/always-ext.ll b/llvm/test/CodeGen/Hexagon/always-ext.ll
index e164e9a..8b4b2f5 100644
--- a/llvm/test/CodeGen/Hexagon/always-ext.ll
+++ b/llvm/test/CodeGen/Hexagon/always-ext.ll
@@ -24,8 +24,8 @@
   br i1 undef, label %for.body.us, label %for.end
 
 for.body.us:                                      ; preds = %entry
-  %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
-  %1 = load i32* undef, align 4
+  %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4
+  %1 = load i32, i32* undef, align 4
   %cmp.i.us = icmp slt i32 %1, 1024
   br i1 %cmp.i.us, label %CuSuiteAdd.exit.us, label %cond.false6.i.us
 
diff --git a/llvm/test/CodeGen/Hexagon/block-addr.ll b/llvm/test/CodeGen/Hexagon/block-addr.ll
index dc0d6e6..902765e 100644
--- a/llvm/test/CodeGen/Hexagon/block-addr.ll
+++ b/llvm/test/CodeGen/Hexagon/block-addr.ll
@@ -10,7 +10,7 @@
   br label %while.body
 
 while.body:
-  %ret.0.load17 = load volatile i32* %ret, align 4
+  %ret.0.load17 = load volatile i32, i32* %ret, align 4
   switch i32 %ret.0.load17, label %label6 [
     i32 0, label %label0
     i32 1, label %label1
@@ -21,37 +21,37 @@
   ]
 
 label0:
-  %ret.0.load18 = load volatile i32* %ret, align 4
+  %ret.0.load18 = load volatile i32, i32* %ret, align 4
   %inc = add nsw i32 %ret.0.load18, 1
   store volatile i32 %inc, i32* %ret, align 4
   br label %while.body
 
 label1:
-  %ret.0.load19 = load volatile i32* %ret, align 4
+  %ret.0.load19 = load volatile i32, i32* %ret, align 4
   %inc2 = add nsw i32 %ret.0.load19, 1
   store volatile i32 %inc2, i32* %ret, align 4
   br label %while.body
 
 label2:
-  %ret.0.load20 = load volatile i32* %ret, align 4
+  %ret.0.load20 = load volatile i32, i32* %ret, align 4
   %inc4 = add nsw i32 %ret.0.load20, 1
   store volatile i32 %inc4, i32* %ret, align 4
   br label %while.body
 
 label3:
-  %ret.0.load21 = load volatile i32* %ret, align 4
+  %ret.0.load21 = load volatile i32, i32* %ret, align 4
   %inc6 = add nsw i32 %ret.0.load21, 1
   store volatile i32 %inc6, i32* %ret, align 4
   br label %while.body
 
 label4:
-  %ret.0.load22 = load volatile i32* %ret, align 4
+  %ret.0.load22 = load volatile i32, i32* %ret, align 4
   %inc8 = add nsw i32 %ret.0.load22, 1
   store volatile i32 %inc8, i32* %ret, align 4
   br label %while.body
 
 label5:
-  %ret.0.load23 = load volatile i32* %ret, align 4
+  %ret.0.load23 = load volatile i32, i32* %ret, align 4
   %inc10 = add nsw i32 %ret.0.load23, 1
   store volatile i32 %inc10, i32* %ret, align 4
   br label %while.body
diff --git a/llvm/test/CodeGen/Hexagon/cext-check.ll b/llvm/test/CodeGen/Hexagon/cext-check.ll
index 9fc3d22..19b91c5 100644
--- a/llvm/test/CodeGen/Hexagon/cext-check.ll
+++ b/llvm/test/CodeGen/Hexagon/cext-check.ll
@@ -7,19 +7,19 @@
 ; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}##4092)
 ; CHECK-NOT: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}##300)
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %tobool = icmp ne i32 %0, 0
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i32 2000
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, 300000
   br label %return
 
 if.end:
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1023
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %2, 300
   br label %return
 
@@ -39,14 +39,14 @@
 
 if.then:
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 1023
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 300000
   br label %return
 
 if.end:
   %arrayidx1 = getelementptr inbounds i8, i8* %a, i32 1024
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv2 = zext i8 %1 to i32
   %add3 = add nsw i32 %conv2, 6000
   br label %return
diff --git a/llvm/test/CodeGen/Hexagon/cext-valid-packet2.ll b/llvm/test/CodeGen/Hexagon/cext-valid-packet2.ll
index 03904d9..2eba743 100644
--- a/llvm/test/CodeGen/Hexagon/cext-valid-packet2.ll
+++ b/llvm/test/CodeGen/Hexagon/cext-valid-packet2.ll
@@ -10,24 +10,24 @@
 define i32 @test(i32* nocapture %a, i32* nocapture %b, i32 %c) nounwind {
 entry:
   %add = add nsw i32 %c, 200002
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add1 = add nsw i32 %0, 200000
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 3000
   store i32 %add1, i32* %arrayidx2, align 4
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %add4 = add nsw i32 %1, 200001
   %arrayidx5 = getelementptr inbounds i32, i32* %a, i32 1
   store i32 %add4, i32* %arrayidx5, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 1
-  %2 = load i32* %arrayidx7, align 4
+  %2 = load i32, i32* %arrayidx7, align 4
   %cmp = icmp sgt i32 %add4, %2
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
   %arrayidx8 = getelementptr inbounds i32, i32* %a, i32 2
-  %3 = load i32* %arrayidx8, align 4
+  %3 = load i32, i32* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds i32, i32* %b, i32 2000
-  %4 = load i32* %arrayidx9, align 4
+  %4 = load i32, i32* %arrayidx9, align 4
   %sub = sub nsw i32 %3, %4
   %arrayidx10 = getelementptr inbounds i32, i32* %a, i32 4000
   store i32 %sub, i32* %arrayidx10, align 4
diff --git a/llvm/test/CodeGen/Hexagon/cmp_pred2.ll b/llvm/test/CodeGen/Hexagon/cmp_pred2.ll
index a20b9f0..28f3e1b 100644
--- a/llvm/test/CodeGen/Hexagon/cmp_pred2.ll
+++ b/llvm/test/CodeGen/Hexagon/cmp_pred2.ll
@@ -11,7 +11,7 @@
   br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
@@ -32,7 +32,7 @@
   br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
@@ -53,7 +53,7 @@
   br i1 %cmp, label %entry.if.end_crit_edge, label %if.then
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
@@ -73,7 +73,7 @@
   br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
diff --git a/llvm/test/CodeGen/Hexagon/cmpb_pred.ll b/llvm/test/CodeGen/Hexagon/cmpb_pred.ll
index 0960da1..cf0c5a8 100644
--- a/llvm/test/CodeGen/Hexagon/cmpb_pred.ll
+++ b/llvm/test/CodeGen/Hexagon/cmpb_pred.ll
@@ -16,7 +16,7 @@
 define i32 @Func_3b(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1
+  %1 = load i8, i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp ne i8 %1, %2
   %selv = zext i1 %cmp to i32
@@ -35,7 +35,7 @@
 define i32 @Func_3d(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1
+  %1 = load i8, i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp eq i8 %1, %2
   %selv = zext i1 %cmp to i32
@@ -45,7 +45,7 @@
 define i32 @Func_3e(i32) nounwind readonly {
 entry:
 ; CHECK-NOT: mux
-  %1 = load i8* @Enum_global, align 1
+  %1 = load i8, i8* @Enum_global, align 1
   %2 = trunc i32 %0 to i8
   %cmp = icmp eq i8 %1, %2
   %selv = zext i1 %cmp to i32
diff --git a/llvm/test/CodeGen/Hexagon/combine.ll b/llvm/test/CodeGen/Hexagon/combine.ll
index 7219985..2e320d9 100644
--- a/llvm/test/CodeGen/Hexagon/combine.ll
+++ b/llvm/test/CodeGen/Hexagon/combine.ll
@@ -6,8 +6,8 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i64* @k, align 8
+  %0 = load i32, i32* @j, align 4
+  %1 = load i64, i64* @k, align 8
   %conv = trunc i64 %1 to i32
   %2 = call i64 @llvm.hexagon.A2.combinew(i32 %0, i32 %conv)
   store i64 %2, i64* @k, align 8
diff --git a/llvm/test/CodeGen/Hexagon/combine_ir.ll b/llvm/test/CodeGen/Hexagon/combine_ir.ll
index 35e997b..634a5c8 100644
--- a/llvm/test/CodeGen/Hexagon/combine_ir.ll
+++ b/llvm/test/CodeGen/Hexagon/combine_ir.ll
@@ -4,7 +4,7 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %1 = zext i32 %0 to i64
   tail call void @bar(i64 %1) nounwind
   ret void
@@ -17,10 +17,10 @@
 
 define void @halfword(i16* nocapture %a) nounwind {
 entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %1 = zext i16 %0 to i64
   %add.ptr = getelementptr inbounds i16, i16* %a, i32 1
-  %2 = load i16* %add.ptr, align 2
+  %2 = load i16, i16* %add.ptr, align 2
   %3 = zext i16 %2 to i64
   %4 = shl nuw nsw i64 %3, 16
   %ins = or i64 %4, %1
@@ -33,10 +33,10 @@
 
 define void @byte(i8* nocapture %a) nounwind {
 entry:
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %1 = zext i8 %0 to i64
   %add.ptr = getelementptr inbounds i8, i8* %a, i32 1
-  %2 = load i8* %add.ptr, align 1
+  %2 = load i8, i8* %add.ptr, align 1
   %3 = zext i8 %2 to i64
   %4 = shl nuw nsw i64 %3, 8
   %ins = or i64 %4, %1
diff --git a/llvm/test/CodeGen/Hexagon/convertdptoint.ll b/llvm/test/CodeGen/Hexagon/convertdptoint.ll
index fa068c4..a09c2fd 100644
--- a/llvm/test/CodeGen/Hexagon/convertdptoint.ll
+++ b/llvm/test/CodeGen/Hexagon/convertdptoint.ll
@@ -14,13 +14,13 @@
   store i32 0, i32* %retval
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %a, align 8
-  %1 = load double* %b, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
   %add = fadd double %0, %1
   store double %add, double* %c, align 8
-  %2 = load double* %c, align 8
+  %2 = load double, double* %c, align 8
   %conv = fptosi double %2 to i32
   store i32 %conv, i32* %i, align 4
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   ret i32 %3
 }
diff --git a/llvm/test/CodeGen/Hexagon/convertdptoll.ll b/llvm/test/CodeGen/Hexagon/convertdptoll.ll
index 1b4dd86..f46d46c 100644
--- a/llvm/test/CodeGen/Hexagon/convertdptoll.ll
+++ b/llvm/test/CodeGen/Hexagon/convertdptoll.ll
@@ -14,14 +14,14 @@
   store i32 0, i32* %retval
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %a, align 8
-  %1 = load double* %b, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
   %add = fadd double %0, %1
   store double %add, double* %c, align 8
-  %2 = load double* %c, align 8
+  %2 = load double, double* %c, align 8
   %conv = fptosi double %2 to i64
   store i64 %conv, i64* %i, align 8
-  %3 = load i64* %i, align 8
+  %3 = load i64, i64* %i, align 8
   %conv1 = trunc i64 %3 to i32
   ret i32 %conv1
 }
diff --git a/llvm/test/CodeGen/Hexagon/convertsptoint.ll b/llvm/test/CodeGen/Hexagon/convertsptoint.ll
index b8a9d6c..7593e57 100644
--- a/llvm/test/CodeGen/Hexagon/convertsptoint.ll
+++ b/llvm/test/CodeGen/Hexagon/convertsptoint.ll
@@ -14,13 +14,13 @@
   store i32 0, i32* %retval
   store float 0x402ECCCCC0000000, float* %a, align 4
   store float 0x4022333340000000, float* %b, align 4
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
   %add = fadd float %0, %1
   store float %add, float* %c, align 4
-  %2 = load float* %c, align 4
+  %2 = load float, float* %c, align 4
   %conv = fptosi float %2 to i32
   store i32 %conv, i32* %i, align 4
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   ret i32 %3
 }
diff --git a/llvm/test/CodeGen/Hexagon/convertsptoll.ll b/llvm/test/CodeGen/Hexagon/convertsptoll.ll
index 1c4df94..d8432cb 100644
--- a/llvm/test/CodeGen/Hexagon/convertsptoll.ll
+++ b/llvm/test/CodeGen/Hexagon/convertsptoll.ll
@@ -14,14 +14,14 @@
   store i32 0, i32* %retval
   store float 0x402ECCCCC0000000, float* %a, align 4
   store float 0x4022333340000000, float* %b, align 4
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
   %add = fadd float %0, %1
   store float %add, float* %c, align 4
-  %2 = load float* %c, align 4
+  %2 = load float, float* %c, align 4
   %conv = fptosi float %2 to i64
   store i64 %conv, i64* %i, align 8
-  %3 = load i64* %i, align 8
+  %3 = load i64, i64* %i, align 8
   %conv1 = trunc i64 %3 to i32
   ret i32 %conv1
 }
diff --git a/llvm/test/CodeGen/Hexagon/dadd.ll b/llvm/test/CodeGen/Hexagon/dadd.ll
index a86a90c..5fcd705 100644
--- a/llvm/test/CodeGen/Hexagon/dadd.ll
+++ b/llvm/test/CodeGen/Hexagon/dadd.ll
@@ -11,8 +11,8 @@
   %c = alloca double, align 8
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %a, align 8
-  %1 = load double* %b, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
   %add = fadd double %0, %1
   store double %add, double* %c, align 8
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/dmul.ll b/llvm/test/CodeGen/Hexagon/dmul.ll
index cbe0d7f..1b79e0a 100644
--- a/llvm/test/CodeGen/Hexagon/dmul.ll
+++ b/llvm/test/CodeGen/Hexagon/dmul.ll
@@ -10,8 +10,8 @@
   %c = alloca double, align 8
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %b, align 8
-  %1 = load double* %a, align 8
+  %0 = load double, double* %b, align 8
+  %1 = load double, double* %a, align 8
   %mul = fmul double %0, %1
   store double %mul, double* %c, align 8
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/double.ll b/llvm/test/CodeGen/Hexagon/double.ll
index c3b6f37..b4d025c 100644
--- a/llvm/test/CodeGen/Hexagon/double.ll
+++ b/llvm/test/CodeGen/Hexagon/double.ll
@@ -10,13 +10,13 @@
   store double* %acc, double** %acc.addr, align 4
   store double %num, double* %num.addr, align 8
   store double %num2, double* %num2.addr, align 8
-  %0 = load double** %acc.addr, align 4
-  %1 = load double* %0
-  %2 = load double* %num.addr, align 8
+  %0 = load double*, double** %acc.addr, align 4
+  %1 = load double, double* %0
+  %2 = load double, double* %num.addr, align 8
   %add = fadd double %1, %2
-  %3 = load double* %num2.addr, align 8
+  %3 = load double, double* %num2.addr, align 8
   %sub = fsub double %add, %3
-  %4 = load double** %acc.addr, align 4
+  %4 = load double*, double** %acc.addr, align 4
   store double %sub, double* %4
   ret void
 }
diff --git a/llvm/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll b/llvm/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
index 54e7ce3..6bf8224 100644
--- a/llvm/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
+++ b/llvm/test/CodeGen/Hexagon/doubleconvert-ieee-rnd-near.ll
@@ -14,13 +14,13 @@
   store i32 0, i32* %retval
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %a, align 8
-  %1 = load double* %b, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
   %add = fadd double %0, %1
   store double %add, double* %c, align 8
-  %2 = load double* %c, align 8
+  %2 = load double, double* %c, align 8
   %conv = fptosi double %2 to i32
   store i32 %conv, i32* %i, align 4
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   ret i32 %3
 }
diff --git a/llvm/test/CodeGen/Hexagon/dsub.ll b/llvm/test/CodeGen/Hexagon/dsub.ll
index f271492..8b37301 100644
--- a/llvm/test/CodeGen/Hexagon/dsub.ll
+++ b/llvm/test/CodeGen/Hexagon/dsub.ll
@@ -10,8 +10,8 @@
   %c = alloca double, align 8
   store double 1.540000e+01, double* %a, align 8
   store double 9.100000e+00, double* %b, align 8
-  %0 = load double* %b, align 8
-  %1 = load double* %a, align 8
+  %0 = load double, double* %b, align 8
+  %1 = load double, double* %a, align 8
   %sub = fsub double %0, %1
   store double %sub, double* %c, align 8
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/extload-combine.ll b/llvm/test/CodeGen/Hexagon/extload-combine.ll
index b3b8bf0..519177f 100644
--- a/llvm/test/CodeGen/Hexagon/extload-combine.ll
+++ b/llvm/test/CodeGen/Hexagon/extload-combine.ll
@@ -19,7 +19,7 @@
 ; CHECK: combine(#0, [[VAR]])
 entry:
   store i16 0, i16* @a, align 2
-  %0 = load i16* @b, align 2
+  %0 = load i16, i16* @b, align 2
   %conv2 = zext i16 %0 to i64
   ret i64 %conv2
 }
@@ -30,7 +30,7 @@
 ; CHECK: sxtw([[VAR1]])
 entry:
   store i16 0, i16* @a, align 2
-  %0 = load i16* @c, align 2
+  %0 = load i16, i16* @c, align 2
   %conv2 = sext i16 %0 to i64
   ret i64 %conv2
 }
@@ -41,7 +41,7 @@
 ; CHECK: combine(#0, [[VAR2]])
 entry:
   store i8 0, i8* @char_a, align 1
-  %0 = load i8* @char_b, align 1
+  %0 = load i8, i8* @char_b, align 1
   %conv2 = zext i8 %0 to i64
   ret i64 %conv2
 }
@@ -52,7 +52,7 @@
 ; CHECK: sxtw([[VAR3]])
 entry:
   store i8 0, i8* @char_a, align 1
-  %0 = load i8* @char_c, align 1
+  %0 = load i8, i8* @char_c, align 1
   %conv2 = sext i8 %0 to i64
   ret i64 %conv2
 }
@@ -63,7 +63,7 @@
 ; CHECK: combine(#0, [[VAR4]])
 entry:
   store i32 0, i32* @int_a, align 4
-  %0 = load i32* @int_b, align 4
+  %0 = load i32, i32* @int_b, align 4
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
@@ -74,7 +74,7 @@
 ; CHECK: sxtw([[VAR5]])
 entry:
   store i32 0, i32* @int_a, align 4
-  %0 = load i32* @int_c, align 4
+  %0 = load i32, i32* @int_c, align 4
   %conv = sext i32 %0 to i64
   ret i64 %conv
 }
diff --git a/llvm/test/CodeGen/Hexagon/fadd.ll b/llvm/test/CodeGen/Hexagon/fadd.ll
index b95e147..6cf0fbb 100644
--- a/llvm/test/CodeGen/Hexagon/fadd.ll
+++ b/llvm/test/CodeGen/Hexagon/fadd.ll
@@ -10,8 +10,8 @@
   %c = alloca float, align 4
   store float 0x402ECCCCC0000000, float* %a, align 4
   store float 0x4022333340000000, float* %b, align 4
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
   %add = fadd float %0, %1
   store float %add, float* %c, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/fcmp.ll b/llvm/test/CodeGen/Hexagon/fcmp.ll
index e7b649e..5cf3c57 100644
--- a/llvm/test/CodeGen/Hexagon/fcmp.ll
+++ b/llvm/test/CodeGen/Hexagon/fcmp.ll
@@ -8,7 +8,7 @@
   %retval = alloca i32, align 4
   %y.addr = alloca float, align 4
   store float %y, float* %y.addr, align 4
-  %0 = load float* %y.addr, align 4
+  %0 = load float, float* %y.addr, align 4
   %cmp = fcmp ogt float %0, 0x406AD7EFA0000000
   br i1 %cmp, label %if.then, label %if.else
 
@@ -21,7 +21,7 @@
   br label %return
 
 return:                                           ; preds = %if.else, %if.then
-  %1 = load i32* %retval
+  %1 = load i32, i32* %retval
   ret i32 %1
 }
 
@@ -31,7 +31,7 @@
   %a = alloca float, align 4
   store i32 0, i32* %retval
   store float 0x40012E0A00000000, float* %a, align 4
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %call = call i32 @foo(float %0)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/Hexagon/float.ll b/llvm/test/CodeGen/Hexagon/float.ll
index bec9f58..03d1fbf 100644
--- a/llvm/test/CodeGen/Hexagon/float.ll
+++ b/llvm/test/CodeGen/Hexagon/float.ll
@@ -10,13 +10,13 @@
   store float* %acc, float** %acc.addr, align 4
   store float %num, float* %num.addr, align 4
   store float %num2, float* %num2.addr, align 4
-  %0 = load float** %acc.addr, align 4
-  %1 = load float* %0
-  %2 = load float* %num.addr, align 4
+  %0 = load float*, float** %acc.addr, align 4
+  %1 = load float, float* %0
+  %2 = load float, float* %num.addr, align 4
   %add = fadd float %1, %2
-  %3 = load float* %num2.addr, align 4
+  %3 = load float, float* %num2.addr, align 4
   %sub = fsub float %add, %3
-  %4 = load float** %acc.addr, align 4
+  %4 = load float*, float** %acc.addr, align 4
   store float %sub, float* %4
   ret void
 }
diff --git a/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll b/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
index bec9f58..03d1fbf 100644
--- a/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
+++ b/llvm/test/CodeGen/Hexagon/floatconvert-ieee-rnd-near.ll
@@ -10,13 +10,13 @@
   store float* %acc, float** %acc.addr, align 4
   store float %num, float* %num.addr, align 4
   store float %num2, float* %num2.addr, align 4
-  %0 = load float** %acc.addr, align 4
-  %1 = load float* %0
-  %2 = load float* %num.addr, align 4
+  %0 = load float*, float** %acc.addr, align 4
+  %1 = load float, float* %0
+  %2 = load float, float* %num.addr, align 4
   %add = fadd float %1, %2
-  %3 = load float* %num2.addr, align 4
+  %3 = load float, float* %num2.addr, align 4
   %sub = fsub float %add, %3
-  %4 = load float** %acc.addr, align 4
+  %4 = load float*, float** %acc.addr, align 4
   store float %sub, float* %4
   ret void
 }
diff --git a/llvm/test/CodeGen/Hexagon/fmul.ll b/llvm/test/CodeGen/Hexagon/fmul.ll
index 4766845..4f55d0b 100644
--- a/llvm/test/CodeGen/Hexagon/fmul.ll
+++ b/llvm/test/CodeGen/Hexagon/fmul.ll
@@ -11,8 +11,8 @@
   %c = alloca float, align 4
   store float 0x402ECCCCC0000000, float* %a, align 4
   store float 0x4022333340000000, float* %b, align 4
-  %0 = load float* %b, align 4
-  %1 = load float* %a, align 4
+  %0 = load float, float* %b, align 4
+  %1 = load float, float* %a, align 4
   %mul = fmul float %0, %1
   store float %mul, float* %c, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/frame.ll b/llvm/test/CodeGen/Hexagon/frame.ll
index dc87c73..e87acb8c 100644
--- a/llvm/test/CodeGen/Hexagon/frame.ll
+++ b/llvm/test/CodeGen/Hexagon/frame.ll
@@ -10,14 +10,14 @@
 define i32 @foo() nounwind {
 entry:
   %i = alloca i32, align 4
-  %0 = load i32* @num, align 4
+  %0 = load i32, i32* @num, align 4
   store i32 %0, i32* %i, align 4
-  %1 = load i32* %i, align 4
-  %2 = load i32* @acc, align 4
+  %1 = load i32, i32* %i, align 4
+  %2 = load i32, i32* @acc, align 4
   %mul = mul nsw i32 %1, %2
-  %3 = load i32* @num2, align 4
+  %3 = load i32, i32* @num2, align 4
   %add = add nsw i32 %mul, %3
   store i32 %add, i32* %i, align 4
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   ret i32 %4
 }
diff --git a/llvm/test/CodeGen/Hexagon/fsub.ll b/llvm/test/CodeGen/Hexagon/fsub.ll
index 07c866f..ca7bdc4 100644
--- a/llvm/test/CodeGen/Hexagon/fsub.ll
+++ b/llvm/test/CodeGen/Hexagon/fsub.ll
@@ -10,8 +10,8 @@
   %c = alloca float, align 4
   store float 0x402ECCCCC0000000, float* %a, align 4
   store float 0x4022333340000000, float* %b, align 4
-  %0 = load float* %b, align 4
-  %1 = load float* %a, align 4
+  %0 = load float, float* %b, align 4
+  %1 = load float, float* %a, align 4
   %sub = fsub float %0, %1
   store float %sub, float* %c, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Hexagon/fusedandshift.ll b/llvm/test/CodeGen/Hexagon/fusedandshift.ll
index 022b3c6..59a1e1d 100644
--- a/llvm/test/CodeGen/Hexagon/fusedandshift.ll
+++ b/llvm/test/CodeGen/Hexagon/fusedandshift.ll
@@ -5,7 +5,7 @@
 
 define i32 @main(i16* %a, i16* %b) nounwind {
   entry:
-  %0 = load i16* %a, align 2
+  %0 = load i16, i16* %a, align 2
   %conv1 = sext i16 %0 to i32
   %shr1 = ashr i32 %conv1, 3
   %and1 = and i32 %shr1, 15
diff --git a/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll b/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll
index a1b80a6..583f67a 100644
--- a/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll
+++ b/llvm/test/CodeGen/Hexagon/gp-plus-offset-load.ll
@@ -12,7 +12,7 @@
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %0 = load i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
+  %0 = load i32, i32* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 3), align 4
   store i32 %0, i32* %ival, align 4
   br label %if.end
 
@@ -27,7 +27,7 @@
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %0 = load i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
+  %0 = load i8, i8* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 1), align 1
   store i8 %0, i8* %ival, align 1
   br label %if.end
 
@@ -42,7 +42,7 @@
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %0 = load i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
+  %0 = load i16, i16* getelementptr inbounds (%struct.struc* @foo, i32 0, i32 2), align 2
   store i16 %0, i16* %ival, align 2
   br label %if.end
 
diff --git a/llvm/test/CodeGen/Hexagon/gp-rel.ll b/llvm/test/CodeGen/Hexagon/gp-rel.ll
index 561869e..bb7cb18 100644
--- a/llvm/test/CodeGen/Hexagon/gp-rel.ll
+++ b/llvm/test/CodeGen/Hexagon/gp-rel.ll
@@ -10,14 +10,14 @@
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#a)
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(#b)
 ; CHECK: if{{ *}}(p{{[0-3]}}) memw(##c){{ *}}={{ *}}r{{[0-9]+}}
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %add = add nsw i32 %1, %0
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll b/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll
index 8112407..643fe11 100644
--- a/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-cleanup.ll
@@ -20,7 +20,7 @@
   %sum.03 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx.phi = phi i32* [ %arrayidx.inc, %for.body ], [ %b, %for.body.preheader ]
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %0 = load i32* %arrayidx.phi, align 4
+  %0 = load i32, i32* %arrayidx.phi, align 4
   %add = add nsw i32 %0, %sum.03
   %inc = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %inc, %n
@@ -50,7 +50,7 @@
   %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx.phi = phi i32* [ %b, %entry ], [ %arrayidx.inc, %for.body ]
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i32* %arrayidx.phi, align 4
+  %0 = load i32, i32* %arrayidx.phi, align 4
   %add = add nsw i32 %0, %sum.02
   %inc = add nsw i32 %i.01, 1
   %exitcond = icmp eq i32 %inc, 40
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll b/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll
index 7dfea5d..ca7da9e 100644
--- a/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -19,7 +19,7 @@
   %b.addr.01 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.body ]
   %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.01, i32 1, !dbg !21
   tail call void @llvm.dbg.value(metadata i32* %incdec.ptr, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !21
-  %0 = load i32* %b.addr.01, align 4, !dbg !21
+  %0 = load i32, i32* %b.addr.01, align 4, !dbg !21
   store i32 %0, i32* %arrayidx.phi, align 4, !dbg !21
   %inc = add nsw i32 %i.02, 1, !dbg !26
   tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !26
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-le.ll b/llvm/test/CodeGen/Hexagon/hwloop-le.ll
index 984a263..85a1b3d 100644
--- a/llvm/test/CodeGen/Hexagon/hwloop-le.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-le.ll
@@ -15,7 +15,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -44,7 +44,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -73,7 +73,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -102,7 +102,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -131,7 +131,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -160,7 +160,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -189,7 +189,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -218,7 +218,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -247,7 +247,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -276,7 +276,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -305,7 +305,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -334,7 +334,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -363,7 +363,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -392,7 +392,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -421,7 +421,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-lt.ll b/llvm/test/CodeGen/Hexagon/hwloop-lt.ll
index 23be6fe..804f764 100644
--- a/llvm/test/CodeGen/Hexagon/hwloop-lt.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-lt.ll
@@ -15,7 +15,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -44,7 +44,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -73,7 +73,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -102,7 +102,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -131,7 +131,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -160,7 +160,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -189,7 +189,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -218,7 +218,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -247,7 +247,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -276,7 +276,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -305,7 +305,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -334,7 +334,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -363,7 +363,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -392,7 +392,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -421,7 +421,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/Hexagon/hwloop-ne.ll b/llvm/test/CodeGen/Hexagon/hwloop-ne.ll
index 8f512a2..12ef3b5 100644
--- a/llvm/test/CodeGen/Hexagon/hwloop-ne.ll
+++ b/llvm/test/CodeGen/Hexagon/hwloop-ne.ll
@@ -15,7 +15,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -44,7 +44,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -73,7 +73,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -102,7 +102,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -131,7 +131,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -160,7 +160,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -189,7 +189,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -218,7 +218,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -247,7 +247,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -276,7 +276,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -305,7 +305,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -334,7 +334,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -363,7 +363,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -392,7 +392,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -421,7 +421,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/Hexagon/i16_VarArg.ll b/llvm/test/CodeGen/Hexagon/i16_VarArg.ll
index 9914e01..41cecec 100644
--- a/llvm/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -20,8 +20,8 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-        %a = load double* @A
-        %b = load double* @B
+        %a = load double, double* @A
+        %b = load double, double* @B
         %lt_r = fcmp olt double %a, %b
         %le_r = fcmp ole double %a, %b
         %gt_r = fcmp ogt double %a, %b
diff --git a/llvm/test/CodeGen/Hexagon/i1_VarArg.ll b/llvm/test/CodeGen/Hexagon/i1_VarArg.ll
index 408943d..8b5625c9 100644
--- a/llvm/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -20,8 +20,8 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-        %a = load double* @A
-        %b = load double* @B
+        %a = load double, double* @A
+        %b = load double, double* @B
         %lt_r = fcmp olt double %a, %b
         %le_r = fcmp ole double %a, %b
         %gt_r = fcmp ogt double %a, %b
diff --git a/llvm/test/CodeGen/Hexagon/i8_VarArg.ll b/llvm/test/CodeGen/Hexagon/i8_VarArg.ll
index f3dec92..7283ba4 100644
--- a/llvm/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/llvm/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -20,8 +20,8 @@
 declare i32 @printf(i8*, ...)
 
 define i32 @main() {
-        %a = load double* @A
-        %b = load double* @B
+        %a = load double, double* @A
+        %b = load double, double* @B
         %lt_r = fcmp olt double %a, %b
         %le_r = fcmp ole double %a, %b
         %gt_r = fcmp ogt double %a, %b
diff --git a/llvm/test/CodeGen/Hexagon/idxload-with-zero-offset.ll b/llvm/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
index aa834ce..f1a9d38 100644
--- a/llvm/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/idxload-with-zero-offset.ll
@@ -8,7 +8,7 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i32, i32* %a, i32 %tmp
-  %val = load i32* %scevgep9, align 4
+  %val = load i32, i32* %scevgep9, align 4
   ret i32 %val
 }
 
@@ -19,7 +19,7 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i16, i16* %a, i32 %tmp
-  %val = load i16* %scevgep9, align 2
+  %val = load i16, i16* %scevgep9, align 2
   ret i16 %val
 }
 
@@ -30,7 +30,7 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i16, i16* %a, i32 %tmp
-  %val = load i16* %scevgep9, align 2
+  %val = load i16, i16* %scevgep9, align 2
   %conv = sext i16 %val to i32
   ret i32 %conv
 }
@@ -42,7 +42,7 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i8, i8* %a, i32 %tmp
-  %val = load i8* %scevgep9, align 1
+  %val = load i8, i8* %scevgep9, align 1
   ret i8 %val
 }
 
@@ -53,7 +53,7 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i8, i8* %a, i32 %tmp
-  %val = load i8* %scevgep9, align 1
+  %val = load i8, i8* %scevgep9, align 1
   %conv = sext i8 %val to i32
   ret i32 %conv
 }
@@ -65,6 +65,6 @@
 entry:
   %tmp = add i32 %n, %m
   %scevgep9 = getelementptr i64, i64* %a, i32 %tmp
-  %val = load i64* %scevgep9, align 8
+  %val = load i64, i64* %scevgep9, align 8
   ret i64 %val
 }
diff --git a/llvm/test/CodeGen/Hexagon/macint.ll b/llvm/test/CodeGen/Hexagon/macint.ll
index b3b9d0e..458a537 100644
--- a/llvm/test/CodeGen/Hexagon/macint.ll
+++ b/llvm/test/CodeGen/Hexagon/macint.ll
@@ -5,7 +5,7 @@
 
 define i32 @main(i32* %a, i32* %b) nounwind {
   entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %div = udiv i32 %0, 10000
   %rem = urem i32 %div, 10
   store i32 %rem, i32* %b, align 4
diff --git a/llvm/test/CodeGen/Hexagon/memops.ll b/llvm/test/CodeGen/Hexagon/memops.ll
index 6a02028..e4a8bf7 100644
--- a/llvm/test/CodeGen/Hexagon/memops.ll
+++ b/llvm/test/CodeGen/Hexagon/memops.ll
@@ -4,7 +4,7 @@
 define void @memop_unsigned_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
@@ -16,7 +16,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
@@ -28,7 +28,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
@@ -39,7 +39,7 @@
 define void @memop_unsigned_char_or(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %p, align 1
   ret void
@@ -48,7 +48,7 @@
 define void @memop_unsigned_char_and(i8* nocapture %p, i8 zeroext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %p, align 1
   ret void
@@ -57,7 +57,7 @@
 define void @memop_unsigned_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
@@ -68,7 +68,7 @@
 define void @memop_unsigned_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
@@ -80,7 +80,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
@@ -93,7 +93,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
@@ -106,7 +106,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
@@ -118,7 +118,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %add.ptr, align 1
   ret void
@@ -128,7 +128,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %add.ptr, align 1
   ret void
@@ -138,7 +138,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
@@ -150,7 +150,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
@@ -162,7 +162,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i8
@@ -175,7 +175,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i8
@@ -188,7 +188,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv1 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i8
@@ -200,7 +200,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %add.ptr, align 1
   ret void
@@ -210,7 +210,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %add.ptr, align 1
   ret void
@@ -220,7 +220,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %and = and i32 %conv, 223
   %conv1 = trunc i32 %and to i8
@@ -232,7 +232,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i8
@@ -243,7 +243,7 @@
 define void @memop_signed_char_add5(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
@@ -255,7 +255,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
@@ -267,7 +267,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
@@ -278,7 +278,7 @@
 define void @memop_signed_char_or(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %p, align 1
   ret void
@@ -287,7 +287,7 @@
 define void @memop_signed_char_and(i8* nocapture %p, i8 signext %x) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %p, align 1
   ret void
@@ -296,7 +296,7 @@
 define void @memop_signed_char_clrbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
@@ -307,7 +307,7 @@
 define void @memop_signed_char_setbit(i8* nocapture %p) nounwind {
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i8* %p, align 1
+  %0 = load i8, i8* %p, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
@@ -319,7 +319,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
@@ -332,7 +332,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
@@ -345,7 +345,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
@@ -357,7 +357,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %add.ptr, align 1
   ret void
@@ -367,7 +367,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %add.ptr, align 1
   ret void
@@ -377,7 +377,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
@@ -389,7 +389,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
@@ -401,7 +401,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i8
@@ -414,7 +414,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i8
@@ -427,7 +427,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i8 %x to i32
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv13 = zext i8 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i8
@@ -439,7 +439,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %or3 = or i8 %0, %x
   store i8 %or3, i8* %add.ptr, align 1
   ret void
@@ -449,7 +449,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %and3 = and i8 %0, %x
   store i8 %and3, i8* %add.ptr, align 1
   ret void
@@ -459,7 +459,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %and = and i32 %conv2, 223
   %conv1 = trunc i32 %and to i8
@@ -471,7 +471,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#5){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 5
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv2 = zext i8 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i8
@@ -482,7 +482,7 @@
 define void @memop_unsigned_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
@@ -494,7 +494,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
@@ -506,7 +506,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
@@ -517,7 +517,7 @@
 define void @memop_unsigned_short_or(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %p, align 2
   ret void
@@ -526,7 +526,7 @@
 define void @memop_unsigned_short_and(i16* nocapture %p, i16 zeroext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %p, align 2
   ret void
@@ -535,7 +535,7 @@
 define void @memop_unsigned_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
@@ -546,7 +546,7 @@
 define void @memop_unsigned_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
@@ -558,7 +558,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
@@ -571,7 +571,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
@@ -584,7 +584,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
@@ -596,7 +596,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %add.ptr, align 2
   ret void
@@ -606,7 +606,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %add.ptr, align 2
   ret void
@@ -616,7 +616,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
@@ -628,7 +628,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
@@ -640,7 +640,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %add = add nsw i32 %conv, 5
   %conv1 = trunc i32 %add to i16
@@ -653,7 +653,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %add = add nsw i32 %conv1, %conv
   %conv2 = trunc i32 %add to i16
@@ -666,7 +666,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv1 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv1, %conv
   %conv2 = trunc i32 %sub to i16
@@ -678,7 +678,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %add.ptr, align 2
   ret void
@@ -688,7 +688,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %add.ptr, align 2
   ret void
@@ -698,7 +698,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %and = and i32 %conv, 65503
   %conv1 = trunc i32 %and to i16
@@ -710,7 +710,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv = zext i16 %0 to i32
   %or = or i32 %conv, 128
   %conv1 = trunc i32 %or to i16
@@ -721,7 +721,7 @@
 define void @memop_signed_short_add5(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
@@ -733,7 +733,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
@@ -745,7 +745,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
@@ -756,7 +756,7 @@
 define void @memop_signed_short_or(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %p, align 2
   ret void
@@ -765,7 +765,7 @@
 define void @memop_signed_short_and(i16* nocapture %p, i16 signext %x) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %p, align 2
   ret void
@@ -774,7 +774,7 @@
 define void @memop_signed_short_clrbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
@@ -785,7 +785,7 @@
 define void @memop_signed_short_setbit(i16* nocapture %p) nounwind {
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i16* %p, align 2
+  %0 = load i16, i16* %p, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
@@ -797,7 +797,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
@@ -810,7 +810,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
@@ -823,7 +823,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
@@ -835,7 +835,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %add.ptr, align 2
   ret void
@@ -845,7 +845,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %add.ptr, align 2
   ret void
@@ -855,7 +855,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
@@ -867,7 +867,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
@@ -879,7 +879,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %add = add nsw i32 %conv2, 5
   %conv1 = trunc i32 %add to i16
@@ -892,7 +892,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}+={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %add = add nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %add to i16
@@ -905,7 +905,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}r{{[0-9]+}}
   %conv4 = zext i16 %x to i32
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv13 = zext i16 %0 to i32
   %sub = sub nsw i32 %conv13, %conv4
   %conv2 = trunc i32 %sub to i16
@@ -917,7 +917,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %or3 = or i16 %0, %x
   store i16 %or3, i16* %add.ptr, align 2
   ret void
@@ -927,7 +927,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %and3 = and i16 %0, %x
   store i16 %and3, i16* %add.ptr, align 2
   ret void
@@ -937,7 +937,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %and = and i32 %conv2, 65503
   %conv1 = trunc i32 %and to i16
@@ -949,7 +949,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 5
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %or = or i32 %conv2, 128
   %conv1 = trunc i32 %or to i16
@@ -960,7 +960,7 @@
 define void @memop_signed_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %add = add i32 %0, 5
   store i32 %add, i32* %p, align 4
   ret void
@@ -969,7 +969,7 @@
 define void @memop_signed_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %add = add i32 %0, %x
   store i32 %add, i32* %p, align 4
   ret void
@@ -978,7 +978,7 @@
 define void @memop_signed_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %sub = sub i32 %0, %x
   store i32 %sub, i32* %p, align 4
   ret void
@@ -987,7 +987,7 @@
 define void @memop_signed_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %p, align 4
   ret void
@@ -996,7 +996,7 @@
 define void @memop_signed_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %p, align 4
   ret void
@@ -1005,7 +1005,7 @@
 define void @memop_signed_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %p, align 4
   ret void
@@ -1014,7 +1014,7 @@
 define void @memop_signed_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %p, align 4
   ret void
@@ -1024,7 +1024,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add i32 %0, 5
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1034,7 +1034,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add i32 %0, %x
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1044,7 +1044,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
   store i32 %sub, i32* %add.ptr, align 4
   ret void
@@ -1054,7 +1054,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1064,7 +1064,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1074,7 +1074,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1084,7 +1084,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1094,7 +1094,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add i32 %0, 5
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1104,7 +1104,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add i32 %0, %x
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1114,7 +1114,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %sub = sub i32 %0, %x
   store i32 %sub, i32* %add.ptr, align 4
   ret void
@@ -1124,7 +1124,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1134,7 +1134,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1144,7 +1144,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1154,7 +1154,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1163,7 +1163,7 @@
 define void @memop_unsigned_int_add5(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %add = add nsw i32 %0, 5
   store i32 %add, i32* %p, align 4
   ret void
@@ -1172,7 +1172,7 @@
 define void @memop_unsigned_int_add(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %add = add nsw i32 %0, %x
   store i32 %add, i32* %p, align 4
   ret void
@@ -1181,7 +1181,7 @@
 define void @memop_unsigned_int_sub(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %sub = sub nsw i32 %0, %x
   store i32 %sub, i32* %p, align 4
   ret void
@@ -1190,7 +1190,7 @@
 define void @memop_unsigned_int_or(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %p, align 4
   ret void
@@ -1199,7 +1199,7 @@
 define void @memop_unsigned_int_and(i32* nocapture %p, i32 %x) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %p, align 4
   ret void
@@ -1208,7 +1208,7 @@
 define void @memop_unsigned_int_clrbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %p, align 4
   ret void
@@ -1217,7 +1217,7 @@
 define void @memop_unsigned_int_setbit(i32* nocapture %p) nounwind {
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %p, align 4
   ret void
@@ -1227,7 +1227,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1237,7 +1237,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1247,7 +1247,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
   store i32 %sub, i32* %add.ptr, align 4
   ret void
@@ -1257,7 +1257,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1267,7 +1267,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1277,7 +1277,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1287,7 +1287,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#0){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1297,7 +1297,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}#5
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add nsw i32 %0, 5
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1307,7 +1307,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}+={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %add = add nsw i32 %0, %x
   store i32 %add, i32* %add.ptr, align 4
   ret void
@@ -1317,7 +1317,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %sub = sub nsw i32 %0, %x
   store i32 %sub, i32* %add.ptr, align 4
   ret void
@@ -1327,7 +1327,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}|={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, %x
   store i32 %or, i32* %add.ptr, align 4
   ret void
@@ -1337,7 +1337,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}&={{ *}}r{{[0-9]+}}
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, %x
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1347,7 +1347,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}clrbit({{ *}}#5{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %and = and i32 %0, -33
   store i32 %and, i32* %add.ptr, align 4
   ret void
@@ -1357,7 +1357,7 @@
 entry:
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}={{ *}}setbit({{ *}}#7{{ *}})
   %add.ptr = getelementptr inbounds i32, i32* %p, i32 5
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   %or = or i32 %0, 128
   store i32 %or, i32* %add.ptr, align 4
   ret void
diff --git a/llvm/test/CodeGen/Hexagon/memops1.ll b/llvm/test/CodeGen/Hexagon/memops1.ll
index 3ba1a3e..37e885b 100644
--- a/llvm/test/CodeGen/Hexagon/memops1.ll
+++ b/llvm/test/CodeGen/Hexagon/memops1.ll
@@ -7,9 +7,9 @@
 ; CHECK:  memw(r{{[0-9]+}}{{ *}}+{{ *}}#40){{ *}}-={{ *}}#1
   %p.addr = alloca i32*, align 4
   store i32* %p, i32** %p.addr, align 4
-  %0 = load i32** %p.addr, align 4
+  %0 = load i32*, i32** %p.addr, align 4
   %add.ptr = getelementptr inbounds i32, i32* %0, i32 10
-  %1 = load i32* %add.ptr, align 4
+  %1 = load i32, i32* %add.ptr, align 4
   %sub = sub nsw i32 %1, 1
   store i32 %sub, i32* %add.ptr, align 4
   ret void
@@ -22,11 +22,11 @@
   %i.addr = alloca i32, align 4
   store i32* %p, i32** %p.addr, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32** %p.addr, align 4
-  %1 = load i32* %i.addr, align 4
+  %0 = load i32*, i32** %p.addr, align 4
+  %1 = load i32, i32* %i.addr, align 4
   %add.ptr = getelementptr inbounds i32, i32* %0, i32 %1
   %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 10
-  %2 = load i32* %add.ptr1, align 4
+  %2 = load i32, i32* %add.ptr1, align 4
   %sub = sub nsw i32 %2, 1
   store i32 %sub, i32* %add.ptr1, align 4
   ret void
diff --git a/llvm/test/CodeGen/Hexagon/memops2.ll b/llvm/test/CodeGen/Hexagon/memops2.ll
index 5a0532f..f9f8a24 100644
--- a/llvm/test/CodeGen/Hexagon/memops2.ll
+++ b/llvm/test/CodeGen/Hexagon/memops2.ll
@@ -6,7 +6,7 @@
 entry:
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
   %add.ptr = getelementptr inbounds i16, i16* %p, i32 10
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   %conv2 = zext i16 %0 to i32
   %sub = add nsw i32 %conv2, 65535
   %conv1 = trunc i32 %sub to i16
@@ -19,7 +19,7 @@
 ; CHECK:  memh(r{{[0-9]+}}{{ *}}+{{ *}}#20){{ *}}-={{ *}}#1
   %add.ptr.sum = add i32 %i, 10
   %add.ptr1 = getelementptr inbounds i16, i16* %p, i32 %add.ptr.sum
-  %0 = load i16* %add.ptr1, align 2
+  %0 = load i16, i16* %add.ptr1, align 2
   %conv3 = zext i16 %0 to i32
   %sub = add nsw i32 %conv3, 65535
   %conv2 = trunc i32 %sub to i16
diff --git a/llvm/test/CodeGen/Hexagon/memops3.ll b/llvm/test/CodeGen/Hexagon/memops3.ll
index 1e80baf..6cd7fdc 100644
--- a/llvm/test/CodeGen/Hexagon/memops3.ll
+++ b/llvm/test/CodeGen/Hexagon/memops3.ll
@@ -6,7 +6,7 @@
 entry:
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
   %add.ptr = getelementptr inbounds i8, i8* %p, i32 10
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   %conv = zext i8 %0 to i32
   %sub = add nsw i32 %conv, 255
   %conv1 = trunc i32 %sub to i8
@@ -19,7 +19,7 @@
 ; CHECK:  memb(r{{[0-9]+}}{{ *}}+{{ *}}#10){{ *}}-={{ *}}#1
   %add.ptr.sum = add i32 %i, 10
   %add.ptr1 = getelementptr inbounds i8, i8* %p, i32 %add.ptr.sum
-  %0 = load i8* %add.ptr1, align 1
+  %0 = load i8, i8* %add.ptr1, align 1
   %conv = zext i8 %0 to i32
   %sub = add nsw i32 %conv, 255
   %conv2 = trunc i32 %sub to i8
diff --git a/llvm/test/CodeGen/Hexagon/misaligned-access.ll b/llvm/test/CodeGen/Hexagon/misaligned-access.ll
index 4dafb44..f4b0cb9 100644
--- a/llvm/test/CodeGen/Hexagon/misaligned-access.ll
+++ b/llvm/test/CodeGen/Hexagon/misaligned-access.ll
@@ -7,10 +7,10 @@
 define i32 @CSDRSEARCH_executeSearchManager() #0 {
 entry:
   %temp = alloca i32, align 4
-  %0 = load i32* @temp1, align 4
+  %0 = load i32, i32* @temp1, align 4
   store i32 %0, i32* %temp, align 4
   %1 = bitcast i32* %temp to i64*
-  %2 = load i64* %1, align 8
+  %2 = load i64, i64* %1, align 8
   %call = call i32 @_hi(i64 %2)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/Hexagon/mpy.ll b/llvm/test/CodeGen/Hexagon/mpy.ll
index d5c5ae3..3ecf7d4 100644
--- a/llvm/test/CodeGen/Hexagon/mpy.ll
+++ b/llvm/test/CodeGen/Hexagon/mpy.ll
@@ -9,10 +9,10 @@
   store i32 %acc, i32* %acc.addr, align 4
   store i32 %num, i32* %num.addr, align 4
   store i32 %num2, i32* %num2.addr, align 4
-  %0 = load i32* %num.addr, align 4
-  %1 = load i32* %acc.addr, align 4
+  %0 = load i32, i32* %num.addr, align 4
+  %1 = load i32, i32* %acc.addr, align 4
   %mul = mul nsw i32 %0, %1
-  %2 = load i32* %num2.addr, align 4
+  %2 = load i32, i32* %num2.addr, align 4
   %add = add nsw i32 %mul, %2
   store i32 %add, i32* %num.addr, align 4
   ret void
diff --git a/llvm/test/CodeGen/Hexagon/newvaluejump.ll b/llvm/test/CodeGen/Hexagon/newvaluejump.ll
index 9c7ca55..3e1ee17 100644
--- a/llvm/test/CodeGen/Hexagon/newvaluejump.ll
+++ b/llvm/test/CodeGen/Hexagon/newvaluejump.ll
@@ -9,10 +9,10 @@
 ; CHECK: if (cmp.eq(r{{[0-9]+}}.new, #0)) jump{{.}}
   %addr1 = alloca i32, align 4
   %addr2 = alloca i32, align 4
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   store i32 %0, i32* %addr1, align 4
   call void @bar(i32 1, i32 2)
-  %1 = load i32* @j, align 4
+  %1 = load i32, i32* @j, align 4
   %tobool = icmp ne i32 %1, 0
   br i1 %tobool, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll
index 3d50ea5..36a0db1 100644
--- a/llvm/test/CodeGen/Hexagon/newvaluejump2.ll
+++ b/llvm/test/CodeGen/Hexagon/newvaluejump2.ll
@@ -7,9 +7,9 @@
 entry:
 ; CHECK: if (cmp.gt(r{{[0-9]+}}.new, r{{[0-9]+}})) jump:{{[t|nt]}} .LBB{{[0-9]+}}_{{[0-9]+}}
   %Reg2 = alloca i8, align 1
-  %0 = load i8* %Reg2, align 1
+  %0 = load i8, i8* %Reg2, align 1
   %conv0 = zext i8 %0 to i32
-  %1 = load i8* @Reg, align 1
+  %1 = load i8, i8* @Reg, align 1
   %conv1 = zext i8 %1 to i32
   %tobool = icmp sle i32 %conv0, %conv1
   br i1 %tobool, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/Hexagon/newvaluestore.ll b/llvm/test/CodeGen/Hexagon/newvaluestore.ll
index 93cf347..13cbba2 100644
--- a/llvm/test/CodeGen/Hexagon/newvaluestore.ll
+++ b/llvm/test/CodeGen/Hexagon/newvaluestore.ll
@@ -11,11 +11,11 @@
   %number1 = alloca i32, align 4
   %number2 = alloca i32, align 4
   %number3 = alloca i32, align 4
-  %0 = load i32 * @i, align 4
+  %0 = load i32 , i32 * @i, align 4
   store i32 %0, i32* %number1, align 4
-  %1 = load i32 * @j, align 4
+  %1 = load i32 , i32 * @j, align 4
   store i32 %1, i32* %number2, align 4
-  %2 = load i32 * @k, align 4
+  %2 = load i32 , i32 * @k, align 4
   store i32 %2, i32* %number3, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/Hexagon/opt-fabs.ll b/llvm/test/CodeGen/Hexagon/opt-fabs.ll
index 31b56fd..da657e4 100644
--- a/llvm/test/CodeGen/Hexagon/opt-fabs.ll
+++ b/llvm/test/CodeGen/Hexagon/opt-fabs.ll
@@ -7,7 +7,7 @@
 entry:
   %x.addr = alloca float, align 4
   store float %x, float* %x.addr, align 4
-  %0 = load float* %x.addr, align 4
+  %0 = load float, float* %x.addr, align 4
   %call = call float @fabsf(float %0) readnone
   ret float %call
 }
diff --git a/llvm/test/CodeGen/Hexagon/opt-fneg.ll b/llvm/test/CodeGen/Hexagon/opt-fneg.ll
index 479b4b6..9789578 100644
--- a/llvm/test/CodeGen/Hexagon/opt-fneg.ll
+++ b/llvm/test/CodeGen/Hexagon/opt-fneg.ll
@@ -6,7 +6,7 @@
 ; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}}, #31)
   %x.addr = alloca float, align 4
   store float %x, float* %x.addr, align 4
-  %0 = load float* %x.addr, align 4
+  %0 = load float, float* %x.addr, align 4
   %sub = fsub float -0.000000e+00, %0
   ret float %sub
 }
diff --git a/llvm/test/CodeGen/Hexagon/postinc-load.ll b/llvm/test/CodeGen/Hexagon/postinc-load.ll
index 547c7c6..a9d9879 100644
--- a/llvm/test/CodeGen/Hexagon/postinc-load.ll
+++ b/llvm/test/CodeGen/Hexagon/postinc-load.ll
@@ -12,8 +12,8 @@
   %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
   %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
   %sum.03 = phi i32 [ 0, %entry ], [ %add2, %for.body ]
-  %0 = load i32* %arrayidx.phi, align 4
-  %1 = load i16* %arrayidx1.phi, align 2
+  %0 = load i32, i32* %arrayidx.phi, align 4
+  %1 = load i16, i16* %arrayidx1.phi, align 2
   %conv = sext i16 %1 to i32
   %add = add i32 %0, %sum.03
   %add2 = add i32 %add, %conv
diff --git a/llvm/test/CodeGen/Hexagon/postinc-store.ll b/llvm/test/CodeGen/Hexagon/postinc-store.ll
index b836f715..6315ca1 100644
--- a/llvm/test/CodeGen/Hexagon/postinc-store.ll
+++ b/llvm/test/CodeGen/Hexagon/postinc-store.ll
@@ -11,8 +11,8 @@
   %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 10, %entry ]
   %arrayidx.phi = phi i32* [ %a, %entry ], [ %arrayidx.inc, %for.body ]
   %arrayidx1.phi = phi i16* [ %b, %entry ], [ %arrayidx1.inc, %for.body ]
-  %0 = load i32* %arrayidx.phi, align 4
-  %1 = load i16* %arrayidx1.phi, align 2
+  %0 = load i32, i32* %arrayidx.phi, align 4
+  %1 = load i16, i16* %arrayidx1.phi, align 2
   %conv = sext i16 %1 to i32
   %factor = mul i32 %0, 2
   %add3 = add i32 %factor, %conv
diff --git a/llvm/test/CodeGen/Hexagon/pred-gp.ll b/llvm/test/CodeGen/Hexagon/pred-gp.ll
index 299bd86..3868e09 100644
--- a/llvm/test/CodeGen/Hexagon/pred-gp.ll
+++ b/llvm/test/CodeGen/Hexagon/pred-gp.ll
@@ -14,11 +14,11 @@
   br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
 
 entry.if.end_crit_edge:
-  %.pre = load i32* @c, align 4
+  %.pre = load i32, i32* @c, align 4
   br label %if.end
 
 if.then:
-  %0 = load i32* @d, align 4
+  %0 = load i32, i32* @d, align 4
   store i32 %0, i32* @c, align 4
   br label %if.end
 
diff --git a/llvm/test/CodeGen/Hexagon/pred-instrs.ll b/llvm/test/CodeGen/Hexagon/pred-instrs.ll
index 800073e..e0a75f1 100644
--- a/llvm/test/CodeGen/Hexagon/pred-instrs.ll
+++ b/llvm/test/CodeGen/Hexagon/pred-instrs.ll
@@ -25,6 +25,6 @@
 if.end:                                           ; preds = %if.else, %if.then
   %storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
   store i32 %storemerge, i32* @a, align 4
-  %0 = load i32* @d, align 4
+  %0 = load i32, i32* @d, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/Hexagon/remove_lsr.ll b/llvm/test/CodeGen/Hexagon/remove_lsr.ll
index 640fdb5..3b85c48 100644
--- a/llvm/test/CodeGen/Hexagon/remove_lsr.ll
+++ b/llvm/test/CodeGen/Hexagon/remove_lsr.ll
@@ -54,9 +54,9 @@
   %7 = trunc i64 %6 to i32
   %8 = tail call i32 @llvm.hexagon.C2.mux(i32 %conv8, i32 %5, i32 %7)
   store i32 %8, i32* %lsr.iv2931, align 4
-  %srcval = load i64* %lsr.iv27, align 8
-  %9 = load i8* %lsr.iv40, align 1
-  %10 = load i8* %lsr.iv37, align 1
+  %srcval = load i64, i64* %lsr.iv27, align 8
+  %9 = load i8, i8* %lsr.iv40, align 1
+  %10 = load i8, i8* %lsr.iv37, align 1
   %lftr.wideiv = trunc i32 %lsr.iv42 to i8
   %exitcond = icmp eq i8 %lftr.wideiv, 32
   %scevgep26 = getelementptr %union.vect64, %union.vect64* %lsr.iv, i32 1
diff --git a/llvm/test/CodeGen/Hexagon/static.ll b/llvm/test/CodeGen/Hexagon/static.ll
index 683a4c2..760b8b5 100644
--- a/llvm/test/CodeGen/Hexagon/static.ll
+++ b/llvm/test/CodeGen/Hexagon/static.ll
@@ -10,10 +10,10 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i32* @num, align 4
-  %1 = load i32* @acc, align 4
+  %0 = load i32, i32* @num, align 4
+  %1 = load i32, i32* @acc, align 4
   %mul = mul nsw i32 %0, %1
-  %2 = load i32* @val, align 4
+  %2 = load i32, i32* @val, align 4
   %add = add nsw i32 %mul, %2
   store i32 %add, i32* @num, align 4
   ret void
diff --git a/llvm/test/CodeGen/Hexagon/struct_args.ll b/llvm/test/CodeGen/Hexagon/struct_args.ll
index f91300b..95b76c7 100644
--- a/llvm/test/CodeGen/Hexagon/struct_args.ll
+++ b/llvm/test/CodeGen/Hexagon/struct_args.ll
@@ -8,7 +8,7 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i64* bitcast (%struct.small* @s1 to i64*), align 1
+  %0 = load i64, i64* bitcast (%struct.small* @s1 to i64*), align 1
   call void @bar(i64 %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll b/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll
index e3057cd..d22d685 100644
--- a/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll
+++ b/llvm/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -20,7 +20,7 @@
 ; CHECK: combine(#0, r{{[0-9]+}})
 entry:
   store i16 0, i16* @a, align 2
-  %0 = load i16* @c, align 2
+  %0 = load i16, i16* @c, align 2
   %conv2 = zext i16 %0 to i64
   ret i64 %conv2
 }
diff --git a/llvm/test/CodeGen/Hexagon/union-1.ll b/llvm/test/CodeGen/Hexagon/union-1.ll
index 0056586..8f2ff28 100644
--- a/llvm/test/CodeGen/Hexagon/union-1.ll
+++ b/llvm/test/CodeGen/Hexagon/union-1.ll
@@ -5,10 +5,10 @@
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %1 = zext i32 %0 to i64
   %add.ptr = getelementptr inbounds i32, i32* %a, i32 1
-  %2 = load i32* %add.ptr, align 4
+  %2 = load i32, i32* %add.ptr, align 4
   %3 = zext i32 %2 to i64
   %4 = shl nuw i64 %3, 32
   %ins = or i64 %4, %1
diff --git a/llvm/test/CodeGen/Hexagon/vaddh.ll b/llvm/test/CodeGen/Hexagon/vaddh.ll
index 01d2041..88194b7 100644
--- a/llvm/test/CodeGen/Hexagon/vaddh.ll
+++ b/llvm/test/CodeGen/Hexagon/vaddh.ll
@@ -6,8 +6,8 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
   %2 = call i32 @llvm.hexagon.A2.svaddh(i32 %0, i32 %1)
   store i32 %2, i32* @k, align 4
   ret void
diff --git a/llvm/test/CodeGen/Hexagon/validate-offset.ll b/llvm/test/CodeGen/Hexagon/validate-offset.ll
index 9e7d0aa..8de006c 100644
--- a/llvm/test/CodeGen/Hexagon/validate-offset.ll
+++ b/llvm/test/CodeGen/Hexagon/validate-offset.ll
@@ -11,26 +11,26 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %a.addr, align 4
-  %1 = load i32* %b.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
   %cmp = icmp sgt i32 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:
-  %2 = load i32* %a.addr, align 4
-  %3 = load i32* %b.addr, align 4
+  %2 = load i32, i32* %a.addr, align 4
+  %3 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %2, %3
   store i32 %add, i32* %retval
   br label %return
 
 if.else:
-  %4 = load i32* %a.addr, align 4
-  %5 = load i32* %b.addr, align 4
+  %4 = load i32, i32* %a.addr, align 4
+  %5 = load i32, i32* %b.addr, align 4
   %sub = sub nsw i32 %4, %5
   store i32 %sub, i32* %retval
   br label %return
 
 return:
-  %6 = load i32* %retval
+  %6 = load i32, i32* %retval
   ret i32 %6
 }
diff --git a/llvm/test/CodeGen/Hexagon/zextloadi1.ll b/llvm/test/CodeGen/Hexagon/zextloadi1.ll
index b58d933..9ce7bea 100644
--- a/llvm/test/CodeGen/Hexagon/zextloadi1.ll
+++ b/llvm/test/CodeGen/Hexagon/zextloadi1.ll
@@ -13,13 +13,13 @@
 @i129_s = external global i129
 
 define void @i129_ls() nounwind  {
-        %tmp = load i129* @i129_l
+        %tmp = load i129, i129* @i129_l
         store i129 %tmp, i129* @i129_s
         ret void
 }
 
 define void @i65_ls() nounwind  {
-        %tmp = load i65* @i65_l
+        %tmp = load i65, i65* @i65_l
         store i65 %tmp, i65* @i65_s
         ret void
 }
diff --git a/llvm/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/llvm/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
index 4c7d2d0..38e9832 100644
--- a/llvm/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
+++ b/llvm/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -7,7 +7,7 @@
 
 define void @uip_arp_arpin() nounwind {
 entry:
-	%tmp = load volatile i16* @uip_len		; <i16> [#uses=1]
+	%tmp = load volatile i16, i16* @uip_len		; <i16> [#uses=1]
 	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
 	store volatile i16 0, i16* @uip_len
 	br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/MSP430/2009-05-17-Rot.ll b/llvm/test/CodeGen/MSP430/2009-05-17-Rot.ll
index d622aa7..30b3739 100644
--- a/llvm/test/CodeGen/MSP430/2009-05-17-Rot.ll
+++ b/llvm/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -4,14 +4,14 @@
         %retval = alloca i16
         %x = alloca i16
         store i16 %x.arg, i16* %x
-        %1 = load i16* %x
+        %1 = load i16, i16* %x
         %2 = shl i16 %1, 1
-        %3 = load i16* %x
+        %3 = load i16, i16* %x
         %4 = lshr i16 %3, 15
         %5 = or i16 %2, %4
         store i16 %5, i16* %retval
         br label %return
 return:
-        %6 = load i16* %retval
+        %6 = load i16, i16* %retval
         ret i16 %6
 }
diff --git a/llvm/test/CodeGen/MSP430/2009-05-17-Shift.ll b/llvm/test/CodeGen/MSP430/2009-05-17-Shift.ll
index e23df78..2e3dd55 100644
--- a/llvm/test/CodeGen/MSP430/2009-05-17-Shift.ll
+++ b/llvm/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -4,12 +4,12 @@
         %retval = alloca i16
         %x = alloca i16
         store i16 %x.arg, i16* %x
-        %1 = load i16* %x
+        %1 = load i16, i16* %x
         %2 = lshr i16 %1, 2
         store i16 %2, i16* %retval
         br label %return
 return:
-        %3 = load i16* %retval
+        %3 = load i16, i16* %retval
         ret i16 %3
 
 }
diff --git a/llvm/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/llvm/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
index e8c0d14..ca54ff0 100644
--- a/llvm/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
+++ b/llvm/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -7,7 +7,7 @@
 entry:
   %result = alloca i16, align 1                   ; <i16*> [#uses=2]
   store volatile i16 0, i16* %result
-  %tmp = load volatile i16* %result               ; <i16> [#uses=1]
+  %tmp = load volatile i16, i16* %result               ; <i16> [#uses=1]
   ret i16 %tmp
 }
 
@@ -23,7 +23,7 @@
 while.end:                                        ; preds = %while.cond
   %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
   store volatile i16 0, i16* %result.i
-  %tmp.i = load volatile i16* %result.i           ; <i16> [#uses=0]
+  %tmp.i = load volatile i16, i16* %result.i           ; <i16> [#uses=0]
   ret i16 0
 }
 
diff --git a/llvm/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/llvm/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
index 9fab482..72ba335 100644
--- a/llvm/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
+++ b/llvm/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -11,12 +11,12 @@
   %x.addr = alloca i8                             ; <i8*> [#uses=2]
   %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
   store i8 %x, i8* %x.addr
-  %tmp1 = load volatile i8* @"\010x0021"          ; <i8> [#uses=1]
+  %tmp1 = load volatile i8, i8* @"\010x0021"          ; <i8> [#uses=1]
   store i8 %tmp1, i8* %tmp
-  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  %tmp2 = load i8, i8* %x.addr                        ; <i8> [#uses=1]
   store volatile i8 %tmp2, i8* @"\010x0021"
-  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  %tmp3 = load i8, i8* %tmp                           ; <i8> [#uses=1]
   store i8 %tmp3, i8* %retval
-  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  %0 = load i8, i8* %retval                           ; <i8> [#uses=1]
   ret i8 %0
 }
diff --git a/llvm/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/llvm/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
index c1a186a..6dfbbfc 100644
--- a/llvm/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
+++ b/llvm/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -4,7 +4,7 @@
 entry:
 	%r = alloca i8		; <i8*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	load volatile i8* %r, align 1		; <i8>:0 [#uses=1]
+	load volatile i8, i8* %r, align 1		; <i8>:0 [#uses=1]
 	or i8 %0, 1		; <i8>:1 [#uses=1]
 	store volatile i8 %1, i8* %r, align 1
 	br label %return
diff --git a/llvm/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/llvm/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
index d5ff29c..04b087e 100644
--- a/llvm/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
+++ b/llvm/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
@@ -46,8 +46,8 @@
   br i1 undef, label %do.body, label %while.body41.i
 
 while.body41.i:                                   ; preds = %while.cond36.i
-  %tmp43.i = load i8** @foo                      ; <i8*> [#uses=2]
-  %tmp44.i = load i8* %tmp43.i                    ; <i8> [#uses=1]
+  %tmp43.i = load i8*, i8** @foo                      ; <i8*> [#uses=2]
+  %tmp44.i = load i8, i8* %tmp43.i                    ; <i8> [#uses=1]
   %ptrincdec50.i = getelementptr inbounds i8, i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
   store i8* %ptrincdec50.i, i8** @foo
   %cmp55.i = icmp eq i8 %tmp44.i, %c              ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll b/llvm/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
index 9910037..907d6ab 100644
--- a/llvm/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
+++ b/llvm/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
@@ -19,7 +19,7 @@
   br i1 %0, label %while.body, label %while.end
 
 while.body:                                       ; preds = %land.end
-  %tmp4 = load i16* undef                         ; <i16> [#uses=0]
+  %tmp4 = load i16, i16* undef                         ; <i16> [#uses=0]
   br label %while.cond
 
 while.end:                                        ; preds = %land.end
diff --git a/llvm/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/llvm/test/CodeGen/MSP430/AddrMode-bis-rx.ll
index 532a25c..1b0a508 100644
--- a/llvm/test/CodeGen/MSP430/AddrMode-bis-rx.ll
+++ b/llvm/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -3,7 +3,7 @@
 target triple = "msp430-generic-generic"
 
 define i16 @am1(i16 %x, i16* %a) nounwind {
-	%1 = load i16* %a
+	%1 = load i16, i16* %a
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
@@ -13,7 +13,7 @@
 @foo = external global i16
 
 define i16 @am2(i16 %x) nounwind {
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
@@ -24,7 +24,7 @@
 
 define i8 @am3(i8 %x, i16 %n) nounwind {
 	%1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %n
-	%2 = load i8* %1
+	%2 = load i8, i8* %1
 	%3 = or i8 %2,%x
 	ret i8 %3
 }
@@ -32,7 +32,7 @@
 ; CHECK:		bis.b	bar(r14), r15
 
 define i16 @am4(i16 %x) nounwind {
-	%1 = load volatile i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
@@ -41,7 +41,7 @@
 
 define i16 @am5(i16 %x, i16* %a) nounwind {
 	%1 = getelementptr i16, i16* %a, i16 2
-	%2 = load i16* %1
+	%2 = load i16, i16* %1
 	%3 = or i16 %2,%x
 	ret i16 %3
 }
@@ -52,7 +52,7 @@
 @baz = common global %S zeroinitializer, align 1
 
 define i16 @am6(i16 %x) nounwind {
-	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%1 = load i16, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	%2 = or i16 %1,%x
 	ret i16 %2
 }
@@ -65,7 +65,7 @@
 define i8 @am7(i8 %x, i16 %n) nounwind {
 	%1 = getelementptr %T, %T* @duh, i32 0, i32 1
 	%2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
-	%3= load i8* %2
+	%3= load i8, i8* %2
 	%4 = or i8 %3,%x
 	ret i8 %4
 }
diff --git a/llvm/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/llvm/test/CodeGen/MSP430/AddrMode-bis-xr.ll
index c40ba1b..b40a48e 100644
--- a/llvm/test/CodeGen/MSP430/AddrMode-bis-xr.ll
+++ b/llvm/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -3,7 +3,7 @@
 target triple = "msp430-generic-generic"
 
 define void @am1(i16* %a, i16 %x) nounwind {
-	%1 = load i16* %a
+	%1 = load i16, i16* %a
 	%2 = or i16 %x, %1
 	store i16 %2, i16* %a
 	ret void
@@ -14,7 +14,7 @@
 @foo = external global i16
 
 define void @am2(i16 %x) nounwind {
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = or i16 %x, %1
 	store i16 %2, i16* @foo
 	ret void
@@ -26,7 +26,7 @@
 
 define void @am3(i16 %i, i8 %x) nounwind {
 	%1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %i
-	%2 = load i8* %1
+	%2 = load i8, i8* %1
 	%3 = or i8 %x, %2
 	store i8 %3, i8* %1
 	ret void
@@ -35,7 +35,7 @@
 ; CHECK:		bis.b	r14, bar(r15)
 
 define void @am4(i16 %x) nounwind {
-	%1 = load volatile i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
 	%2 = or i16 %x, %1
 	store volatile i16 %2, i16* inttoptr(i16 32 to i16*)
 	ret void
@@ -45,7 +45,7 @@
 
 define void @am5(i16* %a, i16 %x) readonly {
 	%1 = getelementptr inbounds i16, i16* %a, i16 2
-	%2 = load i16* %1
+	%2 = load i16, i16* %1
 	%3 = or i16 %x, %2
 	store i16 %3, i16* %1
 	ret void
@@ -57,7 +57,7 @@
 @baz = common global %S zeroinitializer
 
 define void @am6(i16 %x) nounwind {
-	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%1 = load i16, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	%2 = or i16 %x, %1
 	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	ret void
@@ -71,7 +71,7 @@
 define void @am7(i16 %n, i8 %x) nounwind {
 	%1 = getelementptr %T, %T* @duh, i32 0, i32 1
 	%2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
-	%3 = load i8* %2
+	%3 = load i8, i8* %2
 	%4 = or i8 %x, %3
 	store i8 %4, i8* %2
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/llvm/test/CodeGen/MSP430/AddrMode-mov-rx.ll
index 0c2b965..90c5744 100644
--- a/llvm/test/CodeGen/MSP430/AddrMode-mov-rx.ll
+++ b/llvm/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -3,7 +3,7 @@
 target triple = "msp430-generic-generic"
 
 define i16 @am1(i16* %a) nounwind {
-	%1 = load i16* %a
+	%1 = load i16, i16* %a
 	ret i16 %1
 }
 ; CHECK-LABEL: am1:
@@ -12,7 +12,7 @@
 @foo = external global i16
 
 define i16 @am2() nounwind {
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	ret i16 %1
 }
 ; CHECK-LABEL: am2:
@@ -22,14 +22,14 @@
 
 define i8 @am3(i16 %n) nounwind {
 	%1 = getelementptr [2 x i8], [2 x i8]* @bar, i16 0, i16 %n
-	%2 = load i8* %1
+	%2 = load i8, i8* %1
 	ret i8 %2
 }
 ; CHECK-LABEL: am3:
 ; CHECK:		mov.b	bar(r15), r15
 
 define i16 @am4() nounwind {
-	%1 = load volatile i16* inttoptr(i16 32 to i16*)
+	%1 = load volatile i16, i16* inttoptr(i16 32 to i16*)
 	ret i16 %1
 }
 ; CHECK-LABEL: am4:
@@ -37,7 +37,7 @@
 
 define i16 @am5(i16* %a) nounwind {
 	%1 = getelementptr i16, i16* %a, i16 2
-	%2 = load i16* %1
+	%2 = load i16, i16* %1
 	ret i16 %2
 }
 ; CHECK-LABEL: am5:
@@ -47,7 +47,7 @@
 @baz = common global %S zeroinitializer, align 1
 
 define i16 @am6() nounwind {
-	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%1 = load i16, i16* getelementptr (%S* @baz, i32 0, i32 1)
 	ret i16 %1
 }
 ; CHECK-LABEL: am6:
@@ -59,7 +59,7 @@
 define i8 @am7(i16 %n) nounwind {
 	%1 = getelementptr %T, %T* @duh, i32 0, i32 1
 	%2 = getelementptr [2 x i8], [2 x i8]* %1, i16 0, i16 %n
-	%3= load i8* %2
+	%3= load i8, i8* %2
 	ret i8 %3
 }
 ; CHECK-LABEL: am7:
diff --git a/llvm/test/CodeGen/MSP430/Inst16mi.ll b/llvm/test/CodeGen/MSP430/Inst16mi.ll
index e9ab75c..38c16f2 100644
--- a/llvm/test/CodeGen/MSP430/Inst16mi.ll
+++ b/llvm/test/CodeGen/MSP430/Inst16mi.ll
@@ -14,7 +14,7 @@
 define void @add() nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.w	#2, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = add i16 %1, 2
 	store i16 %2, i16 * @foo
 	ret void
@@ -23,7 +23,7 @@
 define void @and() nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.w	#2, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = and i16 %1, 2
 	store i16 %2, i16 * @foo
 	ret void
@@ -32,7 +32,7 @@
 define void @bis() nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.w	#2, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = or i16 %1, 2
 	store i16 %2, i16 * @foo
 	ret void
@@ -41,7 +41,7 @@
 define void @xor() nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.w	#2, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = xor i16 %1, 2
 	store i16 %2, i16 * @foo
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/Inst16mm.ll b/llvm/test/CodeGen/MSP430/Inst16mm.ll
index 5c93e37..c75e1be 100644
--- a/llvm/test/CodeGen/MSP430/Inst16mm.ll
+++ b/llvm/test/CodeGen/MSP430/Inst16mm.ll
@@ -7,7 +7,7 @@
 define void @mov() nounwind {
 ; CHECK-LABEL: mov:
 ; CHECK: mov.w	&bar, &foo
-        %1 = load i16* @bar
+        %1 = load i16, i16* @bar
         store i16 %1, i16* @foo
         ret void
 }
@@ -15,8 +15,8 @@
 define void @add() nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.w	&bar, &foo
-	%1 = load i16* @bar
-	%2 = load i16* @foo
+	%1 = load i16, i16* @bar
+	%2 = load i16, i16* @foo
 	%3 = add i16 %2, %1
 	store i16 %3, i16* @foo
 	ret void
@@ -25,8 +25,8 @@
 define void @and() nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.w	&bar, &foo
-	%1 = load i16* @bar
-	%2 = load i16* @foo
+	%1 = load i16, i16* @bar
+	%2 = load i16, i16* @foo
 	%3 = and i16 %2, %1
 	store i16 %3, i16* @foo
 	ret void
@@ -35,8 +35,8 @@
 define void @bis() nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.w	&bar, &foo
-	%1 = load i16* @bar
-	%2 = load i16* @foo
+	%1 = load i16, i16* @bar
+	%2 = load i16, i16* @foo
 	%3 = or i16 %2, %1
 	store i16 %3, i16* @foo
 	ret void
@@ -45,8 +45,8 @@
 define void @xor() nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.w	&bar, &foo
-	%1 = load i16* @bar
-	%2 = load i16* @foo
+	%1 = load i16, i16* @bar
+	%2 = load i16, i16* @foo
 	%3 = xor i16 %2, %1
 	store i16 %3, i16* @foo
 	ret void
@@ -58,10 +58,10 @@
  %x = alloca i32, align 2                        ; <i32*> [#uses=1]
  %y = alloca i32, align 2                        ; <i32*> [#uses=1]
  store i16 0, i16* %retval
- %tmp = load i32* %y                             ; <i32> [#uses=1]
+ %tmp = load i32, i32* %y                             ; <i32> [#uses=1]
  store i32 %tmp, i32* %x
  store i16 0, i16* %retval
- %0 = load i16* %retval                          ; <i16> [#uses=1]
+ %0 = load i16, i16* %retval                          ; <i16> [#uses=1]
  ret i16 %0
 ; CHECK-LABEL: mov2:
 ; CHECK:	mov.w	2(r1), 6(r1)
diff --git a/llvm/test/CodeGen/MSP430/Inst16mr.ll b/llvm/test/CodeGen/MSP430/Inst16mr.ll
index 2010048..50dc4c0 100644
--- a/llvm/test/CodeGen/MSP430/Inst16mr.ll
+++ b/llvm/test/CodeGen/MSP430/Inst16mr.ll
@@ -13,7 +13,7 @@
 define void @add(i16 %a) nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.w	r15, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = add i16 %a, %1
 	store i16 %2, i16* @foo
 	ret void
@@ -22,7 +22,7 @@
 define void @and(i16 %a) nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.w	r15, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = and i16 %a, %1
 	store i16 %2, i16* @foo
 	ret void
@@ -31,7 +31,7 @@
 define void @bis(i16 %a) nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.w	r15, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = or i16 %a, %1
 	store i16 %2, i16* @foo
 	ret void
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: bic:
 ; CHECK: bic.w   r15, &foo
         %1 = xor i16 %m, -1
-        %2 = load i16* @foo
+        %2 = load i16, i16* @foo
         %3 = and i16 %2, %1
         store i16 %3, i16* @foo
         ret void
@@ -50,7 +50,7 @@
 define void @xor(i16 %a) nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.w	r15, &foo
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = xor i16 %a, %1
 	store i16 %2, i16* @foo
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/Inst16rm.ll b/llvm/test/CodeGen/MSP430/Inst16rm.ll
index e6c5261..4f6998e 100644
--- a/llvm/test/CodeGen/MSP430/Inst16rm.ll
+++ b/llvm/test/CodeGen/MSP430/Inst16rm.ll
@@ -6,7 +6,7 @@
 define i16 @add(i16 %a) nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.w	&foo, r15
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = add i16 %a, %1
 	ret i16 %2
 }
@@ -14,7 +14,7 @@
 define i16 @and(i16 %a) nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.w	&foo, r15
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = and i16 %a, %1
 	ret i16 %2
 }
@@ -22,7 +22,7 @@
 define i16 @bis(i16 %a) nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.w	&foo, r15
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = or i16 %a, %1
 	ret i16 %2
 }
@@ -30,7 +30,7 @@
 define i16  @bic(i16 %a) nounwind {
 ; CHECK-LABEL: bic:
 ; CHECK: bic.w	&foo, r15
-        %1 = load i16* @foo
+        %1 = load i16, i16* @foo
         %2 = xor i16 %1, -1
         %3 = and i16 %a, %2
         ret i16 %3
@@ -39,7 +39,7 @@
 define i16 @xor(i16 %a) nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.w	&foo, r15
-	%1 = load i16* @foo
+	%1 = load i16, i16* @foo
 	%2 = xor i16 %a, %1
 	ret i16 %2
 }
diff --git a/llvm/test/CodeGen/MSP430/Inst8mi.ll b/llvm/test/CodeGen/MSP430/Inst8mi.ll
index a2c7b71..ff22d7e 100644
--- a/llvm/test/CodeGen/MSP430/Inst8mi.ll
+++ b/llvm/test/CodeGen/MSP430/Inst8mi.ll
@@ -13,7 +13,7 @@
 define void @add() nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.b	#2, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = add i8 %1, 2
 	store i8 %2, i8 * @foo
 	ret void
@@ -22,7 +22,7 @@
 define void @and() nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.b	#2, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = and i8 %1, 2
 	store i8 %2, i8 * @foo
 	ret void
@@ -31,7 +31,7 @@
 define void @bis() nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.b	#2, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = or i8 %1, 2
 	store i8 %2, i8 * @foo
 	ret void
@@ -40,7 +40,7 @@
 define void @xor() nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.b	#2, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = xor i8 %1, 2
 	store i8 %2, i8 * @foo
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/Inst8mm.ll b/llvm/test/CodeGen/MSP430/Inst8mm.ll
index d1ce8bc..b9848dc 100644
--- a/llvm/test/CodeGen/MSP430/Inst8mm.ll
+++ b/llvm/test/CodeGen/MSP430/Inst8mm.ll
@@ -8,7 +8,7 @@
 define void @mov() nounwind {
 ; CHECK-LABEL: mov:
 ; CHECK: mov.b	&bar, &foo
-        %1 = load i8* @bar
+        %1 = load i8, i8* @bar
         store i8 %1, i8* @foo
         ret void
 }
@@ -16,8 +16,8 @@
 define void @add() nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.b	&bar, &foo
-	%1 = load i8* @bar
-	%2 = load i8* @foo
+	%1 = load i8, i8* @bar
+	%2 = load i8, i8* @foo
 	%3 = add i8 %2, %1
 	store i8 %3, i8* @foo
 	ret void
@@ -26,8 +26,8 @@
 define void @and() nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.b	&bar, &foo
-	%1 = load i8* @bar
-	%2 = load i8* @foo
+	%1 = load i8, i8* @bar
+	%2 = load i8, i8* @foo
 	%3 = and i8 %2, %1
 	store i8 %3, i8* @foo
 	ret void
@@ -36,8 +36,8 @@
 define void @bis() nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.b	&bar, &foo
-	%1 = load i8* @bar
-	%2 = load i8* @foo
+	%1 = load i8, i8* @bar
+	%2 = load i8, i8* @foo
 	%3 = or i8 %2, %1
 	store i8 %3, i8* @foo
 	ret void
@@ -46,8 +46,8 @@
 define void @xor() nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.b	&bar, &foo
-	%1 = load i8* @bar
-	%2 = load i8* @foo
+	%1 = load i8, i8* @bar
+	%2 = load i8, i8* @foo
 	%3 = xor i8 %2, %1
 	store i8 %3, i8* @foo
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/Inst8mr.ll b/llvm/test/CodeGen/MSP430/Inst8mr.ll
index 0b35667..f03c7e1 100644
--- a/llvm/test/CodeGen/MSP430/Inst8mr.ll
+++ b/llvm/test/CodeGen/MSP430/Inst8mr.ll
@@ -13,7 +13,7 @@
 define void @and(i8 %a) nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.b	r15, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = and i8 %a, %1
 	store i8 %2, i8* @foo
 	ret void
@@ -22,7 +22,7 @@
 define void @add(i8 %a) nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.b	r15, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = add i8 %a, %1
 	store i8 %2, i8* @foo
 	ret void
@@ -31,7 +31,7 @@
 define void @bis(i8 %a) nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.b	r15, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = or i8 %a, %1
 	store i8 %2, i8* @foo
 	ret void
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: bic:
 ; CHECK: bic.b   r15, &foo
         %1 = xor i8 %m, -1
-        %2 = load i8* @foo
+        %2 = load i8, i8* @foo
         %3 = and i8 %2, %1
         store i8 %3, i8* @foo
         ret void
@@ -50,7 +50,7 @@
 define void @xor(i8 %a) nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.b	r15, &foo
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = xor i8 %a, %1
 	store i8 %2, i8* @foo
 	ret void
diff --git a/llvm/test/CodeGen/MSP430/Inst8rm.ll b/llvm/test/CodeGen/MSP430/Inst8rm.ll
index 308163e..e1a9703 100644
--- a/llvm/test/CodeGen/MSP430/Inst8rm.ll
+++ b/llvm/test/CodeGen/MSP430/Inst8rm.ll
@@ -6,7 +6,7 @@
 define i8 @add(i8 %a) nounwind {
 ; CHECK-LABEL: add:
 ; CHECK: add.b	&foo, r15
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = add i8 %a, %1
 	ret i8 %2
 }
@@ -14,7 +14,7 @@
 define i8 @and(i8 %a) nounwind {
 ; CHECK-LABEL: and:
 ; CHECK: and.b	&foo, r15
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = and i8 %a, %1
 	ret i8 %2
 }
@@ -22,7 +22,7 @@
 define i8 @bis(i8 %a) nounwind {
 ; CHECK-LABEL: bis:
 ; CHECK: bis.b	&foo, r15
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = or i8 %a, %1
 	ret i8 %2
 }
@@ -30,7 +30,7 @@
 define i8  @bic(i8 %a) nounwind {
 ; CHECK-LABEL: bic:
 ; CHECK: bic.b  &foo, r15
-        %1 = load i8* @foo
+        %1 = load i8, i8* @foo
         %2 = xor i8 %1, -1
         %3 = and i8 %a, %2
         ret i8 %3
@@ -39,7 +39,7 @@
 define i8 @xor(i8 %a) nounwind {
 ; CHECK-LABEL: xor:
 ; CHECK: xor.b	&foo, r15
-	%1 = load i8* @foo
+	%1 = load i8, i8* @foo
 	%2 = xor i8 %a, %1
 	ret i8 %2
 }
diff --git a/llvm/test/CodeGen/MSP430/bit.ll b/llvm/test/CodeGen/MSP430/bit.ll
index 2ffc191..45964f9 100644
--- a/llvm/test/CodeGen/MSP430/bit.ll
+++ b/llvm/test/CodeGen/MSP430/bit.ll
@@ -33,7 +33,7 @@
 ; CHECK: bit.b	#15, r15
 
 define i8 @bitbmi() nounwind {
-	%t1 = load i8* @foo8
+	%t1 = load i8, i8* @foo8
 	%t2 = and i8 %t1, 15
 	%t3 = icmp ne i8 %t2, 0
 	%t4 = zext i1 %t3 to i8
@@ -43,7 +43,7 @@
 ; CHECK: bit.b	#15, &foo8
 
 define i8 @bitbim() nounwind {
-	%t1 = load i8* @foo8
+	%t1 = load i8, i8* @foo8
 	%t2 = and i8 15, %t1
 	%t3 = icmp ne i8 %t2, 0
 	%t4 = zext i1 %t3 to i8
@@ -53,7 +53,7 @@
 ; CHECK: bit.b	#15, &foo8
 
 define i8 @bitbrm(i8 %a) nounwind {
-	%t1 = load i8* @foo8
+	%t1 = load i8, i8* @foo8
 	%t2 = and i8 %a, %t1
 	%t3 = icmp ne i8 %t2, 0
 	%t4 = zext i1 %t3 to i8
@@ -63,7 +63,7 @@
 ; CHECK: bit.b	&foo8, r15
 
 define i8 @bitbmr(i8 %a) nounwind {
-	%t1 = load i8* @foo8
+	%t1 = load i8, i8* @foo8
 	%t2 = and i8 %t1, %a
 	%t3 = icmp ne i8 %t2, 0
 	%t4 = zext i1 %t3 to i8
@@ -73,8 +73,8 @@
 ; CHECK: bit.b	r15, &foo8
 
 define i8 @bitbmm() nounwind {
-	%t1 = load i8* @foo8
-	%t2 = load i8* @bar8
+	%t1 = load i8, i8* @foo8
+	%t2 = load i8, i8* @bar8
 	%t3 = and i8 %t1, %t2
 	%t4 = icmp ne i8 %t3, 0
 	%t5 = zext i1 %t4 to i8
@@ -114,7 +114,7 @@
 ; CHECK: bit.w	#4080, r15
 
 define i16 @bitwmi() nounwind {
-	%t1 = load i16* @foo16
+	%t1 = load i16, i16* @foo16
 	%t2 = and i16 %t1, 4080
 	%t3 = icmp ne i16 %t2, 0
 	%t4 = zext i1 %t3 to i16
@@ -124,7 +124,7 @@
 ; CHECK: bit.w	#4080, &foo16
 
 define i16 @bitwim() nounwind {
-	%t1 = load i16* @foo16
+	%t1 = load i16, i16* @foo16
 	%t2 = and i16 4080, %t1
 	%t3 = icmp ne i16 %t2, 0
 	%t4 = zext i1 %t3 to i16
@@ -134,7 +134,7 @@
 ; CHECK: bit.w	#4080, &foo16
 
 define i16 @bitwrm(i16 %a) nounwind {
-	%t1 = load i16* @foo16
+	%t1 = load i16, i16* @foo16
 	%t2 = and i16 %a, %t1
 	%t3 = icmp ne i16 %t2, 0
 	%t4 = zext i1 %t3 to i16
@@ -144,7 +144,7 @@
 ; CHECK: bit.w	&foo16, r15
 
 define i16 @bitwmr(i16 %a) nounwind {
-	%t1 = load i16* @foo16
+	%t1 = load i16, i16* @foo16
 	%t2 = and i16 %t1, %a
 	%t3 = icmp ne i16 %t2, 0
 	%t4 = zext i1 %t3 to i16
@@ -154,8 +154,8 @@
 ; CHECK: bit.w	r15, &foo16
 
 define i16 @bitwmm() nounwind {
-	%t1 = load i16* @foo16
-	%t2 = load i16* @bar16
+	%t1 = load i16, i16* @foo16
+	%t2 = load i16, i16* @bar16
 	%t3 = and i16 %t1, %t2
 	%t4 = icmp ne i16 %t3, 0
 	%t5 = zext i1 %t4 to i16
diff --git a/llvm/test/CodeGen/MSP430/byval.ll b/llvm/test/CodeGen/MSP430/byval.ll
index 3be05b5..410a6b0 100644
--- a/llvm/test/CodeGen/MSP430/byval.ll
+++ b/llvm/test/CodeGen/MSP430/byval.ll
@@ -11,7 +11,7 @@
 ; CHECK-LABEL: callee:
 ; CHECK: mov.w 2(r1), r15
   %0 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0
-  %1 = load i16* %0, align 2
+  %1 = load i16, i16* %0, align 2
   ret i16 %1
 }
 
diff --git a/llvm/test/CodeGen/MSP430/indirectbr.ll b/llvm/test/CodeGen/MSP430/indirectbr.ll
index 4816c4a..af1a466 100644
--- a/llvm/test/CodeGen/MSP430/indirectbr.ll
+++ b/llvm/test/CodeGen/MSP430/indirectbr.ll
@@ -5,7 +5,7 @@
 
 define internal i16 @foo(i16 %i) nounwind {
 entry:
-  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %0 = load i8*, i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
   br i1 %1, label %bb3, label %bb2
 
@@ -15,7 +15,7 @@
 
 bb3:                                              ; preds = %entry
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
-  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0.pre = load i8*, i8** %2, align 4        ; <i8*> [#uses=1]
   br label %bb2
 
 L5:                                               ; preds = %bb2
diff --git a/llvm/test/CodeGen/MSP430/indirectbr2.ll b/llvm/test/CodeGen/MSP430/indirectbr2.ll
index 796687e..b0b4f1c 100644
--- a/llvm/test/CodeGen/MSP430/indirectbr2.ll
+++ b/llvm/test/CodeGen/MSP430/indirectbr2.ll
@@ -4,7 +4,7 @@
 define internal i16 @foo(i16 %i) nounwind {
 entry:
   %tmp1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
-  %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0 = load i8*, i8** %tmp1, align 4        ; <i8*> [#uses=1]
 ; CHECK: br .LC.0.2070(r12)
   indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
 
diff --git a/llvm/test/CodeGen/MSP430/inline-asm.ll b/llvm/test/CodeGen/MSP430/inline-asm.ll
index 0e7886a..f4d6292 100644
--- a/llvm/test/CodeGen/MSP430/inline-asm.ll
+++ b/llvm/test/CodeGen/MSP430/inline-asm.ll
@@ -20,7 +20,7 @@
 }
 
 define void @mem() nounwind {
-        %fooval = load i16* @foo
+        %fooval = load i16, i16* @foo
         call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
         ret void
 }
diff --git a/llvm/test/CodeGen/MSP430/jumptable.ll b/llvm/test/CodeGen/MSP430/jumptable.ll
index 239d79e..4ba930b 100644
--- a/llvm/test/CodeGen/MSP430/jumptable.ll
+++ b/llvm/test/CodeGen/MSP430/jumptable.ll
@@ -10,7 +10,7 @@
   %retval = alloca i16, align 2
   %i.addr = alloca i16, align 2
   store i16 %i, i16* %i.addr, align 2
-  %0 = load i16* %i.addr, align 2
+  %0 = load i16, i16* %i.addr, align 2
 ; CHECK: mov.w #2, r14
 ; CHECK: call #__mulhi3hw_noint
 ; CHECK: br .LJTI0_0(r15)
@@ -42,7 +42,7 @@
   br label %return
 
 return:                                           ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb
-  %1 = load i16* %retval
+  %1 = load i16, i16* %retval
   ret i16 %1
 ; CHECK: ret
 }
diff --git a/llvm/test/CodeGen/MSP430/memset.ll b/llvm/test/CodeGen/MSP430/memset.ll
index bf105446..76cfb29 100644
--- a/llvm/test/CodeGen/MSP430/memset.ll
+++ b/llvm/test/CodeGen/MSP430/memset.ll
@@ -8,7 +8,7 @@
 define void @test() nounwind {
 entry:
 ; CHECK-LABEL: test:
-  %0 = load i8** @buf, align 2
+  %0 = load i8*, i8** @buf, align 2
 ; CHECK: mov.w &buf, r15
 ; CHECK-NEXT: mov.w #5, r14
 ; CHECK-NEXT: mov.w #128, r13
diff --git a/llvm/test/CodeGen/MSP430/misched-msp430.ll b/llvm/test/CodeGen/MSP430/misched-msp430.ll
index c8541ef..3d18fa0 100644
--- a/llvm/test/CodeGen/MSP430/misched-msp430.ll
+++ b/llvm/test/CodeGen/MSP430/misched-msp430.ll
@@ -14,7 +14,7 @@
 ; CHECK: ret
 define void @f() {
 entry:
-  %0 = load i16* @y, align 2
+  %0 = load i16, i16* @y, align 2
   store i16 %0, i16* @x, align 2
   ret void
 }
diff --git a/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
index 342afed..a4fea62 100644
--- a/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
+++ b/llvm/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* %in1, align 2
+  %tmp1 = load i16, i16* %in1, align 2
   %1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* %in1, align 2
+  %tmp1 = load i16, i16* %in1, align 2
   %1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* @min1, align 2
+  %tmp1 = load i16, i16* @min1, align 2
   %1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   %2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* @min1, align 2
+  %tmp1 = load i16, i16* @min1, align 2
   %1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   %2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
@@ -166,7 +166,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i16* @min1, align 2
+  %tmp = load i16, i16* @min1, align 2
   call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
   ret void
 }
@@ -191,10 +191,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* %in1, align 2
+  %tmp1 = load i16, i16* %in1, align 2
   %1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   ret void
@@ -206,10 +206,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* %in1, align 2
+  %tmp1 = load i16, i16* %in1, align 2
   %1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   ret void
@@ -221,7 +221,7 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
   ret void
@@ -278,10 +278,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* @min1, align 2
+  %tmp1 = load i16, i16* @min1, align 2
   %1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   %2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
@@ -295,10 +295,10 @@
   %in1 = alloca i16, align 2
   store i16 0, i16* %out0, align 2
   store i16 1, i16* %in1, align 2
-  %tmp = load i16* %in1, align 2
+  %tmp = load i16, i16* %in1, align 2
   %0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
   store i16 %0, i16* %out0, align 2
-  %tmp1 = load i16* @min1, align 2
+  %tmp1 = load i16, i16* @min1, align 2
   %1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
   store i16 %1, i16* %out0, align 2
   %2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
diff --git a/llvm/test/CodeGen/MSP430/postinc.ll b/llvm/test/CodeGen/MSP430/postinc.ll
index ee5cef8..75a927f 100644
--- a/llvm/test/CodeGen/MSP430/postinc.ll
+++ b/llvm/test/CodeGen/MSP430/postinc.ll
@@ -13,7 +13,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: add:
 ; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -35,7 +35,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: sub:
 ; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -57,7 +57,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: or:
 ; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -79,7 +79,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: xor:
 ; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
@@ -101,7 +101,7 @@
   %arrayidx = getelementptr i16, i16* %a, i16 %i.010   ; <i16*> [#uses=1]
 ; CHECK-LABEL: and:
 ; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
-  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %tmp4 = load i16, i16* %arrayidx                     ; <i16> [#uses=1]
   %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
   %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
   %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/llvm/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index cbc3ecf..3c6f380 100644
--- a/llvm/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/llvm/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -27,8 +27,8 @@
 
 define i32 @A1() nounwind {
 entry:
-  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 0), align 8 
-  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 1), align 4 
+  load i32, i32* getelementptr (%struct.anon* @foo, i32 0, i32 0), align 8 
+  load i32, i32* getelementptr (%struct.anon* @foo, i32 0, i32 1), align 4 
   add i32 %1, %0
   ret i32 %2
 }
diff --git a/llvm/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/llvm/test/CodeGen/Mips/2008-08-01-AsmInline.ll
index ae06ffe..5edba02 100644
--- a/llvm/test/CodeGen/Mips/2008-08-01-AsmInline.ll
+++ b/llvm/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -26,8 +26,8 @@
 define void @foo0() nounwind {
 entry:
 ; CHECK: addu
-  %0 = load i32* @gi1, align 4
-  %1 = load i32* @gi0, align 4
+  %0 = load i32, i32* @gi1, align 4
+  %1 = load i32, i32* @gi0, align 4
   %2 = tail call i32 asm "addu $0, $1, $2", "=r,r,r"(i32 %0, i32 %1) nounwind
   store i32 %2, i32* @gi2, align 4
   ret void
@@ -36,7 +36,7 @@
 define void @foo2() nounwind {
 entry:
 ; CHECK: neg.s
-  %0 = load float* @gf1, align 4
+  %0 = load float, float* @gf1, align 4
   %1 = tail call float asm "neg.s $0, $1", "=f,f"(float %0) nounwind
   store float %1, float* @gf0, align 4
   ret void
@@ -45,7 +45,7 @@
 define void @foo3() nounwind {
 entry:
 ; CHECK: neg.d
-  %0 = load double* @gd1, align 8
+  %0 = load double, double* @gd1, align 8
   %1 = tail call double asm "neg.d $0, $1", "=f,f"(double %0) nounwind
   store double %1, double* @gd0, align 8
   ret void
@@ -64,7 +64,7 @@
 entry:
   %0 = tail call i32 asm sideeffect "ulh $0,16($$sp)\0A\09", "=r,~{$2}"()
   store i32 %0, i32* @gi2, align 4
-  %1 = load float* @gf0, align 4
+  %1 = load float, float* @gf0, align 4
   %2 = tail call double asm sideeffect "cvt.d.s $0, $1\0A\09", "=f,f,~{$f0}"(float %1)
   store double %2, double* @gd0, align 8
   ret void
diff --git a/llvm/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/llvm/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
index c41d521..592e574 100644
--- a/llvm/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
+++ b/llvm/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -7,12 +7,12 @@
         %retval = alloca double         ; <double*> [#uses=3]
         store double 0.000000e+00, double* %retval
         %r = alloca double              ; <double*> [#uses=1]
-        load double* %r         ; <double>:0 [#uses=1]
+        load double, double* %r         ; <double>:0 [#uses=1]
         store double %0, double* %retval
         br label %return
 
 return:         ; preds = %entry
-        load double* %retval            ; <double>:1 [#uses=1]
+        load double, double* %retval            ; <double>:1 [#uses=1]
         ret double %1
 }
 
diff --git a/llvm/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/llvm/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
index 6e44747..eaf6ddc 100644
--- a/llvm/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
+++ b/llvm/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -8,7 +8,7 @@
 continue.outer:         ; preds = %case4, %entry
         %p.0.ph.rec = phi i32 [ 0, %entry ], [ %indvar.next, %case4 ]          ; <i32> [#uses=2]
         %p.0.ph = getelementptr i8, i8* %0, i32 %p.0.ph.rec         ; <i8*> [#uses=1]
-        %1 = load i8* %p.0.ph           ; <i8> [#uses=1]
+        %1 = load i8, i8* %p.0.ph           ; <i8> [#uses=1]
         switch i8 %1, label %infloop [
                 i8 0, label %return.split
                 i8 76, label %case4
diff --git a/llvm/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/llvm/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
index 32584e4..9cebfcd 100644
--- a/llvm/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
+++ b/llvm/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -13,16 +13,16 @@
 define double @_erand48_r(%struct._reent* %r, i16* %xseed) nounwind {
 entry:
 	tail call void @__dorand48( %struct._reent* %r, i16* %xseed ) nounwind
-	load i16* %xseed, align 2		; <i16>:0 [#uses=1]
+	load i16, i16* %xseed, align 2		; <i16>:0 [#uses=1]
 	uitofp i16 %0 to double		; <double>:1 [#uses=1]
 	tail call double @ldexp( double %1, i32 -48 ) nounwind		; <double>:2 [#uses=1]
 	getelementptr i16, i16* %xseed, i32 1		; <i16*>:3 [#uses=1]
-	load i16* %3, align 2		; <i16>:4 [#uses=1]
+	load i16, i16* %3, align 2		; <i16>:4 [#uses=1]
 	uitofp i16 %4 to double		; <double>:5 [#uses=1]
 	tail call double @ldexp( double %5, i32 -32 ) nounwind		; <double>:6 [#uses=1]
 	fadd double %2, %6		; <double>:7 [#uses=1]
 	getelementptr i16, i16* %xseed, i32 2		; <i16*>:8 [#uses=1]
-	load i16* %8, align 2		; <i16>:9 [#uses=1]
+	load i16, i16* %8, align 2		; <i16>:9 [#uses=1]
 	uitofp i16 %9 to double		; <double>:10 [#uses=1]
 	tail call double @ldexp( double %10, i32 -16 ) nounwind		; <double>:11 [#uses=1]
 	fadd double %7, %11		; <double>:12 [#uses=1]
@@ -35,18 +35,18 @@
 
 define double @erand48(i16* %xseed) nounwind {
 entry:
-	load %struct._reent** @_impure_ptr, align 4		; <%struct._reent*>:0 [#uses=1]
+	load %struct._reent*, %struct._reent** @_impure_ptr, align 4		; <%struct._reent*>:0 [#uses=1]
 	tail call void @__dorand48( %struct._reent* %0, i16* %xseed ) nounwind
-	load i16* %xseed, align 2		; <i16>:1 [#uses=1]
+	load i16, i16* %xseed, align 2		; <i16>:1 [#uses=1]
 	uitofp i16 %1 to double		; <double>:2 [#uses=1]
 	tail call double @ldexp( double %2, i32 -48 ) nounwind		; <double>:3 [#uses=1]
 	getelementptr i16, i16* %xseed, i32 1		; <i16*>:4 [#uses=1]
-	load i16* %4, align 2		; <i16>:5 [#uses=1]
+	load i16, i16* %4, align 2		; <i16>:5 [#uses=1]
 	uitofp i16 %5 to double		; <double>:6 [#uses=1]
 	tail call double @ldexp( double %6, i32 -32 ) nounwind		; <double>:7 [#uses=1]
 	fadd double %3, %7		; <double>:8 [#uses=1]
 	getelementptr i16, i16* %xseed, i32 2		; <i16*>:9 [#uses=1]
-	load i16* %9, align 2		; <i16>:10 [#uses=1]
+	load i16, i16* %9, align 2		; <i16>:10 [#uses=1]
 	uitofp i16 %10 to double		; <double>:11 [#uses=1]
 	tail call double @ldexp( double %11, i32 -16 ) nounwind		; <double>:12 [#uses=1]
 	fadd double %8, %12		; <double>:13 [#uses=1]
diff --git a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
index 5c84077..fd0254e 100644
--- a/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
+++ b/llvm/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -15,7 +15,7 @@
 entry:
   %x = alloca i32, align 4                        ; <i32*> [#uses=2]
   store volatile i32 2, i32* %x, align 4
-  %0 = load volatile i32* %x, align 4             ; <i32> [#uses=1]
+  %0 = load volatile i32, i32* %x, align 4             ; <i32> [#uses=1]
 ; STATIC-O32: sll $[[R0:[0-9]+]], ${{[0-9]+}}, 2
 ; STATIC-O32: lui $[[R1:[0-9]+]], %hi($JTI0_0)
 ; STATIC-O32: addu $[[R2:[0-9]+]], $[[R0]], $[[R1]]
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll b/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll
index bc508c8..11842dd 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/br1.ll
@@ -10,7 +10,7 @@
 ; Function Attrs: nounwind
 define void @br() #0 {
 entry:
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
index de5f758..f80cb82 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll
@@ -84,13 +84,13 @@
 ; CHECK-LABEL: cxiiiiconv
 ; mips32r2-LABEL:  cxiiiiconv
 ; mips32-LABEL:  cxiiiiconv
-  %0 = load i8* @c1, align 1
+  %0 = load i8, i8* @c1, align 1
   %conv = sext i8 %0 to i32
-  %1 = load i8* @uc1, align 1
+  %1 = load i8, i8* @uc1, align 1
   %conv1 = zext i8 %1 to i32
-  %2 = load i16* @s1, align 2
+  %2 = load i16, i16* @s1, align 2
   %conv2 = sext i16 %2 to i32
-  %3 = load i16* @us1, align 2
+  %3 = load i16, i16* @us1, align 2
   %conv3 = zext i16 %3 to i32
   call void @xiiii(i32 %conv, i32 %conv1, i32 %conv2, i32 %conv3)
 ; CHECK:        addu    $[[REG_GP:[0-9]+]], ${{[0-9]+}}, ${{[0-9+]}}
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll b/llvm/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
index 4cbfe00..72de888 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/fpcmpa.ll
@@ -12,8 +12,8 @@
 ; Function Attrs: nounwind
 define void @feq1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp oeq float %0, %1
 ; CHECK-LABEL:  feq1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -33,8 +33,8 @@
 ; Function Attrs: nounwind
 define void @fne1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp une float %0, %1
 ; CHECK-LABEL:  fne1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -53,8 +53,8 @@
 ; Function Attrs: nounwind
 define void @flt1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp olt float %0, %1
 ; CHECK-LABEL:  flt1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -74,8 +74,8 @@
 ; Function Attrs: nounwind
 define void @fgt1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp ogt float %0, %1
 ; CHECK-LABEL: fgt1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -94,8 +94,8 @@
 ; Function Attrs: nounwind
 define void @fle1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp ole float %0, %1
 ; CHECK-LABEL:  fle1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -114,8 +114,8 @@
 ; Function Attrs: nounwind
 define void @fge1()  {
 entry:
-  %0 = load float* @f1, align 4
-  %1 = load float* @f2, align 4
+  %0 = load float, float* @f1, align 4
+  %1 = load float, float* @f2, align 4
   %cmp = fcmp oge float %0, %1
 ; CHECK-LABEL:  fge1:
 ; CHECK-DAG:    lw      $[[REG_F2_GOT:[0-9]+]], %got(f2)(${{[0-9]+}})
@@ -134,8 +134,8 @@
 ; Function Attrs: nounwind
 define void @deq1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp oeq double %0, %1
 ; CHECK-LABEL:  deq1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -154,8 +154,8 @@
 ; Function Attrs: nounwind
 define void @dne1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp une double %0, %1
 ; CHECK-LABEL:  dne1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -174,8 +174,8 @@
 ; Function Attrs: nounwind
 define void @dlt1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp olt double %0, %1
 ; CHECK-LABEL:  dlt1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -194,8 +194,8 @@
 ; Function Attrs: nounwind
 define void @dgt1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp ogt double %0, %1
 ; CHECK-LABEL:  dgt1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -214,8 +214,8 @@
 ; Function Attrs: nounwind
 define void @dle1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp ole double %0, %1
 ; CHECK-LABEL:  dle1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
@@ -234,8 +234,8 @@
 ; Function Attrs: nounwind
 define void @dge1()  {
 entry:
-  %0 = load double* @d1, align 8
-  %1 = load double* @d2, align 8
+  %0 = load double, double* @d1, align 8
+  %1 = load double, double* @d2, align 8
   %cmp = fcmp oge double %0, %1
 ; CHECK-LABEL:  dge1:
 ; CHECK-DAG:    lw      $[[REG_D2_GOT:[0-9]+]], %got(d2)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/fpext.ll b/llvm/test/CodeGen/Mips/Fast-ISel/fpext.ll
index 8b2570a..5ac2249 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/fpext.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/fpext.ll
@@ -10,7 +10,7 @@
 ; Function Attrs: nounwind
 define void @dv() #0 {
 entry:
-  %0 = load float* @f, align 4
+  %0 = load float, float* @f, align 4
   %conv = fpext float %0 to double
 ; CHECK: cvt.d.s  $f{{[0-9]+}}, $f{{[0-9]+}}
   store double %conv, double* @d_f, align 8
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/fpintconv.ll b/llvm/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
index 5a2cd78..a94ef50 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/fpintconv.ll
@@ -14,7 +14,7 @@
 define void @ifv() {
 entry:
 ; CHECK-LABEL:   .ent  ifv
-  %0 = load float* @f, align 4
+  %0 = load float, float* @f, align 4
   %conv = fptosi float %0 to i32
 ; CHECK:   trunc.w.s  $f[[REG:[0-9]+]], $f{{[0-9]+}}
 ; CHECK:   mfc1	${{[0-9]+}}, $f[[REG]]
@@ -26,7 +26,7 @@
 define void @idv() {
 entry:
 ; CHECK-LABEL:   .ent  idv
-  %0 = load double* @d, align 8
+  %0 = load double, double* @d, align 8
   %conv = fptosi double %0 to i32
 ; CHECK:   trunc.w.d  $f[[REG:[0-9]+]], $f{{[0-9]+}}
 ; CHECK:   mfc1	${{[0-9]+}}, $f[[REG]]
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/fptrunc.ll b/llvm/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
index f9739e1..2eec4c3 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/fptrunc.ll
@@ -10,7 +10,7 @@
 ; Function Attrs: nounwind
 define void @fv() #0 {
 entry:
-  %0 = load double* @d, align 8
+  %0 = load double, double* @d, align 8
   %conv = fptrunc double %0 to float
 ; CHECK: cvt.s.d  $f{{[0-9]+}}, $f{{[0-9]+}}
   store float %conv, float* @f, align 4
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/icmpa.ll b/llvm/test/CodeGen/Mips/Fast-ISel/icmpa.ll
index d2bca3a..670a8d5 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/icmpa.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/icmpa.ll
@@ -14,8 +14,8 @@
 entry:
 ; CHECK-LABEL:  .ent  eq
 
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp eq i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -35,8 +35,8 @@
 define void @ne()  {
 entry:
 ; CHECK-LABEL:  .ent  ne
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp ne i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -56,8 +56,8 @@
 define void @ugt()  {
 entry:
 ; CHECK-LABEL:  .ent  ugt
-  %0 = load i32* @uc, align 4
-  %1 = load i32* @ud, align 4
+  %0 = load i32, i32* @uc, align 4
+  %1 = load i32, i32* @ud, align 4
   %cmp = icmp ugt i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -76,8 +76,8 @@
 define void @ult()  {
 entry:
 ; CHECK-LABEL:  .ent  ult
-  %0 = load i32* @uc, align 4
-  %1 = load i32* @ud, align 4
+  %0 = load i32, i32* @uc, align 4
+  %1 = load i32, i32* @ud, align 4
   %cmp = icmp ult i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -95,8 +95,8 @@
 define void @uge()  {
 entry:
 ; CHECK-LABEL:  .ent  uge
-  %0 = load i32* @uc, align 4
-  %1 = load i32* @ud, align 4
+  %0 = load i32, i32* @uc, align 4
+  %1 = load i32, i32* @ud, align 4
   %cmp = icmp uge i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -115,8 +115,8 @@
 define void @ule()  {
 entry:
 ; CHECK-LABEL:  .ent  ule
-  %0 = load i32* @uc, align 4
-  %1 = load i32* @ud, align 4
+  %0 = load i32, i32* @uc, align 4
+  %1 = load i32, i32* @ud, align 4
   %cmp = icmp ule i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_UD_GOT:[0-9+]]], %got(ud)(${{[0-9]+}})
@@ -135,8 +135,8 @@
 define void @sgt()  {
 entry:
 ; CHECK-LABEL:  .ent sgt
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp sgt i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -154,8 +154,8 @@
 define void @slt()  {
 entry:
 ; CHECK-LABEL:  .ent slt
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp slt i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
@@ -173,8 +173,8 @@
 define void @sge()  {
 entry:
 ; CHECK-LABEL:  .ent sge
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp sge i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @b1, align 4
@@ -193,8 +193,8 @@
 define void @sle()  {
 entry:
 ; CHECK-LABEL:  .ent sle
-  %0 = load i32* @c, align 4
-  %1 = load i32* @d, align 4
+  %0 = load i32, i32* @c, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp = icmp sle i32 %0, %1
   %conv = zext i1 %cmp to i32
 ; CHECK-DAG:  lw	$[[REG_D_GOT:[0-9+]]], %got(d)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll b/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
index c649f61..3daf03d 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/loadstore2.ll
@@ -21,7 +21,7 @@
 ; Function Attrs: nounwind
 define void @cfoo() #0 {
 entry:
-  %0 = load i8* @c2, align 1
+  %0 = load i8, i8* @c2, align 1
   store i8 %0, i8* @c1, align 1
 ; CHECK-LABEL:	cfoo:
 ; CHECK:	lbu	$[[REGc:[0-9]+]], 0(${{[0-9]+}})
@@ -34,7 +34,7 @@
 ; Function Attrs: nounwind
 define void @sfoo() #0 {
 entry:
-  %0 = load i16* @s2, align 2
+  %0 = load i16, i16* @s2, align 2
   store i16 %0, i16* @s1, align 2
 ; CHECK-LABEL:	sfoo:
 ; CHECK:	lhu	$[[REGs:[0-9]+]], 0(${{[0-9]+}})
@@ -46,7 +46,7 @@
 ; Function Attrs: nounwind
 define void @ifoo() #0 {
 entry:
-  %0 = load i32* @i2, align 4
+  %0 = load i32, i32* @i2, align 4
   store i32 %0, i32* @i1, align 4
 ; CHECK-LABEL:	ifoo:
 ; CHECK:	lw	$[[REGi:[0-9]+]], 0(${{[0-9]+}})
@@ -58,7 +58,7 @@
 ; Function Attrs: nounwind
 define void @ffoo() #0 {
 entry:
-  %0 = load float* @f2, align 4
+  %0 = load float, float* @f2, align 4
   store float %0, float* @f1, align 4
 ; CHECK-LABEL:	ffoo:
 ; CHECK:	lwc1	$f[[REGf:[0-9]+]], 0(${{[0-9]+}})
@@ -71,7 +71,7 @@
 ; Function Attrs: nounwind
 define void @dfoo() #0 {
 entry:
-  %0 = load double* @d2, align 8
+  %0 = load double, double* @d2, align 8
   store double %0, double* @d1, align 8
 ; CHECK-LABEL:        dfoo:
 ; CHECK:        ldc1    $f[[REGd:[0-9]+]], 0(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll b/llvm/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
index ca56520..acba132 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/loadstoreconv.ll
@@ -28,11 +28,11 @@
 define void @_Z3b_iv()  {
 entry:
 ; CHECK-LABEL:   .ent  _Z3b_iv
-  %0 = load i8* @b1, align 1
+  %0 = load i8, i8* @b1, align 1
   %tobool = trunc i8 %0 to i1
   %frombool = zext i1 %tobool to i8
   store i8 %frombool, i8* @b2, align 1
-  %1 = load i8* @b2, align 1
+  %1 = load i8, i8* @b2, align 1
   %tobool1 = trunc i8 %1 to i1
   %conv = zext i1 %tobool1 to i32
   store i32 %conv, i32* @i, align 4
@@ -51,10 +51,10 @@
 entry:
 ; CHECK-LABEL:  .ent  _Z4uc_iv
 
-  %0 = load i8* @uc1, align 1
+  %0 = load i8, i8* @uc1, align 1
   %conv = zext i8 %0 to i32
   store i32 %conv, i32* @i, align 4
-  %1 = load i8* @uc2, align 1
+  %1 = load i8, i8* @uc2, align 1
   %conv1 = zext i8 %1 to i32
 ; CHECK:   lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
 ; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 255
@@ -71,10 +71,10 @@
 ; mips32r2-LABEL:  .ent  _Z4sc_iv
 ; mips32-LABEL:  .ent  _Z4sc_iv
 
-  %0 = load i8* @sc1, align 1
+  %0 = load i8, i8* @sc1, align 1
   %conv = sext i8 %0 to i32
   store i32 %conv, i32* @i, align 4
-  %1 = load i8* @sc2, align 1
+  %1 = load i8, i8* @sc2, align 1
   %conv1 = sext i8 %1 to i32
   store i32 %conv1, i32* @j, align 4
 ; mips32r2:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
@@ -91,10 +91,10 @@
 define void @_Z4us_iv()  {
 entry:
 ; CHECK-LABEL:  .ent  _Z4us_iv
-  %0 = load i16* @us1, align 2
+  %0 = load i16, i16* @us1, align 2
   %conv = zext i16 %0 to i32
   store i32 %conv, i32* @i, align 4
-  %1 = load i16* @us2, align 2
+  %1 = load i16, i16* @us2, align 2
   %conv1 = zext i16 %1 to i32
   store i32 %conv1, i32* @j, align 4
   ret void
@@ -109,10 +109,10 @@
 ; mips32r2-LABEL:  .ent  _Z4ss_iv
 ; mips32=LABEL:  .ent  _Z4ss_iv
 
-  %0 = load i16* @ss1, align 2
+  %0 = load i16, i16* @ss1, align 2
   %conv = sext i16 %0 to i32
   store i32 %conv, i32* @i, align 4
-  %1 = load i16* @ss2, align 2
+  %1 = load i16, i16* @ss2, align 2
   %conv1 = sext i16 %1 to i32
   store i32 %conv1, i32* @j, align 4
 ; mips32r2:  lhu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
@@ -129,7 +129,7 @@
 define void @_Z4b_ssv()  {
 entry:
 ; CHECK-LABEL:  .ent  _Z4b_ssv
-  %0 = load i8* @b2, align 1
+  %0 = load i8, i8* @b2, align 1
   %tobool = trunc i8 %0 to i1
   %conv = zext i1 %tobool to i16
   store i16 %conv, i16* @ssi, align 2
@@ -143,10 +143,10 @@
 define void @_Z5uc_ssv()  {
 entry:
 ; CHECK-LABEL:  .ent  _Z5uc_ssv
-  %0 = load i8* @uc1, align 1
+  %0 = load i8, i8* @uc1, align 1
   %conv = zext i8 %0 to i16
   store i16 %conv, i16* @ssi, align 2
-  %1 = load i8* @uc2, align 1
+  %1 = load i8, i8* @uc2, align 1
   %conv1 = zext i8 %1 to i16
 ; CHECK:   lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
 ; CHECK:  andi  ${{[0-9]+}}, $[[REG1]], 255
@@ -161,10 +161,10 @@
 entry:
 ; mips32r2-LABEL:  .ent  _Z5sc_ssv
 ; mips32-LABEL:  .ent  _Z5sc_ssv
-  %0 = load i8* @sc1, align 1
+  %0 = load i8, i8* @sc1, align 1
   %conv = sext i8 %0 to i16
   store i16 %conv, i16* @ssi, align 2
-  %1 = load i8* @sc2, align 1
+  %1 = load i8, i8* @sc2, align 1
   %conv1 = sext i8 %1 to i16
   store i16 %conv1, i16* @ssj, align 2
 ; mips32r2:  lbu  $[[REG1:[0-9]+]], 0(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/overflt.ll b/llvm/test/CodeGen/Mips/Fast-ISel/overflt.ll
index 3792510..db01362 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/overflt.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/overflt.ll
@@ -12,7 +12,7 @@
 define void @foo() {
 entry:
 ; CHECK-LABEL:   .ent  foo
-  %0 = load float** @y, align 4
+  %0 = load float*, float** @y, align 4
   %arrayidx = getelementptr inbounds float, float* %0, i32 64000
   store float 5.500000e+00, float* %arrayidx, align 4
 ; CHECK:        lui     $[[REG_FPCONST_INT:[0-9]+]], 16560
@@ -31,9 +31,9 @@
 define void @goo() {
 entry:
 ; CHECK-LABEL:   .ent  goo
-  %0 = load float** @y, align 4
+  %0 = load float*, float** @y, align 4
   %arrayidx = getelementptr inbounds float, float* %0, i32 64000
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   store float %1, float* @result, align 4
 ; CHECK-DAG:    lw      $[[REG_RESULT:[0-9]+]], %got(result)(${{[0-9]+}})
 ; CHECK-DAG:    lw      $[[REG_Y_GOT:[0-9]+]], %got(y)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/retabi.ll b/llvm/test/CodeGen/Mips/Fast-ISel/retabi.ll
index 109a7f6..ce0ca34 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/retabi.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/retabi.ll
@@ -11,7 +11,7 @@
 define i32 @reti() {
 entry:
 ; CHECK-LABEL: reti:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   ret i32 %0
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
 ; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
@@ -25,7 +25,7 @@
 define signext i16 @rets() {
 entry:
 ; CHECK-LABEL: rets:
-  %0 = load i16* @s, align 2
+  %0 = load i16, i16* @s, align 2
   ret i16 %0
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
 ; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
@@ -40,7 +40,7 @@
 define signext i8 @retc() {
 entry:
 ; CHECK-LABEL: retc:
-  %0 = load i8* @c, align 1
+  %0 = load i8, i8* @c, align 1
   ret i8 %0
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
 ; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
@@ -55,7 +55,7 @@
 define float @retf() {
 entry:
 ; CHECK-LABEL: retf:
-  %0 = load float* @f, align 4
+  %0 = load float, float* @f, align 4
   ret float %0
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
 ; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
@@ -69,7 +69,7 @@
 define double @retd() {
 entry:
 ; CHECK-LABEL: retd:
-  %0 = load double* @d, align 8
+  %0 = load double, double* @d, align 8
   ret double %0
 ; CHECK:        lui     $[[REG_GPa:[0-9]+]], %hi(_gp_disp)
 ; CHECK:        addiu   $[[REG_GPb:[0-9]+]], $[[REG_GPa]], %lo(_gp_disp)
diff --git a/llvm/test/CodeGen/Mips/Fast-ISel/shift.ll b/llvm/test/CodeGen/Mips/Fast-ISel/shift.ll
index 18fd5ac..df1c827 100644
--- a/llvm/test/CodeGen/Mips/Fast-ISel/shift.ll
+++ b/llvm/test/CodeGen/Mips/Fast-ISel/shift.ll
@@ -9,7 +9,7 @@
 entry:
   %foo = alloca %struct.s, align 4
   %0 = bitcast %struct.s* %foo to i32*
-  %bf.load = load i32* %0, align 4
+  %bf.load = load i32, i32* %0, align 4
   %bf.lshr = lshr i32 %bf.load, 2
   %cmp = icmp ne i32 %bf.lshr, 2
   br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/Mips/addi.ll b/llvm/test/CodeGen/Mips/addi.ll
index 01d409e..b6af2ee 100644
--- a/llvm/test/CodeGen/Mips/addi.ll
+++ b/llvm/test/CodeGen/Mips/addi.ll
@@ -8,16 +8,16 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %add = add nsw i32 %0, 5
   store i32 %add, i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %1 = load i32, i32* @j, align 4
   %sub = sub nsw i32 %1, 5
   store i32 %sub, i32* @j, align 4
-  %2 = load i32* @k, align 4
+  %2 = load i32, i32* @k, align 4
   %add1 = add nsw i32 %2, 10000
   store i32 %add1, i32* @k, align 4
-  %3 = load i32* @l, align 4
+  %3 = load i32, i32* @l, align 4
   %sub2 = sub nsw i32 %3, 10000
   store i32 %sub2, i32* @l, align 4
 ; 16: 	addiu	${{[0-9]+}}, 5	# 16 bit inst
diff --git a/llvm/test/CodeGen/Mips/addressing-mode.ll b/llvm/test/CodeGen/Mips/addressing-mode.ll
index e4e3a27..81e0620 100644
--- a/llvm/test/CodeGen/Mips/addressing-mode.ll
+++ b/llvm/test/CodeGen/Mips/addressing-mode.ll
@@ -21,9 +21,9 @@
   %s.120 = phi i32 [ %s.022, %for.cond1.preheader ], [ %add7, %for.body3 ]
   %j.019 = phi i32 [ 0, %for.cond1.preheader ], [ %add8, %for.body3 ]
   %arrayidx4 = getelementptr inbounds [256 x i32], [256 x i32]* %a, i32 %i.021, i32 %j.019
-  %0 = load i32* %arrayidx4, align 4
+  %0 = load i32, i32* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds [256 x i32], [256 x i32]* %b, i32 %i.021, i32 %j.019
-  %1 = load i32* %arrayidx6, align 4
+  %1 = load i32, i32* %arrayidx6, align 4
   %add = add i32 %0, %s.120
   %add7 = add i32 %add, %1
   %add8 = add nsw i32 %j.019, %m
diff --git a/llvm/test/CodeGen/Mips/align16.ll b/llvm/test/CodeGen/Mips/align16.ll
index 580a89c..f385adf 100644
--- a/llvm/test/CodeGen/Mips/align16.ll
+++ b/llvm/test/CodeGen/Mips/align16.ll
@@ -15,10 +15,10 @@
   %x = alloca i32, align 8
   %zz = alloca i32, align 4
   %z = alloca i32, align 4
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %arrayidx = getelementptr inbounds [512 x i32], [512 x i32]* %y, i32 0, i32 10
   store i32 %0, i32* %arrayidx, align 4
-  %1 = load i32* @i, align 4
+  %1 = load i32, i32* @i, align 4
   store i32 %1, i32* %x, align 8
   call void @p(i32* %x)
   %arrayidx1 = getelementptr inbounds [512 x i32], [512 x i32]* %y, i32 0, i32 10
diff --git a/llvm/test/CodeGen/Mips/alloca.ll b/llvm/test/CodeGen/Mips/alloca.ll
index 0700ea3..9f2cef1 100644
--- a/llvm/test/CodeGen/Mips/alloca.ll
+++ b/llvm/test/CodeGen/Mips/alloca.ll
@@ -59,23 +59,23 @@
 ; CHECK: lw  $25, %call16(printf)
 
   %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
-  %tmp7 = load i32* %0, align 4
+  %tmp7 = load i32, i32* %0, align 4
   %arrayidx9 = getelementptr inbounds i8, i8* %tmp1, i32 4
   %3 = bitcast i8* %arrayidx9 to i32*
-  %tmp10 = load i32* %3, align 4
+  %tmp10 = load i32, i32* %3, align 4
   %arrayidx12 = getelementptr inbounds i8, i8* %tmp1, i32 8
   %4 = bitcast i8* %arrayidx12 to i32*
-  %tmp13 = load i32* %4, align 4
-  %tmp16 = load i32* %.pre-phi, align 4
+  %tmp13 = load i32, i32* %4, align 4
+  %tmp16 = load i32, i32* %.pre-phi, align 4
   %arrayidx18 = getelementptr inbounds i8, i8* %tmp1, i32 16
   %5 = bitcast i8* %arrayidx18 to i32*
-  %tmp19 = load i32* %5, align 4
+  %tmp19 = load i32, i32* %5, align 4
   %arrayidx21 = getelementptr inbounds i8, i8* %tmp1, i32 20
   %6 = bitcast i8* %arrayidx21 to i32*
-  %tmp22 = load i32* %6, align 4
+  %tmp22 = load i32, i32* %6, align 4
   %arrayidx24 = getelementptr inbounds i8, i8* %tmp1, i32 24
   %7 = bitcast i8* %arrayidx24 to i32*
-  %tmp25 = load i32* %7, align 4
+  %tmp25 = load i32, i32* %7, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/alloca16.ll b/llvm/test/CodeGen/Mips/alloca16.ll
index 67ec2f9..be8cc74 100644
--- a/llvm/test/CodeGen/Mips/alloca16.ll
+++ b/llvm/test/CodeGen/Mips/alloca16.ll
@@ -12,7 +12,7 @@
 entry:
   %foo.addr = alloca i32, align 4
   store i32 %foo, i32* %foo.addr, align 4
-  %0 = load i32* %foo.addr, align 4
+  %0 = load i32, i32* %foo.addr, align 4
   store i32 %0, i32* @t, align 4
   ret void
 }
@@ -28,46 +28,46 @@
   %sssi = alloca i32, align 4
   %ip = alloca i32*, align 4
   %sssj = alloca i32, align 4
-  %0 = load i32* @iiii, align 4
+  %0 = load i32, i32* @iiii, align 4
   store i32 %0, i32* %sssi, align 4
-  %1 = load i32* @kkkk, align 4
+  %1 = load i32, i32* @kkkk, align 4
   %mul = mul nsw i32 %1, 100
   %2 = alloca i8, i32 %mul
   %3 = bitcast i8* %2 to i32*
   store i32* %3, i32** %ip, align 4
-  %4 = load i32* @jjjj, align 4
+  %4 = load i32, i32* @jjjj, align 4
   store i32 %4, i32* %sssj, align 4
-  %5 = load i32* @jjjj, align 4
-  %6 = load i32* @iiii, align 4
-  %7 = load i32** %ip, align 4
+  %5 = load i32, i32* @jjjj, align 4
+  %6 = load i32, i32* @iiii, align 4
+  %7 = load i32*, i32** %ip, align 4
   %arrayidx = getelementptr inbounds i32, i32* %7, i32 %6
   store i32 %5, i32* %arrayidx, align 4
-  %8 = load i32* @kkkk, align 4
-  %9 = load i32* @jjjj, align 4
-  %10 = load i32** %ip, align 4
+  %8 = load i32, i32* @kkkk, align 4
+  %9 = load i32, i32* @jjjj, align 4
+  %10 = load i32*, i32** %ip, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %10, i32 %9
   store i32 %8, i32* %arrayidx1, align 4
-  %11 = load i32* @iiii, align 4
-  %12 = load i32* @kkkk, align 4
-  %13 = load i32** %ip, align 4
+  %11 = load i32, i32* @iiii, align 4
+  %12 = load i32, i32* @kkkk, align 4
+  %13 = load i32*, i32** %ip, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %13, i32 %12
   store i32 %11, i32* %arrayidx2, align 4
-  %14 = load i32** %ip, align 4
+  %14 = load i32*, i32** %ip, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %14, i32 25
-  %15 = load i32* %arrayidx3, align 4
+  %15 = load i32, i32* %arrayidx3, align 4
   store i32 %15, i32* @riii, align 4
-  %16 = load i32** %ip, align 4
+  %16 = load i32*, i32** %ip, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %16, i32 35
-  %17 = load i32* %arrayidx4, align 4
+  %17 = load i32, i32* %arrayidx4, align 4
   store i32 %17, i32* @rjjj, align 4
-  %18 = load i32** %ip, align 4
+  %18 = load i32*, i32** %ip, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %18, i32 100
-  %19 = load i32* %arrayidx5, align 4
+  %19 = load i32, i32* %arrayidx5, align 4
   store i32 %19, i32* @rkkk, align 4
-  %20 = load i32* @t, align 4
-  %21 = load i32** %ip, align 4
+  %20 = load i32, i32* @t, align 4
+  %21 = load i32*, i32** %ip, align 4
   %arrayidx6 = getelementptr inbounds i32, i32* %21, i32 %20
-  %22 = load i32* %arrayidx6, align 4
+  %22 = load i32, i32* %arrayidx6, align 4
 ; 16: 	addiu $sp, -16
   call void @temp(i32 %22)
 ; 16: 	addiu $sp, 16
diff --git a/llvm/test/CodeGen/Mips/and1.ll b/llvm/test/CodeGen/Mips/and1.ll
index 4ff1204..67aef67 100644
--- a/llvm/test/CodeGen/Mips/and1.ll
+++ b/llvm/test/CodeGen/Mips/and1.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
-  %1 = load i32* @y, align 4
+  %0 = load i32, i32* @x, align 4
+  %1 = load i32, i32* @y, align 4
   %and = and i32 %0, %1
 ; 16:	and	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %and)
diff --git a/llvm/test/CodeGen/Mips/atomic.ll b/llvm/test/CodeGen/Mips/atomic.ll
index ccfeb00..ccd9b26 100644
--- a/llvm/test/CodeGen/Mips/atomic.ll
+++ b/llvm/test/CodeGen/Mips/atomic.ll
@@ -54,7 +54,7 @@
 entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
-  %tmp = load i32* %newval.addr, align 4
+  %tmp = load i32, i32* %newval.addr, align 4
   %0 = atomicrmw xchg i32* @x, i32 %tmp monotonic
   ret i32 %0
 
@@ -74,7 +74,7 @@
 entry:
   %newval.addr = alloca i32, align 4
   store i32 %newval, i32* %newval.addr, align 4
-  %tmp = load i32* %newval.addr, align 4
+  %tmp = load i32, i32* %newval.addr, align 4
   %0 = cmpxchg i32* @x, i32 %oldval, i32 %tmp monotonic monotonic
   %1 = extractvalue { i32, i1 } %0, 0
   ret i32 %1
diff --git a/llvm/test/CodeGen/Mips/atomicops.ll b/llvm/test/CodeGen/Mips/atomicops.ll
index c264152..c1093cf 100644
--- a/llvm/test/CodeGen/Mips/atomicops.ll
+++ b/llvm/test/CodeGen/Mips/atomicops.ll
@@ -18,14 +18,14 @@
   store volatile i32 0, i32* %x, align 4
   %0 = atomicrmw add i32* %x, i32 1 seq_cst
   %add.i = add nsw i32 %0, 2
-  %1 = load volatile i32* %x, align 4
+  %1 = load volatile i32, i32* %x, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %add.i, i32 %1) nounwind
   %pair = cmpxchg i32* %x, i32 1, i32 2 seq_cst seq_cst
   %2 = extractvalue { i32, i1 } %pair, 0
-  %3 = load volatile i32* %x, align 4
+  %3 = load volatile i32, i32* %x, align 4
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %2, i32 %3) nounwind
   %4 = atomicrmw xchg i32* %x, i32 1 seq_cst
-  %5 = load volatile i32* %x, align 4
+  %5 = load volatile i32, i32* %x, align 4
   %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %4, i32 %5) nounwind
 ; 16-LABEL: main:
 ; 16:	lw	${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/beqzc.ll b/llvm/test/CodeGen/Mips/beqzc.ll
index 4a294c2..afb66a9 100644
--- a/llvm/test/CodeGen/Mips/beqzc.ll
+++ b/llvm/test/CodeGen/Mips/beqzc.ll
@@ -6,7 +6,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @main() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   %. = select i1 %cmp, i32 10, i32 55
   store i32 %., i32* @j, align 4
diff --git a/llvm/test/CodeGen/Mips/beqzc1.ll b/llvm/test/CodeGen/Mips/beqzc1.ll
index 8f929a8..fe0dd2a 100644
--- a/llvm/test/CodeGen/Mips/beqzc1.ll
+++ b/llvm/test/CodeGen/Mips/beqzc1.ll
@@ -6,7 +6,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @main() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/biggot.ll b/llvm/test/CodeGen/Mips/biggot.ll
index da287ee..b56ce6b 100644
--- a/llvm/test/CodeGen/Mips/biggot.ll
+++ b/llvm/test/CodeGen/Mips/biggot.ll
@@ -20,7 +20,7 @@
 ; N64: daddu  $[[R3:[0-9]+]], $[[R2]], ${{[a-z0-9]+}}
 ; N64: ld  ${{[0-9]+}}, %call_lo(foo0)($[[R3]])
 
-  %0 = load i32* @v0, align 4
+  %0 = load i32, i32* @v0, align 4
   tail call void @foo0(i32 %0) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/brconeq.ll b/llvm/test/CodeGen/Mips/brconeq.ll
index 6133915..f555528 100644
--- a/llvm/test/CodeGen/Mips/brconeq.ll
+++ b/llvm/test/CodeGen/Mips/brconeq.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp eq i32 %0, %1
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	bteqz	$[[LABEL:[0-9A-Ba-b_]+]]
diff --git a/llvm/test/CodeGen/Mips/brconeqk.ll b/llvm/test/CodeGen/Mips/brconeqk.ll
index 2c0e72d..59edae8 100644
--- a/llvm/test/CodeGen/Mips/brconeqk.ll
+++ b/llvm/test/CodeGen/Mips/brconeqk.ll
@@ -5,7 +5,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 10
   br i1 %cmp, label %if.end, label %if.then
 ; 16:	cmpi	${{[0-9]+}}, {{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/brconeqz.ll b/llvm/test/CodeGen/Mips/brconeqz.ll
index 5586e7b..22c5664 100644
--- a/llvm/test/CodeGen/Mips/brconeqz.ll
+++ b/llvm/test/CodeGen/Mips/brconeqz.ll
@@ -5,7 +5,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.end, label %if.then
 ; 16:	beqz	${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
diff --git a/llvm/test/CodeGen/Mips/brconge.ll b/llvm/test/CodeGen/Mips/brconge.ll
index 02f0a63..46d1984 100644
--- a/llvm/test/CodeGen/Mips/brconge.ll
+++ b/llvm/test/CodeGen/Mips/brconge.ll
@@ -8,8 +8,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp slt i32 %0, %1
   br i1 %cmp, label %if.then, label %if.end
 
@@ -22,7 +22,7 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
-  %2 = load i32* @k, align 4
+  %2 = load i32, i32* @k, align 4
   %cmp1 = icmp slt i32 %0, %2
   br i1 %cmp1, label %if.then2, label %if.end3
 
diff --git a/llvm/test/CodeGen/Mips/brcongt.ll b/llvm/test/CodeGen/Mips/brcongt.ll
index 767b51b..cefacb8 100644
--- a/llvm/test/CodeGen/Mips/brcongt.ll
+++ b/llvm/test/CodeGen/Mips/brcongt.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp sgt i32 %0, %1
   br i1 %cmp, label %if.end, label %if.then
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/brconle.ll b/llvm/test/CodeGen/Mips/brconle.ll
index 854b248..e1f15ec 100644
--- a/llvm/test/CodeGen/Mips/brconle.ll
+++ b/llvm/test/CodeGen/Mips/brconle.ll
@@ -8,8 +8,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @i, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
   %cmp = icmp sgt i32 %0, %1
   br i1 %cmp, label %if.then, label %if.end
 
@@ -22,7 +22,7 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
-  %2 = load i32* @k, align 4
+  %2 = load i32, i32* @k, align 4
   %cmp1 = icmp sgt i32 %1, %2
   br i1 %cmp1, label %if.then2, label %if.end3
 
diff --git a/llvm/test/CodeGen/Mips/brconlt.ll b/llvm/test/CodeGen/Mips/brconlt.ll
index 931a3e8..049f35c 100644
--- a/llvm/test/CodeGen/Mips/brconlt.ll
+++ b/llvm/test/CodeGen/Mips/brconlt.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @i, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
   %cmp = icmp slt i32 %0, %1
   br i1 %cmp, label %if.end, label %if.then
 
diff --git a/llvm/test/CodeGen/Mips/brconne.ll b/llvm/test/CodeGen/Mips/brconne.ll
index 5d5bde3..b260320 100644
--- a/llvm/test/CodeGen/Mips/brconne.ll
+++ b/llvm/test/CodeGen/Mips/brconne.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @i, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %if.then, label %if.end
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/brconnek.ll b/llvm/test/CodeGen/Mips/brconnek.ll
index 6208d7c..778a5cce 100644
--- a/llvm/test/CodeGen/Mips/brconnek.ll
+++ b/llvm/test/CodeGen/Mips/brconnek.ll
@@ -5,7 +5,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %cmp = icmp eq i32 %0, 5
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/brconnez.ll b/llvm/test/CodeGen/Mips/brconnez.ll
index 47db790..754714b 100644
--- a/llvm/test/CodeGen/Mips/brconnez.ll
+++ b/llvm/test/CodeGen/Mips/brconnez.ll
@@ -5,7 +5,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/brdelayslot.ll b/llvm/test/CodeGen/Mips/brdelayslot.ll
index bcaba79..0f46619 100644
--- a/llvm/test/CodeGen/Mips/brdelayslot.ll
+++ b/llvm/test/CodeGen/Mips/brdelayslot.ll
@@ -54,18 +54,18 @@
 
 define void @foo5(i32 %a) nounwind {
 entry:
-  %0 = load i32* @g2, align 4
+  %0 = load i32, i32* @g2, align 4
   %tobool = icmp eq i32 %a, 0
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:
-  %1 = load i32* @g1, align 4
+  %1 = load i32, i32* @g1, align 4
   %add = add nsw i32 %1, %0
   store i32 %add, i32* @g1, align 4
   br label %if.end
 
 if.else:
-  %2 = load i32* @g3, align 4
+  %2 = load i32, i32* @g3, align 4
   %sub = sub nsw i32 %2, %0
   store i32 %sub, i32* @g3, align 4
   br label %if.end
@@ -99,9 +99,9 @@
 define i32 @foo8(i32 %a) nounwind {
 entry:
   store i32 %a, i32* @g1, align 4
-  %0 = load void ()** @foo9, align 4
+  %0 = load void ()*, void ()** @foo9, align 4
   tail call void %0() nounwind
-  %1 = load i32* @g1, align 4
+  %1 = load i32, i32* @g1, align 4
   %add = add nsw i32 %1, %a
   ret i32 %add
 }
@@ -145,7 +145,7 @@
   %s.06 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.05
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %s.06
   %inc = add nsw i32 %i.05, 1
   %exitcond = icmp eq i32 %inc, %n
diff --git a/llvm/test/CodeGen/Mips/brind.ll b/llvm/test/CodeGen/Mips/brind.ll
index 970dd99..8aee61e 100644
--- a/llvm/test/CodeGen/Mips/brind.ll
+++ b/llvm/test/CodeGen/Mips/brind.ll
@@ -27,7 +27,7 @@
   %puts7 = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str7, i32 0, i32 0))
   %inc = add i32 %i.2, 1
   %arrayidx = getelementptr inbounds [5 x i8*], [5 x i8*]* @main.L, i32 0, i32 %i.2
-  %0 = load i8** %arrayidx, align 4
+  %0 = load i8*, i8** %arrayidx, align 4
   indirectbr i8* %0, [label %L1, label %L2, label %L3, label %L4]
 ; 16: 	jrc	 ${{[0-9]+}}
 L4:                                               ; preds = %L3
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
index d1f07a6..1087e53 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-byte.ll
@@ -140,10 +140,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_1b*, align 8
   store %struct.SmallStruct_1b* %ss, %struct.SmallStruct_1b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_1b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
   %2 = getelementptr { i8 }, { i8 }* %1, i32 0, i32 0
-  %3 = load i8* %2, align 1
+  %3 = load i8, i8* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b: 
@@ -154,10 +154,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_2b*, align 8
   store %struct.SmallStruct_2b* %ss, %struct.SmallStruct_2b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_2b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_2b*, %struct.SmallStruct_2b** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_2b* %0 to { i16 }*
   %2 = getelementptr { i16 }, { i16 }* %1, i32 0, i32 0
-  %3 = load i16* %2, align 1
+  %3 = load i16, i16* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i16 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_2b:
@@ -169,12 +169,12 @@
   %ss.addr = alloca %struct.SmallStruct_3b*, align 8
   %.coerce = alloca { i24 }
   store %struct.SmallStruct_3b* %ss, %struct.SmallStruct_3b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_3b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_3b*, %struct.SmallStruct_3b** %ss.addr, align 8
   %1 = bitcast { i24 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_3b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 3, i32 0, i1 false)
   %3 = getelementptr { i24 }, { i24 }* %.coerce, i32 0, i32 0
-  %4 = load i24* %3, align 1
+  %4 = load i24, i24* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i24 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_3b:
@@ -187,10 +187,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_4b*, align 8
   store %struct.SmallStruct_4b* %ss, %struct.SmallStruct_4b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_4b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_4b*, %struct.SmallStruct_4b** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_4b* %0 to { i32 }*
   %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
-  %3 = load i32* %2, align 1
+  %3 = load i32, i32* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_4b:
@@ -202,12 +202,12 @@
   %ss.addr = alloca %struct.SmallStruct_5b*, align 8
   %.coerce = alloca { i40 }
   store %struct.SmallStruct_5b* %ss, %struct.SmallStruct_5b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_5b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_5b*, %struct.SmallStruct_5b** %ss.addr, align 8
   %1 = bitcast { i40 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_5b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 5, i32 0, i1 false)
   %3 = getelementptr { i40 }, { i40 }* %.coerce, i32 0, i32 0
-  %4 = load i40* %3, align 1
+  %4 = load i40, i40* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i40 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_5b:
@@ -219,12 +219,12 @@
   %ss.addr = alloca %struct.SmallStruct_6b*, align 8
   %.coerce = alloca { i48 }
   store %struct.SmallStruct_6b* %ss, %struct.SmallStruct_6b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_6b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_6b*, %struct.SmallStruct_6b** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_6b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
-  %4 = load i48* %3, align 1
+  %4 = load i48, i48* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_6b:
@@ -236,12 +236,12 @@
   %ss.addr = alloca %struct.SmallStruct_7b*, align 8
   %.coerce = alloca { i56 }
   store %struct.SmallStruct_7b* %ss, %struct.SmallStruct_7b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_7b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_7b*, %struct.SmallStruct_7b** %ss.addr, align 8
   %1 = bitcast { i56 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_7b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 7, i32 0, i1 false)
   %3 = getelementptr { i56 }, { i56 }* %.coerce, i32 0, i32 0
-  %4 = load i56* %3, align 1
+  %4 = load i56, i56* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i56 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_7b:
@@ -252,10 +252,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_8b*, align 8
   store %struct.SmallStruct_8b* %ss, %struct.SmallStruct_8b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_8b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_8b*, %struct.SmallStruct_8b** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_8b* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
-  %3 = load i64* %2, align 1
+  %3 = load i64, i64* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_8b:
@@ -267,14 +267,14 @@
   %ss.addr = alloca %struct.SmallStruct_9b*, align 8
   %.coerce = alloca { i64, i8 }
   store %struct.SmallStruct_9b* %ss, %struct.SmallStruct_9b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_9b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_9b*, %struct.SmallStruct_9b** %ss.addr, align 8
   %1 = bitcast { i64, i8 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_9b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 9, i32 0, i1 false)
   %3 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 0
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %5 = getelementptr { i64, i8 }, { i64, i8 }* %.coerce, i32 0, i32 1
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %4, i8 inreg %6)
   ret void
  ; CHECK-LABEL: smallStruct_9b:
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
index c5e4e93..674adcc 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-combinations.ll
@@ -74,10 +74,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_1b1s*, align 8
   store %struct.SmallStruct_1b1s* %ss, %struct.SmallStruct_1b1s** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_1b1s** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_1b1s*, %struct.SmallStruct_1b1s** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_1b1s* %0 to { i32 }*
   %2 = getelementptr { i32 }, { i32 }* %1, i32 0, i32 0
-  %3 = load i32* %2, align 1
+  %3 = load i32, i32* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b1s:
@@ -88,10 +88,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_1b1i*, align 8
   store %struct.SmallStruct_1b1i* %ss, %struct.SmallStruct_1b1i** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_1b1i** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_1b1i*, %struct.SmallStruct_1b1i** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_1b1i* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
-  %3 = load i64* %2, align 1
+  %3 = load i64, i64* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1b1i:
@@ -103,12 +103,12 @@
   %ss.addr = alloca %struct.SmallStruct_1b1s1b*, align 8
   %.coerce = alloca { i48 }
   store %struct.SmallStruct_1b1s1b* %ss, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_1b1s1b** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_1b1s1b*, %struct.SmallStruct_1b1s1b** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_1b1s1b* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
-  %4 = load i48* %3, align 1
+  %4 = load i48, i48* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_1b1s1b:
@@ -121,10 +121,10 @@
 entry:
   %ss.addr = alloca %struct.SmallStruct_1s1i*, align 8
   store %struct.SmallStruct_1s1i* %ss, %struct.SmallStruct_1s1i** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_1s1i** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_1s1i*, %struct.SmallStruct_1s1i** %ss.addr, align 8
   %1 = bitcast %struct.SmallStruct_1s1i* %0 to { i64 }*
   %2 = getelementptr { i64 }, { i64 }* %1, i32 0, i32 0
-  %3 = load i64* %2, align 1
+  %3 = load i64, i64* %2, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 inreg %3)
   ret void
  ; CHECK-LABEL: smallStruct_1s1i:
@@ -136,12 +136,12 @@
   %ss.addr = alloca %struct.SmallStruct_3b1s*, align 8
   %.coerce = alloca { i48 }
   store %struct.SmallStruct_3b1s* %ss, %struct.SmallStruct_3b1s** %ss.addr, align 8
-  %0 = load %struct.SmallStruct_3b1s** %ss.addr, align 8
+  %0 = load %struct.SmallStruct_3b1s*, %struct.SmallStruct_3b1s** %ss.addr, align 8
   %1 = bitcast { i48 }* %.coerce to i8*
   %2 = bitcast %struct.SmallStruct_3b1s* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 6, i32 0, i1 false)
   %3 = getelementptr { i48 }, { i48 }* %.coerce, i32 0, i32 0
-  %4 = load i48* %3, align 1
+  %4 = load i48, i48* %3, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i48 inreg %4)
   ret void
  ; CHECK-LABEL: smallStruct_3b1s:
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
index a9e8563..2242358 100644
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs-small-structs-multiple-args.ll
@@ -110,42 +110,42 @@
   store %struct.SmallStruct_1b* %ss7, %struct.SmallStruct_1b** %ss7.addr, align 8
   store %struct.SmallStruct_1b* %ss8, %struct.SmallStruct_1b** %ss8.addr, align 8
   store %struct.SmallStruct_1b* %ss9, %struct.SmallStruct_1b** %ss9.addr, align 8
-  %0 = load %struct.SmallStruct_1b** %ss1.addr, align 8
-  %1 = load %struct.SmallStruct_1b** %ss2.addr, align 8
-  %2 = load %struct.SmallStruct_1b** %ss3.addr, align 8
-  %3 = load %struct.SmallStruct_1b** %ss4.addr, align 8
-  %4 = load %struct.SmallStruct_1b** %ss5.addr, align 8
-  %5 = load %struct.SmallStruct_1b** %ss6.addr, align 8
-  %6 = load %struct.SmallStruct_1b** %ss7.addr, align 8
-  %7 = load %struct.SmallStruct_1b** %ss8.addr, align 8
-  %8 = load %struct.SmallStruct_1b** %ss9.addr, align 8
+  %0 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss1.addr, align 8
+  %1 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss2.addr, align 8
+  %2 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss3.addr, align 8
+  %3 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss4.addr, align 8
+  %4 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss5.addr, align 8
+  %5 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss6.addr, align 8
+  %6 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss7.addr, align 8
+  %7 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss8.addr, align 8
+  %8 = load %struct.SmallStruct_1b*, %struct.SmallStruct_1b** %ss9.addr, align 8
   %9 = bitcast %struct.SmallStruct_1b* %0 to { i8 }*
   %10 = getelementptr { i8 }, { i8 }* %9, i32 0, i32 0
-  %11 = load i8* %10, align 1
+  %11 = load i8, i8* %10, align 1
   %12 = bitcast %struct.SmallStruct_1b* %1 to { i8 }*
   %13 = getelementptr { i8 }, { i8 }* %12, i32 0, i32 0
-  %14 = load i8* %13, align 1
+  %14 = load i8, i8* %13, align 1
   %15 = bitcast %struct.SmallStruct_1b* %2 to { i8 }*
   %16 = getelementptr { i8 }, { i8 }* %15, i32 0, i32 0
-  %17 = load i8* %16, align 1
+  %17 = load i8, i8* %16, align 1
   %18 = bitcast %struct.SmallStruct_1b* %3 to { i8 }*
   %19 = getelementptr { i8 }, { i8 }* %18, i32 0, i32 0
-  %20 = load i8* %19, align 1
+  %20 = load i8, i8* %19, align 1
   %21 = bitcast %struct.SmallStruct_1b* %4 to { i8 }*
   %22 = getelementptr { i8 }, { i8 }* %21, i32 0, i32 0
-  %23 = load i8* %22, align 1
+  %23 = load i8, i8* %22, align 1
   %24 = bitcast %struct.SmallStruct_1b* %5 to { i8 }*
   %25 = getelementptr { i8 }, { i8 }* %24, i32 0, i32 0
-  %26 = load i8* %25, align 1
+  %26 = load i8, i8* %25, align 1
   %27 = bitcast %struct.SmallStruct_1b* %6 to { i8 }*
   %28 = getelementptr { i8 }, { i8 }* %27, i32 0, i32 0
-  %29 = load i8* %28, align 1
+  %29 = load i8, i8* %28, align 1
   %30 = bitcast %struct.SmallStruct_1b* %7 to { i8 }*
   %31 = getelementptr { i8 }, { i8 }* %30, i32 0, i32 0
-  %32 = load i8* %31, align 1
+  %32 = load i8, i8* %31, align 1
   %33 = bitcast %struct.SmallStruct_1b* %8 to { i8 }*
   %34 = getelementptr { i8 }, { i8 }* %33, i32 0, i32 0
-  %35 = load i8* %34, align 1
+  %35 = load i8, i8* %34, align 1
   call void (i8*, ...)* @varArgF_SmallStruct(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8 inreg %11, i8 inreg %14, i8 inreg %17, i8 inreg %20, i8 inreg %23, i8 inreg %26, i8 inreg %29, i8 inreg %32, i8 inreg %35)
   ret void
  ; CHECK-LABEL: smallStruct_1b_x9:
diff --git a/llvm/test/CodeGen/Mips/cconv/return-float.ll b/llvm/test/CodeGen/Mips/cconv/return-float.ll
index 8c4c31c..4355a55 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-float.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-float.ll
@@ -21,7 +21,7 @@
 
 define float @retfloat() nounwind {
 entry:
-        %0 = load volatile float* @float
+        %0 = load volatile float, float* @float
         ret float %0
 }
 
@@ -35,7 +35,7 @@
 
 define double @retdouble() nounwind {
 entry:
-        %0 = load volatile double* @double
+        %0 = load volatile double, double* @double
         ret double %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-float.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-float.ll
index f0aeb12..14853c8 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-float.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-float.ll
@@ -24,7 +24,7 @@
 
 define float @retfloat() nounwind {
 entry:
-        %0 = load volatile float* @float
+        %0 = load volatile float, float* @float
         ret float %0
 }
 
@@ -38,7 +38,7 @@
 
 define double @retdouble() nounwind {
 entry:
-        %0 = load volatile double* @double
+        %0 = load volatile double, double* @double
         ret double %0
 }
 
@@ -50,7 +50,7 @@
 
 define { double, double } @retComplexDouble() #0 {
   %retval = alloca { double, double }, align 8
-  %1 = load { double, double }* %retval
+  %1 = load { double, double }, { double, double }* %retval
   ret { double, double } %1
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
index 05dacfe..34e9647 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-fp128.ll
@@ -13,7 +13,7 @@
 
 define fp128 @retldouble() nounwind {
 entry:
-        %0 = load volatile fp128* @fp128
+        %0 = load volatile fp128, fp128* @fp128
         ret fp128 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
index 4ce26b1..c4c8f10 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-hard-struct-f128.ll
@@ -10,7 +10,7 @@
 
 define inreg {fp128} @ret_struct_fp128() nounwind {
 entry:
-        %0 = load volatile {fp128}* @struct_fp128
+        %0 = load volatile {fp128}, {fp128}* @struct_fp128
         ret {fp128} %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/return-struct.ll b/llvm/test/CodeGen/Mips/cconv/return-struct.ll
index 3d591df..68af9e3 100644
--- a/llvm/test/CodeGen/Mips/cconv/return-struct.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return-struct.ll
@@ -22,7 +22,7 @@
 
 define inreg {i8} @ret_struct_i8() nounwind {
 entry:
-        %0 = load volatile {i8}* @struct_byte
+        %0 = load volatile {i8}, {i8}* @struct_byte
         ret {i8} %0
 }
 
@@ -54,7 +54,7 @@
         %0 = bitcast {i8,i8}* %retval to i8*
         call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
         %1 = bitcast {i8,i8}* %retval to {i16}*
-        %2 = load volatile {i16}* %1
+        %2 = load volatile {i16}, {i16}* %1
         ret {i16} %2
 }
 
@@ -91,7 +91,7 @@
 ; missed by the CCPromoteToType and the shift didn't happen.
 define inreg {i48} @ret_struct_3xi16() nounwind {
 entry:
-        %0 = load volatile i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
+        %0 = load volatile i48, i48* bitcast ({[3 x i16]}* @struct_3xi16 to i48*), align 2
         %1 = insertvalue {i48} undef, i48 %0, 0
         ret {i48} %1
 }
@@ -174,7 +174,7 @@
 ; This time we let the backend lower the sret argument.
 define {[6 x i32]} @ret_struct_6xi32() {
 entry:
-        %0 = load volatile {[6 x i32]}* @struct_6xi32, align 2
+        %0 = load volatile {[6 x i32]}, {[6 x i32]}* @struct_6xi32, align 2
         ret {[6 x i32]} %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/return.ll b/llvm/test/CodeGen/Mips/cconv/return.ll
index 516026d..a537672 100644
--- a/llvm/test/CodeGen/Mips/cconv/return.ll
+++ b/llvm/test/CodeGen/Mips/cconv/return.ll
@@ -24,7 +24,7 @@
 
 define i8 @reti8() nounwind {
 entry:
-        %0 = load volatile i8* @byte
+        %0 = load volatile i8, i8* @byte
         ret i8 %0
 }
 
@@ -38,7 +38,7 @@
 
 define i32 @reti32() nounwind {
 entry:
-        %0 = load volatile i32* @word
+        %0 = load volatile i32, i32* @word
         ret i32 %0
 }
 
@@ -52,7 +52,7 @@
 
 define i64 @reti64() nounwind {
 entry:
-        %0 = load volatile i64* @dword
+        %0 = load volatile i64, i64* @dword
         ret i64 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/cfi_offset.ll b/llvm/test/CodeGen/Mips/cfi_offset.ll
index e23855b..6e78344 100644
--- a/llvm/test/CodeGen/Mips/cfi_offset.ll
+++ b/llvm/test/CodeGen/Mips/cfi_offset.ll
@@ -32,8 +32,8 @@
 ; CHECK:  .cfi_offset 31, -20
 ; CHECK:  .cfi_offset 16, -24
 
-    %val1 = load volatile double* @var
-    %val2 = load volatile double* @var
+    %val1 = load volatile double, double* @var
+    %val2 = load volatile double, double* @var
     call void (...)* @foo() nounwind
     store volatile double %val1, double* @var
     store volatile double %val2, double* @var
diff --git a/llvm/test/CodeGen/Mips/ci2.ll b/llvm/test/CodeGen/Mips/ci2.ll
index e2068fd..63ed683 100644
--- a/llvm/test/CodeGen/Mips/ci2.ll
+++ b/llvm/test/CodeGen/Mips/ci2.ll
@@ -8,7 +8,7 @@
 define void @foo() #0 {
 entry:
   store i32 305419896, i32* @i, align 4
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
   %tobool = icmp ne i32 %0, 0
   br i1 %tobool, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/Mips/cmov.ll b/llvm/test/CodeGen/Mips/cmov.ll
index b12c2df..b018f28 100644
--- a/llvm/test/CodeGen/Mips/cmov.ll
+++ b/llvm/test/CodeGen/Mips/cmov.ll
@@ -41,7 +41,7 @@
 define i32* @cmov1(i32 signext %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
-  %tmp1 = load i32** @i3, align 4
+  %tmp1 = load i32*, i32** @i3, align 4
   %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
   ret i32* %cond
 }
@@ -81,8 +81,8 @@
 define i32 @cmov2(i32 signext %s) nounwind readonly {
 entry:
   %tobool = icmp ne i32 %s, 0
-  %tmp1 = load i32* @c, align 4
-  %tmp2 = load i32* @d, align 4
+  %tmp1 = load i32, i32* @c, align 4
+  %tmp2 = load i32, i32* @d, align 4
   %cond = select i1 %tobool, i32 %tmp1, i32 %tmp2
   ret i32 %cond
 }
diff --git a/llvm/test/CodeGen/Mips/cmplarge.ll b/llvm/test/CodeGen/Mips/cmplarge.ll
index 43fc10d..7901906 100644
--- a/llvm/test/CodeGen/Mips/cmplarge.ll
+++ b/llvm/test/CodeGen/Mips/cmplarge.ll
@@ -10,7 +10,7 @@
 define void @getSubImagesLuma(%struct.StorablePicture* nocapture %s) #0 {
 entry:
   %size_y = getelementptr inbounds %struct.StorablePicture, %struct.StorablePicture* %s, i32 0, i32 1
-  %0 = load i32* %size_y, align 4
+  %0 = load i32, i32* %size_y, align 4
   %sub = add nsw i32 %0, -1
   %add5 = add nsw i32 %0, 20
   %cmp6 = icmp sgt i32 %add5, -20
@@ -20,7 +20,7 @@
   %j.07 = phi i32 [ %inc, %for.body ], [ -20, %entry ]
   %call = tail call i32 bitcast (i32 (...)* @iClip3 to i32 (i32, i32, i32)*)(i32 0, i32 %sub, i32 %j.07) #2
   %inc = add nsw i32 %j.07, 1
-  %1 = load i32* %size_y, align 4
+  %1 = load i32, i32* %size_y, align 4
   %add = add nsw i32 %1, 20
   %cmp = icmp slt i32 %inc, %add
   br i1 %cmp, label %for.body, label %for.end
diff --git a/llvm/test/CodeGen/Mips/const4a.ll b/llvm/test/CodeGen/Mips/const4a.ll
index ac6795b..9022eb4 100644
--- a/llvm/test/CodeGen/Mips/const4a.ll
+++ b/llvm/test/CodeGen/Mips/const4a.ll
@@ -14,7 +14,7 @@
 define void @t() #0 {
 entry:
   store i32 -559023410, i32* @i, align 4
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
 ; no-load-relax:	lw	${{[0-9]+}}, $CPI0_1	# 16 bit inst
   %tobool = icmp ne i32 %0, 0
   br i1 %tobool, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/Mips/ctlz.ll b/llvm/test/CodeGen/Mips/ctlz.ll
index 1f87166..96af197 100644
--- a/llvm/test/CodeGen/Mips/ctlz.ll
+++ b/llvm/test/CodeGen/Mips/ctlz.ll
@@ -9,7 +9,7 @@
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %1 = call i32 @llvm.ctlz.i32(i32 %0, i1 true)
   store i32 %1, i32* @y, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/disable-tail-merge.ll b/llvm/test/CodeGen/Mips/disable-tail-merge.ll
index b4c093a..9396db7 100644
--- a/llvm/test/CodeGen/Mips/disable-tail-merge.ll
+++ b/llvm/test/CodeGen/Mips/disable-tail-merge.ll
@@ -9,20 +9,20 @@
 define i32 @test1(i32 %a) {
 entry:
   %tobool = icmp eq i32 %a, 0
-  %0 = load i32* @g0, align 4
+  %0 = load i32, i32* @g0, align 4
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:
   %add = add nsw i32 %0, 1
   store i32 %add, i32* @g0, align 4
-  %1 = load i32* @g1, align 4
+  %1 = load i32, i32* @g1, align 4
   %add1 = add nsw i32 %1, 23
   br label %if.end
 
 if.else:
   %add2 = add nsw i32 %0, 11
   store i32 %add2, i32* @g0, align 4
-  %2 = load i32* @g1, align 4
+  %2 = load i32, i32* @g1, align 4
   %add3 = add nsw i32 %2, 23
   br label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/div.ll b/llvm/test/CodeGen/Mips/div.ll
index 00e2c19..731841c 100644
--- a/llvm/test/CodeGen/Mips/div.ll
+++ b/llvm/test/CodeGen/Mips/div.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %div = sdiv i32 %0, %1
 ; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
 ; 16: 	mflo	${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/div_rem.ll b/llvm/test/CodeGen/Mips/div_rem.ll
index 950192e..e64529c 100644
--- a/llvm/test/CodeGen/Mips/div_rem.ll
+++ b/llvm/test/CodeGen/Mips/div_rem.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %div = sdiv i32 %0, %1
   store i32 %div, i32* @kkkk, align 4
   %rem = srem i32 %0, %1
diff --git a/llvm/test/CodeGen/Mips/divrem.ll b/llvm/test/CodeGen/Mips/divrem.ll
index a9cfe0f..918db05 100644
--- a/llvm/test/CodeGen/Mips/divrem.ll
+++ b/llvm/test/CodeGen/Mips/divrem.ll
@@ -220,8 +220,8 @@
 ; FIXME: It's not clear what this is supposed to test.
 define i32 @killFlags() {
 entry:
-  %0 = load i32* @g0, align 4
-  %1 = load i32* @g1, align 4
+  %0 = load i32, i32* @g0, align 4
+  %1 = load i32, i32* @g1, align 4
   %div = sdiv i32 %0, %1
   ret i32 %div
 }
diff --git a/llvm/test/CodeGen/Mips/divu.ll b/llvm/test/CodeGen/Mips/divu.ll
index b96a439..5bc765a 100644
--- a/llvm/test/CodeGen/Mips/divu.ll
+++ b/llvm/test/CodeGen/Mips/divu.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %div = udiv i32 %0, %1
 ; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
 ; 16: 	mflo	${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/divu_remu.ll b/llvm/test/CodeGen/Mips/divu_remu.ll
index a6c1563..a079440 100644
--- a/llvm/test/CodeGen/Mips/divu_remu.ll
+++ b/llvm/test/CodeGen/Mips/divu_remu.ll
@@ -8,8 +8,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %div = udiv i32 %0, %1
   store i32 %div, i32* @kkkk, align 4
   %rem = urem i32 %0, %1
diff --git a/llvm/test/CodeGen/Mips/dsp-patterns.ll b/llvm/test/CodeGen/Mips/dsp-patterns.ll
index 067003a..837c0d8 100644
--- a/llvm/test/CodeGen/Mips/dsp-patterns.ll
+++ b/llvm/test/CodeGen/Mips/dsp-patterns.ll
@@ -7,7 +7,7 @@
 define zeroext i8 @test_lbux(i8* nocapture %b, i32 %i) {
 entry:
   %add.ptr = getelementptr inbounds i8, i8* %b, i32 %i
-  %0 = load i8* %add.ptr, align 1
+  %0 = load i8, i8* %add.ptr, align 1
   ret i8 %0
 }
 
@@ -17,7 +17,7 @@
 define signext i16 @test_lhx(i16* nocapture %b, i32 %i) {
 entry:
   %add.ptr = getelementptr inbounds i16, i16* %b, i32 %i
-  %0 = load i16* %add.ptr, align 2
+  %0 = load i16, i16* %add.ptr, align 2
   ret i16 %0
 }
 
@@ -27,7 +27,7 @@
 define i32 @test_lwx(i32* nocapture %b, i32 %i) {
 entry:
   %add.ptr = getelementptr inbounds i32, i32* %b, i32 %i
-  %0 = load i32* %add.ptr, align 4
+  %0 = load i32, i32* %add.ptr, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/dsp-vec-load-store.ll b/llvm/test/CodeGen/Mips/dsp-vec-load-store.ll
index 7e4a8fe..f925180 100644
--- a/llvm/test/CodeGen/Mips/dsp-vec-load-store.ll
+++ b/llvm/test/CodeGen/Mips/dsp-vec-load-store.ll
@@ -5,7 +5,7 @@
 
 define void @extend_load_trunc_store_v2i8() {
 entry:
-  %0 = load <2 x i8>* @g1, align 2
+  %0 = load <2 x i8>, <2 x i8>* @g1, align 2
   store <2 x i8> %0, <2 x i8>* @g0, align 2
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/eh.ll b/llvm/test/CodeGen/Mips/eh.ll
index fc9e2ef..03bc199 100644
--- a/llvm/test/CodeGen/Mips/eh.ll
+++ b/llvm/test/CodeGen/Mips/eh.ll
@@ -37,7 +37,7 @@
 catch:                                            ; preds = %lpad
   %3 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind
   %4 = bitcast i8* %3 to double*
-  %exn.scalar = load double* %4, align 8
+  %exn.scalar = load double, double* %4, align 8
   %add = fadd double %exn.scalar, %i2
   store double %add, double* @g1, align 8
   tail call void @__cxa_end_catch() nounwind
diff --git a/llvm/test/CodeGen/Mips/emit-big-cst.ll b/llvm/test/CodeGen/Mips/emit-big-cst.ll
index a168743..9bc96c8 100644
--- a/llvm/test/CodeGen/Mips/emit-big-cst.ll
+++ b/llvm/test/CodeGen/Mips/emit-big-cst.ll
@@ -10,7 +10,7 @@
 
 define void @accessBig(i64* %storage) {
   %addr = bitcast i64* %storage to i82*
-  %bigLoadedCst = load volatile i82* @bigCst
+  %bigLoadedCst = load volatile i82, i82* @bigCst
   %tmp = add i82 %bigLoadedCst, 1
   store i82 %tmp, i82* %addr
   ret void
diff --git a/llvm/test/CodeGen/Mips/ex2.ll b/llvm/test/CodeGen/Mips/ex2.ll
index 6d024c2..eb72a7a 100644
--- a/llvm/test/CodeGen/Mips/ex2.ll
+++ b/llvm/test/CodeGen/Mips/ex2.ll
@@ -22,7 +22,7 @@
   unreachable
 
 return:                                           ; No predecessors!
-  %1 = load i32* %retval
+  %1 = load i32, i32* %retval
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/Mips/extins.ll b/llvm/test/CodeGen/Mips/extins.ll
index efaeeea..6604f89 100644
--- a/llvm/test/CodeGen/Mips/extins.ll
+++ b/llvm/test/CodeGen/Mips/extins.ll
@@ -16,7 +16,7 @@
 ; 16-NOT: ins ${{[0-9]+}}
   %and = shl i32 %s, 5
   %shl = and i32 %and, 16352
-  %tmp3 = load i32* %d, align 4
+  %tmp3 = load i32, i32* %d, align 4
   %and5 = and i32 %tmp3, -16353
   %or = or i32 %and5, %shl
   store i32 %or, i32* %d, align 4
diff --git a/llvm/test/CodeGen/Mips/f16abs.ll b/llvm/test/CodeGen/Mips/f16abs.ll
index 0fba9c4..8389832 100644
--- a/llvm/test/CodeGen/Mips/f16abs.ll
+++ b/llvm/test/CodeGen/Mips/f16abs.ll
@@ -11,12 +11,12 @@
 ; Function Attrs: nounwind optsize
 define i32 @main() #0 {
 entry:
-  %0 = load double* @y, align 8
+  %0 = load double, double* @y, align 8
   %call = tail call double @fabs(double %0) #2
   store double %call, double* @x, align 8
 ; static-NOT: 	.ent	__call_stub_fp_fabs
 ; static-NOT: 	jal fabs
-  %1 = load float* @y1, align 4
+  %1 = load float, float* @y1, align 4
   %call2 = tail call float @fabsf(float %1) #2
   store float %call2, float* @x1, align 4
 ; static-NOT: 	.ent	__call_stub_fp_fabsf
diff --git a/llvm/test/CodeGen/Mips/fastcc.ll b/llvm/test/CodeGen/Mips/fastcc.ll
index 6b022c5..a47a1f7e 100644
--- a/llvm/test/CodeGen/Mips/fastcc.ll
+++ b/llvm/test/CodeGen/Mips/fastcc.ll
@@ -108,23 +108,23 @@
 ; CHECK-NACL-NOT: lw  $15
 ; CHECK-NACL-NOT: lw  $24
 
-  %0 = load i32* @gi0, align 4
-  %1 = load i32* @gi1, align 4
-  %2 = load i32* @gi2, align 4
-  %3 = load i32* @gi3, align 4
-  %4 = load i32* @gi4, align 4
-  %5 = load i32* @gi5, align 4
-  %6 = load i32* @gi6, align 4
-  %7 = load i32* @gi7, align 4
-  %8 = load i32* @gi8, align 4
-  %9 = load i32* @gi9, align 4
-  %10 = load i32* @gi10, align 4
-  %11 = load i32* @gi11, align 4
-  %12 = load i32* @gi12, align 4
-  %13 = load i32* @gi13, align 4
-  %14 = load i32* @gi14, align 4
-  %15 = load i32* @gi15, align 4
-  %16 = load i32* @gi16, align 4
+  %0 = load i32, i32* @gi0, align 4
+  %1 = load i32, i32* @gi1, align 4
+  %2 = load i32, i32* @gi2, align 4
+  %3 = load i32, i32* @gi3, align 4
+  %4 = load i32, i32* @gi4, align 4
+  %5 = load i32, i32* @gi5, align 4
+  %6 = load i32, i32* @gi6, align 4
+  %7 = load i32, i32* @gi7, align 4
+  %8 = load i32, i32* @gi8, align 4
+  %9 = load i32, i32* @gi9, align 4
+  %10 = load i32, i32* @gi10, align 4
+  %11 = load i32, i32* @gi11, align 4
+  %12 = load i32, i32* @gi12, align 4
+  %13 = load i32, i32* @gi13, align 4
+  %14 = load i32, i32* @gi14, align 4
+  %15 = load i32, i32* @gi15, align 4
+  %16 = load i32, i32* @gi16, align 4
   tail call fastcc void @callee0(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16)
   ret void
 }
@@ -196,27 +196,27 @@
 ; CHECK: lwc1  $f1
 ; CHECK: lwc1  $f0
 
-  %0 = load float* @gfa0, align 4
-  %1 = load float* @gfa1, align 4
-  %2 = load float* @gfa2, align 4
-  %3 = load float* @gfa3, align 4
-  %4 = load float* @gfa4, align 4
-  %5 = load float* @gfa5, align 4
-  %6 = load float* @gfa6, align 4
-  %7 = load float* @gfa7, align 4
-  %8 = load float* @gfa8, align 4
-  %9 = load float* @gfa9, align 4
-  %10 = load float* @gfa10, align 4
-  %11 = load float* @gfa11, align 4
-  %12 = load float* @gfa12, align 4
-  %13 = load float* @gfa13, align 4
-  %14 = load float* @gfa14, align 4
-  %15 = load float* @gfa15, align 4
-  %16 = load float* @gfa16, align 4
-  %17 = load float* @gfa17, align 4
-  %18 = load float* @gfa18, align 4
-  %19 = load float* @gfa19, align 4
-  %20 = load float* @gfa20, align 4
+  %0 = load float, float* @gfa0, align 4
+  %1 = load float, float* @gfa1, align 4
+  %2 = load float, float* @gfa2, align 4
+  %3 = load float, float* @gfa3, align 4
+  %4 = load float, float* @gfa4, align 4
+  %5 = load float, float* @gfa5, align 4
+  %6 = load float, float* @gfa6, align 4
+  %7 = load float, float* @gfa7, align 4
+  %8 = load float, float* @gfa8, align 4
+  %9 = load float, float* @gfa9, align 4
+  %10 = load float, float* @gfa10, align 4
+  %11 = load float, float* @gfa11, align 4
+  %12 = load float, float* @gfa12, align 4
+  %13 = load float, float* @gfa13, align 4
+  %14 = load float, float* @gfa14, align 4
+  %15 = load float, float* @gfa15, align 4
+  %16 = load float, float* @gfa16, align 4
+  %17 = load float, float* @gfa17, align 4
+  %18 = load float, float* @gfa18, align 4
+  %19 = load float, float* @gfa19, align 4
+  %20 = load float, float* @gfa20, align 4
   tail call fastcc void @callee1(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13, float %14, float %15, float %16, float %17, float %18, float %19, float %20)
   ret void
 }
@@ -292,17 +292,17 @@
 ; NOODDSPREG-DAG:    lwc1    $[[F0:f[0-9]*[02468]]], 40($[[R0]])
 ; NOODDSPREG-DAG:    swc1    $[[F0]], 0($sp)
 
-  %0 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
-  %1 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
-  %2 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
-  %3 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
-  %4 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
-  %5 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
-  %6 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
-  %7 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
-  %8 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
-  %9 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
-  %10 = load float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
+  %0 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 0), align 4
+  %1 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 1), align 4
+  %2 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 2), align 4
+  %3 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 3), align 4
+  %4 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 4), align 4
+  %5 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 5), align 4
+  %6 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 6), align 4
+  %7 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 7), align 4
+  %8 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 8), align 4
+  %9 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 9), align 4
+  %10 = load float, float* getelementptr ([11 x float]* @fa, i32 0, i32 10), align 4
   tail call fastcc void @callee2(float %0, float %1, float %2, float %3,
                                  float %4, float %5, float %6, float %7,
                                  float %8, float %9, float %10)
@@ -373,17 +373,17 @@
 ; FP64-NOODDSPREG-DAG:    ldc1    $[[F0:f[0-9]*[02468]]], 80($[[R0]])
 ; FP64-NOODDSPREG-DAG:    sdc1    $[[F0]], 0($sp)
 
-  %0 = load double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
-  %1 = load double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
-  %2 = load double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
-  %3 = load double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
-  %4 = load double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
-  %5 = load double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
-  %6 = load double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
-  %7 = load double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
-  %8 = load double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
-  %9 = load double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
-  %10 = load double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
+  %0 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 0), align 8
+  %1 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 1), align 8
+  %2 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 2), align 8
+  %3 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 3), align 8
+  %4 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 4), align 8
+  %5 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 5), align 8
+  %6 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 6), align 8
+  %7 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 7), align 8
+  %8 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 8), align 8
+  %9 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 9), align 8
+  %10 = load double, double* getelementptr ([11 x double]* @da, i32 0, i32 10), align 8
   tail call fastcc void @callee3(double %0, double %1, double %2, double %3,
                                  double %4, double %5, double %6, double %7,
                                  double %8, double %9, double %10)
diff --git a/llvm/test/CodeGen/Mips/fixdfsf.ll b/llvm/test/CodeGen/Mips/fixdfsf.ll
index 4271ac2..8695799 100644
--- a/llvm/test/CodeGen/Mips/fixdfsf.ll
+++ b/llvm/test/CodeGen/Mips/fixdfsf.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind optsize
 define void @foo()  {
 entry:
-  %0 = load double* @x, align 8
+  %0 = load double, double* @x, align 8
   %conv = fptoui double %0 to i32
   store i32 %conv, i32* @y, align 4
 ; pic1:	lw	${{[0-9]+}}, %call16(__fixunsdfsi)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/fp-indexed-ls.ll b/llvm/test/CodeGen/Mips/fp-indexed-ls.ll
index 3ff9b37..ee6a7ed 100644
--- a/llvm/test/CodeGen/Mips/fp-indexed-ls.ll
+++ b/llvm/test/CodeGen/Mips/fp-indexed-ls.ll
@@ -46,7 +46,7 @@
 ; CHECK-NACL-NOT: lwxc1
 
   %arrayidx = getelementptr inbounds float, float* %b, i32 %o
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   ret float %0
 }
 
@@ -77,7 +77,7 @@
 ; CHECK-NACL-NOT: ldxc1
 
   %arrayidx = getelementptr inbounds double, double* %b, i32 %o
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   ret double %0
 }
 
@@ -101,7 +101,7 @@
 ; MIPS64R6-NOT:  luxc1
 
   %arrayidx1 = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
-  %0 = load float* %arrayidx1, align 1
+  %0 = load float, float* %arrayidx1, align 1
   ret float %0
 }
 
@@ -129,7 +129,7 @@
 
 ; CHECK-NACL-NOT: swxc1
 
-  %0 = load float* @gf, align 4
+  %0 = load float, float* @gf, align 4
   %arrayidx = getelementptr inbounds float, float* %b, i32 %o
   store float %0, float* %arrayidx, align 4
   ret void
@@ -159,7 +159,7 @@
 
 ; CHECK-NACL-NOT: sdxc1
 
-  %0 = load double* @gd, align 8
+  %0 = load double, double* @gd, align 8
   %arrayidx = getelementptr inbounds double, double* %b, i32 %o
   store double %0, double* %arrayidx, align 8
   ret void
@@ -179,7 +179,7 @@
 
 ; MIPS64R6-NOT:  suxc1
 
-  %0 = load float* @gf, align 4
+  %0 = load float, float* @gf, align 4
   %arrayidx1 = getelementptr inbounds [4 x %struct.S], [4 x %struct.S]* @s, i32 0, i32 %b, i32 0, i32 %c
   store float %0, float* %arrayidx1, align 1
   ret void
@@ -200,7 +200,7 @@
 ; MIPS64R6-NOT:  luxc1
 
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2], [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
-  %0 = load double* %arrayidx1, align 1
+  %0 = load double, double* %arrayidx1, align 1
   ret double %0
 }
 
@@ -218,7 +218,7 @@
 
 ; MIPS64R6-NOT:  suxc1
 
-  %0 = load double* @gd, align 8
+  %0 = load double, double* @gd, align 8
   %arrayidx1 = getelementptr inbounds [4 x %struct.S2], [4 x %struct.S2]* @s2, i32 0, i32 %b, i32 0, i32 %c
   store double %0, double* %arrayidx1, align 1
   ret void
@@ -238,7 +238,7 @@
 
 ; MIPS64R6-NOT:  luxc1
 
-  %0 = load float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
+  %0 = load float, float* getelementptr inbounds (%struct.S3* @s3, i32 0, i32 1), align 1
   ret float %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/fp-spill-reload.ll b/llvm/test/CodeGen/Mips/fp-spill-reload.ll
index 418a74c..4a53ad8 100644
--- a/llvm/test/CodeGen/Mips/fp-spill-reload.ll
+++ b/llvm/test/CodeGen/Mips/fp-spill-reload.ll
@@ -5,27 +5,27 @@
 entry:
 ; CHECK: sw  $fp
 ; CHECK: lw  $fp
-  %0 = load i32* %b, align 4
+  %0 = load i32, i32* %b, align 4
   %arrayidx.1 = getelementptr inbounds i32, i32* %b, i32 1
-  %1 = load i32* %arrayidx.1, align 4
+  %1 = load i32, i32* %arrayidx.1, align 4
   %add.1 = add nsw i32 %1, 1
   %arrayidx.2 = getelementptr inbounds i32, i32* %b, i32 2
-  %2 = load i32* %arrayidx.2, align 4
+  %2 = load i32, i32* %arrayidx.2, align 4
   %add.2 = add nsw i32 %2, 2
   %arrayidx.3 = getelementptr inbounds i32, i32* %b, i32 3
-  %3 = load i32* %arrayidx.3, align 4
+  %3 = load i32, i32* %arrayidx.3, align 4
   %add.3 = add nsw i32 %3, 3
   %arrayidx.4 = getelementptr inbounds i32, i32* %b, i32 4
-  %4 = load i32* %arrayidx.4, align 4
+  %4 = load i32, i32* %arrayidx.4, align 4
   %add.4 = add nsw i32 %4, 4
   %arrayidx.5 = getelementptr inbounds i32, i32* %b, i32 5
-  %5 = load i32* %arrayidx.5, align 4
+  %5 = load i32, i32* %arrayidx.5, align 4
   %add.5 = add nsw i32 %5, 5
   %arrayidx.6 = getelementptr inbounds i32, i32* %b, i32 6
-  %6 = load i32* %arrayidx.6, align 4
+  %6 = load i32, i32* %arrayidx.6, align 4
   %add.6 = add nsw i32 %6, 6
   %arrayidx.7 = getelementptr inbounds i32, i32* %b, i32 7
-  %7 = load i32* %arrayidx.7, align 4
+  %7 = load i32, i32* %arrayidx.7, align 4
   %add.7 = add nsw i32 %7, 7
   call void @foo2(i32 %0, i32 %add.1, i32 %add.2, i32 %add.3, i32 %add.4, i32 %add.5, i32 %add.6, i32 %add.7) nounwind
   call void bitcast (void (...)* @foo1 to void ()*)() nounwind
diff --git a/llvm/test/CodeGen/Mips/fp16instrinsmc.ll b/llvm/test/CodeGen/Mips/fp16instrinsmc.ll
index 84d3814..797be26 100644
--- a/llvm/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/llvm/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -23,8 +23,8 @@
 ; fmask: .set	reorder
 ; fmask: .end	foo1
 entry:
-  %0 = load float* @x, align 4
-  %1 = load float* @one, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @one, align 4
   %call = call float @copysignf(float %0, float %1) #2
   store float %call, float* @y, align 4
   ret void
@@ -39,8 +39,8 @@
 ; fmask:	save	{{.*}}
 ; fmask:	.end	foo2
 entry:
-  %0 = load float* @x, align 4
-  %1 = load float* @negone, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @negone, align 4
   %call = call float @copysignf(float %0, float %1) #2
   store float %call, float* @y, align 4
   ret void
@@ -57,8 +57,8 @@
 ; fmask: .set	macro
 ; fmask: .set	reorder
 ; fmask: .end	foo3
-  %0 = load double* @xd, align 8
-  %1 = load float* @oned, align 4
+  %0 = load double, double* @xd, align 8
+  %1 = load float, float* @oned, align 4
   %conv = fpext float %1 to double
   %call = call double @copysign(double %0, double %conv) #2
   store double %call, double* @yd, align 8
@@ -74,8 +74,8 @@
 ; fmask:	.ent	foo4
 ; fmask:	save	{{.*}}
 ; fmask:	.end	foo4
-  %0 = load double* @xd, align 8
-  %1 = load double* @negoned, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @negoned, align 8
   %call = call double @copysign(double %0, double %1) #2
   store double %call, double* @yd, align 8
   ret void
@@ -84,7 +84,7 @@
 ; Function Attrs: nounwind
 define void @foo5() #0 {
 entry:
-  %0 = load float* @xn, align 4
+  %0 = load float, float* @xn, align 4
   %call = call float @fabsf(float %0) #2
   store float %call, float* @y, align 4
   ret void
@@ -96,7 +96,7 @@
 ; Function Attrs: nounwind
 define void @foo6() #0 {
 entry:
-  %0 = load double* @xdn, align 8
+  %0 = load double, double* @xdn, align 8
   %call = call double @fabs(double %0) #2
   store double %call, double* @yd, align 8
   ret void
@@ -108,7 +108,7 @@
 ; Function Attrs: nounwind
 define void @foo7() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @sinf(float %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(sinf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -122,7 +122,7 @@
 ; Function Attrs: nounwind
 define void @foo8() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @sin(double %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(sin)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -136,7 +136,7 @@
 ; Function Attrs: nounwind
 define void @foo9() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @cosf(float %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(cosf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -150,7 +150,7 @@
 ; Function Attrs: nounwind
 define void @foo10() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @cos(double %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(cos)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -164,7 +164,7 @@
 ; Function Attrs: nounwind
 define void @foo11() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @sqrtf(float %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(sqrtf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -178,7 +178,7 @@
 ; Function Attrs: nounwind
 define void @foo12() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @sqrt(double %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(sqrt)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -192,7 +192,7 @@
 ; Function Attrs: nounwind
 define void @foo13() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @floorf(float %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(floorf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -206,7 +206,7 @@
 ; Function Attrs: nounwind
 define void @foo14() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @floor(double %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(floor)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -220,7 +220,7 @@
 ; Function Attrs: nounwind
 define void @foo15() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @nearbyintf(float %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(nearbyintf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -234,7 +234,7 @@
 ; Function Attrs: nounwind
 define void @foo16() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @nearbyint(double %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(nearbyint)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -248,7 +248,7 @@
 ; Function Attrs: nounwind
 define void @foo17() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @ceilf(float %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(ceilf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -262,7 +262,7 @@
 ; Function Attrs: nounwind
 define void @foo18() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @ceil(double %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(ceil)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -276,7 +276,7 @@
 ; Function Attrs: nounwind
 define void @foo19() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @rintf(float %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(rintf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -290,7 +290,7 @@
 ; Function Attrs: nounwind
 define void @foo20() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @rint(double %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(rint)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -304,7 +304,7 @@
 ; Function Attrs: nounwind
 define void @foo21() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @truncf(float %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(truncf)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -318,7 +318,7 @@
 ; Function Attrs: nounwind
 define void @foo22() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @trunc(double %0) #2
 ;pic:	lw	${{[0-9]+}}, %call16(trunc)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -332,7 +332,7 @@
 ; Function Attrs: nounwind
 define void @foo23() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @log2f(float %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(log2f)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -346,7 +346,7 @@
 ; Function Attrs: nounwind
 define void @foo24() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @log2(double %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(log2)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
@@ -360,7 +360,7 @@
 ; Function Attrs: nounwind
 define void @foo25() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @exp2f(float %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(exp2f)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_sf_1)(${{[0-9]+}})
@@ -374,7 +374,7 @@
 ; Function Attrs: nounwind
 define void @foo26() #0 {
 entry:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %call = call double @exp2(double %0) #3
 ;pic:	lw	${{[0-9]+}}, %call16(exp2)(${{[0-9]+}})
 ;pic:	lw	${{[0-9]+}}, %got(__mips16_call_stub_df_2)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/fp16static.ll b/llvm/test/CodeGen/Mips/fp16static.ll
index beb063d..4e5059e 100644
--- a/llvm/test/CodeGen/Mips/fp16static.ll
+++ b/llvm/test/CodeGen/Mips/fp16static.ll
@@ -4,8 +4,8 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load float* @x, align 4
-  %1 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @x, align 4
   %mul = fmul float %0, %1
   store float %mul, float* @x, align 4
 ; CHECK-STATIC16: jal	__mips16_mulsf3
diff --git a/llvm/test/CodeGen/Mips/fpneeded.ll b/llvm/test/CodeGen/Mips/fpneeded.ll
index fdd8e8f..a89e2a5 100644
--- a/llvm/test/CodeGen/Mips/fpneeded.ll
+++ b/llvm/test/CodeGen/Mips/fpneeded.ll
@@ -76,8 +76,8 @@
 define void @foo1() #0 {
 entry:
   store float 1.000000e+00, float* @zz, align 4
-  %0 = load float* @y, align 4
-  %1 = load float* @x, align 4
+  %0 = load float, float* @y, align 4
+  %1 = load float, float* @x, align 4
   %add = fadd float %0, %1
   store float %add, float* @z, align 4
   ret void
@@ -96,7 +96,7 @@
 
 define void @foo2() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   call void @vf(float %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/fpnotneeded.ll b/llvm/test/CodeGen/Mips/fpnotneeded.ll
index e12d7ba..02b8e8a 100644
--- a/llvm/test/CodeGen/Mips/fpnotneeded.ll
+++ b/llvm/test/CodeGen/Mips/fpnotneeded.ll
@@ -19,7 +19,7 @@
 
 define i32 @iv() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/global-address.ll b/llvm/test/CodeGen/Mips/global-address.ll
index ae6afeb..ecf5e56 100644
--- a/llvm/test/CodeGen/Mips/global-address.ll
+++ b/llvm/test/CodeGen/Mips/global-address.ll
@@ -33,9 +33,9 @@
 ; STATIC-N64: lw  ${{[0-9]+}}, %got_ofst(s1)($[[R1]])
 ; STATIC-N64: ld  ${{[0-9]+}}, %got_disp(g1)
 
-  %0 = load i32* @s1, align 4
+  %0 = load i32, i32* @s1, align 4
   tail call void @foo1(i32 %0) nounwind
-  %1 = load i32* @g1, align 4
+  %1 = load i32, i32* @g1, align 4
   store i32 %1, i32* @s1, align 4
   %add = add nsw i32 %1, 2
   store i32 %add, i32* @g1, align 4
diff --git a/llvm/test/CodeGen/Mips/gpreg-lazy-binding.ll b/llvm/test/CodeGen/Mips/gpreg-lazy-binding.ll
index 3a636d8..800a74f 100644
--- a/llvm/test/CodeGen/Mips/gpreg-lazy-binding.ll
+++ b/llvm/test/CodeGen/Mips/gpreg-lazy-binding.ll
@@ -19,7 +19,7 @@
 
 define internal fastcc void @internalFunc() nounwind noinline {
 entry:
-  %0 = load i32* @g, align 4
+  %0 = load i32, i32* @g, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @g, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/gprestore.ll b/llvm/test/CodeGen/Mips/gprestore.ll
index cbcf0c9..0b005ab 100644
--- a/llvm/test/CodeGen/Mips/gprestore.ll
+++ b/llvm/test/CodeGen/Mips/gprestore.ll
@@ -18,10 +18,10 @@
 ; CHECK-NOT: got({{.*}})($gp)
 ; CHECK: lw $gp
   tail call void (...)* @f1() nounwind
-  %tmp = load i32* @p, align 4
+  %tmp = load i32, i32* @p, align 4
   tail call void @f2(i32 %tmp) nounwind
-  %tmp1 = load i32* @q, align 4
-  %tmp2 = load i32* @r, align 4
+  %tmp1 = load i32, i32* @q, align 4
+  %tmp2 = load i32, i32* @r, align 4
   tail call void @f3(i32 %tmp1, i32 %tmp2) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/hf16_1.ll b/llvm/test/CodeGen/Mips/hf16_1.ll
index 9879cd5..103fd2d 100644
--- a/llvm/test/CodeGen/Mips/hf16_1.ll
+++ b/llvm/test/CodeGen/Mips/hf16_1.ll
@@ -11,96 +11,96 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   call void @v_sf(float %0)
-  %1 = load double* @xd, align 8
+  %1 = load double, double* @xd, align 8
   call void @v_df(double %1)
-  %2 = load float* @x, align 4
-  %3 = load float* @y, align 4
+  %2 = load float, float* @x, align 4
+  %3 = load float, float* @y, align 4
   call void @v_sf_sf(float %2, float %3)
-  %4 = load double* @xd, align 8
-  %5 = load float* @x, align 4
+  %4 = load double, double* @xd, align 8
+  %5 = load float, float* @x, align 4
   call void @v_df_sf(double %4, float %5)
-  %6 = load double* @xd, align 8
-  %7 = load double* @yd, align 8
+  %6 = load double, double* @xd, align 8
+  %7 = load double, double* @yd, align 8
   call void @v_df_df(double %6, double %7)
   %call = call float @sf_v()
-  %8 = load float* @x, align 4
+  %8 = load float, float* @x, align 4
   %call1 = call float @sf_sf(float %8)
-  %9 = load double* @xd, align 8
+  %9 = load double, double* @xd, align 8
   %call2 = call float @sf_df(double %9)
-  %10 = load float* @x, align 4
-  %11 = load float* @y, align 4
+  %10 = load float, float* @x, align 4
+  %11 = load float, float* @y, align 4
   %call3 = call float @sf_sf_sf(float %10, float %11)
-  %12 = load double* @xd, align 8
-  %13 = load float* @x, align 4
+  %12 = load double, double* @xd, align 8
+  %13 = load float, float* @x, align 4
   %call4 = call float @sf_df_sf(double %12, float %13)
-  %14 = load double* @xd, align 8
-  %15 = load double* @yd, align 8
+  %14 = load double, double* @xd, align 8
+  %15 = load double, double* @yd, align 8
   %call5 = call float @sf_df_df(double %14, double %15)
   %call6 = call double @df_v()
-  %16 = load float* @x, align 4
+  %16 = load float, float* @x, align 4
   %call7 = call double @df_sf(float %16)
-  %17 = load double* @xd, align 8
+  %17 = load double, double* @xd, align 8
   %call8 = call double @df_df(double %17)
-  %18 = load float* @x, align 4
-  %19 = load float* @y, align 4
+  %18 = load float, float* @x, align 4
+  %19 = load float, float* @y, align 4
   %call9 = call double @df_sf_sf(float %18, float %19)
-  %20 = load double* @xd, align 8
-  %21 = load float* @x, align 4
+  %20 = load double, double* @xd, align 8
+  %21 = load float, float* @x, align 4
   %call10 = call double @df_df_sf(double %20, float %21)
-  %22 = load double* @xd, align 8
-  %23 = load double* @yd, align 8
+  %22 = load double, double* @xd, align 8
+  %23 = load double, double* @yd, align 8
   %call11 = call double @df_df_df(double %22, double %23)
   %call12 = call { float, float } @sc_v()
   %24 = extractvalue { float, float } %call12, 0
   %25 = extractvalue { float, float } %call12, 1
-  %26 = load float* @x, align 4
+  %26 = load float, float* @x, align 4
   %call13 = call { float, float } @sc_sf(float %26)
   %27 = extractvalue { float, float } %call13, 0
   %28 = extractvalue { float, float } %call13, 1
-  %29 = load double* @xd, align 8
+  %29 = load double, double* @xd, align 8
   %call14 = call { float, float } @sc_df(double %29)
   %30 = extractvalue { float, float } %call14, 0
   %31 = extractvalue { float, float } %call14, 1
-  %32 = load float* @x, align 4
-  %33 = load float* @y, align 4
+  %32 = load float, float* @x, align 4
+  %33 = load float, float* @y, align 4
   %call15 = call { float, float } @sc_sf_sf(float %32, float %33)
   %34 = extractvalue { float, float } %call15, 0
   %35 = extractvalue { float, float } %call15, 1
-  %36 = load double* @xd, align 8
-  %37 = load float* @x, align 4
+  %36 = load double, double* @xd, align 8
+  %37 = load float, float* @x, align 4
   %call16 = call { float, float } @sc_df_sf(double %36, float %37)
   %38 = extractvalue { float, float } %call16, 0
   %39 = extractvalue { float, float } %call16, 1
-  %40 = load double* @xd, align 8
-  %41 = load double* @yd, align 8
+  %40 = load double, double* @xd, align 8
+  %41 = load double, double* @yd, align 8
   %call17 = call { float, float } @sc_df_df(double %40, double %41)
   %42 = extractvalue { float, float } %call17, 0
   %43 = extractvalue { float, float } %call17, 1
   %call18 = call { double, double } @dc_v()
   %44 = extractvalue { double, double } %call18, 0
   %45 = extractvalue { double, double } %call18, 1
-  %46 = load float* @x, align 4
+  %46 = load float, float* @x, align 4
   %call19 = call { double, double } @dc_sf(float %46)
   %47 = extractvalue { double, double } %call19, 0
   %48 = extractvalue { double, double } %call19, 1
-  %49 = load double* @xd, align 8
+  %49 = load double, double* @xd, align 8
   %call20 = call { double, double } @dc_df(double %49)
   %50 = extractvalue { double, double } %call20, 0
   %51 = extractvalue { double, double } %call20, 1
-  %52 = load float* @x, align 4
-  %53 = load float* @y, align 4
+  %52 = load float, float* @x, align 4
+  %53 = load float, float* @y, align 4
   %call21 = call { double, double } @dc_sf_sf(float %52, float %53)
   %54 = extractvalue { double, double } %call21, 0
   %55 = extractvalue { double, double } %call21, 1
-  %56 = load double* @xd, align 8
-  %57 = load float* @x, align 4
+  %56 = load double, double* @xd, align 8
+  %57 = load float, float* @x, align 4
   %call22 = call { double, double } @dc_df_sf(double %56, float %57)
   %58 = extractvalue { double, double } %call22, 0
   %59 = extractvalue { double, double } %call22, 1
-  %60 = load double* @xd, align 8
-  %61 = load double* @yd, align 8
+  %60 = load double, double* @xd, align 8
+  %61 = load double, double* @yd, align 8
   %call23 = call { double, double } @dc_df_df(double %60, double %61)
   %62 = extractvalue { double, double } %call23, 0
   %63 = extractvalue { double, double } %call23, 1
diff --git a/llvm/test/CodeGen/Mips/hf16call32.ll b/llvm/test/CodeGen/Mips/hf16call32.ll
index aec9c71..035479c 100644
--- a/llvm/test/CodeGen/Mips/hf16call32.ll
+++ b/llvm/test/CodeGen/Mips/hf16call32.ll
@@ -67,50 +67,50 @@
   store i32 0, i32* %retval
   call void @clear()
   store float 1.500000e+00, float* @lx, align 4
-  %0 = load float* @lx, align 4
+  %0 = load float, float* @lx, align 4
   call void @v_sf(float %0)
-  %1 = load float* @x, align 4
+  %1 = load float, float* @x, align 4
   %conv = fpext float %1 to double
-  %2 = load float* @lx, align 4
+  %2 = load float, float* @lx, align 4
   %conv1 = fpext float %2 to double
-  %3 = load float* @x, align 4
-  %4 = load float* @lx, align 4
+  %3 = load float, float* @x, align 4
+  %4 = load float, float* @lx, align 4
   %cmp = fcmp oeq float %3, %4
   %conv2 = zext i1 %cmp to i32
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv, double %conv1, i32 %conv2)
   call void @clear()
   store double 0x41678C29C0000000, double* @lxd, align 8
-  %5 = load double* @lxd, align 8
+  %5 = load double, double* @lxd, align 8
   call void @v_df(double %5)
-  %6 = load double* @xd, align 8
-  %7 = load double* @lxd, align 8
-  %8 = load double* @xd, align 8
-  %9 = load double* @lxd, align 8
+  %6 = load double, double* @xd, align 8
+  %7 = load double, double* @lxd, align 8
+  %8 = load double, double* @xd, align 8
+  %9 = load double, double* @lxd, align 8
   %cmp3 = fcmp oeq double %8, %9
   %conv4 = zext i1 %cmp3 to i32
   %call5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %6, double %7, i32 %conv4)
   call void @clear()
   store float 9.000000e+00, float* @lx, align 4
   store float 1.000000e+01, float* @ly, align 4
-  %10 = load float* @lx, align 4
-  %11 = load float* @ly, align 4
+  %10 = load float, float* @lx, align 4
+  %11 = load float, float* @ly, align 4
   call void @v_sf_sf(float %10, float %11)
-  %12 = load float* @x, align 4
+  %12 = load float, float* @x, align 4
   %conv6 = fpext float %12 to double
-  %13 = load float* @lx, align 4
+  %13 = load float, float* @lx, align 4
   %conv7 = fpext float %13 to double
-  %14 = load float* @y, align 4
+  %14 = load float, float* @y, align 4
   %conv8 = fpext float %14 to double
-  %15 = load float* @ly, align 4
+  %15 = load float, float* @ly, align 4
   %conv9 = fpext float %15 to double
-  %16 = load float* @x, align 4
-  %17 = load float* @lx, align 4
+  %16 = load float, float* @x, align 4
+  %17 = load float, float* @lx, align 4
   %cmp10 = fcmp oeq float %16, %17
   br i1 %cmp10, label %land.rhs, label %land.end
 
 land.rhs:                                         ; preds = %entry
-  %18 = load float* @y, align 4
-  %19 = load float* @ly, align 4
+  %18 = load float, float* @y, align 4
+  %19 = load float, float* @ly, align 4
   %cmp12 = fcmp oeq float %18, %19
   br label %land.end
 
@@ -121,21 +121,21 @@
   call void @clear()
   store float 0x3FFE666660000000, float* @lx, align 4
   store double 0x4007E613249FF279, double* @lyd, align 8
-  %21 = load float* @lx, align 4
-  %22 = load double* @lyd, align 8
+  %21 = load float, float* @lx, align 4
+  %22 = load double, double* @lyd, align 8
   call void @v_sf_df(float %21, double %22)
-  %23 = load float* @x, align 4
+  %23 = load float, float* @x, align 4
   %conv15 = fpext float %23 to double
-  %24 = load float* @lx, align 4
+  %24 = load float, float* @lx, align 4
   %conv16 = fpext float %24 to double
-  %25 = load double* @yd, align 8
-  %26 = load double* @lyd, align 8
-  %27 = load float* @x, align 4
-  %28 = load float* @lx, align 4
+  %25 = load double, double* @yd, align 8
+  %26 = load double, double* @lyd, align 8
+  %27 = load float, float* @x, align 4
+  %28 = load float, float* @lx, align 4
   %cmp17 = fcmp oeq float %27, %28
   %conv18 = zext i1 %cmp17 to i32
-  %29 = load double* @yd, align 8
-  %30 = load double* @lyd, align 8
+  %29 = load double, double* @yd, align 8
+  %30 = load double, double* @lyd, align 8
   %cmp19 = fcmp oeq double %29, %30
   %conv20 = zext i1 %cmp19 to i32
   %and = and i32 %conv18, %conv20
@@ -143,21 +143,21 @@
   call void @clear()
   store double 0x4194E54F94000000, double* @lxd, align 8
   store float 7.600000e+01, float* @ly, align 4
-  %31 = load double* @lxd, align 8
-  %32 = load float* @ly, align 4
+  %31 = load double, double* @lxd, align 8
+  %32 = load float, float* @ly, align 4
   call void @v_df_sf(double %31, float %32)
-  %33 = load double* @xd, align 8
-  %34 = load double* @lxd, align 8
-  %35 = load float* @y, align 4
+  %33 = load double, double* @xd, align 8
+  %34 = load double, double* @lxd, align 8
+  %35 = load float, float* @y, align 4
   %conv22 = fpext float %35 to double
-  %36 = load float* @ly, align 4
+  %36 = load float, float* @ly, align 4
   %conv23 = fpext float %36 to double
-  %37 = load double* @xd, align 8
-  %38 = load double* @lxd, align 8
+  %37 = load double, double* @xd, align 8
+  %38 = load double, double* @lxd, align 8
   %cmp24 = fcmp oeq double %37, %38
   %conv25 = zext i1 %cmp24 to i32
-  %39 = load float* @y, align 4
-  %40 = load float* @ly, align 4
+  %39 = load float, float* @y, align 4
+  %40 = load float, float* @ly, align 4
   %cmp26 = fcmp oeq float %39, %40
   %conv27 = zext i1 %cmp26 to i32
   %and28 = and i32 %conv25, %conv27
@@ -165,19 +165,19 @@
   call void @clear()
   store double 7.365198e+07, double* @lxd, align 8
   store double 0x416536CD80000000, double* @lyd, align 8
-  %41 = load double* @lxd, align 8
-  %42 = load double* @lyd, align 8
+  %41 = load double, double* @lxd, align 8
+  %42 = load double, double* @lyd, align 8
   call void @v_df_df(double %41, double %42)
-  %43 = load double* @xd, align 8
-  %44 = load double* @lxd, align 8
-  %45 = load double* @yd, align 8
-  %46 = load double* @lyd, align 8
-  %47 = load double* @xd, align 8
-  %48 = load double* @lxd, align 8
+  %43 = load double, double* @xd, align 8
+  %44 = load double, double* @lxd, align 8
+  %45 = load double, double* @yd, align 8
+  %46 = load double, double* @lyd, align 8
+  %47 = load double, double* @xd, align 8
+  %48 = load double, double* @lxd, align 8
   %cmp30 = fcmp oeq double %47, %48
   %conv31 = zext i1 %cmp30 to i32
-  %49 = load double* @yd, align 8
-  %50 = load double* @lyd, align 8
+  %49 = load double, double* @yd, align 8
+  %50 = load double, double* @lyd, align 8
   %cmp32 = fcmp oeq double %49, %50
   %conv33 = zext i1 %cmp32 to i32
   %and34 = and i32 %conv31, %conv33
@@ -186,35 +186,35 @@
   store float 0x4016666660000000, float* @ret_sf, align 4
   %call36 = call float @sf_v()
   store float %call36, float* @lret_sf, align 4
-  %51 = load float* @ret_sf, align 4
+  %51 = load float, float* @ret_sf, align 4
   %conv37 = fpext float %51 to double
-  %52 = load float* @lret_sf, align 4
+  %52 = load float, float* @lret_sf, align 4
   %conv38 = fpext float %52 to double
-  %53 = load float* @ret_sf, align 4
-  %54 = load float* @lret_sf, align 4
+  %53 = load float, float* @ret_sf, align 4
+  %54 = load float, float* @lret_sf, align 4
   %cmp39 = fcmp oeq float %53, %54
   %conv40 = zext i1 %cmp39 to i32
   %call41 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %conv37, double %conv38, i32 %conv40)
   call void @clear()
   store float 4.587300e+06, float* @ret_sf, align 4
   store float 3.420000e+02, float* @lx, align 4
-  %55 = load float* @lx, align 4
+  %55 = load float, float* @lx, align 4
   %call42 = call float @sf_sf(float %55)
   store float %call42, float* @lret_sf, align 4
-  %56 = load float* @ret_sf, align 4
+  %56 = load float, float* @ret_sf, align 4
   %conv43 = fpext float %56 to double
-  %57 = load float* @lret_sf, align 4
+  %57 = load float, float* @lret_sf, align 4
   %conv44 = fpext float %57 to double
-  %58 = load float* @x, align 4
+  %58 = load float, float* @x, align 4
   %conv45 = fpext float %58 to double
-  %59 = load float* @lx, align 4
+  %59 = load float, float* @lx, align 4
   %conv46 = fpext float %59 to double
-  %60 = load float* @ret_sf, align 4
-  %61 = load float* @lret_sf, align 4
+  %60 = load float, float* @ret_sf, align 4
+  %61 = load float, float* @lret_sf, align 4
   %cmp47 = fcmp oeq float %60, %61
   %conv48 = zext i1 %cmp47 to i32
-  %62 = load float* @x, align 4
-  %63 = load float* @lx, align 4
+  %62 = load float, float* @x, align 4
+  %63 = load float, float* @lx, align 4
   %cmp49 = fcmp oeq float %62, %63
   %conv50 = zext i1 %cmp49 to i32
   %and51 = and i32 %conv48, %conv50
@@ -222,21 +222,21 @@
   call void @clear()
   store float 4.445910e+06, float* @ret_sf, align 4
   store double 0x419A7DB294000000, double* @lxd, align 8
-  %64 = load double* @lxd, align 8
+  %64 = load double, double* @lxd, align 8
   %call53 = call float @sf_df(double %64)
   store float %call53, float* @lret_sf, align 4
-  %65 = load float* @ret_sf, align 4
+  %65 = load float, float* @ret_sf, align 4
   %conv54 = fpext float %65 to double
-  %66 = load float* @lret_sf, align 4
+  %66 = load float, float* @lret_sf, align 4
   %conv55 = fpext float %66 to double
-  %67 = load double* @xd, align 8
-  %68 = load double* @lxd, align 8
-  %69 = load float* @ret_sf, align 4
-  %70 = load float* @lret_sf, align 4
+  %67 = load double, double* @xd, align 8
+  %68 = load double, double* @lxd, align 8
+  %69 = load float, float* @ret_sf, align 4
+  %70 = load float, float* @lret_sf, align 4
   %cmp56 = fcmp oeq float %69, %70
   %conv57 = zext i1 %cmp56 to i32
-  %71 = load double* @xd, align 8
-  %72 = load double* @lxd, align 8
+  %71 = load double, double* @xd, align 8
+  %72 = load double, double* @lxd, align 8
   %cmp58 = fcmp oeq double %71, %72
   %conv59 = zext i1 %cmp58 to i32
   %and60 = and i32 %conv57, %conv59
@@ -245,36 +245,36 @@
   store float 0x3FFF4BC6A0000000, float* @ret_sf, align 4
   store float 4.445500e+03, float* @lx, align 4
   store float 0x4068ACCCC0000000, float* @ly, align 4
-  %73 = load float* @lx, align 4
-  %74 = load float* @ly, align 4
+  %73 = load float, float* @lx, align 4
+  %74 = load float, float* @ly, align 4
   %call62 = call float @sf_sf_sf(float %73, float %74)
   store float %call62, float* @lret_sf, align 4
-  %75 = load float* @ret_sf, align 4
+  %75 = load float, float* @ret_sf, align 4
   %conv63 = fpext float %75 to double
-  %76 = load float* @lret_sf, align 4
+  %76 = load float, float* @lret_sf, align 4
   %conv64 = fpext float %76 to double
-  %77 = load float* @x, align 4
+  %77 = load float, float* @x, align 4
   %conv65 = fpext float %77 to double
-  %78 = load float* @lx, align 4
+  %78 = load float, float* @lx, align 4
   %conv66 = fpext float %78 to double
-  %79 = load float* @y, align 4
+  %79 = load float, float* @y, align 4
   %conv67 = fpext float %79 to double
-  %80 = load float* @ly, align 4
+  %80 = load float, float* @ly, align 4
   %conv68 = fpext float %80 to double
-  %81 = load float* @ret_sf, align 4
-  %82 = load float* @lret_sf, align 4
+  %81 = load float, float* @ret_sf, align 4
+  %82 = load float, float* @lret_sf, align 4
   %cmp69 = fcmp oeq float %81, %82
   br i1 %cmp69, label %land.lhs.true, label %land.end76
 
 land.lhs.true:                                    ; preds = %land.end
-  %83 = load float* @x, align 4
-  %84 = load float* @lx, align 4
+  %83 = load float, float* @x, align 4
+  %84 = load float, float* @lx, align 4
   %cmp71 = fcmp oeq float %83, %84
   br i1 %cmp71, label %land.rhs73, label %land.end76
 
 land.rhs73:                                       ; preds = %land.lhs.true
-  %85 = load float* @y, align 4
-  %86 = load float* @ly, align 4
+  %85 = load float, float* @y, align 4
+  %86 = load float, float* @ly, align 4
   %cmp74 = fcmp oeq float %85, %86
   br label %land.end76
 
@@ -286,34 +286,34 @@
   store float 9.991300e+04, float* @ret_sf, align 4
   store float 1.114500e+04, float* @lx, align 4
   store double 9.994445e+07, double* @lyd, align 8
-  %88 = load float* @lx, align 4
-  %89 = load double* @lyd, align 8
+  %88 = load float, float* @lx, align 4
+  %89 = load double, double* @lyd, align 8
   %call79 = call float @sf_sf_df(float %88, double %89)
   store float %call79, float* @lret_sf, align 4
-  %90 = load float* @ret_sf, align 4
+  %90 = load float, float* @ret_sf, align 4
   %conv80 = fpext float %90 to double
-  %91 = load float* @lret_sf, align 4
+  %91 = load float, float* @lret_sf, align 4
   %conv81 = fpext float %91 to double
-  %92 = load float* @x, align 4
+  %92 = load float, float* @x, align 4
   %conv82 = fpext float %92 to double
-  %93 = load float* @lx, align 4
+  %93 = load float, float* @lx, align 4
   %conv83 = fpext float %93 to double
-  %94 = load double* @yd, align 8
-  %95 = load double* @lyd, align 8
-  %96 = load float* @ret_sf, align 4
-  %97 = load float* @lret_sf, align 4
+  %94 = load double, double* @yd, align 8
+  %95 = load double, double* @lyd, align 8
+  %96 = load float, float* @ret_sf, align 4
+  %97 = load float, float* @lret_sf, align 4
   %cmp84 = fcmp oeq float %96, %97
   br i1 %cmp84, label %land.lhs.true86, label %land.end92
 
 land.lhs.true86:                                  ; preds = %land.end76
-  %98 = load float* @x, align 4
-  %99 = load float* @lx, align 4
+  %98 = load float, float* @x, align 4
+  %99 = load float, float* @lx, align 4
   %cmp87 = fcmp oeq float %98, %99
   br i1 %cmp87, label %land.rhs89, label %land.end92
 
 land.rhs89:                                       ; preds = %land.lhs.true86
-  %100 = load double* @yd, align 8
-  %101 = load double* @lyd, align 8
+  %100 = load double, double* @yd, align 8
+  %101 = load double, double* @lyd, align 8
   %cmp90 = fcmp oeq double %100, %101
   br label %land.end92
 
@@ -325,34 +325,34 @@
   store float 0x417CCC7A00000000, float* @ret_sf, align 4
   store double 0x4172034530000000, double* @lxd, align 8
   store float 4.456200e+04, float* @ly, align 4
-  %103 = load double* @lxd, align 8
-  %104 = load float* @ly, align 4
+  %103 = load double, double* @lxd, align 8
+  %104 = load float, float* @ly, align 4
   %call95 = call float @sf_df_sf(double %103, float %104)
   store float %call95, float* @lret_sf, align 4
-  %105 = load float* @ret_sf, align 4
+  %105 = load float, float* @ret_sf, align 4
   %conv96 = fpext float %105 to double
-  %106 = load float* @lret_sf, align 4
+  %106 = load float, float* @lret_sf, align 4
   %conv97 = fpext float %106 to double
-  %107 = load double* @xd, align 8
-  %108 = load double* @lxd, align 8
-  %109 = load float* @y, align 4
+  %107 = load double, double* @xd, align 8
+  %108 = load double, double* @lxd, align 8
+  %109 = load float, float* @y, align 4
   %conv98 = fpext float %109 to double
-  %110 = load float* @ly, align 4
+  %110 = load float, float* @ly, align 4
   %conv99 = fpext float %110 to double
-  %111 = load float* @ret_sf, align 4
-  %112 = load float* @lret_sf, align 4
+  %111 = load float, float* @ret_sf, align 4
+  %112 = load float, float* @lret_sf, align 4
   %cmp100 = fcmp oeq float %111, %112
   br i1 %cmp100, label %land.lhs.true102, label %land.end108
 
 land.lhs.true102:                                 ; preds = %land.end92
-  %113 = load double* @xd, align 8
-  %114 = load double* @lxd, align 8
+  %113 = load double, double* @xd, align 8
+  %114 = load double, double* @lxd, align 8
   %cmp103 = fcmp oeq double %113, %114
   br i1 %cmp103, label %land.rhs105, label %land.end108
 
 land.rhs105:                                      ; preds = %land.lhs.true102
-  %115 = load float* @y, align 4
-  %116 = load float* @ly, align 4
+  %115 = load float, float* @y, align 4
+  %116 = load float, float* @ly, align 4
   %cmp106 = fcmp oeq float %115, %116
   br label %land.end108
 
@@ -364,32 +364,32 @@
   store float 3.987721e+06, float* @ret_sf, align 4
   store double 0x3FF1F49F6DDDC2D8, double* @lxd, align 8
   store double 0x409129F306A2B170, double* @lyd, align 8
-  %118 = load double* @lxd, align 8
-  %119 = load double* @lyd, align 8
+  %118 = load double, double* @lxd, align 8
+  %119 = load double, double* @lyd, align 8
   %call111 = call float @sf_df_df(double %118, double %119)
   store float %call111, float* @lret_sf, align 4
-  %120 = load float* @ret_sf, align 4
+  %120 = load float, float* @ret_sf, align 4
   %conv112 = fpext float %120 to double
-  %121 = load float* @lret_sf, align 4
+  %121 = load float, float* @lret_sf, align 4
   %conv113 = fpext float %121 to double
-  %122 = load double* @xd, align 8
-  %123 = load double* @lxd, align 8
-  %124 = load double* @yd, align 8
-  %125 = load double* @lyd, align 8
-  %126 = load float* @ret_sf, align 4
-  %127 = load float* @lret_sf, align 4
+  %122 = load double, double* @xd, align 8
+  %123 = load double, double* @lxd, align 8
+  %124 = load double, double* @yd, align 8
+  %125 = load double, double* @lyd, align 8
+  %126 = load float, float* @ret_sf, align 4
+  %127 = load float, float* @lret_sf, align 4
   %cmp114 = fcmp oeq float %126, %127
   br i1 %cmp114, label %land.lhs.true116, label %land.end122
 
 land.lhs.true116:                                 ; preds = %land.end108
-  %128 = load double* @xd, align 8
-  %129 = load double* @lxd, align 8
+  %128 = load double, double* @xd, align 8
+  %129 = load double, double* @lxd, align 8
   %cmp117 = fcmp oeq double %128, %129
   br i1 %cmp117, label %land.rhs119, label %land.end122
 
 land.rhs119:                                      ; preds = %land.lhs.true116
-  %130 = load double* @yd, align 8
-  %131 = load double* @lyd, align 8
+  %130 = load double, double* @yd, align 8
+  %131 = load double, double* @lyd, align 8
   %cmp120 = fcmp oeq double %130, %131
   br label %land.end122
 
@@ -401,31 +401,31 @@
   store double 1.561234e+01, double* @ret_df, align 8
   %call125 = call double @df_v()
   store double %call125, double* @lret_df, align 8
-  %133 = load double* @ret_df, align 8
-  %134 = load double* @lret_df, align 8
-  %135 = load double* @ret_df, align 8
-  %136 = load double* @lret_df, align 8
+  %133 = load double, double* @ret_df, align 8
+  %134 = load double, double* @lret_df, align 8
+  %135 = load double, double* @ret_df, align 8
+  %136 = load double, double* @lret_df, align 8
   %cmp126 = fcmp oeq double %135, %136
   %conv127 = zext i1 %cmp126 to i32
   %call128 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i32 0, i32 0), double %133, double %134, i32 %conv127)
   call void @clear()
   store double 1.345873e+01, double* @ret_df, align 8
   store float 3.434520e+05, float* @lx, align 4
-  %137 = load float* @lx, align 4
+  %137 = load float, float* @lx, align 4
   %call129 = call double @df_sf(float %137)
   store double %call129, double* @lret_df, align 8
-  %138 = load double* @ret_df, align 8
-  %139 = load double* @lret_df, align 8
-  %140 = load float* @x, align 4
+  %138 = load double, double* @ret_df, align 8
+  %139 = load double, double* @lret_df, align 8
+  %140 = load float, float* @x, align 4
   %conv130 = fpext float %140 to double
-  %141 = load float* @lx, align 4
+  %141 = load float, float* @lx, align 4
   %conv131 = fpext float %141 to double
-  %142 = load double* @ret_df, align 8
-  %143 = load double* @lret_df, align 8
+  %142 = load double, double* @ret_df, align 8
+  %143 = load double, double* @lret_df, align 8
   %cmp132 = fcmp oeq double %142, %143
   %conv133 = zext i1 %cmp132 to i32
-  %144 = load float* @x, align 4
-  %145 = load float* @lx, align 4
+  %144 = load float, float* @x, align 4
+  %145 = load float, float* @lx, align 4
   %cmp134 = fcmp oeq float %144, %145
   %conv135 = zext i1 %cmp134 to i32
   %and136 = and i32 %conv133, %conv135
@@ -433,19 +433,19 @@
   call void @clear()
   store double 0x4084F3AB7AA25D8D, double* @ret_df, align 8
   store double 0x4114F671D2F1A9FC, double* @lxd, align 8
-  %146 = load double* @lxd, align 8
+  %146 = load double, double* @lxd, align 8
   %call138 = call double @df_df(double %146)
   store double %call138, double* @lret_df, align 8
-  %147 = load double* @ret_df, align 8
-  %148 = load double* @lret_df, align 8
-  %149 = load double* @xd, align 8
-  %150 = load double* @lxd, align 8
-  %151 = load double* @ret_df, align 8
-  %152 = load double* @lret_df, align 8
+  %147 = load double, double* @ret_df, align 8
+  %148 = load double, double* @lret_df, align 8
+  %149 = load double, double* @xd, align 8
+  %150 = load double, double* @lxd, align 8
+  %151 = load double, double* @ret_df, align 8
+  %152 = load double, double* @lret_df, align 8
   %cmp139 = fcmp oeq double %151, %152
   %conv140 = zext i1 %cmp139 to i32
-  %153 = load double* @xd, align 8
-  %154 = load double* @lxd, align 8
+  %153 = load double, double* @xd, align 8
+  %154 = load double, double* @lxd, align 8
   %cmp141 = fcmp oeq double %153, %154
   %conv142 = zext i1 %cmp141 to i32
   %and143 = and i32 %conv140, %conv142
@@ -454,34 +454,34 @@
   store double 6.781956e+03, double* @ret_df, align 8
   store float 4.445500e+03, float* @lx, align 4
   store float 0x4068ACCCC0000000, float* @ly, align 4
-  %155 = load float* @lx, align 4
-  %156 = load float* @ly, align 4
+  %155 = load float, float* @lx, align 4
+  %156 = load float, float* @ly, align 4
   %call145 = call double @df_sf_sf(float %155, float %156)
   store double %call145, double* @lret_df, align 8
-  %157 = load double* @ret_df, align 8
-  %158 = load double* @lret_df, align 8
-  %159 = load float* @x, align 4
+  %157 = load double, double* @ret_df, align 8
+  %158 = load double, double* @lret_df, align 8
+  %159 = load float, float* @x, align 4
   %conv146 = fpext float %159 to double
-  %160 = load float* @lx, align 4
+  %160 = load float, float* @lx, align 4
   %conv147 = fpext float %160 to double
-  %161 = load float* @y, align 4
+  %161 = load float, float* @y, align 4
   %conv148 = fpext float %161 to double
-  %162 = load float* @ly, align 4
+  %162 = load float, float* @ly, align 4
   %conv149 = fpext float %162 to double
-  %163 = load double* @ret_df, align 8
-  %164 = load double* @lret_df, align 8
+  %163 = load double, double* @ret_df, align 8
+  %164 = load double, double* @lret_df, align 8
   %cmp150 = fcmp oeq double %163, %164
   br i1 %cmp150, label %land.lhs.true152, label %land.end158
 
 land.lhs.true152:                                 ; preds = %land.end122
-  %165 = load float* @x, align 4
-  %166 = load float* @lx, align 4
+  %165 = load float, float* @x, align 4
+  %166 = load float, float* @lx, align 4
   %cmp153 = fcmp oeq float %165, %166
   br i1 %cmp153, label %land.rhs155, label %land.end158
 
 land.rhs155:                                      ; preds = %land.lhs.true152
-  %167 = load float* @y, align 4
-  %168 = load float* @ly, align 4
+  %167 = load float, float* @y, align 4
+  %168 = load float, float* @ly, align 4
   %cmp156 = fcmp oeq float %167, %168
   br label %land.end158
 
@@ -493,32 +493,32 @@
   store double 1.889130e+05, double* @ret_df, align 8
   store float 9.111450e+05, float* @lx, align 4
   store double 0x4185320A58000000, double* @lyd, align 8
-  %170 = load float* @lx, align 4
-  %171 = load double* @lyd, align 8
+  %170 = load float, float* @lx, align 4
+  %171 = load double, double* @lyd, align 8
   %call161 = call double @df_sf_df(float %170, double %171)
   store double %call161, double* @lret_df, align 8
-  %172 = load double* @ret_df, align 8
-  %173 = load double* @lret_df, align 8
-  %174 = load float* @x, align 4
+  %172 = load double, double* @ret_df, align 8
+  %173 = load double, double* @lret_df, align 8
+  %174 = load float, float* @x, align 4
   %conv162 = fpext float %174 to double
-  %175 = load float* @lx, align 4
+  %175 = load float, float* @lx, align 4
   %conv163 = fpext float %175 to double
-  %176 = load double* @yd, align 8
-  %177 = load double* @lyd, align 8
-  %178 = load double* @ret_df, align 8
-  %179 = load double* @lret_df, align 8
+  %176 = load double, double* @yd, align 8
+  %177 = load double, double* @lyd, align 8
+  %178 = load double, double* @ret_df, align 8
+  %179 = load double, double* @lret_df, align 8
   %cmp164 = fcmp oeq double %178, %179
   br i1 %cmp164, label %land.lhs.true166, label %land.end172
 
 land.lhs.true166:                                 ; preds = %land.end158
-  %180 = load float* @x, align 4
-  %181 = load float* @lx, align 4
+  %180 = load float, float* @x, align 4
+  %181 = load float, float* @lx, align 4
   %cmp167 = fcmp oeq float %180, %181
   br i1 %cmp167, label %land.rhs169, label %land.end172
 
 land.rhs169:                                      ; preds = %land.lhs.true166
-  %182 = load double* @yd, align 8
-  %183 = load double* @lyd, align 8
+  %182 = load double, double* @yd, align 8
+  %183 = load double, double* @lyd, align 8
   %cmp170 = fcmp oeq double %182, %183
   br label %land.end172
 
@@ -530,32 +530,32 @@
   store double 0x418B2DB900000000, double* @ret_df, align 8
   store double 0x41B1EF2ED3000000, double* @lxd, align 8
   store float 1.244562e+06, float* @ly, align 4
-  %185 = load double* @lxd, align 8
-  %186 = load float* @ly, align 4
+  %185 = load double, double* @lxd, align 8
+  %186 = load float, float* @ly, align 4
   %call175 = call double @df_df_sf(double %185, float %186)
   store double %call175, double* @lret_df, align 8
-  %187 = load double* @ret_df, align 8
-  %188 = load double* @lret_df, align 8
-  %189 = load double* @xd, align 8
-  %190 = load double* @lxd, align 8
-  %191 = load float* @y, align 4
+  %187 = load double, double* @ret_df, align 8
+  %188 = load double, double* @lret_df, align 8
+  %189 = load double, double* @xd, align 8
+  %190 = load double, double* @lxd, align 8
+  %191 = load float, float* @y, align 4
   %conv176 = fpext float %191 to double
-  %192 = load float* @ly, align 4
+  %192 = load float, float* @ly, align 4
   %conv177 = fpext float %192 to double
-  %193 = load double* @ret_df, align 8
-  %194 = load double* @lret_df, align 8
+  %193 = load double, double* @ret_df, align 8
+  %194 = load double, double* @lret_df, align 8
   %cmp178 = fcmp oeq double %193, %194
   br i1 %cmp178, label %land.lhs.true180, label %land.end186
 
 land.lhs.true180:                                 ; preds = %land.end172
-  %195 = load double* @xd, align 8
-  %196 = load double* @lxd, align 8
+  %195 = load double, double* @xd, align 8
+  %196 = load double, double* @lxd, align 8
   %cmp181 = fcmp oeq double %195, %196
   br i1 %cmp181, label %land.rhs183, label %land.end186
 
 land.rhs183:                                      ; preds = %land.lhs.true180
-  %197 = load float* @y, align 4
-  %198 = load float* @ly, align 4
+  %197 = load float, float* @y, align 4
+  %198 = load float, float* @ly, align 4
   %cmp184 = fcmp oeq float %197, %198
   br label %land.end186
 
@@ -567,30 +567,30 @@
   store double 3.987721e+06, double* @ret_df, align 8
   store double 5.223560e+00, double* @lxd, align 8
   store double 0x40B7D37CC1A8AC5C, double* @lyd, align 8
-  %200 = load double* @lxd, align 8
-  %201 = load double* @lyd, align 8
+  %200 = load double, double* @lxd, align 8
+  %201 = load double, double* @lyd, align 8
   %call189 = call double @df_df_df(double %200, double %201)
   store double %call189, double* @lret_df, align 8
-  %202 = load double* @ret_df, align 8
-  %203 = load double* @lret_df, align 8
-  %204 = load double* @xd, align 8
-  %205 = load double* @lxd, align 8
-  %206 = load double* @yd, align 8
-  %207 = load double* @lyd, align 8
-  %208 = load double* @ret_df, align 8
-  %209 = load double* @lret_df, align 8
+  %202 = load double, double* @ret_df, align 8
+  %203 = load double, double* @lret_df, align 8
+  %204 = load double, double* @xd, align 8
+  %205 = load double, double* @lxd, align 8
+  %206 = load double, double* @yd, align 8
+  %207 = load double, double* @lyd, align 8
+  %208 = load double, double* @ret_df, align 8
+  %209 = load double, double* @lret_df, align 8
   %cmp190 = fcmp oeq double %208, %209
   br i1 %cmp190, label %land.lhs.true192, label %land.end198
 
 land.lhs.true192:                                 ; preds = %land.end186
-  %210 = load double* @xd, align 8
-  %211 = load double* @lxd, align 8
+  %210 = load double, double* @xd, align 8
+  %211 = load double, double* @lxd, align 8
   %cmp193 = fcmp oeq double %210, %211
   br i1 %cmp193, label %land.rhs195, label %land.end198
 
 land.rhs195:                                      ; preds = %land.lhs.true192
-  %212 = load double* @yd, align 8
-  %213 = load double* @lyd, align 8
+  %212 = load double, double* @yd, align 8
+  %213 = load double, double* @lyd, align 8
   %cmp196 = fcmp oeq double %212, %213
   br label %land.end198
 
@@ -606,26 +606,26 @@
   %216 = extractvalue { float, float } %call201, 1
   store float %215, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
   store float %216, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
-  %ret_sc.real = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %ret_sc.real = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
   %conv202 = fpext float %ret_sc.real to double
   %conv203 = fpext float %ret_sc.imag to double
-  %ret_sc.real204 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag205 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %ret_sc.real204 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag205 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
   %conv206 = fpext float %ret_sc.real204 to double
   %conv207 = fpext float %ret_sc.imag205 to double
-  %lret_sc.real = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %lret_sc.real = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %conv208 = fpext float %lret_sc.real to double
   %conv209 = fpext float %lret_sc.imag to double
-  %lret_sc.real210 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag211 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %lret_sc.real210 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag211 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %conv212 = fpext float %lret_sc.real210 to double
   %conv213 = fpext float %lret_sc.imag211 to double
-  %ret_sc.real214 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag215 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
-  %lret_sc.real216 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag217 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real214 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag215 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real216 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag217 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %cmp.r = fcmp oeq float %ret_sc.real214, %lret_sc.real216
   %cmp.i = fcmp oeq float %ret_sc.imag215, %lret_sc.imag217
   %and.ri = and i1 %cmp.r, %cmp.i
@@ -635,44 +635,44 @@
   store float 0x3FF7A99300000000, float* @lx, align 4
   store float 4.500000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
   store float 7.000000e+00, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
-  %217 = load float* @lx, align 4
+  %217 = load float, float* @lx, align 4
   %call220 = call { float, float } @sc_sf(float %217)
   %218 = extractvalue { float, float } %call220, 0
   %219 = extractvalue { float, float } %call220, 1
   store float %218, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
   store float %219, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
-  %ret_sc.real221 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag222 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %ret_sc.real221 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag222 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
   %conv223 = fpext float %ret_sc.real221 to double
   %conv224 = fpext float %ret_sc.imag222 to double
-  %ret_sc.real225 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag226 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %ret_sc.real225 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag226 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
   %conv227 = fpext float %ret_sc.real225 to double
   %conv228 = fpext float %ret_sc.imag226 to double
-  %lret_sc.real229 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag230 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %lret_sc.real229 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag230 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %conv231 = fpext float %lret_sc.real229 to double
   %conv232 = fpext float %lret_sc.imag230 to double
-  %lret_sc.real233 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag234 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %lret_sc.real233 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag234 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %conv235 = fpext float %lret_sc.real233 to double
   %conv236 = fpext float %lret_sc.imag234 to double
-  %220 = load float* @x, align 4
+  %220 = load float, float* @x, align 4
   %conv237 = fpext float %220 to double
-  %221 = load float* @lx, align 4
+  %221 = load float, float* @lx, align 4
   %conv238 = fpext float %221 to double
-  %ret_sc.real239 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
-  %ret_sc.imag240 = load float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
-  %lret_sc.real241 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
-  %lret_sc.imag242 = load float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
+  %ret_sc.real239 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 0)
+  %ret_sc.imag240 = load float, float* getelementptr inbounds ({ float, float }* @ret_sc, i32 0, i32 1)
+  %lret_sc.real241 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 0)
+  %lret_sc.imag242 = load float, float* getelementptr inbounds ({ float, float }* @lret_sc, i32 0, i32 1)
   %cmp.r243 = fcmp oeq float %ret_sc.real239, %lret_sc.real241
   %cmp.i244 = fcmp oeq float %ret_sc.imag240, %lret_sc.imag242
   %and.ri245 = and i1 %cmp.r243, %cmp.i244
   br i1 %and.ri245, label %land.rhs247, label %land.end250
 
 land.rhs247:                                      ; preds = %land.end198
-  %222 = load float* @x, align 4
-  %223 = load float* @lx, align 4
+  %222 = load float, float* @x, align 4
+  %223 = load float, float* @lx, align 4
   %cmp248 = fcmp oeq float %222, %223
   br label %land.end250
 
@@ -688,18 +688,18 @@
   %226 = extractvalue { double, double } %call253, 1
   store double %225, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
   store double %226, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %ret_dc.real = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %ret_dc.real254 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag255 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %lret_dc.real = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %lret_dc.real256 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag257 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %ret_dc.real258 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag259 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %lret_dc.real260 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag261 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real254 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag255 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real256 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag257 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real258 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag259 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real260 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag261 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
   %cmp.r262 = fcmp oeq double %ret_dc.real258, %lret_dc.real260
   %cmp.i263 = fcmp oeq double %ret_dc.imag259, %lret_dc.imag261
   %and.ri264 = and i1 %cmp.r262, %cmp.i263
@@ -709,36 +709,36 @@
   store double 0x40AAF6F532617C1C, double* @lxd, align 8
   store double 4.444500e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
   store double 7.888000e+03, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %227 = load float* @lx, align 4
+  %227 = load float, float* @lx, align 4
   %call267 = call { double, double } @dc_sf(float %227)
   %228 = extractvalue { double, double } %call267, 0
   %229 = extractvalue { double, double } %call267, 1
   store double %228, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
   store double %229, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %ret_dc.real268 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag269 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %ret_dc.real270 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag271 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %lret_dc.real272 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag273 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %lret_dc.real274 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag275 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
-  %230 = load float* @x, align 4
+  %ret_dc.real268 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag269 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %ret_dc.real270 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag271 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real272 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag273 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %lret_dc.real274 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag275 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %230 = load float, float* @x, align 4
   %conv276 = fpext float %230 to double
-  %231 = load float* @lx, align 4
+  %231 = load float, float* @lx, align 4
   %conv277 = fpext float %231 to double
-  %ret_dc.real278 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
-  %ret_dc.imag279 = load double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
-  %lret_dc.real280 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
-  %lret_dc.imag281 = load double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
+  %ret_dc.real278 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 0)
+  %ret_dc.imag279 = load double, double* getelementptr inbounds ({ double, double }* @ret_dc, i32 0, i32 1)
+  %lret_dc.real280 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 0)
+  %lret_dc.imag281 = load double, double* getelementptr inbounds ({ double, double }* @lret_dc, i32 0, i32 1)
   %cmp.r282 = fcmp oeq double %ret_dc.real278, %lret_dc.real280
   %cmp.i283 = fcmp oeq double %ret_dc.imag279, %lret_dc.imag281
   %and.ri284 = and i1 %cmp.r282, %cmp.i283
   br i1 %and.ri284, label %land.rhs286, label %land.end289
 
 land.rhs286:                                      ; preds = %land.end250
-  %232 = load float* @x, align 4
-  %233 = load float* @lx, align 4
+  %232 = load float, float* @x, align 4
+  %233 = load float, float* @lx, align 4
   %cmp287 = fcmp oeq float %232, %233
   br label %land.end289
 
@@ -746,7 +746,7 @@
   %234 = phi i1 [ false, %land.end250 ], [ %cmp287, %land.rhs286 ]
   %land.ext290 = zext i1 %234 to i32
   %call291 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str4, i32 0, i32 0), double %ret_dc.real268, double %ret_dc.imag271, double %lret_dc.real272, double %lret_dc.imag275, double %conv276, double %conv277, i32 %land.ext290)
-  %235 = load i32* %retval
+  %235 = load i32, i32* %retval
   ret i32 %235
 }
 
diff --git a/llvm/test/CodeGen/Mips/hf16call32_body.ll b/llvm/test/CodeGen/Mips/hf16call32_body.ll
index adac314..d06256c 100644
--- a/llvm/test/CodeGen/Mips/hf16call32_body.ll
+++ b/llvm/test/CodeGen/Mips/hf16call32_body.ll
@@ -14,7 +14,7 @@
 entry:
   %p.addr = alloca float, align 4
   store float %p, float* %p.addr, align 4
-  %0 = load float* %p.addr, align 4
+  %0 = load float, float* %p.addr, align 4
   store float %0, float* @x, align 4
   ret void
 }
@@ -33,7 +33,7 @@
 entry:
   %p.addr = alloca double, align 8
   store double %p, double* %p.addr, align 8
-  %0 = load double* %p.addr, align 8
+  %0 = load double, double* %p.addr, align 8
   store double %0, double* @xd, align 8
   ret void
 }
@@ -54,9 +54,9 @@
   %p2.addr = alloca float, align 4
   store float %p1, float* %p1.addr, align 4
   store float %p2, float* %p2.addr, align 4
-  %0 = load float* %p1.addr, align 4
+  %0 = load float, float* %p1.addr, align 4
   store float %0, float* @x, align 4
-  %1 = load float* %p2.addr, align 4
+  %1 = load float, float* %p2.addr, align 4
   store float %1, float* @y, align 4
   ret void
 }
@@ -77,9 +77,9 @@
   %p2.addr = alloca double, align 8
   store float %p1, float* %p1.addr, align 4
   store double %p2, double* %p2.addr, align 8
-  %0 = load float* %p1.addr, align 4
+  %0 = load float, float* %p1.addr, align 4
   store float %0, float* @x, align 4
-  %1 = load double* %p2.addr, align 8
+  %1 = load double, double* %p2.addr, align 8
   store double %1, double* @yd, align 8
   ret void
 }
@@ -101,9 +101,9 @@
   %p2.addr = alloca float, align 4
   store double %p1, double* %p1.addr, align 8
   store float %p2, float* %p2.addr, align 4
-  %0 = load double* %p1.addr, align 8
+  %0 = load double, double* %p1.addr, align 8
   store double %0, double* @xd, align 8
-  %1 = load float* %p2.addr, align 4
+  %1 = load float, float* %p2.addr, align 4
   store float %1, float* @y, align 4
   ret void
 }
@@ -125,9 +125,9 @@
   %p2.addr = alloca double, align 8
   store double %p1, double* %p1.addr, align 8
   store double %p2, double* %p2.addr, align 8
-  %0 = load double* %p1.addr, align 8
+  %0 = load double, double* %p1.addr, align 8
   store double %0, double* @xd, align 8
-  %1 = load double* %p2.addr, align 8
+  %1 = load double, double* %p2.addr, align 8
   store double %1, double* @yd, align 8
   ret void
 }
@@ -146,7 +146,7 @@
 ; Function Attrs: nounwind
 define float @sf_v() #0 {
 entry:
-  %0 = load float* @ret_sf, align 4
+  %0 = load float, float* @ret_sf, align 4
   ret float %0
 }
 
@@ -155,9 +155,9 @@
 entry:
   %p.addr = alloca float, align 4
   store float %p, float* %p.addr, align 4
-  %0 = load float* %p.addr, align 4
+  %0 = load float, float* %p.addr, align 4
   store float %0, float* @x, align 4
-  %1 = load float* @ret_sf, align 4
+  %1 = load float, float* @ret_sf, align 4
   ret float %1
 }
 
@@ -176,9 +176,9 @@
 entry:
   %p.addr = alloca double, align 8
   store double %p, double* %p.addr, align 8
-  %0 = load double* %p.addr, align 8
+  %0 = load double, double* %p.addr, align 8
   store double %0, double* @xd, align 8
-  %1 = load float* @ret_sf, align 4
+  %1 = load float, float* @ret_sf, align 4
   ret float %1
 }
 
@@ -198,11 +198,11 @@
   %p2.addr = alloca float, align 4
   store float %p1, float* %p1.addr, align 4
   store float %p2, float* %p2.addr, align 4
-  %0 = load float* %p1.addr, align 4
+  %0 = load float, float* %p1.addr, align 4
   store float %0, float* @x, align 4
-  %1 = load float* %p2.addr, align 4
+  %1 = load float, float* %p2.addr, align 4
   store float %1, float* @y, align 4
-  %2 = load float* @ret_sf, align 4
+  %2 = load float, float* @ret_sf, align 4
   ret float %2
 }
 
@@ -222,11 +222,11 @@
   %p2.addr = alloca double, align 8
   store float %p1, float* %p1.addr, align 4
   store double %p2, double* %p2.addr, align 8
-  %0 = load float* %p1.addr, align 4
+  %0 = load float, float* %p1.addr, align 4
   store float %0, float* @x, align 4
-  %1 = load double* %p2.addr, align 8
+  %1 = load double, double* %p2.addr, align 8
   store double %1, double* @yd, align 8
-  %2 = load float* @ret_sf, align 4
+  %2 = load float, float* @ret_sf, align 4
   ret float %2
 }
 
@@ -247,11 +247,11 @@
   %p2.addr = alloca float, align 4
   store double %p1, double* %p1.addr, align 8
   store float %p2, float* %p2.addr, align 4
-  %0 = load double* %p1.addr, align 8
+  %0 = load double, double* %p1.addr, align 8
   store double %0, double* @xd, align 8
-  %1 = load float* %p2.addr, align 4
+  %1 = load float, float* %p2.addr, align 4
   store float %1, float* @y, align 4
-  %2 = load float* @ret_sf, align 4
+  %2 = load float, float* @ret_sf, align 4
   ret float %2
 }
 
@@ -272,11 +272,11 @@
   %p2.addr = alloca double, align 8
   store double %p1, double* %p1.addr, align 8
   store double %p2, double* %p2.addr, align 8
-  %0 = load double* %p1.addr, align 8
+  %0 = load double, double* %p1.addr, align 8
   store double %0, double* @xd, align 8
-  %1 = load double* %p2.addr, align 8
+  %1 = load double, double* %p2.addr, align 8
   store double %1, double* @yd, align 8
-  %2 = load float* @ret_sf, align 4
+  %2 = load float, float* @ret_sf, align 4
   ret float %2
 }
 
diff --git a/llvm/test/CodeGen/Mips/hf1_body.ll b/llvm/test/CodeGen/Mips/hf1_body.ll
index 5acfe86..71a1b96 100644
--- a/llvm/test/CodeGen/Mips/hf1_body.ll
+++ b/llvm/test/CodeGen/Mips/hf1_body.ll
@@ -7,7 +7,7 @@
 entry:
   %p.addr = alloca float, align 4
   store float %p, float* %p.addr, align 4
-  %0 = load float* %p.addr, align 4
+  %0 = load float, float* %p.addr, align 4
   store float %0, float* @x, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/hfptrcall.ll b/llvm/test/CodeGen/Mips/hfptrcall.ll
index fd0e359..1df58a3 100644
--- a/llvm/test/CodeGen/Mips/hfptrcall.ll
+++ b/llvm/test/CodeGen/Mips/hfptrcall.ll
@@ -38,7 +38,7 @@
   %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
   store float 5.000000e+00, float* %real
   store float 9.900000e+01, float* %imag
-  %0 = load { float, float }* %retval
+  %0 = load { float, float }, { float, float }* %retval
   ret { float, float } %0
 }
 
@@ -54,7 +54,7 @@
   %imag = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 1
   store double 0x416BC8B0A0000000, double* %real
   store double 0x41CDCCB763800000, double* %imag
-  %0 = load { double, double }* %retval
+  %0 = load { double, double }, { double, double }* %retval
   ret { double, double } %0
 }
 
@@ -65,42 +65,42 @@
 ; Function Attrs: nounwind
 define i32 @main() #0 {
 entry:
-  %0 = load float ()** @ptrsv, align 4
+  %0 = load float ()*, float ()** @ptrsv, align 4
   %call = call float %0()
   store float %call, float* @x, align 4
-  %1 = load float* @x, align 4
+  %1 = load float, float* @x, align 4
   %conv = fpext float %1 to double
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %conv)
-  %2 = load double ()** @ptrdv, align 4
+  %2 = load double ()*, double ()** @ptrdv, align 4
   %call2 = call double %2()
   store double %call2, double* @xd, align 8
-  %3 = load double* @xd, align 8
+  %3 = load double, double* @xd, align 8
   %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double %3)
-  %4 = load { float, float } ()** @ptrscv, align 4
+  %4 = load { float, float } ()*, { float, float } ()** @ptrscv, align 4
   %call4 = call { float, float } %4()
   %5 = extractvalue { float, float } %call4, 0
   %6 = extractvalue { float, float } %call4, 1
   store float %5, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
   store float %6, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
-  %xy.real = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
-  %xy.imag = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  %xy.real = load float, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  %xy.imag = load float, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
   %conv5 = fpext float %xy.real to double
   %conv6 = fpext float %xy.imag to double
-  %xy.real7 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
-  %xy.imag8 = load float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
+  %xy.real7 = load float, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 0)
+  %xy.imag8 = load float, float* getelementptr inbounds ({ float, float }* @xy, i32 0, i32 1)
   %conv9 = fpext float %xy.real7 to double
   %conv10 = fpext float %xy.imag8 to double
   %call11 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %conv5, double %conv10)
-  %7 = load { double, double } ()** @ptrdcv, align 4
+  %7 = load { double, double } ()*, { double, double } ()** @ptrdcv, align 4
   %call12 = call { double, double } %7()
   %8 = extractvalue { double, double } %call12, 0
   %9 = extractvalue { double, double } %call12, 1
   store double %8, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
   store double %9, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
-  %xyd.real = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
-  %xyd.imag = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
-  %xyd.real13 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
-  %xyd.imag14 = load double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  %xyd.real = load double, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  %xyd.imag = load double, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
+  %xyd.real13 = load double, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 0)
+  %xyd.imag14 = load double, double* getelementptr inbounds ({ double, double }* @xyd, i32 0, i32 1)
   %call15 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str1, i32 0, i32 0), double %xyd.real, double %xyd.imag14)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/inlineasm-assembler-directives.ll b/llvm/test/CodeGen/Mips/inlineasm-assembler-directives.ll
index e4a6d1e..88ceed4 100644
--- a/llvm/test/CodeGen/Mips/inlineasm-assembler-directives.ll
+++ b/llvm/test/CodeGen/Mips/inlineasm-assembler-directives.ll
@@ -16,7 +16,7 @@
   %a = alloca i32, align 4
   %b = alloca i32, align 4
   store i32 20, i32* %a, align 4
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09subi $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
   store i32 %1, i32* %b, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/inlineasm-operand-code.ll b/llvm/test/CodeGen/Mips/inlineasm-operand-code.ll
index 3d9dec7..fd726fc 100644
--- a/llvm/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/llvm/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -125,7 +125,7 @@
 ;CHECK_BIG_32:       #APP
 ;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
 ;CHECK_BIG_32:       #NO_APP
-  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %bosco = load i64, i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
   tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
@@ -149,7 +149,7 @@
 ;CHECK_BIG_32:       #APP
 ;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
 ;CHECK_BIG_32:       #NO_APP
-  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %bosco = load i64, i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
   tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
@@ -173,7 +173,7 @@
 ;CHECK_BIG_32:       #APP
 ;CHECK_BIG_32:       or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
 ;CHECK_BIG_32:       #NO_APP
-  %bosco = load i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
+  %bosco = load i64, i64* getelementptr inbounds (%union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
   tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/inlineasm64.ll b/llvm/test/CodeGen/Mips/inlineasm64.ll
index a8e949b..82abdf8 100644
--- a/llvm/test/CodeGen/Mips/inlineasm64.ll
+++ b/llvm/test/CodeGen/Mips/inlineasm64.ll
@@ -8,8 +8,8 @@
 entry:
 ; CHECK: foo1
 ; CHECK: daddu
-  %0 = load i64* @gl1, align 8
-  %1 = load i64* @gl0, align 8
+  %0 = load i64, i64* @gl1, align 8
+  %1 = load i64, i64* @gl0, align 8
   %2 = tail call i64 asm "daddu $0, $1, $2", "=r,r,r"(i64 %0, i64 %1) nounwind
   store i64 %2, i64* @gl2, align 8
   ret void
diff --git a/llvm/test/CodeGen/Mips/internalfunc.ll b/llvm/test/CodeGen/Mips/internalfunc.ll
index 863375a..0320e28 100644
--- a/llvm/test/CodeGen/Mips/internalfunc.ll
+++ b/llvm/test/CodeGen/Mips/internalfunc.ll
@@ -20,7 +20,7 @@
   br i1 %tobool, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %tmp1 = load void (...)** @caller.sf1, align 4
+  %tmp1 = load void (...)*, void (...)** @caller.sf1, align 4
   tail call void (...)* %tmp1() nounwind
   br label %if.end
 
@@ -30,7 +30,7 @@
 ; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)
 ; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
   %tobool3 = icmp ne i32 %a0, 0
-  %tmp4 = load void (...)** @gf1, align 4
+  %tmp4 = load void (...)*, void (...)** @gf1, align 4
   %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
   store void (...)* %cond, void (...)** @caller.sf1, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/jtstat.ll b/llvm/test/CodeGen/Mips/jtstat.ll
index 01afc08..35f71cf 100644
--- a/llvm/test/CodeGen/Mips/jtstat.ll
+++ b/llvm/test/CodeGen/Mips/jtstat.ll
@@ -8,7 +8,7 @@
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   switch i32 %0, label %sw.epilog [
     i32 115, label %sw.bb
     i32 105, label %sw.bb1
diff --git a/llvm/test/CodeGen/Mips/l3mc.ll b/llvm/test/CodeGen/Mips/l3mc.ll
index 3bfb389..6aeed04 100644
--- a/llvm/test/CodeGen/Mips/l3mc.ll
+++ b/llvm/test/CodeGen/Mips/l3mc.ll
@@ -42,28 +42,28 @@
 ; Function Attrs: nounwind
 define void @_Z3foov() #0 {
 entry:
-  %0 = load double* @d1, align 8
+  %0 = load double, double* @d1, align 8
   %conv = fptosi double %0 to i64
   store i64 %conv, i64* @ll1, align 8
-  %1 = load double* @d2, align 8
+  %1 = load double, double* @d2, align 8
   %conv1 = fptoui double %1 to i64
   store i64 %conv1, i64* @ull1, align 8
-  %2 = load float* @f1, align 4
+  %2 = load float, float* @f1, align 4
   %conv2 = fptosi float %2 to i64
   store i64 %conv2, i64* @ll2, align 8
-  %3 = load float* @f2, align 4
+  %3 = load float, float* @f2, align 4
   %conv3 = fptoui float %3 to i64
   store i64 %conv3, i64* @ull2, align 8
-  %4 = load double* @d3, align 8
+  %4 = load double, double* @d3, align 8
   %conv4 = fptosi double %4 to i32
   store i32 %conv4, i32* @l1, align 4
-  %5 = load double* @d4, align 8
+  %5 = load double, double* @d4, align 8
   %conv5 = fptoui double %5 to i32
   store i32 %conv5, i32* @ul1, align 4
-  %6 = load float* @f3, align 4
+  %6 = load float, float* @f3, align 4
   %conv6 = fptosi float %6 to i32
   store i32 %conv6, i32* @l2, align 4
-  %7 = load float* @f4, align 4
+  %7 = load float, float* @f4, align 4
   %conv7 = fptoui float %7 to i32
   store i32 %conv7, i32* @ul2, align 4
   ret void
@@ -72,28 +72,28 @@
 ; Function Attrs: nounwind
 define void @_Z3goov() #0 {
 entry:
-  %0 = load i64* @ll1, align 8
+  %0 = load i64, i64* @ll1, align 8
   %conv = sitofp i64 %0 to double
   store double %conv, double* @d1, align 8
-  %1 = load i64* @ull1, align 8
+  %1 = load i64, i64* @ull1, align 8
   %conv1 = uitofp i64 %1 to double
   store double %conv1, double* @d2, align 8
-  %2 = load i64* @ll2, align 8
+  %2 = load i64, i64* @ll2, align 8
   %conv2 = sitofp i64 %2 to float
   store float %conv2, float* @f1, align 4
-  %3 = load i64* @ull2, align 8
+  %3 = load i64, i64* @ull2, align 8
   %conv3 = uitofp i64 %3 to float
   store float %conv3, float* @f2, align 4
-  %4 = load i32* @l1, align 4
+  %4 = load i32, i32* @l1, align 4
   %conv4 = sitofp i32 %4 to double
   store double %conv4, double* @d3, align 8
-  %5 = load i32* @ul1, align 4
+  %5 = load i32, i32* @ul1, align 4
   %conv5 = uitofp i32 %5 to double
   store double %conv5, double* @d4, align 8
-  %6 = load i32* @l2, align 4
+  %6 = load i32, i32* @l2, align 4
   %conv6 = sitofp i32 %6 to float
   store float %conv6, float* @f3, align 4
-  %7 = load i32* @ul2, align 4
+  %7 = load i32, i32* @ul2, align 4
   %conv7 = uitofp i32 %7 to float
   store float %conv7, float* @f4, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/lb1.ll b/llvm/test/CodeGen/Mips/lb1.ll
index aac2767..2db28a6 100644
--- a/llvm/test/CodeGen/Mips/lb1.ll
+++ b/llvm/test/CodeGen/Mips/lb1.ll
@@ -6,11 +6,11 @@
 define i32 @main() nounwind {
 entry:
   %i = alloca i32, align 4
-  %0 = load i8* @c, align 1
+  %0 = load i8, i8* @c, align 1
 ; 16:	lb	${{[0-9]+}}, 0(${{[0-9]+}})
   %conv = sext i8 %0 to i32
   store i32 %conv, i32* %i, align 4
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/lbu1.ll b/llvm/test/CodeGen/Mips/lbu1.ll
index 63e0cca..369babf 100644
--- a/llvm/test/CodeGen/Mips/lbu1.ll
+++ b/llvm/test/CodeGen/Mips/lbu1.ll
@@ -6,11 +6,11 @@
 define i32 @main() nounwind {
 entry:
   %i = alloca i32, align 4
-  %0 = load i8* @c, align 1
+  %0 = load i8, i8* @c, align 1
   %conv = zext i8 %0 to i32
 ; 16:	lbu	${{[0-9]+}}, 0(${{[0-9]+}})
   store i32 %conv, i32* %i, align 4
-  %1 = load i8* @c, align 1
+  %1 = load i8, i8* @c, align 1
   %conv1 = zext i8 %1 to i32
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %conv1)
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/lcb2.ll b/llvm/test/CodeGen/Mips/lcb2.ll
index 59b96e6..716a6bb 100644
--- a/llvm/test/CodeGen/Mips/lcb2.ll
+++ b/llvm/test/CodeGen/Mips/lcb2.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @bnez() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -31,7 +31,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @beqz() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -60,8 +60,8 @@
 ; Function Attrs: nounwind optsize
 define void @bteqz() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -90,15 +90,15 @@
 ; Function Attrs: nounwind optsize
 define void @btz() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp1 = icmp sgt i32 %0, %1
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry, %if.then
   tail call void asm sideeffect ".space 60000", ""() #1, !srcloc !10
-  %2 = load i32* @i, align 4, !tbaa !1
-  %3 = load i32* @j, align 4, !tbaa !1
+  %2 = load i32, i32* @i, align 4, !tbaa !1
+  %3 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp sgt i32 %2, %3
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/lcb3c.ll b/llvm/test/CodeGen/Mips/lcb3c.ll
index eb83291..d6e259c7 100644
--- a/llvm/test/CodeGen/Mips/lcb3c.ll
+++ b/llvm/test/CodeGen/Mips/lcb3c.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind
 define i32 @s() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -30,7 +30,7 @@
 ; Function Attrs: nounwind
 define i32 @b() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/Mips/lcb4a.ll b/llvm/test/CodeGen/Mips/lcb4a.ll
index fbcadd2..0285ae1 100644
--- a/llvm/test/CodeGen/Mips/lcb4a.ll
+++ b/llvm/test/CodeGen/Mips/lcb4a.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @foo() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -32,7 +32,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @goo() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll
index b2a8d1d..172ecb3 100644
--- a/llvm/test/CodeGen/Mips/lcb5.ll
+++ b/llvm/test/CodeGen/Mips/lcb5.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @x0() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -33,7 +33,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @x1() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -61,7 +61,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @y0() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -86,7 +86,7 @@
 ; Function Attrs: nounwind optsize
 define i32 @y1() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
@@ -114,8 +114,8 @@
 ; Function Attrs: nounwind optsize
 define void @z0() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -140,8 +140,8 @@
 ; Function Attrs: nounwind optsize
 define void @z1() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %if.then, label %if.else
 
@@ -169,15 +169,15 @@
 ; Function Attrs: nounwind optsize
 define void @z3() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp1 = icmp sgt i32 %0, %1
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry, %if.then
   tail call void asm sideeffect ".space 10000", ""() #1, !srcloc !17
-  %2 = load i32* @i, align 4, !tbaa !1
-  %3 = load i32* @j, align 4, !tbaa !1
+  %2 = load i32, i32* @i, align 4, !tbaa !1
+  %3 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp sgt i32 %2, %3
   br i1 %cmp, label %if.then, label %if.end
 
@@ -192,15 +192,15 @@
 ; Function Attrs: nounwind optsize
 define void @z4() #0 {
 entry:
-  %0 = load i32* @i, align 4, !tbaa !1
-  %1 = load i32* @j, align 4, !tbaa !1
+  %0 = load i32, i32* @i, align 4, !tbaa !1
+  %1 = load i32, i32* @j, align 4, !tbaa !1
   %cmp1 = icmp sgt i32 %0, %1
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry, %if.then
   tail call void asm sideeffect ".space 10000000", ""() #1, !srcloc !18
-  %2 = load i32* @i, align 4, !tbaa !1
-  %3 = load i32* @j, align 4, !tbaa !1
+  %2 = load i32, i32* @i, align 4, !tbaa !1
+  %3 = load i32, i32* @j, align 4, !tbaa !1
   %cmp = icmp sgt i32 %2, %3
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/lh1.ll b/llvm/test/CodeGen/Mips/lh1.ll
index 1f95b09..4e2fb98 100644
--- a/llvm/test/CodeGen/Mips/lh1.ll
+++ b/llvm/test/CodeGen/Mips/lh1.ll
@@ -6,11 +6,11 @@
 define i32 @main() nounwind {
 entry:
   %i = alloca i32, align 4
-  %0 = load i16* @s, align 2
+  %0 = load i16, i16* @s, align 2
   %conv = sext i16 %0 to i32
 ; 16:	lh	${{[0-9]+}}, 0(${{[0-9]+}})
   store i32 %conv, i32* %i, align 4
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/lhu1.ll b/llvm/test/CodeGen/Mips/lhu1.ll
index 0cfcede..bd6d0c0 100644
--- a/llvm/test/CodeGen/Mips/lhu1.ll
+++ b/llvm/test/CodeGen/Mips/lhu1.ll
@@ -7,11 +7,11 @@
 define i32 @main() nounwind {
 entry:
   %i = alloca i32, align 4
-  %0 = load i16* @s, align 2
+  %0 = load i16, i16* @s, align 2
   %conv = zext i16 %0 to i32
 ; 16:	lhu	${{[0-9]+}}, 0(${{[0-9]+}})
   store i32 %conv, i32* %i, align 4
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/llcarry.ll b/llvm/test/CodeGen/Mips/llcarry.ll
index 7763dae..f4120ec 100644
--- a/llvm/test/CodeGen/Mips/llcarry.ll
+++ b/llvm/test/CodeGen/Mips/llcarry.ll
@@ -9,8 +9,8 @@
 
 define void @test1() nounwind {
 entry:
-  %0 = load i64* @i, align 8
-  %1 = load i64* @j, align 8
+  %0 = load i64, i64* @i, align 8
+  %1 = load i64, i64* @j, align 8
   %add = add nsw i64 %1, %0
   store i64 %add, i64* @k, align 8
 ; 16:	addu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
@@ -23,8 +23,8 @@
 
 define void @test2() nounwind {
 entry:
-  %0 = load i64* @i, align 8
-  %1 = load i64* @j, align 8
+  %0 = load i64, i64* @i, align 8
+  %1 = load i64, i64* @j, align 8
   %sub = sub nsw i64 %0, %1
 ; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
@@ -37,7 +37,7 @@
 
 define void @test3() nounwind {
 entry:
-  %0 = load i64* @ii, align 8
+  %0 = load i64, i64* @ii, align 8
   %add = add nsw i64 %0, 15
 ; 16:	addiu	${{[0-9]+}}, 15
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/load-store-left-right.ll b/llvm/test/CodeGen/Mips/load-store-left-right.ll
index b8e6e83..ade0d98 100644
--- a/llvm/test/CodeGen/Mips/load-store-left-right.ll
+++ b/llvm/test/CodeGen/Mips/load-store-left-right.ll
@@ -43,7 +43,7 @@
 ; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
 ; MIPS64R6:      lw $2, 0($[[PTR]])
 
-  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
+  %0 = load i32, i32* getelementptr inbounds (%struct.SI* @si, i32 0, i32 0), align 1
   ret i32 %0
 }
 
@@ -100,7 +100,7 @@
 ; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sll)(
 ; MIPS64R6:      ld $2, 0($[[PTR]])
 
-  %0 = load i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
+  %0 = load i64, i64* getelementptr inbounds (%struct.SLL* @sll, i64 0, i32 0), align 1
   ret i64 %0
 }
 
@@ -129,7 +129,7 @@
 ; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(si)(
 ; MIPS64R6:      lw $2, 0($[[PTR]])
 
-  %0 = load i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
+  %0 = load i32, i32* getelementptr inbounds (%struct.SI* @si, i64 0, i32 0), align 1
   %conv = sext i32 %0 to i64
   ret i64 %conv
 }
@@ -165,7 +165,7 @@
 ; MIPS64R6:      ld $[[PTR:[0-9]+]], %got_disp(sui)(
 ; MIPS64R6:      lwu $2, 0($[[PTR]])
 
-  %0 = load i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
+  %0 = load i32, i32* getelementptr inbounds (%struct.SUI* @sui, i64 0, i32 0), align 1
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
@@ -257,7 +257,7 @@
 ; ALL-DAG:       lbu $[[R1:[0-9]+]], 1($[[PTR]])
 ; ALL-DAG:       sb $[[R1]], 3($[[PTR]])
 
-  %0 = load %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
+  %0 = load %struct.S0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 0), align 1
   store %struct.S0 %0, %struct.S0* getelementptr inbounds (%struct.S0* @struct_s0, i32 1), align 1
   ret void
 }
@@ -300,7 +300,7 @@
 ; MIPS64R6-DAG:  lhu $[[R1:[0-9]+]], 2($[[PTR]])
 ; MIPS64R6-DAG:  sh $[[R1]], 6($[[PTR]])
 
-  %0 = load %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
+  %0 = load %struct.S1, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 0), align 1
   store %struct.S1 %0, %struct.S1* getelementptr inbounds (%struct.S1* @struct_s1, i32 1), align 1
   ret void
 }
@@ -361,7 +361,7 @@
 ; MIPS64R6-DAG:  lw $[[R1:[0-9]+]], 4($[[PTR]])
 ; MIPS64R6-DAG:  sw $[[R1]],       12($[[PTR]])
 
-  %0 = load %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
+  %0 = load %struct.S2, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 0), align 1
   store %struct.S2 %0, %struct.S2* getelementptr inbounds (%struct.S2* @struct_s2, i32 1), align 1
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/machineverifier.ll b/llvm/test/CodeGen/Mips/machineverifier.ll
index c673fe5..d496b83 100644
--- a/llvm/test/CodeGen/Mips/machineverifier.ll
+++ b/llvm/test/CodeGen/Mips/machineverifier.ll
@@ -6,7 +6,7 @@
 
 define void @foo() nounwind {
 entry:
-  %0 = load i32* @g, align 4
+  %0 = load i32, i32* @g, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/CodeGen/Mips/mbrsize4a.ll b/llvm/test/CodeGen/Mips/mbrsize4a.ll
index 15e1f47..ad8eb64 100644
--- a/llvm/test/CodeGen/Mips/mbrsize4a.ll
+++ b/llvm/test/CodeGen/Mips/mbrsize4a.ll
@@ -21,7 +21,7 @@
   br label %z
 
 return:                                           ; No predecessors!
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 ; jal16: 	jal	$BB{{[0-9]+}}_{{[0-9]+}}
 }
diff --git a/llvm/test/CodeGen/Mips/micromips-addiu.ll b/llvm/test/CodeGen/Mips/micromips-addiu.ll
index c5bee34..66550f4 100644
--- a/llvm/test/CodeGen/Mips/micromips-addiu.ll
+++ b/llvm/test/CodeGen/Mips/micromips-addiu.ll
@@ -8,17 +8,17 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %addiu1 = add i32 %0, -7
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
                                   ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu1)
 
-  %1 = load i32* @y, align 4
+  %1 = load i32, i32* @y, align 4
   %addiu2 = add i32 %1, 55
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
                                   ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu2)
 
-  %2 = load i32* @z, align 4
+  %2 = load i32, i32* @z, align 4
   %addiu3 = add i32 %2, 24
   %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
                                   ([7 x i8]* @.str, i32 0, i32 0), i32 %addiu3)
diff --git a/llvm/test/CodeGen/Mips/micromips-and16.ll b/llvm/test/CodeGen/Mips/micromips-and16.ll
index 4eacf18..d0a16ac 100644
--- a/llvm/test/CodeGen/Mips/micromips-and16.ll
+++ b/llvm/test/CodeGen/Mips/micromips-and16.ll
@@ -8,8 +8,8 @@
   %b = alloca i32, align 4
   %c = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %and = and i32 %0, %1
   store i32 %and, i32* %a, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/micromips-andi.ll b/llvm/test/CodeGen/Mips/micromips-andi.ll
index b82d2b0..1507c75 100644
--- a/llvm/test/CodeGen/Mips/micromips-andi.ll
+++ b/llvm/test/CodeGen/Mips/micromips-andi.ll
@@ -7,12 +7,12 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %and1 = and i32 %0, 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
                                   ([7 x i8]* @.str, i32 0, i32 0), i32 %and1)
 
-  %1 = load i32* @y, align 4
+  %1 = load i32, i32* @y, align 4
   %and2 = and i32 %1, 5
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
                                   ([7 x i8]* @.str, i32 0, i32 0), i32 %and2)
diff --git a/llvm/test/CodeGen/Mips/micromips-compact-branches.ll b/llvm/test/CodeGen/Mips/micromips-compact-branches.ll
index 670f9a0..c689944 100644
--- a/llvm/test/CodeGen/Mips/micromips-compact-branches.ll
+++ b/llvm/test/CodeGen/Mips/micromips-compact-branches.ll
@@ -4,7 +4,7 @@
 define void @main() nounwind uwtable {
 entry:
   %x = alloca i32, align 4
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/Mips/micromips-delay-slot-jr.ll b/llvm/test/CodeGen/Mips/micromips-delay-slot-jr.ll
index c01e670..fa121f8 100644
--- a/llvm/test/CodeGen/Mips/micromips-delay-slot-jr.ll
+++ b/llvm/test/CodeGen/Mips/micromips-delay-slot-jr.ll
@@ -14,7 +14,7 @@
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([2 x i8]* @str, i32 0, i32 0))
   %inc = add i32 %i.0, 1
   %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @main.L, i32 0, i32 %i.0
-  %0 = load i8** %arrayidx, align 4, !tbaa !1
+  %0 = load i8*, i8** %arrayidx, align 4, !tbaa !1
   indirectbr i8* %0, [label %L1, label %L2]
 
 L2:                                               ; preds = %L1
diff --git a/llvm/test/CodeGen/Mips/micromips-delay-slot.ll b/llvm/test/CodeGen/Mips/micromips-delay-slot.ll
index b5f6c56..ef65462 100644
--- a/llvm/test/CodeGen/Mips/micromips-delay-slot.ll
+++ b/llvm/test/CodeGen/Mips/micromips-delay-slot.ll
@@ -6,7 +6,7 @@
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %0 = load i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
   %shl = shl i32 %0, 2
   %call = call i32 @bar(i32 signext %shl)
   ret i32 %call
diff --git a/llvm/test/CodeGen/Mips/micromips-gp-rc.ll b/llvm/test/CodeGen/Mips/micromips-gp-rc.ll
index 945917a..f139f7a 100644
--- a/llvm/test/CodeGen/Mips/micromips-gp-rc.ll
+++ b/llvm/test/CodeGen/Mips/micromips-gp-rc.ll
@@ -6,7 +6,7 @@
 ; Function Attrs: noreturn nounwind
 define void @foo() #0 {
 entry:
-  %0 = load i32* @g, align 4
+  %0 = load i32, i32* @g, align 4
   tail call void @exit(i32 signext %0)
   unreachable
 }
diff --git a/llvm/test/CodeGen/Mips/micromips-jal.ll b/llvm/test/CodeGen/Mips/micromips-jal.ll
index fccc229..51832fe 100644
--- a/llvm/test/CodeGen/Mips/micromips-jal.ll
+++ b/llvm/test/CodeGen/Mips/micromips-jal.ll
@@ -7,8 +7,8 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %a.addr, align 4
-  %1 = load i32* %b.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %0, %1
   ret i32 %add
 }
@@ -20,11 +20,11 @@
   %y = alloca i32, align 4
   %z = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* %y, align 4
-  %1 = load i32* %z, align 4
+  %0 = load i32, i32* %y, align 4
+  %1 = load i32, i32* %z, align 4
   %call = call i32 @sum(i32 %0, i32 %1)
   store i32 %call, i32* %x, align 4
-  %2 = load i32* %x, align 4
+  %2 = load i32, i32* %x, align 4
   ret i32 %2
 }
 
diff --git a/llvm/test/CodeGen/Mips/micromips-load-effective-address.ll b/llvm/test/CodeGen/Mips/micromips-load-effective-address.ll
index afba760..4704580 100644
--- a/llvm/test/CodeGen/Mips/micromips-load-effective-address.ll
+++ b/llvm/test/CodeGen/Mips/micromips-load-effective-address.ll
@@ -7,10 +7,10 @@
   %y.addr = alloca i32*, align 8
   store i32* %x, i32** %x.addr, align 8
   store i32* %y, i32** %y.addr, align 8
-  %0 = load i32** %x.addr, align 8
-  %1 = load i32* %0, align 4
-  %2 = load i32** %y.addr, align 8
-  %3 = load i32* %2, align 4
+  %0 = load i32*, i32** %x.addr, align 8
+  %1 = load i32, i32* %0, align 4
+  %2 = load i32*, i32** %y.addr, align 8
+  %3 = load i32, i32* %2, align 4
   %add = add nsw i32 %1, %3
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/Mips/micromips-or16.ll b/llvm/test/CodeGen/Mips/micromips-or16.ll
index ab7e79a..82ea9c6 100644
--- a/llvm/test/CodeGen/Mips/micromips-or16.ll
+++ b/llvm/test/CodeGen/Mips/micromips-or16.ll
@@ -8,8 +8,8 @@
   %b = alloca i32, align 4
   %c = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %or = or i32 %0, %1
   store i32 %or, i32* %a, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/micromips-rdhwr-directives.ll b/llvm/test/CodeGen/Mips/micromips-rdhwr-directives.ll
index af40a87..ebe4ddd 100644
--- a/llvm/test/CodeGen/Mips/micromips-rdhwr-directives.ll
+++ b/llvm/test/CodeGen/Mips/micromips-rdhwr-directives.ll
@@ -10,6 +10,6 @@
 ; CHECK: rdhwr
 ; CHECK: .set  pop
 
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/Mips/micromips-shift.ll b/llvm/test/CodeGen/Mips/micromips-shift.ll
index 8215010..ed1bcbb 100644
--- a/llvm/test/CodeGen/Mips/micromips-shift.ll
+++ b/llvm/test/CodeGen/Mips/micromips-shift.ll
@@ -8,11 +8,11 @@
 
 define i32 @shift_left() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %shl = shl i32 %0, 4
   store i32 %shl, i32* @b, align 4
 
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %shl1 = shl i32 %1, 10
   store i32 %shl1, i32* @d, align 4
 
@@ -29,11 +29,11 @@
 
 define i32 @shift_right() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %shr = lshr i32 %0, 4
   store i32 %shr, i32* @j, align 4
 
-  %1 = load i32* @m, align 4
+  %1 = load i32, i32* @m, align 4
   %shr1 = lshr i32 %1, 10
   store i32 %shr1, i32* @n, align 4
 
diff --git a/llvm/test/CodeGen/Mips/micromips-sw-lw-16.ll b/llvm/test/CodeGen/Mips/micromips-sw-lw-16.ll
index 7ea4413..3583726 100644
--- a/llvm/test/CodeGen/Mips/micromips-sw-lw-16.ll
+++ b/llvm/test/CodeGen/Mips/micromips-sw-lw-16.ll
@@ -6,16 +6,16 @@
 entry:
   %p.addr = alloca i32*, align 4
   store i32* %p, i32** %p.addr, align 4
-  %0 = load i32** %p.addr, align 4
-  %1 = load i32* %0, align 4
+  %0 = load i32*, i32** %p.addr, align 4
+  %1 = load i32, i32* %0, align 4
   %add = add nsw i32 7, %1
-  %2 = load i32** %p.addr, align 4
+  %2 = load i32*, i32** %p.addr, align 4
   store i32 %add, i32* %2, align 4
-  %3 = load i32** %p.addr, align 4
+  %3 = load i32*, i32** %p.addr, align 4
   %add.ptr = getelementptr inbounds i32, i32* %3, i32 1
-  %4 = load i32* %add.ptr, align 4
+  %4 = load i32, i32* %add.ptr, align 4
   %add1 = add nsw i32 7, %4
-  %5 = load i32** %p.addr, align 4
+  %5 = load i32*, i32** %p.addr, align 4
   %add.ptr2 = getelementptr inbounds i32, i32* %5, i32 1
   store i32 %add1, i32* %add.ptr2, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/micromips-xor16.ll b/llvm/test/CodeGen/Mips/micromips-xor16.ll
index 9915112..53c75ac 100644
--- a/llvm/test/CodeGen/Mips/micromips-xor16.ll
+++ b/llvm/test/CodeGen/Mips/micromips-xor16.ll
@@ -8,8 +8,8 @@
   %b = alloca i32, align 4
   %c = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %xor = xor i32 %0, %1
   store i32 %xor, i32* %a, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Mips/mips16_32_8.ll b/llvm/test/CodeGen/Mips/mips16_32_8.ll
index 2f5bc21..5f03bf3 100644
--- a/llvm/test/CodeGen/Mips/mips16_32_8.ll
+++ b/llvm/test/CodeGen/Mips/mips16_32_8.ll
@@ -22,11 +22,11 @@
 define void @nofoo() #1 {
 entry:
   store i32 20, i32* @i, align 4
-  %0 = load float* @x, align 4
-  %1 = load float* @y, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @y, align 4
   %add = fadd float %0, %1
   store float %add, float* @f, align 4
-  %2 = load float* @f, align 4
+  %2 = load float, float* @f, align 4
   %conv = fpext float %2 to double
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), double %conv)
   ret void
@@ -48,10 +48,10 @@
 define i32 @main() #3 {
 entry:
   call void @foo()
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str1, i32 0, i32 0), i32 %0)
   call void @nofoo()
-  %1 = load i32* @i, align 4
+  %1 = load i32, i32* @i, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/mips16_fpret.ll b/llvm/test/CodeGen/Mips/mips16_fpret.ll
index 635b28d..bf232c9 100644
--- a/llvm/test/CodeGen/Mips/mips16_fpret.ll
+++ b/llvm/test/CodeGen/Mips/mips16_fpret.ll
@@ -11,7 +11,7 @@
 
 define float @foox()  {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   ret float %0
 ; 1: 	.ent	foox
 ; 1:	lw	$2, %lo(x)(${{[0-9]+}})
@@ -20,7 +20,7 @@
 
 define double @foodx()  {
 entry:
-  %0 = load double* @dx, align 8
+  %0 = load double, double* @dx, align 8
   ret double %0
 ; 1: 	.ent	foodx
 ; 1: 	lw	$2, %lo(dx)(${{[0-9]+}})
@@ -34,13 +34,13 @@
 define { float, float } @foocx()  {
 entry:
   %retval = alloca { float, float }, align 4
-  %cx.real = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)
-  %cx.imag = load float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)
+  %cx.real = load float, float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 0)
+  %cx.imag = load float, float* getelementptr inbounds ({ float, float }* @cx, i32 0, i32 1)
   %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
   %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
   store float %cx.real, float* %real
   store float %cx.imag, float* %imag
-  %0 = load { float, float }* %retval
+  %0 = load { float, float }, { float, float }* %retval
   ret { float, float } %0
 ; 1: 	.ent	foocx
 ; 1: 	lw	$2, %lo(cx)(${{[0-9]+}})
@@ -53,13 +53,13 @@
 define { double, double } @foodcx()  {
 entry:
   %retval = alloca { double, double }, align 8
-  %dcx.real = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)
-  %dcx.imag = load double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)
+  %dcx.real = load double, double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 0)
+  %dcx.imag = load double, double* getelementptr inbounds ({ double, double }* @dcx, i32 0, i32 1)
   %real = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 0
   %imag = getelementptr inbounds { double, double }, { double, double }* %retval, i32 0, i32 1
   store double %dcx.real, double* %real
   store double %dcx.imag, double* %imag
-  %0 = load { double, double }* %retval
+  %0 = load { double, double }, { double, double }* %retval
   ret { double, double } %0
 ; 1: 	.ent	foodcx
 ; 1: 	lw	${{[0-9]}}, %lo(dcx)(${{[0-9]+}})
diff --git a/llvm/test/CodeGen/Mips/mips16ex.ll b/llvm/test/CodeGen/Mips/mips16ex.ll
index 3f70c72..983d4da 100644
--- a/llvm/test/CodeGen/Mips/mips16ex.ll
+++ b/llvm/test/CodeGen/Mips/mips16ex.ll
@@ -33,18 +33,18 @@
   br label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32* %ehselector.slot
+  %sel = load i32, i32* %ehselector.slot
   %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
   %matches = icmp eq i32 %sel, %4
   br i1 %matches, label %catch, label %eh.resume
 
 catch:                                            ; preds = %catch.dispatch
-  %exn = load i8** %exn.slot
+  %exn = load i8*, i8** %exn.slot
   %5 = call i8* @__cxa_begin_catch(i8* %exn) nounwind
   %6 = bitcast i8* %5 to i32*
-  %exn.scalar = load i32* %6
+  %exn.scalar = load i32, i32* %6
   store i32 %exn.scalar, i32* %e, align 4
-  %7 = load i32* %e, align 4
+  %7 = load i32, i32* %e, align 4
   %call2 = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([15 x i8]* @.str1, i32 0, i32 0), i32 %7)
           to label %invoke.cont unwind label %lpad1
 
@@ -66,8 +66,8 @@
   br label %eh.resume
 
 eh.resume:                                        ; preds = %lpad1, %catch.dispatch
-  %exn3 = load i8** %exn.slot
-  %sel4 = load i32* %ehselector.slot
+  %exn3 = load i8*, i8** %exn.slot
+  %sel4 = load i32, i32* %ehselector.slot
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
   %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
   resume { i8*, i32 } %lpad.val5
diff --git a/llvm/test/CodeGen/Mips/mips16fpe.ll b/llvm/test/CodeGen/Mips/mips16fpe.ll
index 987980e..f8b916d 100644
--- a/llvm/test/CodeGen/Mips/mips16fpe.ll
+++ b/llvm/test/CodeGen/Mips/mips16fpe.ll
@@ -42,8 +42,8 @@
 define void @test_addsf3() nounwind {
 entry:
 ;16hf-LABEL: test_addsf3:
-  %0 = load float* @x, align 4
-  %1 = load float* @y, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @y, align 4
   %add = fadd float %0, %1
   store float %add, float* @addsf3_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_addsf3)(${{[0-9]+}})
@@ -53,8 +53,8 @@
 define void @test_adddf3() nounwind {
 entry:
 ;16hf-LABEL: test_adddf3:
-  %0 = load double* @xd, align 8
-  %1 = load double* @yd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @yd, align 8
   %add = fadd double %0, %1
   store double %add, double* @adddf3_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_adddf3)(${{[0-9]+}})
@@ -64,8 +64,8 @@
 define void @test_subsf3() nounwind {
 entry:
 ;16hf-LABEL: test_subsf3:
-  %0 = load float* @x, align 4
-  %1 = load float* @y, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @y, align 4
   %sub = fsub float %0, %1
   store float %sub, float* @subsf3_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subsf3)(${{[0-9]+}})
@@ -75,8 +75,8 @@
 define void @test_subdf3() nounwind {
 entry:
 ;16hf-LABEL: test_subdf3:
-  %0 = load double* @xd, align 8
-  %1 = load double* @yd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @yd, align 8
   %sub = fsub double %0, %1
   store double %sub, double* @subdf3_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_subdf3)(${{[0-9]+}})
@@ -86,8 +86,8 @@
 define void @test_mulsf3() nounwind {
 entry:
 ;16hf-LABEL: test_mulsf3:
-  %0 = load float* @x, align 4
-  %1 = load float* @y, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @y, align 4
   %mul = fmul float %0, %1
   store float %mul, float* @mulsf3_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_mulsf3)(${{[0-9]+}})
@@ -97,8 +97,8 @@
 define void @test_muldf3() nounwind {
 entry:
 ;16hf-LABEL: test_muldf3:
-  %0 = load double* @xd, align 8
-  %1 = load double* @yd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @yd, align 8
   %mul = fmul double %0, %1
   store double %mul, double* @muldf3_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_muldf3)(${{[0-9]+}})
@@ -108,8 +108,8 @@
 define void @test_divsf3() nounwind {
 entry:
 ;16hf-LABEL: test_divsf3:
-  %0 = load float* @y, align 4
-  %1 = load float* @x, align 4
+  %0 = load float, float* @y, align 4
+  %1 = load float, float* @x, align 4
   %div = fdiv float %0, %1
   store float %div, float* @divsf3_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divsf3)(${{[0-9]+}})
@@ -119,9 +119,9 @@
 define void @test_divdf3() nounwind {
 entry:
 ;16hf-LABEL: test_divdf3:
-  %0 = load double* @yd, align 8
+  %0 = load double, double* @yd, align 8
   %mul = fmul double %0, 2.000000e+00
-  %1 = load double* @xd, align 8
+  %1 = load double, double* @xd, align 8
   %div = fdiv double %mul, %1
   store double %div, double* @divdf3_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_divdf3)(${{[0-9]+}})
@@ -131,7 +131,7 @@
 define void @test_extendsfdf2() nounwind {
 entry:
 ;16hf-LABEL: test_extendsfdf2:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %conv = fpext float %0 to double
   store double %conv, double* @extendsfdf2_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_extendsfdf2)(${{[0-9]+}})
@@ -141,7 +141,7 @@
 define void @test_truncdfsf2() nounwind {
 entry:
 ;16hf-LABEL: test_truncdfsf2:
-  %0 = load double* @xd2, align 8
+  %0 = load double, double* @xd2, align 8
   %conv = fptrunc double %0 to float
   store float %conv, float* @truncdfsf2_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_truncdfsf2)(${{[0-9]+}})
@@ -151,7 +151,7 @@
 define void @test_fix_truncsfsi() nounwind {
 entry:
 ;16hf-LABEL: test_fix_truncsfsi:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %conv = fptosi float %0 to i32
   store i32 %conv, i32* @fix_truncsfsi_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncsfsi)(${{[0-9]+}})
@@ -161,7 +161,7 @@
 define void @test_fix_truncdfsi() nounwind {
 entry:
 ;16hf-LABEL: test_fix_truncdfsi:
-  %0 = load double* @xd, align 8
+  %0 = load double, double* @xd, align 8
   %conv = fptosi double %0 to i32
   store i32 %conv, i32* @fix_truncdfsi_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_fix_truncdfsi)(${{[0-9]+}})
@@ -171,7 +171,7 @@
 define void @test_floatsisf() nounwind {
 entry:
 ;16hf-LABEL: test_floatsisf:
-  %0 = load i32* @si, align 4
+  %0 = load i32, i32* @si, align 4
   %conv = sitofp i32 %0 to float
   store float %conv, float* @floatsisf_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsisf)(${{[0-9]+}})
@@ -181,7 +181,7 @@
 define void @test_floatsidf() nounwind {
 entry:
 ;16hf-LABEL: test_floatsidf:
-  %0 = load i32* @si, align 4
+  %0 = load i32, i32* @si, align 4
   %conv = sitofp i32 %0 to double
   store double %conv, double* @floatsidf_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatsidf)(${{[0-9]+}})
@@ -191,7 +191,7 @@
 define void @test_floatunsisf() nounwind {
 entry:
 ;16hf-LABEL: test_floatunsisf:
-  %0 = load i32* @ui, align 4
+  %0 = load i32, i32* @ui, align 4
   %conv = uitofp i32 %0 to float
   store float %conv, float* @floatunsisf_result, align 4
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsisf)(${{[0-9]+}})
@@ -201,7 +201,7 @@
 define void @test_floatunsidf() nounwind {
 entry:
 ;16hf-LABEL: test_floatunsidf:
-  %0 = load i32* @ui, align 4
+  %0 = load i32, i32* @ui, align 4
   %conv = uitofp i32 %0 to double
   store double %conv, double* @floatunsidf_result, align 8
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_floatunsidf)(${{[0-9]+}})
@@ -211,8 +211,8 @@
 define void @test_eqsf2() nounwind {
 entry:
 ;16hf-LABEL: test_eqsf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @xx, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @xx, align 4
   %cmp = fcmp oeq float %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @eqsf2_result, align 4
@@ -223,8 +223,8 @@
 define void @test_eqdf2() nounwind {
 entry:
 ;16hf-LABEL: test_eqdf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @xxd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @xxd, align 8
   %cmp = fcmp oeq double %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @eqdf2_result, align 4
@@ -235,8 +235,8 @@
 define void @test_nesf2() nounwind {
 entry:
 ;16hf-LABEL: test_nesf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @y, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @y, align 4
   %cmp = fcmp une float %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @nesf2_result, align 4
@@ -247,8 +247,8 @@
 define void @test_nedf2() nounwind {
 entry:
 ;16hf-LABEL: test_nedf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @yd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @yd, align 8
   %cmp = fcmp une double %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @nedf2_result, align 4
@@ -259,10 +259,10 @@
 define void @test_gesf2() nounwind {
 entry:
 ;16hf-LABEL: test_gesf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @xx, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @xx, align 4
   %cmp = fcmp oge float %0, %1
-  %2 = load float* @y, align 4
+  %2 = load float, float* @y, align 4
   %cmp1 = fcmp oge float %2, %0
   %and3 = and i1 %cmp, %cmp1
   %and = zext i1 %and3 to i32
@@ -274,10 +274,10 @@
 define void @test_gedf2() nounwind {
 entry:
 ;16hf-LABEL: test_gedf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @xxd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @xxd, align 8
   %cmp = fcmp oge double %0, %1
-  %2 = load double* @yd, align 8
+  %2 = load double, double* @yd, align 8
   %cmp1 = fcmp oge double %2, %0
   %and3 = and i1 %cmp, %cmp1
   %and = zext i1 %and3 to i32
@@ -289,10 +289,10 @@
 define void @test_ltsf2() nounwind {
 entry:
 ;16hf-LABEL: test_ltsf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @xx, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @xx, align 4
   %lnot = fcmp uge float %0, %1
-  %2 = load float* @y, align 4
+  %2 = load float, float* @y, align 4
   %cmp1 = fcmp olt float %0, %2
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
@@ -305,10 +305,10 @@
 define void @test_ltdf2() nounwind {
 entry:
 ;16hf-LABEL: test_ltdf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @xxd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @xxd, align 8
   %lnot = fcmp uge double %0, %1
-  %2 = load double* @yd, align 8
+  %2 = load double, double* @yd, align 8
   %cmp1 = fcmp olt double %0, %2
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
@@ -321,10 +321,10 @@
 define void @test_lesf2() nounwind {
 entry:
 ;16hf-LABEL: test_lesf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @xx, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @xx, align 4
   %cmp = fcmp ole float %0, %1
-  %2 = load float* @y, align 4
+  %2 = load float, float* @y, align 4
   %cmp1 = fcmp ole float %0, %2
   %and3 = and i1 %cmp, %cmp1
   %and = zext i1 %and3 to i32
@@ -336,10 +336,10 @@
 define void @test_ledf2() nounwind {
 entry:
 ;16hf-LABEL: test_ledf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @xxd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @xxd, align 8
   %cmp = fcmp ole double %0, %1
-  %2 = load double* @yd, align 8
+  %2 = load double, double* @yd, align 8
   %cmp1 = fcmp ole double %0, %2
   %and3 = and i1 %cmp, %cmp1
   %and = zext i1 %and3 to i32
@@ -351,10 +351,10 @@
 define void @test_gtsf2() nounwind {
 entry:
 ;16hf-LABEL: test_gtsf2:
-  %0 = load float* @x, align 4
-  %1 = load float* @xx, align 4
+  %0 = load float, float* @x, align 4
+  %1 = load float, float* @xx, align 4
   %lnot = fcmp ule float %0, %1
-  %2 = load float* @y, align 4
+  %2 = load float, float* @y, align 4
   %cmp1 = fcmp ogt float %2, %0
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
@@ -366,10 +366,10 @@
 define void @test_gtdf2() nounwind {
 entry:
 ;16hf-LABEL: test_gtdf2:
-  %0 = load double* @xd, align 8
-  %1 = load double* @xxd, align 8
+  %0 = load double, double* @xd, align 8
+  %1 = load double, double* @xxd, align 8
   %lnot = fcmp ule double %0, %1
-  %2 = load double* @yd, align 8
+  %2 = load double, double* @yd, align 8
   %cmp1 = fcmp ogt double %2, %0
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
diff --git a/llvm/test/CodeGen/Mips/mips64-f128-call.ll b/llvm/test/CodeGen/Mips/mips64-f128-call.ll
index 455e540..9a093e6 100644
--- a/llvm/test/CodeGen/Mips/mips64-f128-call.ll
+++ b/llvm/test/CodeGen/Mips/mips64-f128-call.ll
@@ -19,7 +19,7 @@
 
 define void @foo1() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   tail call void @foo2(fp128 %0)
   ret void
 }
@@ -38,7 +38,7 @@
 entry:
   %call = tail call fp128 @foo4()
   store fp128 %call, fp128* @gld0, align 16
-  %0 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld1, align 16
   ret fp128 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/mips64-f128.ll b/llvm/test/CodeGen/Mips/mips64-f128.ll
index 6987d4a..9dd41e3 100644
--- a/llvm/test/CodeGen/Mips/mips64-f128.ll
+++ b/llvm/test/CodeGen/Mips/mips64-f128.ll
@@ -18,8 +18,8 @@
 
 define fp128 @addLD() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %add = fadd fp128 %0, %1
   ret fp128 %add
 }
@@ -29,8 +29,8 @@
 
 define fp128 @subLD() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %sub = fsub fp128 %0, %1
   ret fp128 %sub
 }
@@ -40,8 +40,8 @@
 
 define fp128 @mulLD() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %mul = fmul fp128 %0, %1
   ret fp128 %mul
 }
@@ -51,8 +51,8 @@
 
 define fp128 @divLD() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %div = fdiv fp128 %0, %1
   ret fp128 %div
 }
@@ -247,7 +247,7 @@
 
 define fp128 @libcall1_fabsl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @fabsl(fp128 %0) nounwind readnone
   ret fp128 %call
 }
@@ -259,7 +259,7 @@
 
 define fp128 @libcall1_ceill() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @ceill(fp128 %0) nounwind readnone
   ret fp128 %call
 }
@@ -271,7 +271,7 @@
 
 define fp128 @libcall1_sinl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @sinl(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -283,7 +283,7 @@
 
 define fp128 @libcall1_cosl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @cosl(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -295,7 +295,7 @@
 
 define fp128 @libcall1_expl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @expl(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -307,7 +307,7 @@
 
 define fp128 @libcall1_exp2l() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @exp2l(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -319,7 +319,7 @@
 
 define fp128 @libcall1_logl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @logl(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -331,7 +331,7 @@
 
 define fp128 @libcall1_log2l() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @log2l(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -343,7 +343,7 @@
 
 define fp128 @libcall1_log10l() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @log10l(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -355,7 +355,7 @@
 
 define fp128 @libcall1_nearbyintl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @nearbyintl(fp128 %0) nounwind readnone
   ret fp128 %call
 }
@@ -367,7 +367,7 @@
 
 define fp128 @libcall1_floorl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @floorl(fp128 %0) nounwind readnone
   ret fp128 %call
 }
@@ -379,7 +379,7 @@
 
 define fp128 @libcall1_sqrtl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @sqrtl(fp128 %0) nounwind
   ret fp128 %call
 }
@@ -391,7 +391,7 @@
 
 define fp128 @libcall1_rintl() {
 entry:
-  %0 = load fp128* @gld0, align 16
+  %0 = load fp128, fp128* @gld0, align 16
   %call = tail call fp128 @rintl(fp128 %0) nounwind readnone
   ret fp128 %call
 }
@@ -424,8 +424,8 @@
 
 define fp128 @libcall2_copysignl() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %call = tail call fp128 @copysignl(fp128 %0, fp128 %1) nounwind readnone
   ret fp128 %call
 }
@@ -437,8 +437,8 @@
 
 define fp128 @libcall2_powl() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %call = tail call fp128 @powl(fp128 %0, fp128 %1) nounwind
   ret fp128 %call
 }
@@ -450,8 +450,8 @@
 
 define fp128 @libcall2_fmodl() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld1, align 16
   %call = tail call fp128 @fmodl(fp128 %0, fp128 %1) nounwind
   ret fp128 %call
 }
@@ -463,9 +463,9 @@
 
 define fp128 @libcall3_fmal() {
 entry:
-  %0 = load fp128* @gld0, align 16
-  %1 = load fp128* @gld2, align 16
-  %2 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld0, align 16
+  %1 = load fp128, fp128* @gld2, align 16
+  %2 = load fp128, fp128* @gld1, align 16
   %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %2, fp128 %1)
   ret fp128 %3
 }
@@ -539,7 +539,7 @@
 
 define fp128 @load_LD_LD() {
 entry:
-  %0 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld1, align 16
   ret fp128 %0
 }
 
@@ -551,7 +551,7 @@
 
 define fp128 @load_LD_float() {
 entry:
-  %0 = load float* @gf1, align 4
+  %0 = load float, float* @gf1, align 4
   %conv = fpext float %0 to fp128
   ret fp128 %conv
 }
@@ -564,7 +564,7 @@
 
 define fp128 @load_LD_double() {
 entry:
-  %0 = load double* @gd1, align 8
+  %0 = load double, double* @gd1, align 8
   %conv = fpext double %0 to fp128
   ret fp128 %conv
 }
@@ -579,7 +579,7 @@
 
 define void @store_LD_LD() {
 entry:
-  %0 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld1, align 16
   store fp128 %0, fp128* @gld0, align 16
   ret void
 }
@@ -595,7 +595,7 @@
 
 define void @store_LD_float() {
 entry:
-  %0 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld1, align 16
   %conv = fptrunc fp128 %0 to float
   store float %conv, float* @gf1, align 4
   ret void
@@ -612,7 +612,7 @@
 
 define void @store_LD_double() {
 entry:
-  %0 = load fp128* @gld1, align 16
+  %0 = load fp128, fp128* @gld1, align 16
   %conv = fptrunc fp128 %0 to double
   store double %conv, double* @gd1, align 8
   ret void
diff --git a/llvm/test/CodeGen/Mips/mips64directive.ll b/llvm/test/CodeGen/Mips/mips64directive.ll
index c4ba534..b1052f7 100644
--- a/llvm/test/CodeGen/Mips/mips64directive.ll
+++ b/llvm/test/CodeGen/Mips/mips64directive.ll
@@ -6,7 +6,7 @@
 ; CHECK: 8byte
 define i64 @foo1() nounwind readonly {
 entry:
-  %0 = load i64* @gl, align 8
+  %0 = load i64, i64* @gl, align 8
   ret i64 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/mips64fpldst.ll b/llvm/test/CodeGen/Mips/mips64fpldst.ll
index 5d62156..55d5c77 100644
--- a/llvm/test/CodeGen/Mips/mips64fpldst.ll
+++ b/llvm/test/CodeGen/Mips/mips64fpldst.ll
@@ -16,7 +16,7 @@
 ; CHECK-N32: funcfl1
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: lwc1 $f{{[0-9]+}}, 0($[[R0]]) 
-  %0 = load float* @f0, align 4
+  %0 = load float, float* @f0, align 4
   ret float %0
 }
 
@@ -28,7 +28,7 @@
 ; CHECK-N32: funcfl2
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: ldc1 $f{{[0-9]+}}, 0($[[R0]]) 
-  %0 = load double* @d0, align 8 
+  %0 = load double, double* @d0, align 8 
   ret double %0
 }
 
@@ -40,7 +40,7 @@
 ; CHECK-N32: funcfs1
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(f0)
 ; CHECK-N32: swc1 $f{{[0-9]+}}, 0($[[R0]]) 
-  %0 = load float* @f1, align 4 
+  %0 = load float, float* @f1, align 4 
   store float %0, float* @f0, align 4 
   ret void
 }
@@ -53,7 +53,7 @@
 ; CHECK-N32: funcfs2
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(d0)
 ; CHECK-N32: sdc1 $f{{[0-9]+}}, 0($[[R0]]) 
-  %0 = load double* @d1, align 8 
+  %0 = load double, double* @d1, align 8 
   store double %0, double* @d0, align 8 
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/mips64instrs.ll b/llvm/test/CodeGen/Mips/mips64instrs.ll
index ed617be..d64cdce 100644
--- a/llvm/test/CodeGen/Mips/mips64instrs.ll
+++ b/llvm/test/CodeGen/Mips/mips64instrs.ll
@@ -123,8 +123,8 @@
 ; GPRMULDIV:     ddiv $2, $[[T0]], $[[T1]]
 ; GPRMULDIV:     teq $[[T1]], $zero, 7
 
-  %0 = load i64* @gll0, align 8
-  %1 = load i64* @gll1, align 8
+  %0 = load i64, i64* @gll0, align 8
+  %1 = load i64, i64* @gll1, align 8
   %div = sdiv i64 %0, %1
   ret i64 %div
 }
@@ -144,8 +144,8 @@
 ; GPRMULDIV:     ddivu $2, $[[T0]], $[[T1]]
 ; GPRMULDIV:     teq $[[T1]], $zero, 7
 
-  %0 = load i64* @gll0, align 8
-  %1 = load i64* @gll1, align 8
+  %0 = load i64, i64* @gll0, align 8
+  %1 = load i64, i64* @gll1, align 8
   %div = udiv i64 %0, %1
   ret i64 %div
 }
diff --git a/llvm/test/CodeGen/Mips/mips64intldst.ll b/llvm/test/CodeGen/Mips/mips64intldst.ll
index 1ceafc1..658ab88 100644
--- a/llvm/test/CodeGen/Mips/mips64intldst.ll
+++ b/llvm/test/CodeGen/Mips/mips64intldst.ll
@@ -20,7 +20,7 @@
 ; CHECK-N32: func1
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: lb ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i8* @c, align 4
+  %0 = load i8, i8* @c, align 4
   %conv = sext i8 %0 to i64
   ret i64 %conv
 }
@@ -33,7 +33,7 @@
 ; CHECK-N32: func2
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: lh ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i16* @s, align 4
+  %0 = load i16, i16* @s, align 4
   %conv = sext i16 %0 to i64
   ret i64 %conv
 }
@@ -46,7 +46,7 @@
 ; CHECK-N32: func3
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: lw ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %conv = sext i32 %0 to i64
   ret i64 %conv
 }
@@ -59,7 +59,7 @@
 ; CHECK-N32: func4
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: ld ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i64* @l, align 8
+  %0 = load i64, i64* @l, align 8
   ret i64 %0
 }
 
@@ -71,7 +71,7 @@
 ; CHECK-N32: ufunc1
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(uc)
 ; CHECK-N32: lbu ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i8* @uc, align 4
+  %0 = load i8, i8* @uc, align 4
   %conv = zext i8 %0 to i64
   ret i64 %conv
 }
@@ -84,7 +84,7 @@
 ; CHECK-N32: ufunc2
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(us)
 ; CHECK-N32: lhu ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i16* @us, align 4
+  %0 = load i16, i16* @us, align 4
   %conv = zext i16 %0 to i64
   ret i64 %conv
 }
@@ -97,7 +97,7 @@
 ; CHECK-N32: ufunc3
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(ui)
 ; CHECK-N32: lwu ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i32* @ui, align 4
+  %0 = load i32, i32* @ui, align 4
   %conv = zext i32 %0 to i64
   ret i64 %conv
 }
@@ -110,7 +110,7 @@
 ; CHECK-N32: sfunc1
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(c)
 ; CHECK-N32: sb ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i64* @l1, align 8
+  %0 = load i64, i64* @l1, align 8
   %conv = trunc i64 %0 to i8
   store i8 %conv, i8* @c, align 4
   ret void
@@ -124,7 +124,7 @@
 ; CHECK-N32: sfunc2
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(s)
 ; CHECK-N32: sh ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i64* @l1, align 8
+  %0 = load i64, i64* @l1, align 8
   %conv = trunc i64 %0 to i16
   store i16 %conv, i16* @s, align 4
   ret void
@@ -138,7 +138,7 @@
 ; CHECK-N32: sfunc3
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(i)
 ; CHECK-N32: sw ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i64* @l1, align 8
+  %0 = load i64, i64* @l1, align 8
   %conv = trunc i64 %0 to i32
   store i32 %conv, i32* @i, align 4
   ret void
@@ -152,7 +152,7 @@
 ; CHECK-N32: sfunc4
 ; CHECK-N32: lw $[[R0:[0-9]+]], %got_disp(l)
 ; CHECK-N32: sd ${{[0-9]+}}, 0($[[R0]])
-  %0 = load i64* @l1, align 8
+  %0 = load i64, i64* @l1, align 8
   store i64 %0, i64* @l, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/mips64sinttofpsf.ll b/llvm/test/CodeGen/Mips/mips64sinttofpsf.ll
index d3d4603..7bd75bb 100644
--- a/llvm/test/CodeGen/Mips/mips64sinttofpsf.ll
+++ b/llvm/test/CodeGen/Mips/mips64sinttofpsf.ll
@@ -5,7 +5,7 @@
 entry:
   %x = alloca i32, align 4
   store volatile i32 -32, i32* %x, align 4
-  %0 = load volatile i32* %x, align 4
+  %0 = load volatile i32, i32* %x, align 4
   %conv = sitofp i32 %0 to double
   ret double %conv
 
diff --git a/llvm/test/CodeGen/Mips/mipslopat.ll b/llvm/test/CodeGen/Mips/mipslopat.ll
index 1f433b9..63b68c1 100644
--- a/llvm/test/CodeGen/Mips/mipslopat.ll
+++ b/llvm/test/CodeGen/Mips/mipslopat.ll
@@ -6,10 +6,10 @@
 
 define void @simple_vol_file() nounwind {
 entry:
-  %tmp = load volatile i32** @stat_vol_ptr_int, align 4
+  %tmp = load volatile i32*, i32** @stat_vol_ptr_int, align 4
   %0 = bitcast i32* %tmp to i8*
   call void @llvm.prefetch(i8* %0, i32 0, i32 0, i32 1)
-  %tmp1 = load i32** @stat_ptr_vol_int, align 4
+  %tmp1 = load i32*, i32** @stat_ptr_vol_int, align 4
   %1 = bitcast i32* %tmp1 to i8*
   call void @llvm.prefetch(i8* %1, i32 0, i32 0, i32 1)
   ret void
diff --git a/llvm/test/CodeGen/Mips/misha.ll b/llvm/test/CodeGen/Mips/misha.ll
index 3000b5c..23ad7f6 100644
--- a/llvm/test/CodeGen/Mips/misha.ll
+++ b/llvm/test/CodeGen/Mips/misha.ll
@@ -8,7 +8,7 @@
   br i1 %cmp8, label %for.end, label %for.body.lr.ph
 
 for.body.lr.ph:                                   ; preds = %entry
-  %.pre = load i8* %to, align 1
+  %.pre = load i8, i8* %to, align 1
   br label %for.body
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
@@ -16,7 +16,7 @@
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %from.addr.09 = phi i8* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
   %incdec.ptr = getelementptr inbounds i8, i8* %from.addr.09, i32 1
-  %2 = load i8* %from.addr.09, align 1
+  %2 = load i8, i8* %from.addr.09, align 1
   %conv27 = zext i8 %2 to i32
   %conv36 = zext i8 %1 to i32
   %add = add nsw i32 %conv36, %conv27
@@ -44,7 +44,7 @@
   br i1 %cmp8, label %for.end, label %for.body.lr.ph
 
 for.body.lr.ph:                                   ; preds = %entry
-  %.pre = load i16* %to, align 2
+  %.pre = load i16, i16* %to, align 2
   br label %for.body
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
@@ -52,7 +52,7 @@
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %from.addr.09 = phi i16* [ %from, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
   %incdec.ptr = getelementptr inbounds i16, i16* %from.addr.09, i32 1
-  %2 = load i16* %from.addr.09, align 2
+  %2 = load i16, i16* %from.addr.09, align 2
   %conv27 = zext i16 %2 to i32
   %conv36 = zext i16 %1 to i32
   %add = add nsw i32 %conv36, %conv27
diff --git a/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll b/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll
index f42850f..c7eda33 100644
--- a/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll
+++ b/llvm/test/CodeGen/Mips/mno-ldc1-sdc1.ll
@@ -111,7 +111,7 @@
 
 define double @test_ldc1() {
 entry:
-  %0 = load double* @g0, align 8
+  %0 = load double, double* @g0, align 8
   ret double %0
 }
 
@@ -213,7 +213,7 @@
 define double @test_ldxc1(double* nocapture readonly %a, i32 %i) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %a, i32 %i
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   ret double %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/msa/2r.ll b/llvm/test/CodeGen/Mips/msa/2r.ll
index da35ad8..501936c 100644
--- a/llvm/test/CodeGen/Mips/msa/2r.ll
+++ b/llvm/test/CodeGen/Mips/msa/2r.ll
@@ -8,7 +8,7 @@
 
 define void @llvm_mips_nloc_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_nloc_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nloc_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.nloc.b(<16 x i8> %0)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_nloc_b_RES
   ret void
@@ -29,7 +29,7 @@
 
 define void @llvm_mips_nloc_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_nloc_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nloc_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.nloc.h(<8 x i16> %0)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_nloc_h_RES
   ret void
@@ -50,7 +50,7 @@
 
 define void @llvm_mips_nloc_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_nloc_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nloc_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.nloc.w(<4 x i32> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_nloc_w_RES
   ret void
@@ -71,7 +71,7 @@
 
 define void @llvm_mips_nloc_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_nloc_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nloc_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.nloc.d(<2 x i64> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_nloc_d_RES
   ret void
@@ -92,7 +92,7 @@
 
 define void @llvm_mips_nlzc_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_nlzc_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nlzc_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.nlzc.b(<16 x i8> %0)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_nlzc_b_RES
   ret void
@@ -113,7 +113,7 @@
 
 define void @llvm_mips_nlzc_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_nlzc_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nlzc_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.nlzc.h(<8 x i16> %0)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_nlzc_h_RES
   ret void
@@ -134,7 +134,7 @@
 
 define void @llvm_mips_nlzc_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_nlzc_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nlzc_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.nlzc.w(<4 x i32> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_nlzc_w_RES
   ret void
@@ -155,7 +155,7 @@
 
 define void @llvm_mips_nlzc_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_nlzc_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nlzc_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.nlzc.d(<2 x i64> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_nlzc_d_RES
   ret void
@@ -176,7 +176,7 @@
 
 define void @llvm_mips_pcnt_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_pcnt_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pcnt_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.pcnt.b(<16 x i8> %0)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_pcnt_b_RES
   ret void
@@ -197,7 +197,7 @@
 
 define void @llvm_mips_pcnt_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_pcnt_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pcnt_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.pcnt.h(<8 x i16> %0)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_pcnt_h_RES
   ret void
@@ -218,7 +218,7 @@
 
 define void @llvm_mips_pcnt_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_pcnt_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pcnt_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.pcnt.w(<4 x i32> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_pcnt_w_RES
   ret void
@@ -239,7 +239,7 @@
 
 define void @llvm_mips_pcnt_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_pcnt_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pcnt_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.pcnt.d(<2 x i64> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_pcnt_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2r_vector_scalar.ll b/llvm/test/CodeGen/Mips/msa/2r_vector_scalar.ll
index 64e459e..ddcd3cf 100644
--- a/llvm/test/CodeGen/Mips/msa/2r_vector_scalar.ll
+++ b/llvm/test/CodeGen/Mips/msa/2r_vector_scalar.ll
@@ -15,7 +15,7 @@
 
 define void @llvm_mips_fill_b_test() nounwind {
 entry:
-  %0 = load i32* @llvm_mips_fill_b_ARG1
+  %0 = load i32, i32* @llvm_mips_fill_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.fill.b(i32 %0)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_fill_b_RES
   ret void
@@ -35,7 +35,7 @@
 
 define void @llvm_mips_fill_h_test() nounwind {
 entry:
-  %0 = load i32* @llvm_mips_fill_h_ARG1
+  %0 = load i32, i32* @llvm_mips_fill_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.fill.h(i32 %0)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_fill_h_RES
   ret void
@@ -55,7 +55,7 @@
 
 define void @llvm_mips_fill_w_test() nounwind {
 entry:
-  %0 = load i32* @llvm_mips_fill_w_ARG1
+  %0 = load i32, i32* @llvm_mips_fill_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.fill.w(i32 %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_fill_w_RES
   ret void
@@ -75,7 +75,7 @@
 
 define void @llvm_mips_fill_d_test() nounwind {
 entry:
-  %0 = load i64* @llvm_mips_fill_d_ARG1
+  %0 = load i64, i64* @llvm_mips_fill_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.fill.d(i64 %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_fill_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf.ll b/llvm/test/CodeGen/Mips/msa/2rf.ll
index b361ef5..1dbfbda 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf.ll
@@ -8,7 +8,7 @@
 
 define void @llvm_mips_flog2_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_flog2_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.flog2.w(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
   ret void
@@ -29,7 +29,7 @@
 
 define void @llvm_mips_flog2_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_flog2_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.flog2.d(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @flog2_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_flog2_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_flog2_w_ARG1
   %1 = tail call <4 x float> @llvm.log2.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_flog2_w_RES
   ret void
@@ -65,7 +65,7 @@
 
 define void @flog2_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_flog2_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_flog2_d_ARG1
   %1 = tail call <2 x double> @llvm.log2.v2f64(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_flog2_d_RES
   ret void
@@ -86,7 +86,7 @@
 
 define void @llvm_mips_frint_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_frint_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.frint.w(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
   ret void
@@ -107,7 +107,7 @@
 
 define void @llvm_mips_frint_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_frint_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.frint.d(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
   ret void
@@ -125,7 +125,7 @@
 
 define void @frint_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_frint_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_frint_w_ARG1
   %1 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_frint_w_RES
   ret void
@@ -143,7 +143,7 @@
 
 define void @frint_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_frint_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_frint_d_ARG1
   %1 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_frint_d_RES
   ret void
@@ -164,7 +164,7 @@
 
 define void @llvm_mips_frcp_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_frcp_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_frcp_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.frcp.w(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_frcp_w_RES
   ret void
@@ -185,7 +185,7 @@
 
 define void @llvm_mips_frcp_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_frcp_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_frcp_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.frcp.d(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_frcp_d_RES
   ret void
@@ -206,7 +206,7 @@
 
 define void @llvm_mips_frsqrt_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_frsqrt_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_frsqrt_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.frsqrt.w(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_frsqrt_w_RES
   ret void
@@ -227,7 +227,7 @@
 
 define void @llvm_mips_frsqrt_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_frsqrt_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_frsqrt_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.frsqrt.d(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_frsqrt_d_RES
   ret void
@@ -248,7 +248,7 @@
 
 define void @llvm_mips_fsqrt_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsqrt_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.fsqrt.w(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
   ret void
@@ -269,7 +269,7 @@
 
 define void @llvm_mips_fsqrt_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsqrt_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.fsqrt.d(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
   ret void
@@ -287,7 +287,7 @@
 
 define void @fsqrt_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsqrt_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsqrt_w_ARG1
   %1 = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_fsqrt_w_RES
   ret void
@@ -305,7 +305,7 @@
 
 define void @fsqrt_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsqrt_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsqrt_d_ARG1
   %1 = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_fsqrt_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf_exup.ll b/llvm/test/CodeGen/Mips/msa/2rf_exup.ll
index 8d7cc36..fd81ff6 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf_exup.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf_exup.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_fexupl_w_test() nounwind {
 entry:
-  %0 = load <8 x half>* @llvm_mips_fexupl_w_ARG1
+  %0 = load <8 x half>, <8 x half>* @llvm_mips_fexupl_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.fexupl.w(<8 x half> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_fexupl_w_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_fexupl_d_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fexupl_d_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fexupl_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.fexupl.d(<4 x float> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_fexupl_d_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_fexupr_w_test() nounwind {
 entry:
-  %0 = load <8 x half>* @llvm_mips_fexupr_w_ARG1
+  %0 = load <8 x half>, <8 x half>* @llvm_mips_fexupr_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.fexupr.w(<8 x half> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_fexupr_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_fexupr_d_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fexupr_d_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fexupr_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.fexupr.d(<4 x float> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_fexupr_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf_float_int.ll b/llvm/test/CodeGen/Mips/msa/2rf_float_int.ll
index 3b5dfda..3690158 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf_float_int.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf_float_int.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_ffint_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ffint_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffint_s_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.ffint.s.w(<4 x i32> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_ffint_s_w_RES
   ret void
@@ -30,7 +30,7 @@
 
 define void @llvm_mips_ffint_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ffint_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ffint_s_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.ffint.s.d(<2 x i64> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_ffint_s_d_RES
   ret void
@@ -51,7 +51,7 @@
 
 define void @llvm_mips_ffint_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ffint_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffint_u_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.ffint.u.w(<4 x i32> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_ffint_u_w_RES
   ret void
@@ -72,7 +72,7 @@
 
 define void @llvm_mips_ffint_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ffint_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ffint_u_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.ffint.u.d(<2 x i64> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_ffint_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf_fq.ll b/llvm/test/CodeGen/Mips/msa/2rf_fq.ll
index 021dd93..05c649e 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf_fq.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf_fq.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_ffql_w_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ffql_w_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ffql_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.ffql.w(<8 x i16> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_ffql_w_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_ffql_d_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ffql_d_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffql_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.ffql.d(<4 x i32> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_ffql_d_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_ffqr_w_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ffqr_w_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ffqr_w_ARG1
   %1 = tail call <4 x float> @llvm.mips.ffqr.w(<8 x i16> %0)
   store <4 x float> %1, <4 x float>* @llvm_mips_ffqr_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_ffqr_d_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ffqr_d_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ffqr_d_ARG1
   %1 = tail call <2 x double> @llvm.mips.ffqr.d(<4 x i32> %0)
   store <2 x double> %1, <2 x double>* @llvm_mips_ffqr_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf_int_float.ll b/llvm/test/CodeGen/Mips/msa/2rf_int_float.ll
index 4665ae0..77d1404 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf_int_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf_int_float.ll
@@ -10,7 +10,7 @@
 
 define void @llvm_mips_fclass_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fclass_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fclass_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.fclass.w(<4 x float> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_fclass_w_RES
   ret void
@@ -31,7 +31,7 @@
 
 define void @llvm_mips_fclass_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fclass_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fclass_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.fclass.d(<2 x double> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_fclass_d_RES
   ret void
@@ -52,7 +52,7 @@
 
 define void @llvm_mips_ftrunc_s_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_ftrunc_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.ftrunc.s.w(<4 x float> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_s_w_RES
   ret void
@@ -73,7 +73,7 @@
 
 define void @llvm_mips_ftrunc_s_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_ftrunc_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.ftrunc.s.d(<2 x double> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_s_d_RES
   ret void
@@ -94,7 +94,7 @@
 
 define void @llvm_mips_ftrunc_u_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_ftrunc_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.ftrunc.u.w(<4 x float> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_ftrunc_u_w_RES
   ret void
@@ -115,7 +115,7 @@
 
 define void @llvm_mips_ftrunc_u_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_ftrunc_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.ftrunc.u.d(<2 x double> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_ftrunc_u_d_RES
   ret void
@@ -136,7 +136,7 @@
 
 define void @llvm_mips_ftint_s_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_ftint_s_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_ftint_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.ftint.s.w(<4 x float> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_s_w_RES
   ret void
@@ -157,7 +157,7 @@
 
 define void @llvm_mips_ftint_s_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_ftint_s_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_ftint_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.ftint.s.d(<2 x double> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_s_d_RES
   ret void
@@ -178,7 +178,7 @@
 
 define void @llvm_mips_ftint_u_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_ftint_u_w_ARG1
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_ftint_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.ftint.u.w(<4 x float> %0)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_ftint_u_w_RES
   ret void
@@ -199,7 +199,7 @@
 
 define void @llvm_mips_ftint_u_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_ftint_u_d_ARG1
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_ftint_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.ftint.u.d(<2 x double> %0)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_ftint_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/2rf_tq.ll b/llvm/test/CodeGen/Mips/msa/2rf_tq.ll
index 6f3c508..9b7f02a 100644
--- a/llvm/test/CodeGen/Mips/msa/2rf_tq.ll
+++ b/llvm/test/CodeGen/Mips/msa/2rf_tq.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_ftq_h_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_ftq_h_ARG1
-  %1 = load <4 x float>* @llvm_mips_ftq_h_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_ftq_h_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_ftq_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ftq.h(<4 x float> %0, <4 x float> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ftq_h_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_ftq_w_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_ftq_w_ARG1
-  %1 = load <2 x double>* @llvm_mips_ftq_w_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_ftq_w_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_ftq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ftq.w(<2 x double> %0, <2 x double> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ftq_w_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-a.ll b/llvm/test/CodeGen/Mips/msa/3r-a.ll
index dab15b6..db772f9 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-a.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-a.ll
@@ -15,8 +15,8 @@
 
 define void @llvm_mips_add_a_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_add_a_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_add_a_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_add_a_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_add_a_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.add.a.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_add_a_b_RES
   ret void
@@ -40,8 +40,8 @@
 
 define void @llvm_mips_add_a_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_add_a_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_add_a_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_add_a_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_add_a_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.add.a.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_add_a_h_RES
   ret void
@@ -65,8 +65,8 @@
 
 define void @llvm_mips_add_a_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_add_a_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_add_a_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_add_a_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_add_a_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.add.a.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_add_a_w_RES
   ret void
@@ -90,8 +90,8 @@
 
 define void @llvm_mips_add_a_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_add_a_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_add_a_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_add_a_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_add_a_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.add.a.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_add_a_d_RES
   ret void
@@ -115,8 +115,8 @@
 
 define void @llvm_mips_adds_a_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_adds_a_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_adds_a_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_a_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_a_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.adds.a.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_a_b_RES
   ret void
@@ -140,8 +140,8 @@
 
 define void @llvm_mips_adds_a_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_adds_a_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_adds_a_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_a_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_a_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.adds.a.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_a_h_RES
   ret void
@@ -165,8 +165,8 @@
 
 define void @llvm_mips_adds_a_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_adds_a_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_adds_a_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_a_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_a_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.adds.a.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_a_w_RES
   ret void
@@ -190,8 +190,8 @@
 
 define void @llvm_mips_adds_a_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_adds_a_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_adds_a_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_a_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_a_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.adds.a.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_a_d_RES
   ret void
@@ -215,8 +215,8 @@
 
 define void @llvm_mips_adds_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_adds_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_adds_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.adds.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_s_b_RES
   ret void
@@ -240,8 +240,8 @@
 
 define void @llvm_mips_adds_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_adds_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_adds_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.adds.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_s_h_RES
   ret void
@@ -265,8 +265,8 @@
 
 define void @llvm_mips_adds_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_adds_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_adds_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.adds.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_s_w_RES
   ret void
@@ -290,8 +290,8 @@
 
 define void @llvm_mips_adds_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_adds_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_adds_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.adds.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_s_d_RES
   ret void
@@ -315,8 +315,8 @@
 
 define void @llvm_mips_adds_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_adds_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_adds_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_adds_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.adds.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_adds_u_b_RES
   ret void
@@ -340,8 +340,8 @@
 
 define void @llvm_mips_adds_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_adds_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_adds_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_adds_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.adds.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_adds_u_h_RES
   ret void
@@ -365,8 +365,8 @@
 
 define void @llvm_mips_adds_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_adds_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_adds_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_adds_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.adds.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_adds_u_w_RES
   ret void
@@ -390,8 +390,8 @@
 
 define void @llvm_mips_adds_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_adds_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_adds_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_adds_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.adds.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_adds_u_d_RES
   ret void
@@ -415,8 +415,8 @@
 
 define void @llvm_mips_addv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
   ret void
@@ -440,8 +440,8 @@
 
 define void @llvm_mips_addv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
   ret void
@@ -465,8 +465,8 @@
 
 define void @llvm_mips_addv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
   ret void
@@ -490,8 +490,8 @@
 
 define void @llvm_mips_addv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
   ret void
@@ -512,8 +512,8 @@
 
 define void @addv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_addv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_addv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_addv_b_ARG2
   %2 = add <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_addv_b_RES
   ret void
@@ -532,8 +532,8 @@
 
 define void @addv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_addv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_addv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_addv_h_ARG2
   %2 = add <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_addv_h_RES
   ret void
@@ -552,8 +552,8 @@
 
 define void @addv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_addv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_addv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_addv_w_ARG2
   %2 = add <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_addv_w_RES
   ret void
@@ -572,8 +572,8 @@
 
 define void @addv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_addv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_addv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_addv_d_ARG2
   %2 = add <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_addv_d_RES
   ret void
@@ -595,8 +595,8 @@
 
 define void @llvm_mips_asub_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_asub_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_asub_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.asub.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_s_b_RES
   ret void
@@ -620,8 +620,8 @@
 
 define void @llvm_mips_asub_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_asub_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_asub_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.asub.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_s_h_RES
   ret void
@@ -645,8 +645,8 @@
 
 define void @llvm_mips_asub_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_asub_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_asub_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.asub.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_s_w_RES
   ret void
@@ -670,8 +670,8 @@
 
 define void @llvm_mips_asub_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_asub_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_asub_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.asub.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_s_d_RES
   ret void
@@ -695,8 +695,8 @@
 
 define void @llvm_mips_asub_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_asub_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_asub_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_asub_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.asub.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_asub_u_b_RES
   ret void
@@ -720,8 +720,8 @@
 
 define void @llvm_mips_asub_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_asub_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_asub_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_asub_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.asub.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_asub_u_h_RES
   ret void
@@ -745,8 +745,8 @@
 
 define void @llvm_mips_asub_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_asub_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_asub_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_asub_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.asub.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_asub_u_w_RES
   ret void
@@ -770,8 +770,8 @@
 
 define void @llvm_mips_asub_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_asub_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_asub_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_asub_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.asub.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_asub_u_d_RES
   ret void
@@ -795,8 +795,8 @@
 
 define void @llvm_mips_ave_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ave_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ave_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ave.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_s_b_RES
   ret void
@@ -820,8 +820,8 @@
 
 define void @llvm_mips_ave_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ave_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ave_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ave.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_s_h_RES
   ret void
@@ -845,8 +845,8 @@
 
 define void @llvm_mips_ave_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ave_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ave_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ave.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_s_w_RES
   ret void
@@ -870,8 +870,8 @@
 
 define void @llvm_mips_ave_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ave_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ave_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ave.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_s_d_RES
   ret void
@@ -895,8 +895,8 @@
 
 define void @llvm_mips_ave_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ave_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ave_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ave_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ave.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ave_u_b_RES
   ret void
@@ -920,8 +920,8 @@
 
 define void @llvm_mips_ave_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ave_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ave_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ave_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ave.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ave_u_h_RES
   ret void
@@ -945,8 +945,8 @@
 
 define void @llvm_mips_ave_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ave_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ave_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ave_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ave.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ave_u_w_RES
   ret void
@@ -970,8 +970,8 @@
 
 define void @llvm_mips_ave_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ave_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ave_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ave_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ave.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ave_u_d_RES
   ret void
@@ -995,8 +995,8 @@
 
 define void @llvm_mips_aver_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_aver_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_aver_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.aver.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_s_b_RES
   ret void
@@ -1020,8 +1020,8 @@
 
 define void @llvm_mips_aver_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_aver_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_aver_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.aver.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_s_h_RES
   ret void
@@ -1045,8 +1045,8 @@
 
 define void @llvm_mips_aver_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_aver_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_aver_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.aver.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_s_w_RES
   ret void
@@ -1070,8 +1070,8 @@
 
 define void @llvm_mips_aver_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_aver_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_aver_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.aver.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_s_d_RES
   ret void
@@ -1095,8 +1095,8 @@
 
 define void @llvm_mips_aver_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_aver_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_aver_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_aver_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.aver.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_aver_u_b_RES
   ret void
@@ -1120,8 +1120,8 @@
 
 define void @llvm_mips_aver_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_aver_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_aver_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_aver_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.aver.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_aver_u_h_RES
   ret void
@@ -1145,8 +1145,8 @@
 
 define void @llvm_mips_aver_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_aver_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_aver_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_aver_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.aver.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_aver_u_w_RES
   ret void
@@ -1170,8 +1170,8 @@
 
 define void @llvm_mips_aver_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_aver_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_aver_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_aver_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.aver.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_aver_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-b.ll b/llvm/test/CodeGen/Mips/msa/3r-b.ll
index a05d19b..2ecdc42 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-b.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-b.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_bclr_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bclr_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bclr_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bclr_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bclr.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bclr_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_bclr_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bclr_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bclr_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bclr_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.bclr.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_bclr_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_bclr_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bclr_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bclr_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bclr_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.bclr.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_bclr_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_bclr_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bclr_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bclr_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bclr_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.bclr.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_bclr_d_RES
   ret void
@@ -99,9 +99,9 @@
 
 define void @llvm_mips_binsl_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_binsl_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_binsl_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_binsl_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_binsl_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.binsl.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_binsl_b_RES
   ret void
@@ -127,9 +127,9 @@
 
 define void @llvm_mips_binsl_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_binsl_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_binsl_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_binsl_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_binsl_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.binsl.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_binsl_h_RES
   ret void
@@ -155,9 +155,9 @@
 
 define void @llvm_mips_binsl_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_binsl_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_binsl_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_binsl_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_binsl_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.binsl.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_binsl_w_RES
   ret void
@@ -183,9 +183,9 @@
 
 define void @llvm_mips_binsl_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_binsl_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_binsl_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_binsl_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_binsl_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.binsl.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_binsl_d_RES
   ret void
@@ -211,9 +211,9 @@
 
 define void @llvm_mips_binsr_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_binsr_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_binsr_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_binsr_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_binsr_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.binsr.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_binsr_b_RES
   ret void
@@ -239,9 +239,9 @@
 
 define void @llvm_mips_binsr_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_binsr_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_binsr_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_binsr_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_binsr_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.binsr.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_binsr_h_RES
   ret void
@@ -267,9 +267,9 @@
 
 define void @llvm_mips_binsr_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_binsr_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_binsr_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_binsr_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_binsr_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.binsr.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_binsr_w_RES
   ret void
@@ -295,9 +295,9 @@
 
 define void @llvm_mips_binsr_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_binsr_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_binsr_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_binsr_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_binsr_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.binsr.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_binsr_d_RES
   ret void
@@ -322,8 +322,8 @@
 
 define void @llvm_mips_bneg_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bneg_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bneg_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bneg_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bneg.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bneg_b_RES
   ret void
@@ -344,8 +344,8 @@
 
 define void @llvm_mips_bneg_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bneg_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bneg_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bneg_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.bneg.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_bneg_h_RES
   ret void
@@ -366,8 +366,8 @@
 
 define void @llvm_mips_bneg_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bneg_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bneg_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bneg_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.bneg.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_bneg_w_RES
   ret void
@@ -388,8 +388,8 @@
 
 define void @llvm_mips_bneg_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bneg_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bneg_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bneg_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.bneg.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_bneg_d_RES
   ret void
@@ -410,8 +410,8 @@
 
 define void @llvm_mips_bset_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bset_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bset_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bset_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bset.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bset_b_RES
   ret void
@@ -432,8 +432,8 @@
 
 define void @llvm_mips_bset_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bset_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bset_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bset_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.bset.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_bset_h_RES
   ret void
@@ -454,8 +454,8 @@
 
 define void @llvm_mips_bset_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bset_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bset_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bset_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.bset.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_bset_w_RES
   ret void
@@ -476,8 +476,8 @@
 
 define void @llvm_mips_bset_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bset_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bset_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bset_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.bset.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_bset_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-c.ll b/llvm/test/CodeGen/Mips/msa/3r-c.ll
index 6ec92c2..a3913e0 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-c.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-c.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_ceq_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ceq_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ceq_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ceq_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ceq_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ceq.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ceq_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_ceq_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ceq_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ceq_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ceq_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ceq_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ceq.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ceq_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_ceq_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ceq_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ceq_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ceq_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ceq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ceq.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ceq_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_ceq_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ceq_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ceq_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ceq_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ceq_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ceq.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ceq_d_RES
   ret void
@@ -98,8 +98,8 @@
 
 define void @llvm_mips_cle_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_cle_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_cle_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.cle.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_s_b_RES
   ret void
@@ -120,8 +120,8 @@
 
 define void @llvm_mips_cle_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_cle_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_cle_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.cle.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_s_h_RES
   ret void
@@ -142,8 +142,8 @@
 
 define void @llvm_mips_cle_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_cle_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_cle_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.cle.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_s_w_RES
   ret void
@@ -164,8 +164,8 @@
 
 define void @llvm_mips_cle_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_cle_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_cle_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.cle.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_s_d_RES
   ret void
@@ -186,8 +186,8 @@
 
 define void @llvm_mips_cle_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_cle_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_cle_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_cle_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.cle.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_cle_u_b_RES
   ret void
@@ -208,8 +208,8 @@
 
 define void @llvm_mips_cle_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_cle_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_cle_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_cle_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.cle.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_cle_u_h_RES
   ret void
@@ -230,8 +230,8 @@
 
 define void @llvm_mips_cle_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_cle_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_cle_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_cle_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.cle.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_cle_u_w_RES
   ret void
@@ -252,8 +252,8 @@
 
 define void @llvm_mips_cle_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_cle_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_cle_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_cle_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.cle.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_cle_u_d_RES
   ret void
@@ -274,8 +274,8 @@
 
 define void @llvm_mips_clt_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clt_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_clt_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.clt.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_s_b_RES
   ret void
@@ -296,8 +296,8 @@
 
 define void @llvm_mips_clt_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clt_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_clt_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.clt.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_s_h_RES
   ret void
@@ -318,8 +318,8 @@
 
 define void @llvm_mips_clt_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clt_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_clt_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.clt.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_s_w_RES
   ret void
@@ -340,8 +340,8 @@
 
 define void @llvm_mips_clt_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clt_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_clt_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.clt.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_s_d_RES
   ret void
@@ -362,8 +362,8 @@
 
 define void @llvm_mips_clt_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clt_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_clt_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_clt_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.clt.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_clt_u_b_RES
   ret void
@@ -384,8 +384,8 @@
 
 define void @llvm_mips_clt_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clt_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_clt_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_clt_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.clt.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_clt_u_h_RES
   ret void
@@ -406,8 +406,8 @@
 
 define void @llvm_mips_clt_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clt_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_clt_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_clt_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.clt.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_clt_u_w_RES
   ret void
@@ -428,8 +428,8 @@
 
 define void @llvm_mips_clt_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clt_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_clt_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_clt_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.clt.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_clt_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-d.ll b/llvm/test/CodeGen/Mips/msa/3r-d.ll
index 0099554..4fc32b7 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-d.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-d.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_div_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.div.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_div_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.div.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_div_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.div.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_div_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.div.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
   ret void
@@ -95,8 +95,8 @@
 
 define void @div_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_div_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_div_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_s_b_ARG2
   %2 = sdiv <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_div_s_b_RES
   ret void
@@ -111,8 +111,8 @@
 
 define void @div_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_div_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_div_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_s_h_ARG2
   %2 = sdiv <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_div_s_h_RES
   ret void
@@ -127,8 +127,8 @@
 
 define void @div_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_div_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_div_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_s_w_ARG2
   %2 = sdiv <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_div_s_w_RES
   ret void
@@ -143,8 +143,8 @@
 
 define void @div_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_div_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_div_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_s_d_ARG2
   %2 = sdiv <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_div_s_d_RES
   ret void
@@ -163,8 +163,8 @@
 
 define void @llvm_mips_div_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.div.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
   ret void
@@ -185,8 +185,8 @@
 
 define void @llvm_mips_div_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.div.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
   ret void
@@ -207,8 +207,8 @@
 
 define void @llvm_mips_div_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.div.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
   ret void
@@ -229,8 +229,8 @@
 
 define void @llvm_mips_div_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.div.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
   ret void
@@ -248,8 +248,8 @@
 
 define void @div_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_div_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_div_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_div_u_b_ARG2
   %2 = udiv <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_div_u_b_RES
   ret void
@@ -264,8 +264,8 @@
 
 define void @div_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_div_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_div_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_div_u_h_ARG2
   %2 = udiv <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_div_u_h_RES
   ret void
@@ -280,8 +280,8 @@
 
 define void @div_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_div_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_div_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_div_u_w_ARG2
   %2 = udiv <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_div_u_w_RES
   ret void
@@ -296,8 +296,8 @@
 
 define void @div_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_div_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_div_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_div_u_d_ARG2
   %2 = udiv <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_div_u_d_RES
   ret void
@@ -326,8 +326,8 @@
 
 define void @llvm_mips_dotp_s_h_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dotp_s_h_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_s_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.dotp.s.h(<16 x i8> %0, <16 x i8> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_s_h_RES
   ret void
@@ -353,8 +353,8 @@
 
 define void @llvm_mips_dotp_s_w_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dotp_s_w_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_s_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.dotp.s.w(<8 x i16> %0, <8 x i16> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_s_w_RES
   ret void
@@ -377,8 +377,8 @@
 
 define void @llvm_mips_dotp_s_d_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dotp_s_d_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_s_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.dotp.s.d(<4 x i32> %0, <4 x i32> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_s_d_RES
   ret void
@@ -409,8 +409,8 @@
 
 define void @llvm_mips_dotp_u_h_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dotp_u_h_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_u_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dotp_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.dotp.u.h(<16 x i8> %0, <16 x i8> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_dotp_u_h_RES
   ret void
@@ -436,8 +436,8 @@
 
 define void @llvm_mips_dotp_u_w_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dotp_u_w_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_u_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dotp_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.dotp.u.w(<8 x i16> %0, <8 x i16> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_dotp_u_w_RES
   ret void
@@ -460,8 +460,8 @@
 
 define void @llvm_mips_dotp_u_d_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dotp_u_d_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_u_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dotp_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.dotp.u.d(<4 x i32> %0, <4 x i32> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_dotp_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-i.ll b/llvm/test/CodeGen/Mips/msa/3r-i.ll
index 2ef3047..7147b75 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-i.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-i.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_ilvev_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ilvev_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ilvev_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvev_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvev_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ilvev.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvev_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_ilvev_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ilvev_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ilvev_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvev_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvev_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ilvev.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvev_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_ilvev_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ilvev_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ilvev_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvev_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvev_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ilvev.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvev_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_ilvev_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ilvev_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ilvev_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvev_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvev_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ilvev.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvev_d_RES
   ret void
@@ -98,8 +98,8 @@
 
 define void @llvm_mips_ilvl_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ilvl_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ilvl_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvl_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvl_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ilvl.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvl_b_RES
   ret void
@@ -120,8 +120,8 @@
 
 define void @llvm_mips_ilvl_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ilvl_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ilvl_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvl_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvl_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ilvl.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvl_h_RES
   ret void
@@ -142,8 +142,8 @@
 
 define void @llvm_mips_ilvl_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ilvl_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ilvl_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvl_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvl_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ilvl.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvl_w_RES
   ret void
@@ -164,8 +164,8 @@
 
 define void @llvm_mips_ilvl_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ilvl_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ilvl_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvl_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvl_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ilvl.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvl_d_RES
   ret void
@@ -186,8 +186,8 @@
 
 define void @llvm_mips_ilvod_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ilvod_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ilvod_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvod_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvod_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ilvod.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvod_b_RES
   ret void
@@ -208,8 +208,8 @@
 
 define void @llvm_mips_ilvod_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ilvod_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ilvod_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvod_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvod_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ilvod.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvod_h_RES
   ret void
@@ -230,8 +230,8 @@
 
 define void @llvm_mips_ilvod_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ilvod_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ilvod_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvod_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvod_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ilvod.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvod_w_RES
   ret void
@@ -252,8 +252,8 @@
 
 define void @llvm_mips_ilvod_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ilvod_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ilvod_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvod_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvod_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ilvod.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvod_d_RES
   ret void
@@ -274,8 +274,8 @@
 
 define void @llvm_mips_ilvr_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ilvr_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_ilvr_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvr_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_ilvr_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.ilvr.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_ilvr_b_RES
   ret void
@@ -296,8 +296,8 @@
 
 define void @llvm_mips_ilvr_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ilvr_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_ilvr_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvr_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_ilvr_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.ilvr.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_ilvr_h_RES
   ret void
@@ -318,8 +318,8 @@
 
 define void @llvm_mips_ilvr_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ilvr_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_ilvr_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvr_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_ilvr_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.ilvr.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_ilvr_w_RES
   ret void
@@ -340,8 +340,8 @@
 
 define void @llvm_mips_ilvr_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ilvr_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_ilvr_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvr_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_ilvr_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.ilvr.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_ilvr_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-m.ll b/llvm/test/CodeGen/Mips/msa/3r-m.ll
index ddfd720..39b4f7d 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-m.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-m.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_max_a_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_max_a_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_max_a_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_a_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_a_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.max.a.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_max_a_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_max_a_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_max_a_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_max_a_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_a_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_a_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.max.a.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_max_a_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_max_a_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_max_a_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_max_a_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_a_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_a_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.max.a.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_max_a_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_max_a_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_max_a_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_max_a_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_a_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_a_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.max.a.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_max_a_d_RES
   ret void
@@ -98,8 +98,8 @@
 
 define void @llvm_mips_max_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_max_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_max_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.max.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_max_s_b_RES
   ret void
@@ -120,8 +120,8 @@
 
 define void @llvm_mips_max_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_max_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_max_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.max.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_max_s_h_RES
   ret void
@@ -142,8 +142,8 @@
 
 define void @llvm_mips_max_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_max_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_max_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.max.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_max_s_w_RES
   ret void
@@ -164,8 +164,8 @@
 
 define void @llvm_mips_max_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_max_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_max_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.max.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_max_s_d_RES
   ret void
@@ -186,8 +186,8 @@
 
 define void @llvm_mips_max_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_max_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_max_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_max_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_max_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.max.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_max_u_b_RES
   ret void
@@ -208,8 +208,8 @@
 
 define void @llvm_mips_max_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_max_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_max_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_max_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_max_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.max.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_max_u_h_RES
   ret void
@@ -230,8 +230,8 @@
 
 define void @llvm_mips_max_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_max_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_max_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_max_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_max_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.max.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_max_u_w_RES
   ret void
@@ -252,8 +252,8 @@
 
 define void @llvm_mips_max_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_max_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_max_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_max_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_max_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.max.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_max_u_d_RES
   ret void
@@ -274,8 +274,8 @@
 
 define void @llvm_mips_min_a_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_min_a_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_min_a_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_a_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_a_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.min.a.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_min_a_b_RES
   ret void
@@ -296,8 +296,8 @@
 
 define void @llvm_mips_min_a_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_min_a_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_min_a_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_a_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_a_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.min.a.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_min_a_h_RES
   ret void
@@ -318,8 +318,8 @@
 
 define void @llvm_mips_min_a_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_min_a_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_min_a_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_a_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_a_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.min.a.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_min_a_w_RES
   ret void
@@ -340,8 +340,8 @@
 
 define void @llvm_mips_min_a_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_min_a_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_min_a_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_a_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_a_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.min.a.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_min_a_d_RES
   ret void
@@ -362,8 +362,8 @@
 
 define void @llvm_mips_min_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_min_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_min_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.min.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_min_s_b_RES
   ret void
@@ -384,8 +384,8 @@
 
 define void @llvm_mips_min_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_min_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_min_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.min.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_min_s_h_RES
   ret void
@@ -406,8 +406,8 @@
 
 define void @llvm_mips_min_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_min_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_min_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.min.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_min_s_w_RES
   ret void
@@ -428,8 +428,8 @@
 
 define void @llvm_mips_min_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_min_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_min_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.min.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_min_s_d_RES
   ret void
@@ -450,8 +450,8 @@
 
 define void @llvm_mips_min_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_min_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_min_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_min_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_min_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.min.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_min_u_b_RES
   ret void
@@ -472,8 +472,8 @@
 
 define void @llvm_mips_min_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_min_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_min_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_min_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_min_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.min.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_min_u_h_RES
   ret void
@@ -494,8 +494,8 @@
 
 define void @llvm_mips_min_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_min_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_min_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_min_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_min_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.min.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_min_u_w_RES
   ret void
@@ -516,8 +516,8 @@
 
 define void @llvm_mips_min_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_min_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_min_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_min_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_min_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.min.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_min_u_d_RES
   ret void
@@ -538,8 +538,8 @@
 
 define void @llvm_mips_mod_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mod_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_mod_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.mod.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_s_b_RES
   ret void
@@ -560,8 +560,8 @@
 
 define void @llvm_mips_mod_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mod_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mod_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.mod.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_s_h_RES
   ret void
@@ -582,8 +582,8 @@
 
 define void @llvm_mips_mod_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mod_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mod_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.mod.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_s_w_RES
   ret void
@@ -604,8 +604,8 @@
 
 define void @llvm_mips_mod_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mod_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_mod_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.mod.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_s_d_RES
   ret void
@@ -626,8 +626,8 @@
 
 define void @llvm_mips_mod_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mod_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_mod_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mod_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.mod.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_mod_u_b_RES
   ret void
@@ -648,8 +648,8 @@
 
 define void @llvm_mips_mod_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mod_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mod_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mod_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.mod.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mod_u_h_RES
   ret void
@@ -670,8 +670,8 @@
 
 define void @llvm_mips_mod_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mod_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mod_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mod_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.mod.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mod_u_w_RES
   ret void
@@ -692,8 +692,8 @@
 
 define void @llvm_mips_mod_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mod_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_mod_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mod_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.mod.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_mod_u_d_RES
   ret void
@@ -714,8 +714,8 @@
 
 define void @llvm_mips_mulv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.mulv.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
   ret void
@@ -736,8 +736,8 @@
 
 define void @llvm_mips_mulv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.mulv.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
   ret void
@@ -758,8 +758,8 @@
 
 define void @llvm_mips_mulv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.mulv.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
   ret void
@@ -780,8 +780,8 @@
 
 define void @llvm_mips_mulv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.mulv.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
   ret void
@@ -798,8 +798,8 @@
 
 define void @mulv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mulv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_mulv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_mulv_b_ARG2
   %2 = mul <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_mulv_b_RES
   ret void
@@ -814,8 +814,8 @@
 
 define void @mulv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mulv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mulv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulv_h_ARG2
   %2 = mul <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mulv_h_RES
   ret void
@@ -830,8 +830,8 @@
 
 define void @mulv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mulv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mulv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulv_w_ARG2
   %2 = mul <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mulv_w_RES
   ret void
@@ -846,8 +846,8 @@
 
 define void @mulv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mulv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_mulv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_mulv_d_ARG2
   %2 = mul <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_mulv_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-p.ll b/llvm/test/CodeGen/Mips/msa/3r-p.ll
index 852023b..70b98aa 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-p.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-p.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_pckev_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_pckev_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_pckev_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pckev_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_pckev_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.pckev.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_pckev_b_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_pckev_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_pckev_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_pckev_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pckev_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_pckev_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.pckev.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_pckev_h_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_pckev_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_pckev_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_pckev_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pckev_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_pckev_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.pckev.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_pckev_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_pckev_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_pckev_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_pckev_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pckev_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_pckev_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.pckev.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_pckev_d_RES
   ret void
@@ -98,8 +98,8 @@
 
 define void @llvm_mips_pckod_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_pckod_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_pckod_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_pckod_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_pckod_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.pckod.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_pckod_b_RES
   ret void
@@ -120,8 +120,8 @@
 
 define void @llvm_mips_pckod_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_pckod_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_pckod_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_pckod_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_pckod_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.pckod.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_pckod_h_RES
   ret void
@@ -142,8 +142,8 @@
 
 define void @llvm_mips_pckod_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_pckod_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_pckod_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_pckod_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_pckod_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.pckod.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_pckod_w_RES
   ret void
@@ -164,8 +164,8 @@
 
 define void @llvm_mips_pckod_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_pckod_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_pckod_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_pckod_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_pckod_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.pckod.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_pckod_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-s.ll b/llvm/test/CodeGen/Mips/msa/3r-s.ll
index 581c3bf..d04c5ff 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-s.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-s.ll
@@ -11,9 +11,9 @@
 
 define void @llvm_mips_sld_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sld_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sld_b_ARG2
-  %2 = load i32* @llvm_mips_sld_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sld_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sld_b_ARG2
+  %2 = load i32, i32* @llvm_mips_sld_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.sld.b(<16 x i8> %0, <16 x i8> %1, i32 %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_sld_b_RES
   ret void
@@ -39,9 +39,9 @@
 
 define void @llvm_mips_sld_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sld_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sld_h_ARG2
-  %2 = load i32* @llvm_mips_sld_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sld_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sld_h_ARG2
+  %2 = load i32, i32* @llvm_mips_sld_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.sld.h(<8 x i16> %0, <8 x i16> %1, i32 %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_sld_h_RES
   ret void
@@ -67,9 +67,9 @@
 
 define void @llvm_mips_sld_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sld_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sld_w_ARG2
-  %2 = load i32* @llvm_mips_sld_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sld_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sld_w_ARG2
+  %2 = load i32, i32* @llvm_mips_sld_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.sld.w(<4 x i32> %0, <4 x i32> %1, i32 %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_sld_w_RES
   ret void
@@ -95,9 +95,9 @@
 
 define void @llvm_mips_sld_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sld_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sld_d_ARG2
-  %2 = load i32* @llvm_mips_sld_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sld_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sld_d_ARG2
+  %2 = load i32, i32* @llvm_mips_sld_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.sld.d(<2 x i64> %0, <2 x i64> %1, i32 %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_sld_d_RES
   ret void
@@ -122,8 +122,8 @@
 
 define void @llvm_mips_sll_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.sll.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
   ret void
@@ -146,8 +146,8 @@
 
 define void @llvm_mips_sll_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.sll.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
   ret void
@@ -170,8 +170,8 @@
 
 define void @llvm_mips_sll_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.sll.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
   ret void
@@ -194,8 +194,8 @@
 
 define void @llvm_mips_sll_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.sll.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
   ret void
@@ -214,8 +214,8 @@
 
 define void @sll_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sll_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sll_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sll_b_ARG2
   %2 = shl <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_sll_b_RES
   ret void
@@ -232,8 +232,8 @@
 
 define void @sll_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sll_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sll_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sll_h_ARG2
   %2 = shl <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_sll_h_RES
   ret void
@@ -250,8 +250,8 @@
 
 define void @sll_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sll_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sll_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sll_w_ARG2
   %2 = shl <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_sll_w_RES
   ret void
@@ -268,8 +268,8 @@
 
 define void @sll_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sll_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sll_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sll_d_ARG2
   %2 = shl <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_sll_d_RES
   ret void
@@ -290,8 +290,8 @@
 
 define void @llvm_mips_sra_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.sra.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
   ret void
@@ -314,8 +314,8 @@
 
 define void @llvm_mips_sra_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.sra.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
   ret void
@@ -338,8 +338,8 @@
 
 define void @llvm_mips_sra_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.sra.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
   ret void
@@ -362,8 +362,8 @@
 
 define void @llvm_mips_sra_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.sra.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
   ret void
@@ -383,8 +383,8 @@
 
 define void @sra_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sra_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sra_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sra_b_ARG2
   %2 = ashr <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_sra_b_RES
   ret void
@@ -401,8 +401,8 @@
 
 define void @sra_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sra_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sra_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sra_h_ARG2
   %2 = ashr <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_sra_h_RES
   ret void
@@ -419,8 +419,8 @@
 
 define void @sra_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sra_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sra_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sra_w_ARG2
   %2 = ashr <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_sra_w_RES
   ret void
@@ -437,8 +437,8 @@
 
 define void @sra_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sra_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sra_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sra_d_ARG2
   %2 = ashr <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_sra_d_RES
   ret void
@@ -459,8 +459,8 @@
 
 define void @llvm_mips_srar_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srar_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_srar_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srar_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srar_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.srar.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_srar_b_RES
   ret void
@@ -483,8 +483,8 @@
 
 define void @llvm_mips_srar_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srar_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_srar_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srar_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srar_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.srar.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_srar_h_RES
   ret void
@@ -507,8 +507,8 @@
 
 define void @llvm_mips_srar_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srar_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_srar_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srar_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srar_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.srar.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_srar_w_RES
   ret void
@@ -531,8 +531,8 @@
 
 define void @llvm_mips_srar_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srar_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_srar_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srar_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srar_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.srar.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_srar_d_RES
   ret void
@@ -555,8 +555,8 @@
 
 define void @llvm_mips_srl_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.srl.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
   ret void
@@ -579,8 +579,8 @@
 
 define void @llvm_mips_srl_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.srl.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
   ret void
@@ -603,8 +603,8 @@
 
 define void @llvm_mips_srl_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.srl.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
   ret void
@@ -627,8 +627,8 @@
 
 define void @llvm_mips_srl_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.srl.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
   ret void
@@ -651,8 +651,8 @@
 
 define void @llvm_mips_srlr_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srlr_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_srlr_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srlr_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srlr_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.srlr.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_srlr_b_RES
   ret void
@@ -675,8 +675,8 @@
 
 define void @llvm_mips_srlr_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srlr_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_srlr_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srlr_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srlr_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.srlr.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_srlr_h_RES
   ret void
@@ -699,8 +699,8 @@
 
 define void @llvm_mips_srlr_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srlr_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_srlr_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srlr_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srlr_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.srlr.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_srlr_w_RES
   ret void
@@ -723,8 +723,8 @@
 
 define void @llvm_mips_srlr_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srlr_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_srlr_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srlr_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srlr_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.srlr.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_srlr_d_RES
   ret void
@@ -744,8 +744,8 @@
 
 define void @srl_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srl_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_srl_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_srl_b_ARG2
   %2 = lshr <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_srl_b_RES
   ret void
@@ -762,8 +762,8 @@
 
 define void @srl_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srl_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_srl_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_srl_h_ARG2
   %2 = lshr <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_srl_h_RES
   ret void
@@ -780,8 +780,8 @@
 
 define void @srl_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srl_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_srl_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_srl_w_ARG2
   %2 = lshr <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_srl_w_RES
   ret void
@@ -798,8 +798,8 @@
 
 define void @srl_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srl_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_srl_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_srl_d_ARG2
   %2 = lshr <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_srl_d_RES
   ret void
@@ -820,8 +820,8 @@
 
 define void @llvm_mips_subs_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subs_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subs_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.subs.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_s_b_RES
   ret void
@@ -844,8 +844,8 @@
 
 define void @llvm_mips_subs_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subs_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subs_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.subs.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_s_h_RES
   ret void
@@ -868,8 +868,8 @@
 
 define void @llvm_mips_subs_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subs_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subs_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.subs.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_s_w_RES
   ret void
@@ -892,8 +892,8 @@
 
 define void @llvm_mips_subs_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subs_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subs_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.subs.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_s_d_RES
   ret void
@@ -916,8 +916,8 @@
 
 define void @llvm_mips_subs_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subs_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subs_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subs_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.subs.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subs_u_b_RES
   ret void
@@ -940,8 +940,8 @@
 
 define void @llvm_mips_subs_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subs_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subs_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subs_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.subs.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subs_u_h_RES
   ret void
@@ -964,8 +964,8 @@
 
 define void @llvm_mips_subs_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subs_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subs_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subs_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.subs.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subs_u_w_RES
   ret void
@@ -988,8 +988,8 @@
 
 define void @llvm_mips_subs_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subs_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subs_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subs_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.subs.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subs_u_d_RES
   ret void
@@ -1012,8 +1012,8 @@
 
 define void @llvm_mips_subsus_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subsus_u_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subsus_u_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subsus_u_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.subsus.u.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subsus_u_b_RES
   ret void
@@ -1036,8 +1036,8 @@
 
 define void @llvm_mips_subsus_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subsus_u_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subsus_u_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subsus_u_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.subsus.u.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subsus_u_h_RES
   ret void
@@ -1060,8 +1060,8 @@
 
 define void @llvm_mips_subsus_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subsus_u_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subsus_u_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subsus_u_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.subsus.u.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subsus_u_w_RES
   ret void
@@ -1084,8 +1084,8 @@
 
 define void @llvm_mips_subsus_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subsus_u_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subsus_u_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subsus_u_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.subsus.u.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subsus_u_d_RES
   ret void
@@ -1108,8 +1108,8 @@
 
 define void @llvm_mips_subsuu_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subsuu_s_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subsuu_s_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.subsuu.s.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subsuu_s_b_RES
   ret void
@@ -1132,8 +1132,8 @@
 
 define void @llvm_mips_subsuu_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subsuu_s_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subsuu_s_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.subsuu.s.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subsuu_s_h_RES
   ret void
@@ -1156,8 +1156,8 @@
 
 define void @llvm_mips_subsuu_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subsuu_s_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subsuu_s_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.subsuu.s.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subsuu_s_w_RES
   ret void
@@ -1180,8 +1180,8 @@
 
 define void @llvm_mips_subsuu_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subsuu_s_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subsuu_s_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.subsuu.s.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subsuu_s_d_RES
   ret void
@@ -1204,8 +1204,8 @@
 
 define void @llvm_mips_subv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.subv.b(<16 x i8> %0, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
   ret void
@@ -1228,8 +1228,8 @@
 
 define void @llvm_mips_subv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.subv.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
   ret void
@@ -1252,8 +1252,8 @@
 
 define void @llvm_mips_subv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.subv.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
   ret void
@@ -1276,8 +1276,8 @@
 
 define void @llvm_mips_subv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.subv.d(<2 x i64> %0, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
   ret void
@@ -1297,8 +1297,8 @@
 
 define void @subv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_subv_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_subv_b_ARG2
   %2 = sub <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_subv_b_RES
   ret void
@@ -1315,8 +1315,8 @@
 
 define void @subv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_subv_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_subv_h_ARG2
   %2 = sub <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_subv_h_RES
   ret void
@@ -1333,8 +1333,8 @@
 
 define void @subv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_subv_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_subv_w_ARG2
   %2 = sub <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_subv_w_RES
   ret void
@@ -1351,8 +1351,8 @@
 
 define void @subv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_subv_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_subv_d_ARG2
   %2 = sub <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_subv_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r-v.ll b/llvm/test/CodeGen/Mips/msa/3r-v.ll
index c9693f9..2d36da4 100644
--- a/llvm/test/CodeGen/Mips/msa/3r-v.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r-v.ll
@@ -11,9 +11,9 @@
 
 define void @llvm_mips_vshf_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_vshf_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_vshf_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_vshf_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_vshf_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.vshf.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_vshf_b_RES
   ret void
@@ -36,9 +36,9 @@
 
 define void @llvm_mips_vshf_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_vshf_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_vshf_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_vshf_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_vshf_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.vshf.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_vshf_h_RES
   ret void
@@ -61,9 +61,9 @@
 
 define void @llvm_mips_vshf_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_vshf_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_vshf_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_vshf_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_vshf_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.vshf.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_vshf_w_RES
   ret void
@@ -86,9 +86,9 @@
 
 define void @llvm_mips_vshf_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_vshf_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_vshf_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_vshf_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_vshf_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.vshf.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_vshf_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r_4r.ll b/llvm/test/CodeGen/Mips/msa/3r_4r.ll
index b7fd728..73d104c 100644
--- a/llvm/test/CodeGen/Mips/msa/3r_4r.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r_4r.ll
@@ -11,9 +11,9 @@
 
 define void @llvm_mips_maddv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_maddv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_maddv_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_maddv_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_maddv_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.maddv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_maddv_b_RES
   ret void
@@ -36,9 +36,9 @@
 
 define void @llvm_mips_maddv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_maddv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_maddv_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_maddv_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_maddv_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.maddv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_maddv_h_RES
   ret void
@@ -61,9 +61,9 @@
 
 define void @llvm_mips_maddv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_maddv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_maddv_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_maddv_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_maddv_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.maddv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_maddv_w_RES
   ret void
@@ -86,9 +86,9 @@
 
 define void @llvm_mips_maddv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_maddv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_maddv_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_maddv_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_maddv_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.maddv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_maddv_d_RES
   ret void
@@ -111,9 +111,9 @@
 
 define void @llvm_mips_msubv_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_msubv_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_msubv_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_msubv_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_msubv_b_ARG3
   %3 = tail call <16 x i8> @llvm.mips.msubv.b(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
   store <16 x i8> %3, <16 x i8>* @llvm_mips_msubv_b_RES
   ret void
@@ -136,9 +136,9 @@
 
 define void @llvm_mips_msubv_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_msubv_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_msubv_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_msubv_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msubv_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.msubv.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_msubv_h_RES
   ret void
@@ -161,9 +161,9 @@
 
 define void @llvm_mips_msubv_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_msubv_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_msubv_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_msubv_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msubv_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.msubv.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_msubv_w_RES
   ret void
@@ -186,9 +186,9 @@
 
 define void @llvm_mips_msubv_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_msubv_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_msubv_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_msubv_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_msubv_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.msubv.d(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_msubv_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r_4r_widen.ll b/llvm/test/CodeGen/Mips/msa/3r_4r_widen.ll
index 7063e45..fe248ee 100644
--- a/llvm/test/CodeGen/Mips/msa/3r_4r_widen.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r_4r_widen.ll
@@ -12,9 +12,9 @@
 
 define void @llvm_mips_dpadd_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
-  %2 = load <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_s_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.dpadd.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_s_h_RES
   ret void
@@ -37,9 +37,9 @@
 
 define void @llvm_mips_dpadd_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
-  %2 = load <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_s_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.dpadd.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_s_w_RES
   ret void
@@ -62,9 +62,9 @@
 
 define void @llvm_mips_dpadd_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
-  %2 = load <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_s_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_s_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.dpadd.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_s_d_RES
   ret void
@@ -87,9 +87,9 @@
 
 define void @llvm_mips_dpadd_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
-  %2 = load <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpadd_u_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.dpadd.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_dpadd_u_h_RES
   ret void
@@ -112,9 +112,9 @@
 
 define void @llvm_mips_dpadd_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
-  %2 = load <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpadd_u_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.dpadd.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_dpadd_u_w_RES
   ret void
@@ -137,9 +137,9 @@
 
 define void @llvm_mips_dpadd_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
-  %2 = load <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpadd_u_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpadd_u_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.dpadd.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_dpadd_u_d_RES
   ret void
@@ -162,9 +162,9 @@
 
 define void @llvm_mips_dpsub_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
-  %2 = load <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_s_h_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_s_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.dpsub.s.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_s_h_RES
   ret void
@@ -187,9 +187,9 @@
 
 define void @llvm_mips_dpsub_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
-  %2 = load <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_w_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_s_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.dpsub.s.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_s_w_RES
   ret void
@@ -212,9 +212,9 @@
 
 define void @llvm_mips_dpsub_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
-  %2 = load <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpsub_s_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_d_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_s_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.dpsub.s.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_s_d_RES
   ret void
@@ -237,9 +237,9 @@
 
 define void @llvm_mips_dpsub_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
-  %1 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
-  %2 = load <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_h_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_u_h_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_dpsub_u_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.dpsub.u.h(<8 x i16> %0, <16 x i8> %1, <16 x i8> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_dpsub_u_h_RES
   ret void
@@ -262,9 +262,9 @@
 
 define void @llvm_mips_dpsub_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
-  %1 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
-  %2 = load <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_w_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_w_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_dpsub_u_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.dpsub.u.w(<4 x i32> %0, <8 x i16> %1, <8 x i16> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_dpsub_u_w_RES
   ret void
@@ -287,9 +287,9 @@
 
 define void @llvm_mips_dpsub_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
-  %1 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
-  %2 = load <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_dpsub_u_d_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_d_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_dpsub_u_d_ARG3
   %3 = tail call <2 x i64> @llvm.mips.dpsub.u.d(<2 x i64> %0, <4 x i32> %1, <4 x i32> %2)
   store <2 x i64> %3, <2 x i64>* @llvm_mips_dpsub_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3r_splat.ll b/llvm/test/CodeGen/Mips/msa/3r_splat.ll
index 6b0cb26..56d26b0 100644
--- a/llvm/test/CodeGen/Mips/msa/3r_splat.ll
+++ b/llvm/test/CodeGen/Mips/msa/3r_splat.ll
@@ -11,7 +11,7 @@
 
 define void @llvm_mips_splat_b_test(i32 %a) nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_splat_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_splat_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.splat.b(<16 x i8> %0, i32 %a)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_splat_b_RES
   ret void
@@ -32,7 +32,7 @@
 
 define void @llvm_mips_splat_h_test(i32 %a) nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_splat_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_splat_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.splat.h(<8 x i16> %0, i32 %a)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_splat_h_RES
   ret void
@@ -53,7 +53,7 @@
 
 define void @llvm_mips_splat_w_test(i32 %a) nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_splat_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_splat_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.splat.w(<4 x i32> %0, i32 %a)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_splat_w_RES
   ret void
@@ -74,7 +74,7 @@
 
 define void @llvm_mips_splat_d_test(i32 %a) nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_splat_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_splat_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.splat.d(<2 x i64> %0, i32 %a)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_splat_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf.ll b/llvm/test/CodeGen/Mips/msa/3rf.ll
index ae665af..dce0c27 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf.ll
@@ -9,8 +9,8 @@
 
 define void @llvm_mips_fadd_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
   ret void
@@ -31,8 +31,8 @@
 
 define void @llvm_mips_fadd_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
   ret void
@@ -49,8 +49,8 @@
 
 define void @fadd_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fadd_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fadd_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fadd_w_ARG2
   %2 = fadd <4 x float> %0, %1
   store <4 x float> %2, <4 x float>* @llvm_mips_fadd_w_RES
   ret void
@@ -65,8 +65,8 @@
 
 define void @fadd_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fadd_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fadd_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fadd_d_ARG2
   %2 = fadd <2 x double> %0, %1
   store <2 x double> %2, <2 x double>* @llvm_mips_fadd_d_RES
   ret void
@@ -85,8 +85,8 @@
 
 define void @llvm_mips_fdiv_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fdiv.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
   ret void
@@ -107,8 +107,8 @@
 
 define void @llvm_mips_fdiv_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fdiv.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
   ret void
@@ -125,8 +125,8 @@
 
 define void @fdiv_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fdiv_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fdiv_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fdiv_w_ARG2
   %2 = fdiv <4 x float> %0, %1
   store <4 x float> %2, <4 x float>* @llvm_mips_fdiv_w_RES
   ret void
@@ -141,8 +141,8 @@
 
 define void @fdiv_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fdiv_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fdiv_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fdiv_d_ARG2
   %2 = fdiv <2 x double> %0, %1
   store <2 x double> %2, <2 x double>* @llvm_mips_fdiv_d_RES
   ret void
@@ -161,8 +161,8 @@
 
 define void @llvm_mips_fmin_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmin_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmin_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmin_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmin_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fmin_w_RES
   ret void
@@ -183,8 +183,8 @@
 
 define void @llvm_mips_fmin_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmin_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmin_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmin_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmin_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fmin_d_RES
   ret void
@@ -205,8 +205,8 @@
 
 define void @llvm_mips_fmin_a_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmin_a_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmin_a_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmin_a_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmin_a_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fmin.a.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fmin_a_w_RES
   ret void
@@ -227,8 +227,8 @@
 
 define void @llvm_mips_fmin_a_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmin_a_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmin_a_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmin_a_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmin_a_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fmin.a.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fmin_a_d_RES
   ret void
@@ -249,8 +249,8 @@
 
 define void @llvm_mips_fmax_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmax_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmax_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmax_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmax_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fmax_w_RES
   ret void
@@ -271,8 +271,8 @@
 
 define void @llvm_mips_fmax_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmax_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmax_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmax_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmax_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fmax_d_RES
   ret void
@@ -293,8 +293,8 @@
 
 define void @llvm_mips_fmax_a_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmax_a_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmax_a_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmax_a_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmax_a_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fmax.a.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fmax_a_w_RES
   ret void
@@ -315,8 +315,8 @@
 
 define void @llvm_mips_fmax_a_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmax_a_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmax_a_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmax_a_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmax_a_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fmax.a.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fmax_a_d_RES
   ret void
@@ -337,8 +337,8 @@
 
 define void @llvm_mips_fmul_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fmul.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
   ret void
@@ -359,8 +359,8 @@
 
 define void @llvm_mips_fmul_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fmul.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
   ret void
@@ -377,8 +377,8 @@
 
 define void @fmul_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmul_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmul_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmul_w_ARG2
   %2 = fmul <4 x float> %0, %1
   store <4 x float> %2, <4 x float>* @llvm_mips_fmul_w_RES
   ret void
@@ -393,8 +393,8 @@
 
 define void @fmul_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmul_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmul_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmul_d_ARG2
   %2 = fmul <2 x double> %0, %1
   store <2 x double> %2, <2 x double>* @llvm_mips_fmul_d_RES
   ret void
@@ -413,8 +413,8 @@
 
 define void @llvm_mips_fsub_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fsub.w(<4 x float> %0, <4 x float> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
   ret void
@@ -435,8 +435,8 @@
 
 define void @llvm_mips_fsub_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fsub.d(<2 x double> %0, <2 x double> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
   ret void
@@ -454,8 +454,8 @@
 
 define void @fsub_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsub_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsub_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsub_w_ARG2
   %2 = fsub <4 x float> %0, %1
   store <4 x float> %2, <4 x float>* @llvm_mips_fsub_w_RES
   ret void
@@ -470,8 +470,8 @@
 
 define void @fsub_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsub_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsub_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsub_d_ARG2
   %2 = fsub <2 x double> %0, %1
   store <2 x double> %2, <2 x double>* @llvm_mips_fsub_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_4rf.ll b/llvm/test/CodeGen/Mips/msa/3rf_4rf.ll
index 67ef7fd..f1a3002 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_4rf.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_4rf.ll
@@ -11,9 +11,9 @@
 
 define void @llvm_mips_fmadd_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmadd_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmadd_w_ARG2
-  %2 = load <4 x float>* @llvm_mips_fmadd_w_ARG3
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG2
+  %2 = load <4 x float>, <4 x float>* @llvm_mips_fmadd_w_ARG3
   %3 = tail call <4 x float> @llvm.mips.fmadd.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
   store <4 x float> %3, <4 x float>* @llvm_mips_fmadd_w_RES
   ret void
@@ -36,9 +36,9 @@
 
 define void @llvm_mips_fmadd_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmadd_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmadd_d_ARG2
-  %2 = load <2 x double>* @llvm_mips_fmadd_d_ARG3
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG2
+  %2 = load <2 x double>, <2 x double>* @llvm_mips_fmadd_d_ARG3
   %3 = tail call <2 x double> @llvm.mips.fmadd.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
   store <2 x double> %3, <2 x double>* @llvm_mips_fmadd_d_RES
   ret void
@@ -61,9 +61,9 @@
 
 define void @llvm_mips_fmsub_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fmsub_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fmsub_w_ARG2
-  %2 = load <4 x float>* @llvm_mips_fmsub_w_ARG3
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG2
+  %2 = load <4 x float>, <4 x float>* @llvm_mips_fmsub_w_ARG3
   %3 = tail call <4 x float> @llvm.mips.fmsub.w(<4 x float> %0, <4 x float> %1, <4 x float> %2)
   store <4 x float> %3, <4 x float>* @llvm_mips_fmsub_w_RES
   ret void
@@ -86,9 +86,9 @@
 
 define void @llvm_mips_fmsub_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fmsub_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fmsub_d_ARG2
-  %2 = load <2 x double>* @llvm_mips_fmsub_d_ARG3
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG2
+  %2 = load <2 x double>, <2 x double>* @llvm_mips_fmsub_d_ARG3
   %3 = tail call <2 x double> @llvm.mips.fmsub.d(<2 x double> %0, <2 x double> %1, <2 x double> %2)
   store <2 x double> %3, <2 x double>* @llvm_mips_fmsub_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_4rf_q.ll b/llvm/test/CodeGen/Mips/msa/3rf_4rf_q.ll
index de28be0..704c4b7 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_4rf_q.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_4rf_q.ll
@@ -11,9 +11,9 @@
 
 define void @llvm_mips_madd_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_madd_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_madd_q_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_madd_q_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_madd_q_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.madd.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_madd_q_h_RES
   ret void
@@ -36,9 +36,9 @@
 
 define void @llvm_mips_madd_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_madd_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_madd_q_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_madd_q_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_madd_q_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.madd.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_madd_q_w_RES
   ret void
@@ -61,9 +61,9 @@
 
 define void @llvm_mips_maddr_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_maddr_q_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_maddr_q_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.maddr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_maddr_q_h_RES
   ret void
@@ -86,9 +86,9 @@
 
 define void @llvm_mips_maddr_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_maddr_q_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_maddr_q_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.maddr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_maddr_q_w_RES
   ret void
@@ -111,9 +111,9 @@
 
 define void @llvm_mips_msub_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_msub_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_msub_q_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_msub_q_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msub_q_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.msub.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_msub_q_h_RES
   ret void
@@ -136,9 +136,9 @@
 
 define void @llvm_mips_msub_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_msub_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_msub_q_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_msub_q_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msub_q_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.msub.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_msub_q_w_RES
   ret void
@@ -161,9 +161,9 @@
 
 define void @llvm_mips_msubr_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_msubr_q_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_msubr_q_h_ARG3
   %3 = tail call <8 x i16> @llvm.mips.msubr.q.h(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
   store <8 x i16> %3, <8 x i16>* @llvm_mips_msubr_q_h_RES
   ret void
@@ -186,9 +186,9 @@
 
 define void @llvm_mips_msubr_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_msubr_q_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_msubr_q_w_ARG3
   %3 = tail call <4 x i32> @llvm.mips.msubr.q.w(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2)
   store <4 x i32> %3, <4 x i32>* @llvm_mips_msubr_q_w_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_exdo.ll b/llvm/test/CodeGen/Mips/msa/3rf_exdo.ll
index 8a7f268..1b1b2e9 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_exdo.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_exdo.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_fexdo_h_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fexdo_h_ARG1
-  %1 = load <4 x float>* @llvm_mips_fexdo_h_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fexdo_h_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fexdo_h_ARG2
   %2 = tail call <8 x half> @llvm.mips.fexdo.h(<4 x float> %0, <4 x float> %1)
   store <8 x half> %2, <8 x half>* @llvm_mips_fexdo_h_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_fexdo_w_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fexdo_w_ARG1
-  %1 = load <2 x double>* @llvm_mips_fexdo_w_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fexdo_w_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fexdo_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fexdo.w(<2 x double> %0, <2 x double> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fexdo_w_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_float_int.ll b/llvm/test/CodeGen/Mips/msa/3rf_float_int.ll
index 7b01e17..2bd056d 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_float_int.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_float_int.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_fexp2_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fexp2_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_fexp2_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fexp2_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_fexp2_w_ARG2
   %2 = tail call <4 x float> @llvm.mips.fexp2.w(<4 x float> %0, <4 x i32> %1)
   store <4 x float> %2, <4 x float>* @llvm_mips_fexp2_w_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_fexp2_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fexp2_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_fexp2_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fexp2_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_fexp2_d_ARG2
   %2 = tail call <2 x double> @llvm.mips.fexp2.d(<2 x double> %0, <2 x i64> %1)
   store <2 x double> %2, <2 x double>* @llvm_mips_fexp2_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_int_float.ll b/llvm/test/CodeGen/Mips/msa/3rf_int_float.ll
index 5624771..545e543 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_int_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_int_float.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_fcaf_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcaf_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcaf_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcaf_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcaf_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcaf.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcaf_w_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_fcaf_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcaf_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcaf_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcaf_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcaf_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcaf.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcaf_d_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_fceq_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fceq_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fceq_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fceq_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fceq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fceq.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fceq_w_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_fceq_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fceq_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fceq_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fceq_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fceq_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fceq.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fceq_d_RES
   ret void
@@ -98,8 +98,8 @@
 
 define void @llvm_mips_fcle_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcle_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcle_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcle_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcle_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcle.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcle_w_RES
   ret void
@@ -120,8 +120,8 @@
 
 define void @llvm_mips_fcle_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcle_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcle_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcle_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcle_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcle.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcle_d_RES
   ret void
@@ -142,8 +142,8 @@
 
 define void @llvm_mips_fclt_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fclt_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fclt_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fclt_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fclt_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fclt.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fclt_w_RES
   ret void
@@ -164,8 +164,8 @@
 
 define void @llvm_mips_fclt_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fclt_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fclt_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fclt_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fclt_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fclt.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fclt_d_RES
   ret void
@@ -186,8 +186,8 @@
 
 define void @llvm_mips_fcor_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcor_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcor_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcor_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcor_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcor.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcor_w_RES
   ret void
@@ -208,8 +208,8 @@
 
 define void @llvm_mips_fcor_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcor_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcor_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcor_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcor_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcor.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcor_d_RES
   ret void
@@ -230,8 +230,8 @@
 
 define void @llvm_mips_fcne_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcne_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcne_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcne_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcne_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcne.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcne_w_RES
   ret void
@@ -252,8 +252,8 @@
 
 define void @llvm_mips_fcne_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcne_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcne_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcne_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcne_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcne.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcne_d_RES
   ret void
@@ -274,8 +274,8 @@
 
 define void @llvm_mips_fcueq_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcueq_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcueq_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcueq_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcueq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcueq.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcueq_w_RES
   ret void
@@ -296,8 +296,8 @@
 
 define void @llvm_mips_fcueq_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcueq_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcueq_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcueq_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcueq_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcueq.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcueq_d_RES
   ret void
@@ -318,8 +318,8 @@
 
 define void @llvm_mips_fcult_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcult_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcult_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcult_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcult_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcult.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcult_w_RES
   ret void
@@ -340,8 +340,8 @@
 
 define void @llvm_mips_fcult_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcult_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcult_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcult_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcult_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcult.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcult_d_RES
   ret void
@@ -362,8 +362,8 @@
 
 define void @llvm_mips_fcule_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcule_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcule_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcule_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcule_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcule.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcule_w_RES
   ret void
@@ -384,8 +384,8 @@
 
 define void @llvm_mips_fcule_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcule_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcule_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcule_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcule_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcule.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcule_d_RES
   ret void
@@ -406,8 +406,8 @@
 
 define void @llvm_mips_fcun_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcun_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcun_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcun_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcun_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcun.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcun_w_RES
   ret void
@@ -428,8 +428,8 @@
 
 define void @llvm_mips_fcun_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcun_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcun_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcun_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcun_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcun.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcun_d_RES
   ret void
@@ -450,8 +450,8 @@
 
 define void @llvm_mips_fcune_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fcune_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fcune_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fcune_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fcune_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fcune.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fcune_w_RES
   ret void
@@ -472,8 +472,8 @@
 
 define void @llvm_mips_fcune_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fcune_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fcune_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fcune_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fcune_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fcune.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fcune_d_RES
   ret void
@@ -494,8 +494,8 @@
 
 define void @llvm_mips_fsaf_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsaf_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsaf_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsaf_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsaf_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsaf.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsaf_w_RES
   ret void
@@ -516,8 +516,8 @@
 
 define void @llvm_mips_fsaf_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsaf_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsaf_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsaf_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsaf_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsaf.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsaf_d_RES
   ret void
@@ -538,8 +538,8 @@
 
 define void @llvm_mips_fseq_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fseq_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fseq_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fseq_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fseq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fseq.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fseq_w_RES
   ret void
@@ -560,8 +560,8 @@
 
 define void @llvm_mips_fseq_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fseq_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fseq_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fseq_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fseq_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fseq.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fseq_d_RES
   ret void
@@ -582,8 +582,8 @@
 
 define void @llvm_mips_fsle_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsle_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsle_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsle_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsle_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsle.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsle_w_RES
   ret void
@@ -604,8 +604,8 @@
 
 define void @llvm_mips_fsle_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsle_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsle_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsle_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsle_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsle.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsle_d_RES
   ret void
@@ -626,8 +626,8 @@
 
 define void @llvm_mips_fslt_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fslt_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fslt_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fslt_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fslt_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fslt.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fslt_w_RES
   ret void
@@ -648,8 +648,8 @@
 
 define void @llvm_mips_fslt_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fslt_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fslt_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fslt_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fslt_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fslt.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fslt_d_RES
   ret void
@@ -670,8 +670,8 @@
 
 define void @llvm_mips_fsor_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsor_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsor_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsor_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsor_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsor.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsor_w_RES
   ret void
@@ -692,8 +692,8 @@
 
 define void @llvm_mips_fsor_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsor_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsor_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsor_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsor_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsor.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsor_d_RES
   ret void
@@ -714,8 +714,8 @@
 
 define void @llvm_mips_fsne_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsne_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsne_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsne_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsne_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsne.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsne_w_RES
   ret void
@@ -736,8 +736,8 @@
 
 define void @llvm_mips_fsne_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsne_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsne_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsne_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsne_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsne.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsne_d_RES
   ret void
@@ -758,8 +758,8 @@
 
 define void @llvm_mips_fsueq_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsueq_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsueq_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsueq_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsueq_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsueq.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsueq_w_RES
   ret void
@@ -780,8 +780,8 @@
 
 define void @llvm_mips_fsueq_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsueq_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsueq_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsueq_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsueq_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsueq.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsueq_d_RES
   ret void
@@ -802,8 +802,8 @@
 
 define void @llvm_mips_fsult_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsult_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsult_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsult_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsult_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsult.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsult_w_RES
   ret void
@@ -824,8 +824,8 @@
 
 define void @llvm_mips_fsult_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsult_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsult_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsult_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsult_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsult.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsult_d_RES
   ret void
@@ -846,8 +846,8 @@
 
 define void @llvm_mips_fsule_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsule_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsule_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsule_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsule_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsule.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsule_w_RES
   ret void
@@ -868,8 +868,8 @@
 
 define void @llvm_mips_fsule_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsule_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsule_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsule_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsule_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsule.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsule_d_RES
   ret void
@@ -890,8 +890,8 @@
 
 define void @llvm_mips_fsun_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsun_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsun_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsun_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsun_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsun.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsun_w_RES
   ret void
@@ -912,8 +912,8 @@
 
 define void @llvm_mips_fsun_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsun_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsun_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsun_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsun_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsun.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsun_d_RES
   ret void
@@ -934,8 +934,8 @@
 
 define void @llvm_mips_fsune_w_test() nounwind {
 entry:
-  %0 = load <4 x float>* @llvm_mips_fsune_w_ARG1
-  %1 = load <4 x float>* @llvm_mips_fsune_w_ARG2
+  %0 = load <4 x float>, <4 x float>* @llvm_mips_fsune_w_ARG1
+  %1 = load <4 x float>, <4 x float>* @llvm_mips_fsune_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.fsune.w(<4 x float> %0, <4 x float> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_fsune_w_RES
   ret void
@@ -956,8 +956,8 @@
 
 define void @llvm_mips_fsune_d_test() nounwind {
 entry:
-  %0 = load <2 x double>* @llvm_mips_fsune_d_ARG1
-  %1 = load <2 x double>* @llvm_mips_fsune_d_ARG2
+  %0 = load <2 x double>, <2 x double>* @llvm_mips_fsune_d_ARG1
+  %1 = load <2 x double>, <2 x double>* @llvm_mips_fsune_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.fsune.d(<2 x double> %0, <2 x double> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_fsune_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/3rf_q.ll b/llvm/test/CodeGen/Mips/msa/3rf_q.ll
index f7000ee..c8b0a50 100644
--- a/llvm/test/CodeGen/Mips/msa/3rf_q.ll
+++ b/llvm/test/CodeGen/Mips/msa/3rf_q.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_mul_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mul_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mul_q_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mul_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mul_q_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.mul.q.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mul_q_h_RES
   ret void
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_mul_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mul_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mul_q_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mul_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mul_q_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.mul.q.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mul_q_w_RES
   ret void
@@ -54,8 +54,8 @@
 
 define void @llvm_mips_mulr_q_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_mulr_q_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mulr_q_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_mulr_q_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.mulr.q.h(<8 x i16> %0, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_mulr_q_h_RES
   ret void
@@ -76,8 +76,8 @@
 
 define void @llvm_mips_mulr_q_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_mulr_q_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mulr_q_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_mulr_q_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.mulr.q.w(<4 x i32> %0, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_mulr_q_w_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/arithmetic.ll b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
index 09ee502..3ecd0e4 100644
--- a/llvm/test/CodeGen/Mips/msa/arithmetic.ll
+++ b/llvm/test/CodeGen/Mips/msa/arithmetic.ll
@@ -4,9 +4,9 @@
 define void @add_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: add_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = add <16 x i8> %1, %2
   ; CHECK-DAG: addv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@
 define void @add_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: add_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = add <8 x i16> %1, %2
   ; CHECK-DAG: addv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@
 define void @add_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: add_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = add <4 x i32> %1, %2
   ; CHECK-DAG: addv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@
 define void @add_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: add_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = add <2 x i64> %1, %2
   ; CHECK-DAG: addv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,7 +68,7 @@
 define void @add_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: add_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = add <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
                           i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -83,7 +83,7 @@
 define void @add_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: add_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = add <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
                           i16 1, i16 1, i16 1, i16 1>
@@ -98,7 +98,7 @@
 define void @add_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: add_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = add <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: addvi.w [[R3:\$w[0-9]+]], [[R1]], 1
@@ -112,7 +112,7 @@
 define void @add_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: add_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = add <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: addvi.d [[R3:\$w[0-9]+]], [[R1]], 1
@@ -126,9 +126,9 @@
 define void @sub_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: sub_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <16 x i8> %1, %2
   ; CHECK-DAG: subv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -142,9 +142,9 @@
 define void @sub_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: sub_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <8 x i16> %1, %2
   ; CHECK-DAG: subv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -158,9 +158,9 @@
 define void @sub_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: sub_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <4 x i32> %1, %2
   ; CHECK-DAG: subv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -174,9 +174,9 @@
 define void @sub_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: sub_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = sub <2 x i64> %1, %2
   ; CHECK-DAG: subv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -190,7 +190,7 @@
 define void @sub_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: sub_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
                           i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -205,7 +205,7 @@
 define void @sub_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: sub_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1,
                           i16 1, i16 1, i16 1, i16 1>
@@ -220,7 +220,7 @@
 define void @sub_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: sub_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: subvi.w [[R3:\$w[0-9]+]], [[R1]], 1
@@ -234,7 +234,7 @@
 define void @sub_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: sub_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = sub <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: subvi.d [[R3:\$w[0-9]+]], [[R1]], 1
@@ -248,9 +248,9 @@
 define void @mul_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: mul_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <16 x i8> %1, %2
   ; CHECK-DAG: mulv.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -264,9 +264,9 @@
 define void @mul_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: mul_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <8 x i16> %1, %2
   ; CHECK-DAG: mulv.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -280,9 +280,9 @@
 define void @mul_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: mul_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <4 x i32> %1, %2
   ; CHECK-DAG: mulv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -296,9 +296,9 @@
 define void @mul_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: mul_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = mul <2 x i64> %1, %2
   ; CHECK-DAG: mulv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -313,11 +313,11 @@
                          <16 x i8>* %c) nounwind {
   ; CHECK: maddv_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <16 x i8>* %c
+  %3 = load <16 x i8>, <16 x i8>* %c
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <16 x i8> %2, %3
   %5 = add <16 x i8> %4, %1
@@ -333,11 +333,11 @@
                          <8 x i16>* %c) nounwind {
   ; CHECK: maddv_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <8 x i16>* %c
+  %3 = load <8 x i16>, <8 x i16>* %c
   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <8 x i16> %2, %3
   %5 = add <8 x i16> %4, %1
@@ -353,11 +353,11 @@
                          <4 x i32>* %c) nounwind {
   ; CHECK: maddv_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x i32>* %c
+  %3 = load <4 x i32>, <4 x i32>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <4 x i32> %2, %3
   %5 = add <4 x i32> %4, %1
@@ -373,11 +373,11 @@
                          <2 x i64>* %c) nounwind {
   ; CHECK: maddv_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x i64>* %c
+  %3 = load <2 x i64>, <2 x i64>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <2 x i64> %2, %3
   %5 = add <2 x i64> %4, %1
@@ -393,11 +393,11 @@
                          <16 x i8>* %c) nounwind {
   ; CHECK: msubv_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <16 x i8>* %c
+  %3 = load <16 x i8>, <16 x i8>* %c
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <16 x i8> %2, %3
   %5 = sub <16 x i8> %1, %4
@@ -413,11 +413,11 @@
                          <8 x i16>* %c) nounwind {
   ; CHECK: msubv_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <8 x i16>* %c
+  %3 = load <8 x i16>, <8 x i16>* %c
   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <8 x i16> %2, %3
   %5 = sub <8 x i16> %1, %4
@@ -433,11 +433,11 @@
                          <4 x i32>* %c) nounwind {
   ; CHECK: msubv_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x i32>* %c
+  %3 = load <4 x i32>, <4 x i32>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <4 x i32> %2, %3
   %5 = sub <4 x i32> %1, %4
@@ -453,11 +453,11 @@
                          <2 x i64>* %c) nounwind {
   ; CHECK: msubv_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x i64>* %c
+  %3 = load <2 x i64>, <2 x i64>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = mul <2 x i64> %2, %3
   %5 = sub <2 x i64> %1, %4
@@ -472,9 +472,9 @@
 define void @div_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: div_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <16 x i8> %1, %2
   ; CHECK-DAG: div_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -488,9 +488,9 @@
 define void @div_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: div_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <8 x i16> %1, %2
   ; CHECK-DAG: div_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -504,9 +504,9 @@
 define void @div_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: div_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <4 x i32> %1, %2
   ; CHECK-DAG: div_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -520,9 +520,9 @@
 define void @div_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: div_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = sdiv <2 x i64> %1, %2
   ; CHECK-DAG: div_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -536,9 +536,9 @@
 define void @div_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: div_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <16 x i8> %1, %2
   ; CHECK-DAG: div_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -552,9 +552,9 @@
 define void @div_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: div_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <8 x i16> %1, %2
   ; CHECK-DAG: div_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -568,9 +568,9 @@
 define void @div_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: div_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <4 x i32> %1, %2
   ; CHECK-DAG: div_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -584,9 +584,9 @@
 define void @div_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: div_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = udiv <2 x i64> %1, %2
   ; CHECK-DAG: div_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -600,9 +600,9 @@
 define void @mod_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: mod_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <16 x i8> %1, %2
   ; CHECK-DAG: mod_s.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -616,9 +616,9 @@
 define void @mod_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: mod_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <8 x i16> %1, %2
   ; CHECK-DAG: mod_s.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -632,9 +632,9 @@
 define void @mod_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: mod_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <4 x i32> %1, %2
   ; CHECK-DAG: mod_s.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -648,9 +648,9 @@
 define void @mod_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: mod_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = srem <2 x i64> %1, %2
   ; CHECK-DAG: mod_s.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -664,9 +664,9 @@
 define void @mod_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: mod_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <16 x i8> %1, %2
   ; CHECK-DAG: mod_u.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -680,9 +680,9 @@
 define void @mod_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: mod_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <8 x i16> %1, %2
   ; CHECK-DAG: mod_u.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -696,9 +696,9 @@
 define void @mod_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: mod_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <4 x i32> %1, %2
   ; CHECK-DAG: mod_u.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -712,9 +712,9 @@
 define void @mod_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: mod_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = urem <2 x i64> %1, %2
   ; CHECK-DAG: mod_u.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
diff --git a/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll b/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll
index 9aae284..d2ead53 100644
--- a/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/arithmetic_float.ll
@@ -4,9 +4,9 @@
 define void @add_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: add_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fadd <4 x float> %1, %2
   ; CHECK-DAG: fadd.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@
 define void @add_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: add_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fadd <2 x double> %1, %2
   ; CHECK-DAG: fadd.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@
 define void @sub_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: sub_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fsub <4 x float> %1, %2
   ; CHECK-DAG: fsub.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@
 define void @sub_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: sub_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fsub <2 x double> %1, %2
   ; CHECK-DAG: fsub.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,9 +68,9 @@
 define void @mul_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: mul_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fmul <4 x float> %1, %2
   ; CHECK-DAG: fmul.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -84,9 +84,9 @@
 define void @mul_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: mul_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fmul <2 x double> %1, %2
   ; CHECK-DAG: fmul.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -101,11 +101,11 @@
                        <4 x float>* %c) nounwind {
   ; CHECK: fma_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x float>* %c
+  %3 = load <4 x float>, <4 x float>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = tail call <4 x float> @llvm.fma.v4f32 (<4 x float> %1, <4 x float> %2,
                                               <4 x float> %3)
@@ -121,11 +121,11 @@
                        <2 x double>* %c) nounwind {
   ; CHECK: fma_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x double>* %c
+  %3 = load <2 x double>, <2 x double>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = tail call <2 x double> @llvm.fma.v2f64 (<2 x double> %1, <2 x double> %2,
                                                <2 x double> %3)
@@ -141,11 +141,11 @@
                        <4 x float>* %c) nounwind {
   ; CHECK: fmsub_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x float>* %c
+  %3 = load <4 x float>, <4 x float>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = fmul <4 x float> %2, %3
   %5 = fsub <4 x float> %1, %4
@@ -161,11 +161,11 @@
                        <2 x double>* %c) nounwind {
   ; CHECK: fmsub_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x double>* %c
+  %3 = load <2 x double>, <2 x double>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = fmul <2 x double> %2, %3
   %5 = fsub <2 x double> %1, %4
@@ -180,9 +180,9 @@
 define void @fdiv_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: fdiv_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fdiv <4 x float> %1, %2
   ; CHECK-DAG: fdiv.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -196,9 +196,9 @@
 define void @fdiv_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: fdiv_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fdiv <2 x double> %1, %2
   ; CHECK-DAG: fdiv.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -212,7 +212,7 @@
 define void @fabs_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
   ; CHECK: fabs_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x float> @llvm.fabs.v4f32 (<4 x float> %1)
   ; CHECK-DAG: fmax_a.w [[R3:\$w[0-9]+]], [[R1]], [[R1]]
@@ -226,7 +226,7 @@
 define void @fabs_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
   ; CHECK: fabs_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.fabs.v2f64 (<2 x double> %1)
   ; CHECK-DAG: fmax_a.d [[R3:\$w[0-9]+]], [[R1]], [[R1]]
@@ -240,7 +240,7 @@
 define void @fexp2_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
   ; CHECK: fexp2_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
@@ -256,7 +256,7 @@
 define void @fexp2_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
   ; CHECK: fexp2_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
@@ -272,7 +272,7 @@
 define void @fexp2_v4f32_2(<4 x float>* %c, <4 x float>* %a) nounwind {
   ; CHECK: fexp2_v4f32_2:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x float> @llvm.exp2.v4f32 (<4 x float> %1)
   %3 = fmul <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>, %2
@@ -289,7 +289,7 @@
 define void @fexp2_v2f64_2(<2 x double>* %c, <2 x double>* %a) nounwind {
   ; CHECK: fexp2_v2f64_2:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.exp2.v2f64 (<2 x double> %1)
   %3 = fmul <2 x double> <double 2.0, double 2.0>, %2
@@ -306,7 +306,7 @@
 define void @fsqrt_v4f32(<4 x float>* %c, <4 x float>* %a) nounwind {
   ; CHECK: fsqrt_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %1)
   ; CHECK-DAG: fsqrt.w [[R3:\$w[0-9]+]], [[R1]]
@@ -320,7 +320,7 @@
 define void @fsqrt_v2f64(<2 x double>* %c, <2 x double>* %a) nounwind {
   ; CHECK: fsqrt_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %1)
   ; CHECK-DAG: fsqrt.d [[R3:\$w[0-9]+]], [[R1]]
@@ -334,7 +334,7 @@
 define void @ffint_u_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: ffint_u_v4f32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = uitofp <4 x i32> %1 to <4 x float>
   ; CHECK-DAG: ffint_u.w [[R3:\$w[0-9]+]], [[R1]]
@@ -348,7 +348,7 @@
 define void @ffint_u_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: ffint_u_v2f64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = uitofp <2 x i64> %1 to <2 x double>
   ; CHECK-DAG: ffint_u.d [[R3:\$w[0-9]+]], [[R1]]
@@ -362,7 +362,7 @@
 define void @ffint_s_v4f32(<4 x float>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: ffint_s_v4f32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = sitofp <4 x i32> %1 to <4 x float>
   ; CHECK-DAG: ffint_s.w [[R3:\$w[0-9]+]], [[R1]]
@@ -376,7 +376,7 @@
 define void @ffint_s_v2f64(<2 x double>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: ffint_s_v2f64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = sitofp <2 x i64> %1 to <2 x double>
   ; CHECK-DAG: ffint_s.d [[R3:\$w[0-9]+]], [[R1]]
@@ -390,7 +390,7 @@
 define void @ftrunc_u_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
   ; CHECK: ftrunc_u_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = fptoui <4 x float> %1 to <4 x i32>
   ; CHECK-DAG: ftrunc_u.w [[R3:\$w[0-9]+]], [[R1]]
@@ -404,7 +404,7 @@
 define void @ftrunc_u_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
   ; CHECK: ftrunc_u_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = fptoui <2 x double> %1 to <2 x i64>
   ; CHECK-DAG: ftrunc_u.d [[R3:\$w[0-9]+]], [[R1]]
@@ -418,7 +418,7 @@
 define void @ftrunc_s_v4f32(<4 x i32>* %c, <4 x float>* %a) nounwind {
   ; CHECK: ftrunc_s_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = fptosi <4 x float> %1 to <4 x i32>
   ; CHECK-DAG: ftrunc_s.w [[R3:\$w[0-9]+]], [[R1]]
@@ -432,7 +432,7 @@
 define void @ftrunc_s_v2f64(<2 x i64>* %c, <2 x double>* %a) nounwind {
   ; CHECK: ftrunc_s_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = fptosi <2 x double> %1 to <2 x i64>
   ; CHECK-DAG: ftrunc_s.d [[R3:\$w[0-9]+]], [[R1]]
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations.ll b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
index dbdf42b..97525be0 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations.ll
@@ -258,7 +258,7 @@
 define i32 @extract_sext_v16i8() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v16i8:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
 
   %2 = add <16 x i8> %1, %1
@@ -277,7 +277,7 @@
 define i32 @extract_sext_v8i16() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v8i16:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
 
   %2 = add <8 x i16> %1, %1
@@ -296,7 +296,7 @@
 define i32 @extract_sext_v4i32() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v4i32:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = add <4 x i32> %1, %1
@@ -312,7 +312,7 @@
 define i64 @extract_sext_v2i64() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v2i64:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
 
   %2 = add <2 x i64> %1, %1
@@ -331,7 +331,7 @@
 define i32 @extract_zext_v16i8() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v16i8:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
 
   %2 = add <16 x i8> %1, %1
@@ -349,7 +349,7 @@
 define i32 @extract_zext_v8i16() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v8i16:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
 
   %2 = add <8 x i16> %1, %1
@@ -367,7 +367,7 @@
 define i32 @extract_zext_v4i32() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v4i32:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = add <4 x i32> %1, %1
@@ -383,7 +383,7 @@
 define i64 @extract_zext_v2i64() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v2i64:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]],
 
   %2 = add <2 x i64> %1, %1
@@ -401,14 +401,14 @@
 define i32 @extract_sext_v16i8_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v16i8_vidx:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <16 x i8> %1, %1
   ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -425,14 +425,14 @@
 define i32 @extract_sext_v8i16_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v8i16_vidx:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <8 x i16> %1, %1
   ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -449,14 +449,14 @@
 define i32 @extract_sext_v4i32_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v4i32_vidx:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <4 x i32> %1, %1
   ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -472,14 +472,14 @@
 define i64 @extract_sext_v2i64_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_sext_v2i64_vidx:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <2 x i64> %1, %1
   ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -497,14 +497,14 @@
 define i32 @extract_zext_v16i8_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v16i8_vidx:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v16i8)(
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <16 x i8> %1, %1
   ; MIPS32-AE-DAG: addv.b [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -521,14 +521,14 @@
 define i32 @extract_zext_v8i16_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v8i16_vidx:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v8i16)(
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <8 x i16> %1, %1
   ; MIPS32-AE-DAG: addv.h [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -545,14 +545,14 @@
 define i32 @extract_zext_v4i32_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v4i32_vidx:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4i32)(
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <4 x i32> %1, %1
   ; MIPS32-AE-DAG: addv.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -568,14 +568,14 @@
 define i64 @extract_zext_v2i64_vidx() nounwind {
   ; MIPS32-AE-LABEL: extract_zext_v2i64_vidx:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2i64)(
   ; MIPS32-AE-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = add <2 x i64> %1, %1
   ; MIPS32-AE-DAG: addv.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -593,7 +593,7 @@
 define void @insert_v16i8(i32 %a) nounwind {
   ; MIPS32-AE-LABEL: insert_v16i8:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
 
   %a2 = trunc i32 %a to i8
@@ -615,7 +615,7 @@
 define void @insert_v8i16(i32 %a) nounwind {
   ; MIPS32-AE-LABEL: insert_v8i16:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
 
   %a2 = trunc i32 %a to i16
@@ -637,7 +637,7 @@
 define void @insert_v4i32(i32 %a) nounwind {
   ; MIPS32-AE-LABEL: insert_v4i32:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
   ; MIPS32-AE-NOT: andi
@@ -656,7 +656,7 @@
 define void @insert_v2i64(i64 %a) nounwind {
   ; MIPS32-AE-LABEL: insert_v2i64:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
   ; MIPS32-AE-NOT: andi
@@ -676,10 +676,10 @@
 define void @insert_v16i8_vidx(i32 %a) nounwind {
   ; MIPS32-AE: insert_v16i8_vidx:
 
-  %1 = load <16 x i8>* @v16i8
+  %1 = load <16 x i8>, <16 x i8>* @v16i8
   ; MIPS32-AE-DAG: ld.b [[R1:\$w[0-9]+]],
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -705,10 +705,10 @@
 define void @insert_v8i16_vidx(i32 %a) nounwind {
   ; MIPS32-AE: insert_v8i16_vidx:
 
-  %1 = load <8 x i16>* @v8i16
+  %1 = load <8 x i16>, <8 x i16>* @v8i16
   ; MIPS32-AE-DAG: ld.h [[R1:\$w[0-9]+]],
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -735,10 +735,10 @@
 define void @insert_v4i32_vidx(i32 %a) nounwind {
   ; MIPS32-AE: insert_v4i32_vidx:
 
-  %1 = load <4 x i32>* @v4i32
+  %1 = load <4 x i32>, <4 x i32>* @v4i32
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -762,10 +762,10 @@
 define void @insert_v2i64_vidx(i64 %a) nounwind {
   ; MIPS32-AE: insert_v2i64_vidx:
 
-  %1 = load <2 x i64>* @v2i64
+  %1 = load <2 x i64>, <2 x i64>* @v2i64
   ; MIPS32-AE-DAG: ld.w [[R1:\$w[0-9]+]],
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-AE-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-AE-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
diff --git a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
index a0c9d29..53c1f11 100644
--- a/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/basic_operations_float.ll
@@ -75,7 +75,7 @@
 define void @nonconst_v4f32() nounwind {
   ; MIPS32-LABEL: nonconst_v4f32:
 
-  %1 = load float *@f32
+  %1 = load float , float *@f32
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = insertelement <4 x float> %2, float %1, i32 1
   %4 = insertelement <4 x float> %3, float %1, i32 2
@@ -91,7 +91,7 @@
 define void @nonconst_v2f64() nounwind {
   ; MIPS32-LABEL: nonconst_v2f64:
 
-  %1 = load double *@f64
+  %1 = load double , double *@f64
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
   store volatile <2 x double> %3, <2 x double>*@v2f64
@@ -105,7 +105,7 @@
 define float @extract_v4f32() nounwind {
   ; MIPS32-LABEL: extract_v4f32:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = fadd <4 x float> %1, %1
@@ -123,7 +123,7 @@
 define float @extract_v4f32_elt0() nounwind {
   ; MIPS32-LABEL: extract_v4f32_elt0:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = fadd <4 x float> %1, %1
@@ -141,7 +141,7 @@
 define float @extract_v4f32_elt2() nounwind {
   ; MIPS32-LABEL: extract_v4f32_elt2:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = fadd <4 x float> %1, %1
@@ -159,14 +159,14 @@
 define float @extract_v4f32_vidx() nounwind {
   ; MIPS32-LABEL: extract_v4f32_vidx:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = fadd <4 x float> %1, %1
   ; MIPS32-DAG: fadd.w [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -180,7 +180,7 @@
 define double @extract_v2f64() nounwind {
   ; MIPS32-LABEL: extract_v2f64:
 
-  %1 = load <2 x double>* @v2f64
+  %1 = load <2 x double>, <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
 
   %2 = fadd <2 x double> %1, %1
@@ -203,7 +203,7 @@
 define double @extract_v2f64_elt0() nounwind {
   ; MIPS32-LABEL: extract_v2f64_elt0:
 
-  %1 = load <2 x double>* @v2f64
+  %1 = load <2 x double>, <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
 
   %2 = fadd <2 x double> %1, %1
@@ -224,14 +224,14 @@
 define double @extract_v2f64_vidx() nounwind {
   ; MIPS32-LABEL: extract_v2f64_vidx:
 
-  %1 = load <2 x double>* @v2f64
+  %1 = load <2 x double>, <2 x double>* @v2f64
   ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
   %2 = fadd <2 x double> %1, %1
   ; MIPS32-DAG: fadd.d [[R2:\$w[0-9]+]], [[R1]], [[R1]]
 
-  %3 = load i32* @i32
+  %3 = load i32, i32* @i32
   ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -245,7 +245,7 @@
 define void @insert_v4f32(float %a) nounwind {
   ; MIPS32-LABEL: insert_v4f32:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]],
 
   %2 = insertelement <4 x float> %1, float %a, i32 1
@@ -262,7 +262,7 @@
 define void @insert_v2f64(double %a) nounwind {
   ; MIPS32-LABEL: insert_v2f64:
 
-  %1 = load <2 x double>* @v2f64
+  %1 = load <2 x double>, <2 x double>* @v2f64
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]],
 
   %2 = insertelement <2 x double> %1, double %a, i32 1
@@ -279,11 +279,11 @@
 define void @insert_v4f32_vidx(float %a) nounwind {
   ; MIPS32-LABEL: insert_v4f32_vidx:
 
-  %1 = load <4 x float>* @v4f32
+  %1 = load <4 x float>, <4 x float>* @v4f32
   ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v4f32)(
   ; MIPS32-DAG: ld.w [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
@@ -305,11 +305,11 @@
 define void @insert_v2f64_vidx(double %a) nounwind {
   ; MIPS32-LABEL: insert_v2f64_vidx:
 
-  %1 = load <2 x double>* @v2f64
+  %1 = load <2 x double>, <2 x double>* @v2f64
   ; MIPS32-DAG: lw [[PTR_V:\$[0-9]+]], %got(v2f64)(
   ; MIPS32-DAG: ld.d [[R1:\$w[0-9]+]], 0([[PTR_V]])
 
-  %2 = load i32* @i32
+  %2 = load i32, i32* @i32
   ; MIPS32-DAG: lw [[PTR_I:\$[0-9]+]], %got(i32)(
   ; MIPS32-DAG: lw [[IDX:\$[0-9]+]], 0([[PTR_I]])
 
diff --git a/llvm/test/CodeGen/Mips/msa/bit.ll b/llvm/test/CodeGen/Mips/msa/bit.ll
index 59ddbe1..f005730 100644
--- a/llvm/test/CodeGen/Mips/msa/bit.ll
+++ b/llvm/test/CodeGen/Mips/msa/bit.ll
@@ -8,7 +8,7 @@
 
 define void @llvm_mips_sat_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sat_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sat_s_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.sat.s.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_s_b_RES
   ret void
@@ -27,7 +27,7 @@
 
 define void @llvm_mips_sat_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sat_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sat_s_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.sat.s.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_s_h_RES
   ret void
@@ -46,7 +46,7 @@
 
 define void @llvm_mips_sat_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sat_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sat_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.sat.s.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_s_w_RES
   ret void
@@ -65,7 +65,7 @@
 
 define void @llvm_mips_sat_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sat_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sat_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.sat.s.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_s_d_RES
   ret void
@@ -84,7 +84,7 @@
 
 define void @llvm_mips_sat_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sat_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sat_u_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.sat.u.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_sat_u_b_RES
   ret void
@@ -103,7 +103,7 @@
 
 define void @llvm_mips_sat_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sat_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sat_u_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.sat.u.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_sat_u_h_RES
   ret void
@@ -122,7 +122,7 @@
 
 define void @llvm_mips_sat_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sat_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sat_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.sat.u.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_sat_u_w_RES
   ret void
@@ -141,7 +141,7 @@
 
 define void @llvm_mips_sat_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sat_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sat_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.sat.u.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_sat_u_d_RES
   ret void
@@ -160,7 +160,7 @@
 
 define void @llvm_mips_slli_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_slli_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_slli_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.slli.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_slli_b_RES
   ret void
@@ -179,7 +179,7 @@
 
 define void @llvm_mips_slli_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_slli_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_slli_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.slli.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_slli_h_RES
   ret void
@@ -198,7 +198,7 @@
 
 define void @llvm_mips_slli_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_slli_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_slli_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.slli.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_slli_w_RES
   ret void
@@ -217,7 +217,7 @@
 
 define void @llvm_mips_slli_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_slli_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_slli_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.slli.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_slli_d_RES
   ret void
@@ -236,7 +236,7 @@
 
 define void @llvm_mips_srai_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srai_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srai_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.srai.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_srai_b_RES
   ret void
@@ -255,7 +255,7 @@
 
 define void @llvm_mips_srai_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srai_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srai_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.srai.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_srai_h_RES
   ret void
@@ -274,7 +274,7 @@
 
 define void @llvm_mips_srai_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srai_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srai_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.srai.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_srai_w_RES
   ret void
@@ -293,7 +293,7 @@
 
 define void @llvm_mips_srai_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srai_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srai_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.srai.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_srai_d_RES
   ret void
@@ -312,7 +312,7 @@
 
 define void @llvm_mips_srari_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srari_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srari_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.srari.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_srari_b_RES
   ret void
@@ -331,7 +331,7 @@
 
 define void @llvm_mips_srari_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srari_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srari_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.srari.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_srari_h_RES
   ret void
@@ -350,7 +350,7 @@
 
 define void @llvm_mips_srari_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srari_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srari_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.srari.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_srari_w_RES
   ret void
@@ -369,7 +369,7 @@
 
 define void @llvm_mips_srari_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srari_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srari_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.srari.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_srari_d_RES
   ret void
@@ -388,7 +388,7 @@
 
 define void @llvm_mips_srli_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srli_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srli_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.srli.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_srli_b_RES
   ret void
@@ -407,7 +407,7 @@
 
 define void @llvm_mips_srli_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srli_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srli_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.srli.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_srli_h_RES
   ret void
@@ -426,7 +426,7 @@
 
 define void @llvm_mips_srli_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srli_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srli_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.srli.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_srli_w_RES
   ret void
@@ -445,7 +445,7 @@
 
 define void @llvm_mips_srli_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srli_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srli_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.srli.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_srli_d_RES
   ret void
@@ -464,7 +464,7 @@
 
 define void @llvm_mips_srlri_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_srlri_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_srlri_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.srlri.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_srlri_b_RES
   ret void
@@ -483,7 +483,7 @@
 
 define void @llvm_mips_srlri_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_srlri_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_srlri_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.srlri.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_srlri_h_RES
   ret void
@@ -502,7 +502,7 @@
 
 define void @llvm_mips_srlri_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_srlri_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_srlri_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.srlri.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_srlri_w_RES
   ret void
@@ -521,7 +521,7 @@
 
 define void @llvm_mips_srlri_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_srlri_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_srlri_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.srlri.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_srlri_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/bitcast.ll b/llvm/test/CodeGen/Mips/msa/bitcast.ll
index 8e880ec..837cc28 100644
--- a/llvm/test/CodeGen/Mips/msa/bitcast.ll
+++ b/llvm/test/CodeGen/Mips/msa/bitcast.ll
@@ -5,7 +5,7 @@
 
 define void @v16i8_to_v16i8(<16 x i8>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -29,7 +29,7 @@
 
 define void @v16i8_to_v8i16(<16 x i8>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -56,7 +56,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v16i8_to_v8f16(<16 x i8>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -77,7 +77,7 @@
 
 define void @v16i8_to_v4i32(<16 x i8>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -102,7 +102,7 @@
 
 define void @v16i8_to_v4f32(<16 x i8>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -127,7 +127,7 @@
 
 define void @v16i8_to_v2i64(<16 x i8>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -153,7 +153,7 @@
 
 define void @v16i8_to_v2f64(<16 x i8>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <16 x i8>* %src
+  %0 = load volatile <16 x i8>, <16 x i8>* %src
   %1 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0, <16 x i8> %0)
   %2 = bitcast <16 x i8> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -179,7 +179,7 @@
 
 define void @v8i16_to_v16i8(<8 x i16>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -204,7 +204,7 @@
 
 define void @v8i16_to_v8i16(<8 x i16>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -230,7 +230,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8i16_to_v8f16(<8 x i16>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -251,7 +251,7 @@
 
 define void @v8i16_to_v4i32(<8 x i16>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -276,7 +276,7 @@
 
 define void @v8i16_to_v4f32(<8 x i16>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -301,7 +301,7 @@
 
 define void @v8i16_to_v2i64(<8 x i16>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -326,7 +326,7 @@
 
 define void @v8i16_to_v2f64(<8 x i16>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x i16>* %src
+  %0 = load volatile <8 x i16>, <8 x i16>* %src
   %1 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0, <8 x i16> %0)
   %2 = bitcast <8 x i16> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -354,7 +354,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v16i8(<8 x half>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <16 x i8>
   %2 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %1, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* %dst
@@ -378,7 +378,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v8i16(<8 x half>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <8 x i16>
   %2 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %1, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* %dst
@@ -403,7 +403,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v8f16(<8 x half>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <8 x half>
   store <8 x half> %1, <8 x half>* %dst
   ret void
@@ -423,7 +423,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v4i32(<8 x half>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <4 x i32>
   %2 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %1, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* %dst
@@ -447,7 +447,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v4f32(<8 x half>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <4 x float>
   %2 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %1, <4 x float> %1)
   store <4 x float> %2, <4 x float>* %dst
@@ -471,7 +471,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v2i64(<8 x half>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <2 x i64>
   %2 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %1, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* %dst
@@ -495,7 +495,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v8f16_to_v2f64(<8 x half>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <8 x half>* %src
+  %0 = load volatile <8 x half>, <8 x half>* %src
   %1 = bitcast <8 x half> %0 to <2 x double>
   %2 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %1, <2 x double> %1)
   store <2 x double> %2, <2 x double>* %dst
@@ -518,7 +518,7 @@
 
 define void @v4i32_to_v16i8(<4 x i32>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -543,7 +543,7 @@
 
 define void @v4i32_to_v8i16(<4 x i32>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -570,7 +570,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v4i32_to_v8f16(<4 x i32>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -591,7 +591,7 @@
 
 define void @v4i32_to_v4i32(<4 x i32>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -615,7 +615,7 @@
 
 define void @v4i32_to_v4f32(<4 x i32>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -639,7 +639,7 @@
 
 define void @v4i32_to_v2i64(<4 x i32>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -664,7 +664,7 @@
 
 define void @v4i32_to_v2f64(<4 x i32>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x i32>* %src
+  %0 = load volatile <4 x i32>, <4 x i32>* %src
   %1 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %0)
   %2 = bitcast <4 x i32> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -689,7 +689,7 @@
 
 define void @v4f32_to_v16i8(<4 x float>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -714,7 +714,7 @@
 
 define void @v4f32_to_v8i16(<4 x float>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -741,7 +741,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v4f32_to_v8f16(<4 x float>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -762,7 +762,7 @@
 
 define void @v4f32_to_v4i32(<4 x float>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -786,7 +786,7 @@
 
 define void @v4f32_to_v4f32(<4 x float>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -810,7 +810,7 @@
 
 define void @v4f32_to_v2i64(<4 x float>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -835,7 +835,7 @@
 
 define void @v4f32_to_v2f64(<4 x float>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <4 x float>* %src
+  %0 = load volatile <4 x float>, <4 x float>* %src
   %1 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %0, <4 x float> %0)
   %2 = bitcast <4 x float> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -860,7 +860,7 @@
 
 define void @v2i64_to_v16i8(<2 x i64>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -886,7 +886,7 @@
 
 define void @v2i64_to_v8i16(<2 x i64>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -913,7 +913,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v2i64_to_v8f16(<2 x i64>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -934,7 +934,7 @@
 
 define void @v2i64_to_v4i32(<2 x i64>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -959,7 +959,7 @@
 
 define void @v2i64_to_v4f32(<2 x i64>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -984,7 +984,7 @@
 
 define void @v2i64_to_v2i64(<2 x i64>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -1008,7 +1008,7 @@
 
 define void @v2i64_to_v2f64(<2 x i64>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x i64>* %src
+  %0 = load volatile <2 x i64>, <2 x i64>* %src
   %1 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0, <2 x i64> %0)
   %2 = bitcast <2 x i64> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
@@ -1032,7 +1032,7 @@
 
 define void @v2f64_to_v16i8(<2 x double>* %src, <16 x i8>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <16 x i8>
   %3 = tail call <16 x i8> @llvm.mips.addv.b(<16 x i8> %2, <16 x i8> %2)
@@ -1058,7 +1058,7 @@
 
 define void @v2f64_to_v8i16(<2 x double>* %src, <8 x i16>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <8 x i16>
   %3 = tail call <8 x i16> @llvm.mips.addv.h(<8 x i16> %2, <8 x i16> %2)
@@ -1085,7 +1085,7 @@
 ; are no operations for v8f16 to put in the way.
 define void @v2f64_to_v8f16(<2 x double>* %src, <8 x half>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <8 x half>
   store <8 x half> %2, <8 x half>* %dst
@@ -1106,7 +1106,7 @@
 
 define void @v2f64_to_v4i32(<2 x double>* %src, <4 x i32>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <4 x i32>
   %3 = tail call <4 x i32> @llvm.mips.addv.w(<4 x i32> %2, <4 x i32> %2)
@@ -1131,7 +1131,7 @@
 
 define void @v2f64_to_v4f32(<2 x double>* %src, <4 x float>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <4 x float>
   %3 = tail call <4 x float> @llvm.mips.fadd.w(<4 x float> %2, <4 x float> %2)
@@ -1156,7 +1156,7 @@
 
 define void @v2f64_to_v2i64(<2 x double>* %src, <2 x i64>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <2 x i64>
   %3 = tail call <2 x i64> @llvm.mips.addv.d(<2 x i64> %2, <2 x i64> %2)
@@ -1180,7 +1180,7 @@
 
 define void @v2f64_to_v2f64(<2 x double>* %src, <2 x double>* %dst) nounwind {
 entry:
-  %0 = load volatile <2 x double>* %src
+  %0 = load volatile <2 x double>, <2 x double>* %src
   %1 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %0, <2 x double> %0)
   %2 = bitcast <2 x double> %1 to <2 x double>
   %3 = tail call <2 x double> @llvm.mips.fadd.d(<2 x double> %2, <2 x double> %2)
diff --git a/llvm/test/CodeGen/Mips/msa/bitwise.ll b/llvm/test/CodeGen/Mips/msa/bitwise.ll
index 5d57198..2a260b2 100644
--- a/llvm/test/CodeGen/Mips/msa/bitwise.ll
+++ b/llvm/test/CodeGen/Mips/msa/bitwise.ll
@@ -4,9 +4,9 @@
 define void @and_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: and_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = and <16 x i8> %1, %2
   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -20,9 +20,9 @@
 define void @and_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: and_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = and <8 x i16> %1, %2
   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -36,9 +36,9 @@
 define void @and_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: and_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = and <4 x i32> %1, %2
   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -52,9 +52,9 @@
 define void @and_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: and_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = and <2 x i64> %1, %2
   ; CHECK-DAG: and.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -68,7 +68,7 @@
 define void @and_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: and_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = and <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ; CHECK-DAG: andi.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -82,7 +82,7 @@
 define void @and_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: and_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = and <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1
@@ -97,7 +97,7 @@
 define void @and_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: and_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = and <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1
@@ -112,7 +112,7 @@
 define void @and_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: and_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = and <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1
@@ -127,9 +127,9 @@
 define void @or_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: or_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = or <16 x i8> %1, %2
   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -143,9 +143,9 @@
 define void @or_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: or_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = or <8 x i16> %1, %2
   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -159,9 +159,9 @@
 define void @or_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: or_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = or <4 x i32> %1, %2
   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -175,9 +175,9 @@
 define void @or_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: or_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = or <2 x i64> %1, %2
   ; CHECK-DAG: or.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -191,7 +191,7 @@
 define void @or_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: or_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = or <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ; CHECK-DAG: ori.b [[R4:\$w[0-9]+]], [[R1]], 3
@@ -205,7 +205,7 @@
 define void @or_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: or_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = or <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
@@ -220,7 +220,7 @@
 define void @or_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: or_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = or <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
@@ -235,7 +235,7 @@
 define void @or_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: or_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = or <2 x i64> %1, <i64 3, i64 3>
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
@@ -250,9 +250,9 @@
 define void @nor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: nor_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = or <16 x i8> %1, %2
   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -267,9 +267,9 @@
 define void @nor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: nor_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = or <8 x i16> %1, %2
   %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -284,9 +284,9 @@
 define void @nor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: nor_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = or <4 x i32> %1, %2
   %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -301,9 +301,9 @@
 define void @nor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: nor_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = or <2 x i64> %1, %2
   %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
@@ -318,7 +318,7 @@
 define void @nor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: nor_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = or <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -333,7 +333,7 @@
 define void @nor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: nor_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = or <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = xor <8 x i16> %2, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -349,7 +349,7 @@
 define void @nor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: nor_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = or <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = xor <4 x i32> %2, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -365,7 +365,7 @@
 define void @nor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: nor_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = or <2 x i64> %1, <i64 1, i64 1>
   %3 = xor <2 x i64> %2, <i64 -1, i64 -1>
@@ -381,9 +381,9 @@
 define void @xor_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: xor_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = xor <16 x i8> %1, %2
   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -397,9 +397,9 @@
 define void @xor_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: xor_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = xor <8 x i16> %1, %2
   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -413,9 +413,9 @@
 define void @xor_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: xor_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = xor <4 x i32> %1, %2
   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -429,9 +429,9 @@
 define void @xor_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: xor_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = xor <2 x i64> %1, %2
   ; CHECK-DAG: xor.v [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -445,7 +445,7 @@
 define void @xor_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: xor_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ; CHECK-DAG: xori.b [[R4:\$w[0-9]+]], [[R1]], 3
@@ -459,7 +459,7 @@
 define void @xor_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: xor_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3
@@ -474,7 +474,7 @@
 define void @xor_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: xor_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
   ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3
@@ -489,7 +489,7 @@
 define void @xor_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: xor_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <2 x i64> %1, <i64 3, i64 3>
   ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3
@@ -504,9 +504,9 @@
 define void @sll_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: sll_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <16 x i8> %1, %2
   ; CHECK-DAG: sll.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -520,9 +520,9 @@
 define void @sll_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: sll_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <8 x i16> %1, %2
   ; CHECK-DAG: sll.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -536,9 +536,9 @@
 define void @sll_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: sll_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <4 x i32> %1, %2
   ; CHECK-DAG: sll.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -552,9 +552,9 @@
 define void @sll_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: sll_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <2 x i64> %1, %2
   ; CHECK-DAG: sll.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -568,7 +568,7 @@
 define void @sll_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: sll_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shl <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ; CHECK-DAG: slli.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -582,7 +582,7 @@
 define void @sll_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: sll_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shl <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ; CHECK-DAG: slli.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -596,7 +596,7 @@
 define void @sll_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: sll_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shl <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: slli.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -610,7 +610,7 @@
 define void @sll_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: sll_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shl <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: slli.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -624,9 +624,9 @@
 define void @sra_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: sra_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = ashr <16 x i8> %1, %2
   ; CHECK-DAG: sra.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -640,9 +640,9 @@
 define void @sra_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: sra_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = ashr <8 x i16> %1, %2
   ; CHECK-DAG: sra.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -656,9 +656,9 @@
 define void @sra_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: sra_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = ashr <4 x i32> %1, %2
   ; CHECK-DAG: sra.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -672,9 +672,9 @@
 define void @sra_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: sra_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = ashr <2 x i64> %1, %2
   ; CHECK-DAG: sra.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -688,7 +688,7 @@
 define void @sra_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: sra_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = ashr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ; CHECK-DAG: srai.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -702,7 +702,7 @@
 define void @sra_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: sra_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = ashr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ; CHECK-DAG: srai.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -716,7 +716,7 @@
 define void @sra_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: sra_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = ashr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: srai.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -730,7 +730,7 @@
 define void @sra_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: sra_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = ashr <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: srai.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -744,9 +744,9 @@
 define void @srl_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: srl_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = lshr <16 x i8> %1, %2
   ; CHECK-DAG: srl.b [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -760,9 +760,9 @@
 define void @srl_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: srl_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = lshr <8 x i16> %1, %2
   ; CHECK-DAG: srl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -776,9 +776,9 @@
 define void @srl_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: srl_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = lshr <4 x i32> %1, %2
   ; CHECK-DAG: srl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -792,9 +792,9 @@
 define void @srl_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: srl_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = lshr <2 x i64> %1, %2
   ; CHECK-DAG: srl.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -808,7 +808,7 @@
 define void @srl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: srl_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = lshr <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ; CHECK-DAG: srli.b [[R4:\$w[0-9]+]], [[R1]], 1
@@ -822,7 +822,7 @@
 define void @srl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: srl_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = lshr <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ; CHECK-DAG: srli.h [[R4:\$w[0-9]+]], [[R1]], 1
@@ -836,7 +836,7 @@
 define void @srl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: srl_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = lshr <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: srli.w [[R4:\$w[0-9]+]], [[R1]], 1
@@ -850,7 +850,7 @@
 define void @srl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: srl_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = lshr <2 x i64> %1, <i64 1, i64 1>
   ; CHECK-DAG: srli.d [[R4:\$w[0-9]+]], [[R1]], 1
@@ -864,7 +864,7 @@
 define void @ctpop_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: ctpop_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <16 x i8> @llvm.ctpop.v16i8 (<16 x i8> %1)
   ; CHECK-DAG: pcnt.b [[R3:\$w[0-9]+]], [[R1]]
@@ -878,7 +878,7 @@
 define void @ctpop_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: ctpop_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <8 x i16> @llvm.ctpop.v8i16 (<8 x i16> %1)
   ; CHECK-DAG: pcnt.h [[R3:\$w[0-9]+]], [[R1]]
@@ -892,7 +892,7 @@
 define void @ctpop_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: ctpop_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x i32> @llvm.ctpop.v4i32 (<4 x i32> %1)
   ; CHECK-DAG: pcnt.w [[R3:\$w[0-9]+]], [[R1]]
@@ -906,7 +906,7 @@
 define void @ctpop_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: ctpop_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x i64> @llvm.ctpop.v2i64 (<2 x i64> %1)
   ; CHECK-DAG: pcnt.d [[R3:\$w[0-9]+]], [[R1]]
@@ -920,7 +920,7 @@
 define void @ctlz_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: ctlz_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <16 x i8> @llvm.ctlz.v16i8 (<16 x i8> %1)
   ; CHECK-DAG: nlzc.b [[R3:\$w[0-9]+]], [[R1]]
@@ -934,7 +934,7 @@
 define void @ctlz_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: ctlz_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <8 x i16> @llvm.ctlz.v8i16 (<8 x i16> %1)
   ; CHECK-DAG: nlzc.h [[R3:\$w[0-9]+]], [[R1]]
@@ -948,7 +948,7 @@
 define void @ctlz_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: ctlz_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <4 x i32> @llvm.ctlz.v4i32 (<4 x i32> %1)
   ; CHECK-DAG: nlzc.w [[R3:\$w[0-9]+]], [[R1]]
@@ -962,7 +962,7 @@
 define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: ctlz_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = tail call <2 x i64> @llvm.ctlz.v2i64 (<2 x i64> %1)
   ; CHECK-DAG: nlzc.d [[R3:\$w[0-9]+]], [[R1]]
@@ -976,11 +976,11 @@
 define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
   ; CHECK: bsel_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <16 x i8>* %m
+  %3 = load <16 x i8>, <16 x i8>* %m
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
                           i8 -1, i8 -1, i8 -1, i8 -1,
@@ -1002,9 +1002,9 @@
 define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
   ; CHECK: bsel_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %m
+  %2 = load <16 x i8>, <16 x i8>* %m
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
   %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
                           i8 -1, i8 -1, i8 -1, i8 -1,
@@ -1027,9 +1027,9 @@
 define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: bsel_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = and <8 x i16> %1, <i16 6, i16 6, i16 6, i16 6,
                           i16 6, i16 6, i16 6, i16 6>
@@ -1048,9 +1048,9 @@
 define void @bsel_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: bsel_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = and <4 x i32> %1, <i32 6, i32 6, i32 6, i32 6>
   %4 = and <4 x i32> %2, <i32 4294967289, i32 4294967289, i32 4294967289, i32 4294967289>
@@ -1067,9 +1067,9 @@
 define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: bsel_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = and <2 x i64> %1, <i64 6, i64 6>
   %4 = and <2 x i64> %2, <i64 18446744073709551609, i64 18446744073709551609>
@@ -1086,9 +1086,9 @@
 define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: binsl_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = and <16 x i8> %1, <i8 192, i8 192, i8 192, i8 192,
                           i8 192, i8 192, i8 192, i8 192,
@@ -1110,9 +1110,9 @@
 define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: binsl_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = and <8 x i16> %1, <i16 49152, i16 49152, i16 49152, i16 49152,
                           i16 49152, i16 49152, i16 49152, i16 49152>
@@ -1130,9 +1130,9 @@
 define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: binsl_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = and <4 x i32> %1, <i32 3221225472, i32 3221225472, i32 3221225472, i32 3221225472>
   %4 = and <4 x i32> %2, <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
@@ -1148,9 +1148,9 @@
 define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: binsl_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = and <2 x i64> %1, <i64 18446744073709551608, i64 18446744073709551608>
   %4 = and <2 x i64> %2, <i64 7, i64 7>
@@ -1170,9 +1170,9 @@
 define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: binsr_v16i8_i:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = and <16 x i8> %1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3,
                           i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1192,9 +1192,9 @@
 define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: binsr_v8i16_i:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = and <8 x i16> %1, <i16 3, i16 3, i16 3, i16 3,
                           i16 3, i16 3, i16 3, i16 3>
@@ -1212,9 +1212,9 @@
 define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: binsr_v4i32_i:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = and <4 x i32> %1, <i32 3, i32 3, i32 3, i32 3>
   %4 = and <4 x i32> %2, <i32 4294967292, i32 4294967292, i32 4294967292, i32 4294967292>
@@ -1230,9 +1230,9 @@
 define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: binsr_v2i64_i:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = and <2 x i64> %1, <i64 3, i64 3>
   %4 = and <2 x i64> %2, <i64 18446744073709551612, i64 18446744073709551612>
@@ -1248,9 +1248,9 @@
 define void @bclr_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: bclr_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -1266,9 +1266,9 @@
 define void @bclr_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: bclr_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   %4 = xor <8 x i16> %3, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -1284,9 +1284,9 @@
 define void @bclr_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: bclr_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   %4 = xor <4 x i32> %3, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1302,9 +1302,9 @@
 define void @bclr_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: bclr_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   %4 = xor <2 x i64> %3, <i64 -1, i64 -1>
@@ -1320,9 +1320,9 @@
 define void @bset_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: bset_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   %4 = or <16 x i8> %1, %3
@@ -1337,9 +1337,9 @@
 define void @bset_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: bset_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   %4 = or <8 x i16> %1, %3
@@ -1354,9 +1354,9 @@
 define void @bset_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: bset_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   %4 = or <4 x i32> %1, %3
@@ -1371,9 +1371,9 @@
 define void @bset_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: bset_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   %4 = or <2 x i64> %1, %3
@@ -1388,9 +1388,9 @@
 define void @bneg_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: bneg_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, %2
   %4 = xor <16 x i8> %1, %3
@@ -1405,9 +1405,9 @@
 define void @bneg_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: bneg_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, %2
   %4 = xor <8 x i16> %1, %3
@@ -1422,9 +1422,9 @@
 define void @bneg_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: bneg_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %2
   %4 = xor <4 x i32> %1, %3
@@ -1439,9 +1439,9 @@
 define void @bneg_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: bneg_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shl <2 x i64> <i64 1, i64 1>, %2
   %4 = xor <2 x i64> %1, %3
@@ -1456,7 +1456,7 @@
 define void @bclri_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: bclri_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <16 x i8> <i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8, i8  8>,
                      <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -1473,7 +1473,7 @@
 define void @bclri_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: bclri_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <8 x i16> <i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8, i16  8>,
                      <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -1489,7 +1489,7 @@
 define void @bclri_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: bclri_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <4 x i32> <i32  8, i32  8, i32  8, i32  8>,
                      <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -1505,7 +1505,7 @@
 define void @bclri_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: bclri_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <2 x i64> <i64  8, i64  8>,
                      <i64 -1, i64 -1>
@@ -1521,7 +1521,7 @@
 define void @bseti_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: bseti_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = or <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
   ; CHECK-DAG: bseti.b [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1535,7 +1535,7 @@
 define void @bseti_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: bseti_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = or <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   ; CHECK-DAG: bseti.h [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1549,7 +1549,7 @@
 define void @bseti_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: bseti_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = or <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
   ; CHECK-DAG: bseti.w [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1563,7 +1563,7 @@
 define void @bseti_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: bseti_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = or <2 x i64> %1, <i64 8, i64 8>
   ; CHECK-DAG: bseti.d [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1577,7 +1577,7 @@
 define void @bnegi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: bnegi_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <16 x i8> %1, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
   ; CHECK-DAG: bnegi.b [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1591,7 +1591,7 @@
 define void @bnegi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: bnegi_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <8 x i16> %1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
   ; CHECK-DAG: bnegi.h [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1605,7 +1605,7 @@
 define void @bnegi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: bnegi_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
   ; CHECK-DAG: bnegi.w [[R3:\$w[0-9]+]], [[R1]], 3
@@ -1619,7 +1619,7 @@
 define void @bnegi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: bnegi_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = xor <2 x i64> %1, <i64 8, i64 8>
   ; CHECK-DAG: bnegi.d [[R3:\$w[0-9]+]], [[R1]], 3
diff --git a/llvm/test/CodeGen/Mips/msa/compare.ll b/llvm/test/CodeGen/Mips/msa/compare.ll
index 87ca148..bc4f6e7 100644
--- a/llvm/test/CodeGen/Mips/msa/compare.ll
+++ b/llvm/test/CodeGen/Mips/msa/compare.ll
@@ -4,9 +4,9 @@
 define void @ceq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: ceq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp eq <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -21,9 +21,9 @@
 define void @ceq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: ceq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp eq <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -38,9 +38,9 @@
 define void @ceq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: ceq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp eq <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -55,9 +55,9 @@
 define void @ceq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: ceq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp eq <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -72,9 +72,9 @@
 define void @cle_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: cle_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -89,9 +89,9 @@
 define void @cle_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: cle_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -106,9 +106,9 @@
 define void @cle_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: cle_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -123,9 +123,9 @@
 define void @cle_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: cle_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -140,9 +140,9 @@
 define void @cle_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: cle_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -157,9 +157,9 @@
 define void @cle_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: cle_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -174,9 +174,9 @@
 define void @cle_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: cle_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -191,9 +191,9 @@
 define void @cle_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: cle_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -208,9 +208,9 @@
 define void @clt_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: clt_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -225,9 +225,9 @@
 define void @clt_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: clt_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -242,9 +242,9 @@
 define void @clt_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: clt_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -259,9 +259,9 @@
 define void @clt_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: clt_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -276,9 +276,9 @@
 define void @clt_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: clt_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -293,9 +293,9 @@
 define void @clt_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: clt_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -310,9 +310,9 @@
 define void @clt_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: clt_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -327,9 +327,9 @@
 define void @clt_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: clt_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -345,9 +345,9 @@
 ; issues in this area.
 define void @cne_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: cne_v16i8:
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ne <16 x i8> %1, %2
   %4 = sext <16 x i1> %3 to <16 x i8>
@@ -365,9 +365,9 @@
 define void @cne_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: cne_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ne <8 x i16> %1, %2
   %4 = sext <8 x i1> %3 to <8 x i16>
@@ -387,9 +387,9 @@
 define void @cne_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: cne_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ne <4 x i32> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -409,9 +409,9 @@
 define void @cne_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: cne_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ne <2 x i64> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -429,7 +429,7 @@
 define void @ceqi_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: ceqi_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp eq <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = sext <16 x i1> %2 to <16 x i8>
@@ -444,7 +444,7 @@
 define void @ceqi_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: ceqi_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp eq <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = sext <8 x i1> %2 to <8 x i16>
@@ -459,7 +459,7 @@
 define void @ceqi_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: ceqi_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp eq <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = sext <4 x i1> %2 to <4 x i32>
@@ -474,7 +474,7 @@
 define void @ceqi_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: ceqi_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp eq <2 x i64> %1, <i64 1, i64 1>
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -489,7 +489,7 @@
 define void @clei_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: clei_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = sext <16 x i1> %2 to <16 x i8>
@@ -504,7 +504,7 @@
 define void @clei_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: clei_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = sext <8 x i1> %2 to <8 x i16>
@@ -519,7 +519,7 @@
 define void @clei_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: clei_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = sext <4 x i1> %2 to <4 x i32>
@@ -534,7 +534,7 @@
 define void @clei_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: clei_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -549,7 +549,7 @@
 define void @clei_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: clei_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = sext <16 x i1> %2 to <16 x i8>
@@ -564,7 +564,7 @@
 define void @clei_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: clei_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = sext <8 x i1> %2 to <8 x i16>
@@ -579,7 +579,7 @@
 define void @clei_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: clei_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = sext <4 x i1> %2 to <4 x i32>
@@ -594,7 +594,7 @@
 define void @clei_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: clei_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -609,7 +609,7 @@
 define void @clti_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: clti_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = sext <16 x i1> %2 to <16 x i8>
@@ -624,7 +624,7 @@
 define void @clti_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: clti_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = sext <8 x i1> %2 to <8 x i16>
@@ -639,7 +639,7 @@
 define void @clti_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: clti_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = sext <4 x i1> %2 to <4 x i32>
@@ -654,7 +654,7 @@
 define void @clti_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: clti_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -669,7 +669,7 @@
 define void @clti_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: clti_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = sext <16 x i1> %2 to <16 x i8>
@@ -684,7 +684,7 @@
 define void @clti_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: clti_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = sext <8 x i1> %2 to <8 x i16>
@@ -699,7 +699,7 @@
 define void @clti_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: clti_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = sext <4 x i1> %2 to <4 x i32>
@@ -714,7 +714,7 @@
 define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: clti_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
   %3 = sext <2 x i1> %2 to <2 x i64>
@@ -730,11 +730,11 @@
                         <16 x i8>* %c) nounwind {
   ; CHECK: bsel_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <16 x i8>* %c
+  %3 = load <16 x i8>, <16 x i8>* %c
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp sgt <16 x i8> %1, %2
   ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -752,11 +752,11 @@
                         <8 x i16>* %c) nounwind {
   ; CHECK: bsel_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <8 x i16>* %c
+  %3 = load <8 x i16>, <8 x i16>* %c
   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp sgt <8 x i16> %1, %2
   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -774,11 +774,11 @@
                         <4 x i32>* %c) nounwind {
   ; CHECK: bsel_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x i32>* %c
+  %3 = load <4 x i32>, <4 x i32>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp sgt <4 x i32> %1, %2
   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -796,11 +796,11 @@
                         <2 x i64>* %c) nounwind {
   ; CHECK: bsel_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x i64>* %c
+  %3 = load <2 x i64>, <2 x i64>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp sgt <2 x i64> %1, %2
   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -818,11 +818,11 @@
                         <16 x i8>* %c) nounwind {
   ; CHECK: bsel_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <16 x i8>* %c
+  %3 = load <16 x i8>, <16 x i8>* %c
   ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp ugt <16 x i8> %1, %2
   ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -840,11 +840,11 @@
                         <8 x i16>* %c) nounwind {
   ; CHECK: bsel_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <8 x i16>* %c
+  %3 = load <8 x i16>, <8 x i16>* %c
   ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp ugt <8 x i16> %1, %2
   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -862,11 +862,11 @@
                         <4 x i32>* %c) nounwind {
   ; CHECK: bsel_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x i32>* %c
+  %3 = load <4 x i32>, <4 x i32>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp ugt <4 x i32> %1, %2
   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -884,11 +884,11 @@
                         <2 x i64>* %c) nounwind {
   ; CHECK: bsel_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x i64>* %c
+  %3 = load <2 x i64>, <2 x i64>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = icmp ugt <2 x i64> %1, %2
   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -906,9 +906,9 @@
                         <16 x i8>* %c) nounwind {
   ; CHECK: bseli_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <16 x i8> %1, %2
   ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -925,9 +925,9 @@
                         <8 x i16>* %c) nounwind {
   ; CHECK: bseli_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <8 x i16> %1, %2
   ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -945,9 +945,9 @@
                         <4 x i32>* %c) nounwind {
   ; CHECK: bseli_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <4 x i32> %1, %2
   ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -965,9 +965,9 @@
                         <2 x i64>* %c) nounwind {
   ; CHECK: bseli_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <2 x i64> %1, %2
   ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -985,9 +985,9 @@
                         <16 x i8>* %c) nounwind {
   ; CHECK: bseli_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <16 x i8> %1, %2
   ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1004,9 +1004,9 @@
                         <8 x i16>* %c) nounwind {
   ; CHECK: bseli_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <8 x i16> %1, %2
   ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1024,9 +1024,9 @@
                         <4 x i32>* %c) nounwind {
   ; CHECK: bseli_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <4 x i32> %1, %2
   ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1044,9 +1044,9 @@
                         <2 x i64>* %c) nounwind {
   ; CHECK: bseli_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <2 x i64> %1, %2
   ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -1063,9 +1063,9 @@
 define void @max_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: max_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1080,9 +1080,9 @@
 define void @max_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: max_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1097,9 +1097,9 @@
 define void @max_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: max_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1114,9 +1114,9 @@
 define void @max_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: max_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sgt <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1131,9 +1131,9 @@
 define void @max_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: max_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1148,9 +1148,9 @@
 define void @max_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: max_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1165,9 +1165,9 @@
 define void @max_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: max_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1182,9 +1182,9 @@
 define void @max_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: max_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ugt <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1199,9 +1199,9 @@
 define void @max_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: max_s_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sge <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1216,9 +1216,9 @@
 define void @max_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: max_s_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sge <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1233,9 +1233,9 @@
 define void @max_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: max_s_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sge <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1250,9 +1250,9 @@
 define void @max_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: max_s_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sge <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1267,9 +1267,9 @@
 define void @max_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: max_u_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp uge <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1284,9 +1284,9 @@
 define void @max_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: max_u_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp uge <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1301,9 +1301,9 @@
 define void @max_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: max_u_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp uge <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1318,9 +1318,9 @@
 define void @max_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: max_u_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp uge <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1335,7 +1335,7 @@
 define void @maxi_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: maxi_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sgt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1350,7 +1350,7 @@
 define void @maxi_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: maxi_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sgt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1365,7 +1365,7 @@
 define void @maxi_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: maxi_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sgt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1380,7 +1380,7 @@
 define void @maxi_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: maxi_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sgt <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1395,7 +1395,7 @@
 define void @maxi_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: maxi_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ugt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1410,7 +1410,7 @@
 define void @maxi_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: maxi_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ugt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1425,7 +1425,7 @@
 define void @maxi_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: maxi_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ugt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1440,7 +1440,7 @@
 define void @maxi_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: maxi_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ugt <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1455,7 +1455,7 @@
 define void @maxi_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: maxi_s_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1470,7 +1470,7 @@
 define void @maxi_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: maxi_s_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1485,7 +1485,7 @@
 define void @maxi_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: maxi_s_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1500,7 +1500,7 @@
 define void @maxi_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: maxi_s_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sge <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1515,7 +1515,7 @@
 define void @maxi_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: maxi_u_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp uge <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1530,7 +1530,7 @@
 define void @maxi_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: maxi_u_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp uge <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1545,7 +1545,7 @@
 define void @maxi_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: maxi_u_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp uge <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1560,7 +1560,7 @@
 define void @maxi_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: maxi_u_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp uge <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1575,9 +1575,9 @@
 define void @min_s_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: min_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1592,9 +1592,9 @@
 define void @min_s_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: min_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1609,9 +1609,9 @@
 define void @min_s_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: min_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1626,9 +1626,9 @@
 define void @min_s_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: min_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp slt <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1643,9 +1643,9 @@
 define void @min_u_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: min_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1660,9 +1660,9 @@
 define void @min_u_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: min_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1677,9 +1677,9 @@
 define void @min_u_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: min_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1694,9 +1694,9 @@
 define void @min_u_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: min_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ult <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1711,9 +1711,9 @@
 define void @min_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: min_s_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1728,9 +1728,9 @@
 define void @min_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: min_s_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1745,9 +1745,9 @@
 define void @min_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: min_s_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1762,9 +1762,9 @@
 define void @min_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: min_s_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp sle <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1779,9 +1779,9 @@
 define void @min_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: min_u_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <16 x i8> %1, %2
   %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
@@ -1796,9 +1796,9 @@
 define void @min_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: min_u_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <8 x i16> %1, %2
   %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
@@ -1813,9 +1813,9 @@
 define void @min_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: min_u_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <4 x i32> %1, %2
   %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
@@ -1830,9 +1830,9 @@
 define void @min_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: min_u_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = icmp ule <2 x i64> %1, %2
   %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
@@ -1847,7 +1847,7 @@
 define void @mini_s_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: mini_s_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1862,7 +1862,7 @@
 define void @mini_s_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: mini_s_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1877,7 +1877,7 @@
 define void @mini_s_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: mini_s_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1892,7 +1892,7 @@
 define void @mini_s_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: mini_s_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp slt <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1907,7 +1907,7 @@
 define void @mini_u_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: mini_u_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1922,7 +1922,7 @@
 define void @mini_u_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: mini_u_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1937,7 +1937,7 @@
 define void @mini_u_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: mini_u_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -1952,7 +1952,7 @@
 define void @mini_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: mini_u_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ult <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -1967,7 +1967,7 @@
 define void @mini_s_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: mini_s_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -1982,7 +1982,7 @@
 define void @mini_s_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: mini_s_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -1997,7 +1997,7 @@
 define void @mini_s_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: mini_s_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2012,7 +2012,7 @@
 define void @mini_s_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: mini_s_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp sle <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
@@ -2027,7 +2027,7 @@
 define void @mini_u_eq_v16i8(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: mini_u_eq_v16i8:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <16 x i8> %1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -2042,7 +2042,7 @@
 define void @mini_u_eq_v8i16(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: mini_u_eq_v8i16:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <8 x i16> %1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
@@ -2057,7 +2057,7 @@
 define void @mini_u_eq_v4i32(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: mini_u_eq_v4i32:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <4 x i32> %1, <i32 1, i32 1, i32 1, i32 1>
   %3 = select <4 x i1> %2, <4 x i32> %1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -2072,7 +2072,7 @@
 define void @mini_u_eq_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: mini_u_eq_v2i64:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = icmp ule <2 x i64> %1, <i64 1, i64 1>
   %3 = select <2 x i1> %2, <2 x i64> %1, <2 x i64> <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/Mips/msa/compare_float.ll b/llvm/test/CodeGen/Mips/msa/compare_float.ll
index e93221b..3229d02 100644
--- a/llvm/test/CodeGen/Mips/msa/compare_float.ll
+++ b/llvm/test/CodeGen/Mips/msa/compare_float.ll
@@ -9,8 +9,8 @@
 define void @false_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: false_v4f32:
 
-  %1 = load <4 x float>* %a
-  %2 = load <4 x float>* %b
+  %1 = load <4 x float>, <4 x float>* %a
+  %2 = load <4 x float>, <4 x float>* %b
   %3 = fcmp false <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
   store <4 x i32> %4, <4 x i32>* %c
@@ -25,8 +25,8 @@
 define void @false_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: false_v2f64:
 
-  %1 = load <2 x double>* %a
-  %2 = load <2 x double>* %b
+  %1 = load <2 x double>, <2 x double>* %a
+  %2 = load <2 x double>, <2 x double>* %b
   %3 = fcmp false <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
   store <2 x i64> %4, <2 x i64>* %c
@@ -41,9 +41,9 @@
 define void @oeq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: oeq_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp oeq <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -58,9 +58,9 @@
 define void @oeq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: oeq_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp oeq <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -75,9 +75,9 @@
 define void @oge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: oge_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp oge <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -92,9 +92,9 @@
 define void @oge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: oge_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp oge <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -109,9 +109,9 @@
 define void @ogt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ogt_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ogt <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -126,9 +126,9 @@
 define void @ogt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ogt_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ogt <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -143,9 +143,9 @@
 define void @ole_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ole_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ole <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -160,9 +160,9 @@
 define void @ole_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ole_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ole <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -177,9 +177,9 @@
 define void @olt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: olt_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp olt <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -194,9 +194,9 @@
 define void @olt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: olt_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp olt <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -211,9 +211,9 @@
 define void @one_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: one_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp one <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -228,9 +228,9 @@
 define void @one_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: one_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp one <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -245,9 +245,9 @@
 define void @ord_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ord_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ord <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -262,9 +262,9 @@
 define void @ord_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ord_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ord <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -279,9 +279,9 @@
 define void @ueq_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ueq_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ueq <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -296,9 +296,9 @@
 define void @ueq_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ueq_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ueq <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -313,9 +313,9 @@
 define void @uge_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: uge_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp uge <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -330,9 +330,9 @@
 define void @uge_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: uge_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp uge <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -347,9 +347,9 @@
 define void @ugt_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ugt_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ugt <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -364,9 +364,9 @@
 define void @ugt_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ugt_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ugt <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -381,9 +381,9 @@
 define void @ule_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ule_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ule <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -398,9 +398,9 @@
 define void @ule_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ule_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ule <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -415,9 +415,9 @@
 define void @ult_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: ult_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ult <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -432,9 +432,9 @@
 define void @ult_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: ult_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ult <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -449,9 +449,9 @@
 define void @uno_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: uno_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp uno <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
@@ -466,9 +466,9 @@
 define void @uno_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: uno_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp uno <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
@@ -483,8 +483,8 @@
 define void @true_v4f32(<4 x i32>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: true_v4f32:
 
-  %1 = load <4 x float>* %a
-  %2 = load <4 x float>* %b
+  %1 = load <4 x float>, <4 x float>* %a
+  %2 = load <4 x float>, <4 x float>* %b
   %3 = fcmp true <4 x float> %1, %2
   %4 = sext <4 x i1> %3 to <4 x i32>
   store <4 x i32> %4, <4 x i32>* %c
@@ -499,8 +499,8 @@
 define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: true_v2f64:
 
-  %1 = load <2 x double>* %a
-  %2 = load <2 x double>* %b
+  %1 = load <2 x double>, <2 x double>* %a
+  %2 = load <2 x double>, <2 x double>* %b
   %3 = fcmp true <2 x double> %1, %2
   %4 = sext <2 x i1> %3 to <2 x i64>
   store <2 x i64> %4, <2 x i64>* %c
@@ -516,11 +516,11 @@
                           <4 x float>* %c) nounwind {
   ; CHECK: bsel_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <4 x float>* %c
+  %3 = load <4 x float>, <4 x float>* %c
   ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7)
   %4 = fcmp ogt <4 x float> %1, %2
   ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -538,11 +538,11 @@
                           <2 x double>* %c) nounwind {
   ; CHECK: bsel_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
-  %3 = load <2 x double>* %c
+  %3 = load <2 x double>, <2 x double>* %c
   ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7)
   %4 = fcmp ogt <2 x double> %1, %2
   ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -560,9 +560,9 @@
                           <4 x float>* %c) nounwind {
   ; CHECK: bseli_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ogt <4 x float> %1, %2
   ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -580,9 +580,9 @@
                           <2 x double>* %c) nounwind {
   ; CHECK: bseli_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = fcmp ogt <2 x double> %1, %2
   ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]]
@@ -599,9 +599,9 @@
 define void @max_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: max_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = tail call <4 x float> @llvm.mips.fmax.w(<4 x float> %1, <4 x float> %2)
   ; CHECK-DAG: fmax.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -615,9 +615,9 @@
 define void @max_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: max_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = tail call <2 x double> @llvm.mips.fmax.d(<2 x double> %1, <2 x double> %2)
   ; CHECK-DAG: fmax.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -631,9 +631,9 @@
 define void @min_v4f32(<4 x float>* %c, <4 x float>* %a, <4 x float>* %b) nounwind {
   ; CHECK: min_v4f32:
 
-  %1 = load <4 x float>* %a
+  %1 = load <4 x float>, <4 x float>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x float>* %b
+  %2 = load <4 x float>, <4 x float>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = tail call <4 x float> @llvm.mips.fmin.w(<4 x float> %1, <4 x float> %2)
   ; CHECK-DAG: fmin.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -647,9 +647,9 @@
 define void @min_v2f64(<2 x double>* %c, <2 x double>* %a, <2 x double>* %b) nounwind {
   ; CHECK: min_v2f64:
 
-  %1 = load <2 x double>* %a
+  %1 = load <2 x double>, <2 x double>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x double>* %b
+  %2 = load <2 x double>, <2 x double>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = tail call <2 x double> @llvm.mips.fmin.d(<2 x double> %1, <2 x double> %2)
   ; CHECK-DAG: fmin.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
diff --git a/llvm/test/CodeGen/Mips/msa/elm_copy.ll b/llvm/test/CodeGen/Mips/msa/elm_copy.ll
index 0dd75fa..2a0d74f 100644
--- a/llvm/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/llvm/test/CodeGen/Mips/msa/elm_copy.ll
@@ -15,7 +15,7 @@
 
 define void @llvm_mips_copy_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_copy_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_copy_s_b_ARG1
   %1 = tail call i32 @llvm.mips.copy.s.b(<16 x i8> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_s_b_RES
   ret void
@@ -38,7 +38,7 @@
 
 define void @llvm_mips_copy_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_copy_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_copy_s_h_ARG1
   %1 = tail call i32 @llvm.mips.copy.s.h(<8 x i16> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_s_h_RES
   ret void
@@ -61,7 +61,7 @@
 
 define void @llvm_mips_copy_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_copy_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_copy_s_w_ARG1
   %1 = tail call i32 @llvm.mips.copy.s.w(<4 x i32> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_s_w_RES
   ret void
@@ -84,7 +84,7 @@
 
 define void @llvm_mips_copy_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_copy_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_copy_s_d_ARG1
   %1 = tail call i64 @llvm.mips.copy.s.d(<2 x i64> %0, i32 1)
   store i64 %1, i64* @llvm_mips_copy_s_d_RES
   ret void
@@ -112,7 +112,7 @@
 
 define void @llvm_mips_copy_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_copy_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_copy_u_b_ARG1
   %1 = tail call i32 @llvm.mips.copy.u.b(<16 x i8> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_u_b_RES
   ret void
@@ -135,7 +135,7 @@
 
 define void @llvm_mips_copy_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_copy_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_copy_u_h_ARG1
   %1 = tail call i32 @llvm.mips.copy.u.h(<8 x i16> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_u_h_RES
   ret void
@@ -158,7 +158,7 @@
 
 define void @llvm_mips_copy_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_copy_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_copy_u_w_ARG1
   %1 = tail call i32 @llvm.mips.copy.u.w(<4 x i32> %0, i32 1)
   store i32 %1, i32* @llvm_mips_copy_u_w_RES
   ret void
@@ -181,7 +181,7 @@
 
 define void @llvm_mips_copy_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_copy_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_copy_u_d_ARG1
   %1 = tail call i64 @llvm.mips.copy.u.d(<2 x i64> %0, i32 1)
   store i64 %1, i64* @llvm_mips_copy_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/elm_insv.ll b/llvm/test/CodeGen/Mips/msa/elm_insv.ll
index c746e52..46e6289 100644
--- a/llvm/test/CodeGen/Mips/msa/elm_insv.ll
+++ b/llvm/test/CodeGen/Mips/msa/elm_insv.ll
@@ -16,8 +16,8 @@
 
 define void @llvm_mips_insert_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_insert_b_ARG1
-  %1 = load i32* @llvm_mips_insert_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_insert_b_ARG1
+  %1 = load i32, i32* @llvm_mips_insert_b_ARG3
   %2 = tail call <16 x i8> @llvm.mips.insert.b(<16 x i8> %0, i32 1, i32 %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_insert_b_RES
   ret void
@@ -38,8 +38,8 @@
 
 define void @llvm_mips_insert_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_insert_h_ARG1
-  %1 = load i32* @llvm_mips_insert_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_insert_h_ARG1
+  %1 = load i32, i32* @llvm_mips_insert_h_ARG3
   %2 = tail call <8 x i16> @llvm.mips.insert.h(<8 x i16> %0, i32 1, i32 %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_insert_h_RES
   ret void
@@ -60,8 +60,8 @@
 
 define void @llvm_mips_insert_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_insert_w_ARG1
-  %1 = load i32* @llvm_mips_insert_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_insert_w_ARG1
+  %1 = load i32, i32* @llvm_mips_insert_w_ARG3
   %2 = tail call <4 x i32> @llvm.mips.insert.w(<4 x i32> %0, i32 1, i32 %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_insert_w_RES
   ret void
@@ -82,8 +82,8 @@
 
 define void @llvm_mips_insert_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_insert_d_ARG1
-  %1 = load i64* @llvm_mips_insert_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_insert_d_ARG1
+  %1 = load i64, i64* @llvm_mips_insert_d_ARG3
   %2 = tail call <2 x i64> @llvm.mips.insert.d(<2 x i64> %0, i32 1, i64 %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_insert_d_RES
   ret void
@@ -110,8 +110,8 @@
 
 define void @llvm_mips_insve_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_insve_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_insve_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_insve_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_insve_b_ARG3
   %2 = tail call <16 x i8> @llvm.mips.insve.b(<16 x i8> %0, i32 1, <16 x i8> %1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_insve_b_RES
   ret void
@@ -136,8 +136,8 @@
 
 define void @llvm_mips_insve_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_insve_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_insve_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_insve_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_insve_h_ARG3
   %2 = tail call <8 x i16> @llvm.mips.insve.h(<8 x i16> %0, i32 1, <8 x i16> %1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_insve_h_RES
   ret void
@@ -162,8 +162,8 @@
 
 define void @llvm_mips_insve_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_insve_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_insve_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_insve_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_insve_w_ARG3
   %2 = tail call <4 x i32> @llvm.mips.insve.w(<4 x i32> %0, i32 1, <4 x i32> %1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_insve_w_RES
   ret void
@@ -188,8 +188,8 @@
 
 define void @llvm_mips_insve_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_insve_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_insve_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_insve_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_insve_d_ARG3
   %2 = tail call <2 x i64> @llvm.mips.insve.d(<2 x i64> %0, i32 1, <2 x i64> %1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_insve_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/elm_move.ll b/llvm/test/CodeGen/Mips/msa/elm_move.ll
index 98c06c7..9665b6d 100644
--- a/llvm/test/CodeGen/Mips/msa/elm_move.ll
+++ b/llvm/test/CodeGen/Mips/msa/elm_move.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_move_vb_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_move_vb_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_move_vb_ARG1
   %1 = tail call <16 x i8> @llvm.mips.move.v(<16 x i8> %0)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_move_vb_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll b/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll
index 00a6544..87f15f1 100644
--- a/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll
+++ b/llvm/test/CodeGen/Mips/msa/elm_shift_slide.ll
@@ -10,8 +10,8 @@
 
 define void @llvm_mips_sldi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_sldi_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_sldi_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_sldi_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_sldi_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.sldi.b(<16 x i8> %0, <16 x i8> %1, i32 1)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_sldi_b_RES
   ret void
@@ -31,8 +31,8 @@
 
 define void @llvm_mips_sldi_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_sldi_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_sldi_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_sldi_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_sldi_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.sldi.h(<8 x i16> %0, <8 x i16> %1, i32 1)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_sldi_h_RES
   ret void
@@ -52,8 +52,8 @@
 
 define void @llvm_mips_sldi_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_sldi_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_sldi_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_sldi_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_sldi_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.sldi.w(<4 x i32> %0, <4 x i32> %1, i32 1)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_sldi_w_RES
   ret void
@@ -73,8 +73,8 @@
 
 define void @llvm_mips_sldi_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_sldi_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_sldi_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_sldi_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_sldi_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.sldi.d(<2 x i64> %0, <2 x i64> %1, i32 1)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_sldi_d_RES
   ret void
@@ -93,7 +93,7 @@
 
 define void @llvm_mips_splati_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_splati_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_splati_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.splati.b(<16 x i8> %0, i32 1)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_splati_b_RES
   ret void
@@ -112,7 +112,7 @@
 
 define void @llvm_mips_splati_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_splati_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_splati_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.splati.h(<8 x i16> %0, i32 1)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_splati_h_RES
   ret void
@@ -131,7 +131,7 @@
 
 define void @llvm_mips_splati_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_splati_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_splati_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.splati.w(<4 x i32> %0, i32 1)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_splati_w_RES
   ret void
@@ -150,7 +150,7 @@
 
 define void @llvm_mips_splati_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_splati_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_splati_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.splati.d(<2 x i64> %0, i32 1)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_splati_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/frameindex.ll b/llvm/test/CodeGen/Mips/msa/frameindex.ll
index 3c01190..afd28ae 100644
--- a/llvm/test/CodeGen/Mips/msa/frameindex.ll
+++ b/llvm/test/CodeGen/Mips/msa/frameindex.ll
@@ -5,7 +5,7 @@
   ; MIPS32-AE: loadstore_v16i8_near:
 
   %1 = alloca <16 x i8>
-  %2 = load volatile <16 x i8>* %1
+  %2 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0($sp)
   store volatile <16 x i8> %2, <16 x i8>* %1
   ; MIPS32-AE: st.b [[R1]], 0($sp)
@@ -20,7 +20,7 @@
   %1 = alloca <16 x i8>
   %2 = alloca [496 x i8] ; Push the frame right up to 512 bytes
 
-  %3 = load volatile <16 x i8>* %1
+  %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 496($sp)
   store volatile <16 x i8> %3, <16 x i8>* %1
   ; MIPS32-AE: st.b [[R1]], 496($sp)
@@ -35,7 +35,7 @@
   %1 = alloca <16 x i8>
   %2 = alloca [497 x i8] ; Push the frame just over 512 bytes
 
-  %3 = load volatile <16 x i8>* %1
+  %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 512
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <16 x i8> %3, <16 x i8>* %1
@@ -52,7 +52,7 @@
   %1 = alloca <16 x i8>
   %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
 
-  %3 = load volatile <16 x i8>* %1
+  %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -71,7 +71,7 @@
   %1 = alloca <16 x i8>
   %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
 
-  %3 = load volatile <16 x i8>* %1
+  %3 = load volatile <16 x i8>, <16 x i8>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.b [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -88,7 +88,7 @@
   ; MIPS32-AE: loadstore_v8i16_near:
 
   %1 = alloca <8 x i16>
-  %2 = load volatile <8 x i16>* %1
+  %2 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0($sp)
   store volatile <8 x i16> %2, <8 x i16>* %1
   ; MIPS32-AE: st.h [[R1]], 0($sp)
@@ -106,7 +106,7 @@
   %4 = bitcast i8* %3 to [2 x <8 x i16>]*
   %5 = getelementptr [2 x <8 x i16>], [2 x <8 x i16>]* %4, i32 0, i32 0
 
-  %6 = load volatile <8 x i16>* %5
+  %6 = load volatile <8 x i16>, <8 x i16>* %5
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <8 x i16> %6, <8 x i16>* %5
@@ -123,7 +123,7 @@
   %1 = alloca <8 x i16>
   %2 = alloca [1008 x i8] ; Push the frame right up to 1024 bytes
 
-  %3 = load volatile <8 x i16>* %1
+  %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 1008($sp)
   store volatile <8 x i16> %3, <8 x i16>* %1
   ; MIPS32-AE: st.h [[R1]], 1008($sp)
@@ -138,7 +138,7 @@
   %1 = alloca <8 x i16>
   %2 = alloca [1009 x i8] ; Push the frame just over 1024 bytes
 
-  %3 = load volatile <8 x i16>* %1
+  %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1024
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <8 x i16> %3, <8 x i16>* %1
@@ -155,7 +155,7 @@
   %1 = alloca <8 x i16>
   %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
 
-  %3 = load volatile <8 x i16>* %1
+  %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -174,7 +174,7 @@
   %1 = alloca <8 x i16>
   %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
 
-  %3 = load volatile <8 x i16>* %1
+  %3 = load volatile <8 x i16>, <8 x i16>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.h [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -191,7 +191,7 @@
   ; MIPS32-AE: loadstore_v4i32_near:
 
   %1 = alloca <4 x i32>
-  %2 = load volatile <4 x i32>* %1
+  %2 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0($sp)
   store volatile <4 x i32> %2, <4 x i32>* %1
   ; MIPS32-AE: st.w [[R1]], 0($sp)
@@ -209,7 +209,7 @@
   %4 = bitcast i8* %3 to [2 x <4 x i32>]*
   %5 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %4, i32 0, i32 0
 
-  %6 = load volatile <4 x i32>* %5
+  %6 = load volatile <4 x i32>, <4 x i32>* %5
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <4 x i32> %6, <4 x i32>* %5
@@ -226,7 +226,7 @@
   %1 = alloca <4 x i32>
   %2 = alloca [2032 x i8] ; Push the frame right up to 2048 bytes
 
-  %3 = load volatile <4 x i32>* %1
+  %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 2032($sp)
   store volatile <4 x i32> %3, <4 x i32>* %1
   ; MIPS32-AE: st.w [[R1]], 2032($sp)
@@ -241,7 +241,7 @@
   %1 = alloca <4 x i32>
   %2 = alloca [2033 x i8] ; Push the frame just over 2048 bytes
 
-  %3 = load volatile <4 x i32>* %1
+  %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 2048
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <4 x i32> %3, <4 x i32>* %1
@@ -258,7 +258,7 @@
   %1 = alloca <4 x i32>
   %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
 
-  %3 = load volatile <4 x i32>* %1
+  %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -277,7 +277,7 @@
   %1 = alloca <4 x i32>
   %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
 
-  %3 = load volatile <4 x i32>* %1
+  %3 = load volatile <4 x i32>, <4 x i32>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.w [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -294,7 +294,7 @@
   ; MIPS32-AE: loadstore_v2i64_near:
 
   %1 = alloca <2 x i64>
-  %2 = load volatile <2 x i64>* %1
+  %2 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0($sp)
   store volatile <2 x i64> %2, <2 x i64>* %1
   ; MIPS32-AE: st.d [[R1]], 0($sp)
@@ -312,7 +312,7 @@
   %4 = bitcast i8* %3 to [2 x <2 x i64>]*
   %5 = getelementptr [2 x <2 x i64>], [2 x <2 x i64>]* %4, i32 0, i32 0
 
-  %6 = load volatile <2 x i64>* %5
+  %6 = load volatile <2 x i64>, <2 x i64>* %5
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 1
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <2 x i64> %6, <2 x i64>* %5
@@ -329,7 +329,7 @@
   %1 = alloca <2 x i64>
   %2 = alloca [4080 x i8] ; Push the frame right up to 4096 bytes
 
-  %3 = load volatile <2 x i64>* %1
+  %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 4080($sp)
   store volatile <2 x i64> %3, <2 x i64>* %1
   ; MIPS32-AE: st.d [[R1]], 4080($sp)
@@ -344,7 +344,7 @@
   %1 = alloca <2 x i64>
   %2 = alloca [4081 x i8] ; Push the frame just over 4096 bytes
 
-  %3 = load volatile <2 x i64>* %1
+  %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: addiu [[BASE:\$([0-9]+|gp)]], $sp, 4096
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
   store volatile <2 x i64> %3, <2 x i64>* %1
@@ -361,7 +361,7 @@
   %1 = alloca <2 x i64>
   %2 = alloca [32752 x i8] ; Push the frame right up to 32768 bytes
 
-  %3 = load volatile <2 x i64>* %1
+  %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
@@ -380,7 +380,7 @@
   %1 = alloca <2 x i64>
   %2 = alloca [32753 x i8] ; Push the frame just over 32768 bytes
 
-  %3 = load volatile <2 x i64>* %1
+  %3 = load volatile <2 x i64>, <2 x i64>* %1
   ; MIPS32-AE: ori [[R2:\$([0-9]+|gp)]], $zero, 32768
   ; MIPS32-AE: addu [[BASE:\$([0-9]+|gp)]], $sp, [[R2]]
   ; MIPS32-AE: ld.d [[R1:\$w[0-9]+]], 0([[BASE]])
diff --git a/llvm/test/CodeGen/Mips/msa/i10.ll b/llvm/test/CodeGen/Mips/msa/i10.ll
index c5a9617..204884b 100644
--- a/llvm/test/CodeGen/Mips/msa/i10.ll
+++ b/llvm/test/CodeGen/Mips/msa/i10.ll
@@ -7,7 +7,7 @@
 
 define i32 @llvm_mips_bnz_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bnz_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnz_b_ARG1
   %1 = tail call i32 @llvm.mips.bnz.b(<16 x i8> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
@@ -28,7 +28,7 @@
 
 define i32 @llvm_mips_bnz_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bnz_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bnz_h_ARG1
   %1 = tail call i32 @llvm.mips.bnz.h(<8 x i16> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
@@ -49,7 +49,7 @@
 
 define i32 @llvm_mips_bnz_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bnz_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bnz_w_ARG1
   %1 = tail call i32 @llvm.mips.bnz.w(<4 x i32> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
@@ -70,7 +70,7 @@
 
 define i32 @llvm_mips_bnz_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bnz_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bnz_d_ARG1
   %1 = tail call i32 @llvm.mips.bnz.d(<2 x i64> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
diff --git a/llvm/test/CodeGen/Mips/msa/i5-a.ll b/llvm/test/CodeGen/Mips/msa/i5-a.ll
index 0b50720..f9486b1 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-a.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-a.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_addvi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_addvi_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_addvi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.addvi.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_addvi_b_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_addvi_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_addvi_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_addvi_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.addvi.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_addvi_h_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_addvi_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_addvi_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_addvi_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.addvi.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_addvi_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_addvi_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_addvi_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_addvi_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.addvi.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_addvi_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i5-b.ll b/llvm/test/CodeGen/Mips/msa/i5-b.ll
index da6be66..40ab095f 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-b.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-b.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_bclri_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bclri_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bclri_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.bclri.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_bclri_b_RES
   ret void
@@ -29,7 +29,7 @@
 
 define void @llvm_mips_bclri_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bclri_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bclri_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.bclri.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_bclri_h_RES
   ret void
@@ -48,7 +48,7 @@
 
 define void @llvm_mips_bclri_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bclri_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bclri_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.bclri.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_bclri_w_RES
   ret void
@@ -67,7 +67,7 @@
 
 define void @llvm_mips_bclri_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bclri_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bclri_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.bclri.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_bclri_d_RES
   ret void
@@ -87,8 +87,8 @@
 
 define void @llvm_mips_binsli_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_binsli_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsli_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsli_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, <16 x i8> %1, i32 7)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_binsli_b_RES
   ret void
@@ -112,8 +112,8 @@
 
 define void @llvm_mips_binsli_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_binsli_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsli_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsli_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, <8 x i16> %1, i32 7)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_binsli_h_RES
   ret void
@@ -137,8 +137,8 @@
 
 define void @llvm_mips_binsli_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_binsli_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsli_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsli_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, <4 x i32> %1, i32 7)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_binsli_w_RES
   ret void
@@ -162,8 +162,8 @@
 
 define void @llvm_mips_binsli_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_binsli_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsli_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsli_d_ARG2
   ; TODO: We use a particularly wide mask here to work around a legalization
   ;       issue. If the mask doesn't fit within a 10-bit immediate, it gets
   ;       legalized into a constant pool. We should add a test to cover the
@@ -191,8 +191,8 @@
 
 define void @llvm_mips_binsri_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_binsri_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_binsri_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_binsri_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, <16 x i8> %1, i32 7)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_binsri_b_RES
   ret void
@@ -216,8 +216,8 @@
 
 define void @llvm_mips_binsri_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_binsri_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_binsri_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_binsri_h_ARG2
   %2 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, <8 x i16> %1, i32 7)
   store <8 x i16> %2, <8 x i16>* @llvm_mips_binsri_h_RES
   ret void
@@ -241,8 +241,8 @@
 
 define void @llvm_mips_binsri_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_binsri_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_binsri_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_binsri_w_ARG2
   %2 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, <4 x i32> %1, i32 7)
   store <4 x i32> %2, <4 x i32>* @llvm_mips_binsri_w_RES
   ret void
@@ -266,8 +266,8 @@
 
 define void @llvm_mips_binsri_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_binsri_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_binsri_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_binsri_d_ARG2
   %2 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, <2 x i64> %1, i32 7)
   store <2 x i64> %2, <2 x i64>* @llvm_mips_binsri_d_RES
   ret void
@@ -290,7 +290,7 @@
 
 define void @llvm_mips_bnegi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bnegi_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnegi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.bnegi.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_bnegi_b_RES
   ret void
@@ -309,7 +309,7 @@
 
 define void @llvm_mips_bnegi_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bnegi_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bnegi_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.bnegi.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_bnegi_h_RES
   ret void
@@ -328,7 +328,7 @@
 
 define void @llvm_mips_bnegi_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bnegi_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bnegi_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.bnegi.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_bnegi_w_RES
   ret void
@@ -347,7 +347,7 @@
 
 define void @llvm_mips_bnegi_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bnegi_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bnegi_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.bnegi.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_bnegi_d_RES
   ret void
@@ -366,7 +366,7 @@
 
 define void @llvm_mips_bseti_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bseti_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bseti_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.bseti.b(<16 x i8> %0, i32 7)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_bseti_b_RES
   ret void
@@ -385,7 +385,7 @@
 
 define void @llvm_mips_bseti_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bseti_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bseti_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.bseti.h(<8 x i16> %0, i32 7)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_bseti_h_RES
   ret void
@@ -404,7 +404,7 @@
 
 define void @llvm_mips_bseti_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bseti_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bseti_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.bseti.w(<4 x i32> %0, i32 7)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_bseti_w_RES
   ret void
@@ -423,7 +423,7 @@
 
 define void @llvm_mips_bseti_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bseti_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bseti_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.bseti.d(<2 x i64> %0, i32 7)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_bseti_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i5-c.ll b/llvm/test/CodeGen/Mips/msa/i5-c.ll
index bf1578f..8158250 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-c.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-c.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_ceqi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ceqi_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ceqi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.ceqi.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_ceqi_b_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_ceqi_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_ceqi_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_ceqi_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.ceqi.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_ceqi_h_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_ceqi_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_ceqi_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_ceqi_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.ceqi.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_ceqi_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_ceqi_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_ceqi_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_ceqi_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.ceqi.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_ceqi_d_RES
   ret void
@@ -85,7 +85,7 @@
 
 define void @llvm_mips_clei_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clei_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clei_s_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.clei.s.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_s_b_RES
   ret void
@@ -104,7 +104,7 @@
 
 define void @llvm_mips_clei_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clei_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clei_s_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.clei.s.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_s_h_RES
   ret void
@@ -123,7 +123,7 @@
 
 define void @llvm_mips_clei_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clei_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clei_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.clei.s.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_s_w_RES
   ret void
@@ -142,7 +142,7 @@
 
 define void @llvm_mips_clei_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clei_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clei_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.clei.s.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_s_d_RES
   ret void
@@ -161,7 +161,7 @@
 
 define void @llvm_mips_clei_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clei_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clei_u_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.clei.u.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_clei_u_b_RES
   ret void
@@ -180,7 +180,7 @@
 
 define void @llvm_mips_clei_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clei_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clei_u_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.clei.u.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_clei_u_h_RES
   ret void
@@ -199,7 +199,7 @@
 
 define void @llvm_mips_clei_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clei_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clei_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.clei.u.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_clei_u_w_RES
   ret void
@@ -218,7 +218,7 @@
 
 define void @llvm_mips_clei_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clei_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clei_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.clei.u.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_clei_u_d_RES
   ret void
@@ -237,7 +237,7 @@
 
 define void @llvm_mips_clti_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clti_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clti_s_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.clti.s.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_s_b_RES
   ret void
@@ -256,7 +256,7 @@
 
 define void @llvm_mips_clti_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clti_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clti_s_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.clti.s.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_s_h_RES
   ret void
@@ -275,7 +275,7 @@
 
 define void @llvm_mips_clti_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clti_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clti_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.clti.s.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_s_w_RES
   ret void
@@ -294,7 +294,7 @@
 
 define void @llvm_mips_clti_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clti_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clti_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.clti.s.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_s_d_RES
   ret void
@@ -313,7 +313,7 @@
 
 define void @llvm_mips_clti_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_clti_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_clti_u_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.clti.u.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_clti_u_b_RES
   ret void
@@ -332,7 +332,7 @@
 
 define void @llvm_mips_clti_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_clti_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_clti_u_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.clti.u.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_clti_u_h_RES
   ret void
@@ -351,7 +351,7 @@
 
 define void @llvm_mips_clti_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_clti_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_clti_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.clti.u.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_clti_u_w_RES
   ret void
@@ -370,7 +370,7 @@
 
 define void @llvm_mips_clti_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_clti_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_clti_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.clti.u.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_clti_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i5-m.ll b/llvm/test/CodeGen/Mips/msa/i5-m.ll
index 2766349..ba6e9d2 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-m.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-m.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_maxi_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_maxi_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maxi_s_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.maxi.s.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_s_b_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_maxi_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_maxi_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maxi_s_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.maxi.s.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_s_h_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_maxi_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_maxi_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maxi_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.maxi.s.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_s_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_maxi_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_maxi_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maxi_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.maxi.s.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_s_d_RES
   ret void
@@ -85,7 +85,7 @@
 
 define void @llvm_mips_maxi_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_maxi_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_maxi_u_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.maxi.u.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_maxi_u_b_RES
   ret void
@@ -104,7 +104,7 @@
 
 define void @llvm_mips_maxi_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_maxi_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_maxi_u_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.maxi.u.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_maxi_u_h_RES
   ret void
@@ -123,7 +123,7 @@
 
 define void @llvm_mips_maxi_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_maxi_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_maxi_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.maxi.u.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_maxi_u_w_RES
   ret void
@@ -142,7 +142,7 @@
 
 define void @llvm_mips_maxi_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_maxi_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_maxi_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.maxi.u.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_maxi_u_d_RES
   ret void
@@ -161,7 +161,7 @@
 
 define void @llvm_mips_mini_s_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mini_s_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mini_s_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.mini.s.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_s_b_RES
   ret void
@@ -180,7 +180,7 @@
 
 define void @llvm_mips_mini_s_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mini_s_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mini_s_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.mini.s.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_s_h_RES
   ret void
@@ -199,7 +199,7 @@
 
 define void @llvm_mips_mini_s_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mini_s_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mini_s_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.mini.s.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_s_w_RES
   ret void
@@ -218,7 +218,7 @@
 
 define void @llvm_mips_mini_s_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mini_s_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mini_s_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.mini.s.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_s_d_RES
   ret void
@@ -237,7 +237,7 @@
 
 define void @llvm_mips_mini_u_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_mini_u_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_mini_u_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.mini.u.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_mini_u_b_RES
   ret void
@@ -256,7 +256,7 @@
 
 define void @llvm_mips_mini_u_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_mini_u_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_mini_u_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.mini.u.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_mini_u_h_RES
   ret void
@@ -275,7 +275,7 @@
 
 define void @llvm_mips_mini_u_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_mini_u_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_mini_u_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.mini.u.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_mini_u_w_RES
   ret void
@@ -294,7 +294,7 @@
 
 define void @llvm_mips_mini_u_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_mini_u_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_mini_u_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.mini.u.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_mini_u_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i5-s.ll b/llvm/test/CodeGen/Mips/msa/i5-s.ll
index 184172f..db331b1 100644
--- a/llvm/test/CodeGen/Mips/msa/i5-s.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5-s.ll
@@ -9,7 +9,7 @@
 
 define void @llvm_mips_subvi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_subvi_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_subvi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.subvi.b(<16 x i8> %0, i32 14)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_subvi_b_RES
   ret void
@@ -28,7 +28,7 @@
 
 define void @llvm_mips_subvi_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_subvi_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_subvi_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.subvi.h(<8 x i16> %0, i32 14)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_subvi_h_RES
   ret void
@@ -47,7 +47,7 @@
 
 define void @llvm_mips_subvi_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_subvi_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_subvi_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.subvi.w(<4 x i32> %0, i32 14)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_subvi_w_RES
   ret void
@@ -66,7 +66,7 @@
 
 define void @llvm_mips_subvi_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_subvi_d_ARG1
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_subvi_d_ARG1
   %1 = tail call <2 x i64> @llvm.mips.subvi.d(<2 x i64> %0, i32 14)
   store <2 x i64> %1, <2 x i64>* @llvm_mips_subvi_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll b/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll
index 7cc55f2..991bb84 100644
--- a/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll
+++ b/llvm/test/CodeGen/Mips/msa/i5_ld_st.ll
@@ -81,7 +81,7 @@
 
 define void @llvm_mips_st_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_st_b_ARG
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_st_b_ARG
   %1 = bitcast <16 x i8>* @llvm_mips_st_b_RES to i8*
   tail call void @llvm.mips.st.b(<16 x i8> %0, i8* %1, i32 16)
   ret void
@@ -99,7 +99,7 @@
 
 define void @llvm_mips_st_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_st_h_ARG
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_st_h_ARG
   %1 = bitcast <8 x i16>* @llvm_mips_st_h_RES to i8*
   tail call void @llvm.mips.st.h(<8 x i16> %0, i8* %1, i32 16)
   ret void
@@ -117,7 +117,7 @@
 
 define void @llvm_mips_st_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_st_w_ARG
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_st_w_ARG
   %1 = bitcast <4 x i32>* @llvm_mips_st_w_RES to i8*
   tail call void @llvm.mips.st.w(<4 x i32> %0, i8* %1, i32 16)
   ret void
@@ -135,7 +135,7 @@
 
 define void @llvm_mips_st_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_st_d_ARG
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_st_d_ARG
   %1 = bitcast <2 x i64>* @llvm_mips_st_d_RES to i8*
   tail call void @llvm.mips.st.d(<2 x i64> %0, i8* %1, i32 16)
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/i8.ll b/llvm/test/CodeGen/Mips/msa/i8.ll
index d2931a7..4af9c58 100644
--- a/llvm/test/CodeGen/Mips/msa/i8.ll
+++ b/llvm/test/CodeGen/Mips/msa/i8.ll
@@ -8,7 +8,7 @@
 
 define void @llvm_mips_andi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_andi_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_andi_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.andi.b(<16 x i8> %0, i32 25)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_andi_b_RES
   ret void
@@ -28,8 +28,8 @@
 
 define void @llvm_mips_bmnzi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnzi_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
   ret void
@@ -52,8 +52,8 @@
 
 define void @llvm_mips_bmzi_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmzi_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmzi_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES
   ret void
@@ -77,8 +77,8 @@
 
 define void @llvm_mips_bseli_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bseli_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bseli_b_ARG2
   %2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25)
   store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES
   ret void
@@ -100,7 +100,7 @@
 
 define void @llvm_mips_nori_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_nori_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nori_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.nori.b(<16 x i8> %0, i32 25)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_nori_b_RES
   ret void
@@ -119,7 +119,7 @@
 
 define void @llvm_mips_ori_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_ori_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_ori_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.ori.b(<16 x i8> %0, i32 25)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_ori_b_RES
   ret void
@@ -138,7 +138,7 @@
 
 define void @llvm_mips_shf_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_shf_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_shf_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.shf.b(<16 x i8> %0, i32 25)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_shf_b_RES
   ret void
@@ -157,7 +157,7 @@
 
 define void @llvm_mips_shf_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_shf_h_ARG1
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_shf_h_ARG1
   %1 = tail call <8 x i16> @llvm.mips.shf.h(<8 x i16> %0, i32 25)
   store <8 x i16> %1, <8 x i16>* @llvm_mips_shf_h_RES
   ret void
@@ -176,7 +176,7 @@
 
 define void @llvm_mips_shf_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_shf_w_ARG1
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_shf_w_ARG1
   %1 = tail call <4 x i32> @llvm.mips.shf.w(<4 x i32> %0, i32 25)
   store <4 x i32> %1, <4 x i32>* @llvm_mips_shf_w_RES
   ret void
@@ -195,7 +195,7 @@
 
 define void @llvm_mips_xori_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_xori_b_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xori_b_ARG1
   %1 = tail call <16 x i8> @llvm.mips.xori.b(<16 x i8> %0, i32 25)
   store <16 x i8> %1, <16 x i8>* @llvm_mips_xori_b_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/inline-asm.ll b/llvm/test/CodeGen/Mips/msa/inline-asm.ll
index 4a34273..85da87b 100644
--- a/llvm/test/CodeGen/Mips/msa/inline-asm.ll
+++ b/llvm/test/CodeGen/Mips/msa/inline-asm.ll
@@ -16,7 +16,7 @@
 define void @test2() nounwind {
 entry:
   ; CHECK-LABEL: test2:
-  %0 = load <4 x i32>* @v4i32_r
+  %0 = load <4 x i32>, <4 x i32>* @v4i32_r
   %1 = call <4 x i32> asm "addvi.w ${0:w}, ${1:w}, 1", "=f,f"(<4 x i32> %0)
   ; CHECK: addvi.w $w{{[1-3]?[0-9]}}, $w{{[1-3]?[0-9]}}, 1
   store <4 x i32> %1, <4 x i32>* @v4i32_r
@@ -26,7 +26,7 @@
 define void @test3() nounwind {
 entry:
   ; CHECK-LABEL: test3:
-  %0 = load <4 x i32>* @v4i32_r
+  %0 = load <4 x i32>, <4 x i32>* @v4i32_r
   %1 = call <4 x i32> asm sideeffect "addvi.w ${0:w}, ${1:w}, 1", "=f,f,~{$w0}"(<4 x i32> %0)
   ; CHECK: addvi.w $w{{([1-9]|[1-3][0-9])}}, $w{{([1-9]|[1-3][0-9])}}, 1
   store <4 x i32> %1, <4 x i32>* @v4i32_r
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
index 4beaaa9..beb361b 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll
@@ -14,7 +14,7 @@
   %A2 = alloca <1 x double>
   %A1 = alloca double
   %A = alloca i32
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 77, i8* %0
   %E = extractelement <8 x i64> zeroinitializer, i32 2
   %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3>
@@ -24,7 +24,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %CF78, %BB
-  %L5 = load i8* %Sl
+  %L5 = load i8, i8* %Sl
   store i8 %L, i8* %Sl
   %E6 = extractelement <8 x i32> zeroinitializer, i32 2
   %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef>
@@ -33,7 +33,7 @@
   %FC = sitofp <8 x i64> zeroinitializer to <8 x float>
   %Sl9 = select i1 %Cmp, i8 77, i8 77
   %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer
-  %L11 = load i8* %0
+  %L11 = load i8, i8* %0
   store i8 %Sl9, i8* %0
   %E12 = extractelement <1 x i16> zeroinitializer, i32 0
   %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7>
@@ -42,7 +42,7 @@
   %Tr = trunc <8 x i64> %Shuff to <8 x i32>
   %Sl16 = select i1 %Cmp, i8 77, i8 %5
   %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10
-  %L18 = load i8* %Sl
+  %L18 = load i8, i8* %Sl
   store i8 -1, i8* %Sl
   %E19 = extractelement <8 x i32> zeroinitializer, i32 3
   %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef>
@@ -54,7 +54,7 @@
   br i1 %Cmp25, label %CF, label %CF78
 
 CF78:                                             ; preds = %CF
-  %L26 = load i8* %Sl
+  %L26 = load i8, i8* %Sl
   store i32 50347, i32* %A
   %E27 = extractelement <8 x i1> %Cmp10, i32 2
   br i1 %E27, label %CF, label %CF77
@@ -65,7 +65,7 @@
   %B30 = urem <8 x i32> %Tr, zeroinitializer
   %Tr31 = trunc i32 0 to i16
   %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
-  %L33 = load i8* %Sl
+  %L33 = load i8, i8* %Sl
   store i8 %L26, i8* %Sl
   %E34 = extractelement <4 x i32> zeroinitializer, i32 0
   %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef
@@ -73,7 +73,7 @@
   %B37 = srem <1 x i16> %I29, zeroinitializer
   %FC38 = sitofp <8 x i32> %B30 to <8 x double>
   %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24
-  %L40 = load i8* %Sl
+  %L40 = load i8, i8* %Sl
   store i8 %Sl16, i8* %Sl
   %E41 = extractelement <1 x i16> zeroinitializer, i32 0
   %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12>
@@ -85,7 +85,7 @@
   br i1 %Cmp46, label %CF77, label %CF80
 
 CF80:                                             ; preds = %CF80, %CF77
-  %L47 = load i64* %PC
+  %L47 = load i64, i64* %PC
   store i8 77, i8* %Sl
   %E48 = extractelement <8 x i64> zeroinitializer, i32 2
   %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3>
@@ -97,7 +97,7 @@
   br i1 %Cmp54, label %CF80, label %CF81
 
 CF81:                                             ; preds = %CF80
-  %L55 = load i8* %Sl
+  %L55 = load i8, i8* %Sl
   store i8 %Sl16, i8* %Sl
   %E56 = extractelement <1 x i16> %B, i32 0
   %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1>
@@ -105,7 +105,7 @@
   %B59 = srem i32 %E19, %E19
   %Sl60 = select i1 %Cmp, i8 77, i8 77
   %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B
-  %L62 = load i8* %Sl
+  %L62 = load i8, i8* %Sl
   store i64 %L47, i64* %PC52
   %E63 = extractelement <4 x i32> %I43, i32 2
   %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3>
@@ -117,7 +117,7 @@
   br i1 %Cmp69, label %CF77, label %CF79
 
 CF79:                                             ; preds = %CF81
-  %L70 = load i32* %A
+  %L70 = load i32, i32* %A
   store i64 %4, i64* %PC
   %E71 = extractelement <4 x i32> zeroinitializer, i32 0
   %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9>
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
index f9cab03..bdf6eaf 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s1935737938.ll
@@ -14,7 +14,7 @@
   %A2 = alloca i64
   %A1 = alloca i32
   %A = alloca <2 x i64>
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 -1, i8* %0
   %E = extractelement <2 x i32> zeroinitializer, i32 0
   %Shuff = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -22,7 +22,7 @@
   %B = lshr i8 %L, -69
   %ZE = fpext float 0xBF2AA5FE80000000 to double
   %Sl = select i1 true, <1 x i64> <i64 -1>, <1 x i64> <i64 -1>
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 -69, i8* %0
   %E6 = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
   %Shuff7 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -31,7 +31,7 @@
   %FC = uitofp i32 %3 to double
   %Sl10 = select i1 true, <1 x i1> zeroinitializer, <1 x i1> zeroinitializer
   %Cmp = icmp ne <1 x i64> %I, <i64 -1>
-  %L11 = load i8* %0
+  %L11 = load i8, i8* %0
   store i8 %L11, i8* %0
   %E12 = extractelement <1 x i64> <i64 -1>, i32 0
   %Shuff13 = shufflevector <1 x i64> %Sl, <1 x i64> <i64 -1>, <1 x i32> <i32 1>
@@ -42,7 +42,7 @@
   br label %CF74
 
 CF74:                                             ; preds = %CF74, %CF80, %CF76, %BB
-  %L18 = load i8* %0
+  %L18 = load i8, i8* %0
   store i8 -69, i8* %0
   %E19 = extractelement <1 x i64> %Sl, i32 0
   %Shuff20 = shufflevector <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10>
@@ -50,7 +50,7 @@
   %B22 = urem i32 135673, %3
   %FC23 = sitofp i8 %L to float
   %Sl24 = select i1 true, i8 %B, i8 %L18
-  %L25 = load i8* %0
+  %L25 = load i8, i8* %0
   store i8 %L, i8* %0
   %E26 = extractelement <2 x i32> %Shuff, i32 1
   %Shuff27 = shufflevector <2 x i32> zeroinitializer, <2 x i32> zeroinitializer, <2 x i32> <i32 2, i32 0>
@@ -62,7 +62,7 @@
   br i1 %Cmp31, label %CF74, label %CF80
 
 CF80:                                             ; preds = %CF74
-  %L32 = load i8* %0
+  %L32 = load i8, i8* %0
   store i8 -1, i8* %0
   %E33 = extractelement <2 x i32> zeroinitializer, i32 1
   %Shuff34 = shufflevector <1 x i64> %Shuff13, <1 x i64> <i64 -1>, <1 x i32> zeroinitializer
@@ -70,7 +70,7 @@
   %FC36 = sitofp <1 x i1> %Cmp to <1 x float>
   %Sl37 = select i1 true, <8 x i8> %Shuff20, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %Cmp38 = icmp sgt <2 x i32> %I21, %Shuff27
-  %L39 = load i8* %0
+  %L39 = load i8, i8* %0
   store i8 %Sl24, i8* %0
   %E40 = extractelement <8 x i64> zeroinitializer, i32 1
   %Shuff41 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Cmp38, <2 x i32> <i32 0, i32 2>
@@ -81,7 +81,7 @@
   br i1 %Cmp45, label %CF74, label %CF76
 
 CF76:                                             ; preds = %CF80
-  %L46 = load i8* %0
+  %L46 = load i8, i8* %0
   store i8 %L39, i8* %0
   %E47 = extractelement <2 x i32> %Shuff27, i32 0
   %Shuff48 = shufflevector <1 x i1> %Sl10, <1 x i1> %Sl10, <1 x i32> <i32 1>
@@ -92,7 +92,7 @@
   br i1 %Cmp52, label %CF74, label %CF75
 
 CF75:                                             ; preds = %CF75, %CF76
-  %L53 = load i8* %0
+  %L53 = load i8, i8* %0
   store i8 %L18, i8* %0
   %E54 = extractelement <8 x i8> %Shuff20, i32 5
   %Shuff55 = shufflevector <2 x i32> %Shuff, <2 x i32> zeroinitializer, <2 x i32> <i32 0, i32 2>
@@ -103,7 +103,7 @@
   br i1 %Cmp59, label %CF75, label %CF78
 
 CF78:                                             ; preds = %CF75
-  %L60 = load i8* %0
+  %L60 = load i8, i8* %0
   store i8 -69, i8* %0
   %E61 = extractelement <2 x i32> zeroinitializer, i32 0
   %Shuff62 = shufflevector <2 x i32> %Shuff7, <2 x i32> %I21, <2 x i32> <i32 1, i32 3>
@@ -115,7 +115,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %CF78
-  %L68 = load i8* %0
+  %L68 = load i8, i8* %0
   store i64 %B57, i64* %2
   %E69 = extractelement <2 x i1> %Shuff41, i32 1
   br i1 %E69, label %CF, label %CF77
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
index e14f405..8f23a8c 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s2704903805.ll
@@ -13,7 +13,7 @@
   %A2 = alloca i8
   %A1 = alloca i32
   %A = alloca i8
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 %5, i8* %0
   %E = extractelement <2 x i16> zeroinitializer, i32 0
   %Shuff = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> undef
@@ -25,7 +25,7 @@
   br label %CF83
 
 CF83:                                             ; preds = %BB
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 85, i8* %0
   %E6 = extractelement <1 x i8> <i8 -1>, i32 0
   %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -37,7 +37,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %CF81, %CF83
-  %L13 = load i8* %0
+  %L13 = load i8, i8* %0
   store i8 0, i8* %0
   %E14 = extractelement <2 x i64> zeroinitializer, i32 0
   %Shuff15 = shufflevector <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
@@ -52,7 +52,7 @@
   br i1 %Cmp19, label %CF80, label %CF81
 
 CF81:                                             ; preds = %CF80
-  %L20 = load i8* %0
+  %L20 = load i8, i8* %0
   store i8 85, i8* %0
   %E21 = extractelement <1 x i8> <i8 -1>, i32 0
   %Shuff22 = shufflevector <1 x i8> <i8 -1>, <1 x i8> %Shuff, <1 x i32> zeroinitializer
@@ -60,7 +60,7 @@
   %FC24 = fptoui <4 x float> %FC to <4 x i16>
   %Sl25 = select i1 %Cmp, <2 x i32> zeroinitializer, <2 x i32> <i32 -1, i32 -1>
   %Cmp26 = icmp ult <4 x i64> %I16, %Shuff15
-  %L27 = load i8* %0
+  %L27 = load i8, i8* %0
   store i8 %L, i8* %0
   %E28 = extractelement <1 x i8> <i8 -1>, i32 0
   %Shuff29 = shufflevector <8 x i16> zeroinitializer, <8 x i16> zeroinitializer, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 5, i32 undef, i32 9>
@@ -68,7 +68,7 @@
   %B31 = mul i8 %E28, 85
   %PC = bitcast i32* %A3 to i32*
   %Sl32 = select i1 %Cmp12, float %FC10, float 0x4712BFE680000000
-  %L33 = load i32* %PC
+  %L33 = load i32, i32* %PC
   store i32 %L33, i32* %PC
   %E34 = extractelement <2 x i16> zeroinitializer, i32 1
   %Shuff35 = shufflevector <1 x i8> %Shuff, <1 x i8> <i8 -1>, <1 x i32> zeroinitializer
@@ -79,7 +79,7 @@
   br i1 %Cmp39, label %CF, label %CF77
 
 CF77:                                             ; preds = %CF77, %CF81
-  %L40 = load i32* %PC
+  %L40 = load i32, i32* %PC
   store i32 %3, i32* %PC
   %E41 = extractelement <2 x i32> zeroinitializer, i32 0
   %Shuff42 = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -88,7 +88,7 @@
   %Se = sext i32 %3 to i64
   %Sl45 = select i1 true, <1 x i8> %Shuff, <1 x i8> %I43
   %Cmp46 = icmp sge <1 x i8> %I36, %Shuff
-  %L47 = load i32* %PC
+  %L47 = load i32, i32* %PC
   store i32 %L33, i32* %PC
   %E48 = extractelement <2 x i16> zeroinitializer, i32 0
   %Shuff49 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
@@ -100,7 +100,7 @@
   br i1 %Cmp54, label %CF77, label %CF78
 
 CF78:                                             ; preds = %CF78, %CF77
-  %L55 = load i32* %PC
+  %L55 = load i32, i32* %PC
   store i32 %L33, i32* %PC
   %E56 = extractelement <8 x i16> %Shuff29, i32 4
   %Shuff57 = shufflevector <1 x i8> <i8 -1>, <1 x i8> <i8 -1>, <1 x i32> <i32 1>
@@ -111,7 +111,7 @@
   br i1 %Cmp60, label %CF78, label %CF79
 
 CF79:                                             ; preds = %CF79, %CF78
-  %L61 = load i32* %PC
+  %L61 = load i32, i32* %PC
   store i32 %L33, i32* %A3
   %E62 = extractelement <4 x i64> %Shuff15, i32 1
   %Shuff63 = shufflevector <8 x i16> %Shuff29, <8 x i16> %Shuff29, <8 x i32> <i32 undef, i32 10, i32 12, i32 undef, i32 undef, i32 undef, i32 4, i32 6>
@@ -123,7 +123,7 @@
   br i1 %Cmp68, label %CF79, label %CF82
 
 CF82:                                             ; preds = %CF79
-  %L69 = load i32* %PC
+  %L69 = load i32, i32* %PC
   store i32 %L33, i32* %PC
   %E70 = extractelement <8 x i16> zeroinitializer, i32 3
   %Shuff71 = shufflevector <4 x i64> %Shuff15, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, <4 x i32> <i32 6, i32 undef, i32 2, i32 4>
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
index 1a03e55..e3cf796 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3861334421.ll
@@ -14,7 +14,7 @@
   %A2 = alloca i64
   %A1 = alloca i64
   %A = alloca double
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 -101, i8* %0
   %E = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
   %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1>
@@ -22,7 +22,7 @@
   %B = and i64 116376, 57247
   %FC = uitofp i8 7 to double
   %Sl = select i1 false, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 %L, i8* %0
   %E6 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 3
   %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -33,7 +33,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %BB
-  %L11 = load i8* %0
+  %L11 = load i8, i8* %0
   store i8 -87, i8* %0
   %E12 = extractelement <4 x i64> zeroinitializer, i32 0
   %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 7, i32 9, i32 11, i32 13, i32 undef, i32 1, i32 3, i32 5>
@@ -45,7 +45,7 @@
   br i1 %Cmp18, label %CF, label %CF80
 
 CF80:                                             ; preds = %CF80, %CF88, %CF
-  %L19 = load i8* %0
+  %L19 = load i8, i8* %0
   store i8 -101, i8* %0
   %E20 = extractelement <4 x i64> zeroinitializer, i32 0
   %Shuff21 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff7, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
@@ -56,7 +56,7 @@
   br i1 %Cmp25, label %CF80, label %CF83
 
 CF83:                                             ; preds = %CF83, %CF80
-  %L26 = load i8* %0
+  %L26 = load i8, i8* %0
   store i8 -87, i8* %0
   %E27 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 0
   %Shuff28 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 7, i32 1, i32 3, i32 5>
@@ -68,7 +68,7 @@
   br i1 %Cmp33, label %CF83, label %CF88
 
 CF88:                                             ; preds = %CF83
-  %L34 = load i8* %0
+  %L34 = load i8, i8* %0
   store i8 -87, i8* %0
   %E35 = extractelement <8 x i64> %Shuff, i32 7
   %Shuff36 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> %Shuff28, <4 x i32> <i32 2, i32 undef, i32 undef, i32 0>
@@ -80,7 +80,7 @@
   br i1 %Cmp40, label %CF80, label %CF81
 
 CF81:                                             ; preds = %CF81, %CF85, %CF87, %CF88
-  %L41 = load i8* %0
+  %L41 = load i8, i8* %0
   store i8 %L34, i8* %0
   %E42 = extractelement <8 x i64> %Shuff13, i32 6
   %Shuff43 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 7>
@@ -92,7 +92,7 @@
   br i1 %Cmp47, label %CF81, label %CF85
 
 CF85:                                             ; preds = %CF81
-  %L48 = load i8* %0
+  %L48 = load i8, i8* %0
   store i8 -101, i8* %0
   %E49 = extractelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i32 2
   %Shuff50 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
@@ -101,7 +101,7 @@
   %FC53 = uitofp i8 %L48 to double
   %Sl54 = select i1 %Cmp47, i32 %3, i32 %Sl24
   %Cmp55 = icmp ne <8 x i64> %Shuff13, zeroinitializer
-  %L56 = load i8* %0
+  %L56 = load i8, i8* %0
   store i8 %L11, i8* %0
   %E57 = extractelement <4 x i64> %Shuff21, i32 1
   %Shuff58 = shufflevector <8 x i64> %Shuff, <8 x i64> zeroinitializer, <8 x i32> <i32 4, i32 6, i32 undef, i32 10, i32 12, i32 undef, i32 0, i32 2>
@@ -113,7 +113,7 @@
 CF84:                                             ; preds = %CF84, %CF85
   %Sl62 = select i1 false, i8 %L, i8 %L48
   %Cmp63 = icmp ne <8 x i64> %I, zeroinitializer
-  %L64 = load i8* %0
+  %L64 = load i8, i8* %0
   store i8 %5, i8* %0
   %E65 = extractelement <8 x i1> %Cmp55, i32 0
   br i1 %E65, label %CF84, label %CF87
@@ -125,7 +125,7 @@
   %ZE69 = zext <8 x i8> %Sl32 to <8 x i64>
   %Sl70 = select i1 %Tr61, i64 %E20, i64 %E12
   %Cmp71 = icmp slt <8 x i64> %I, %Shuff
-  %L72 = load i8* %0
+  %L72 = load i8, i8* %0
   store i8 %L72, i8* %0
   %E73 = extractelement <8 x i1> %Cmp55, i32 6
   br i1 %E73, label %CF81, label %CF82
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
index 96547d9..6f33810 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3926023935.ll
@@ -14,7 +14,7 @@
   %A2 = alloca double
   %A1 = alloca float
   %A = alloca double
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 -123, i8* %0
   %E = extractelement <4 x i64> zeroinitializer, i32 1
   %Shuff = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -22,7 +22,7 @@
   %BC = bitcast i64 181325 to double
   %Sl = select i1 false, <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
   %Cmp = icmp ne <4 x i64> zeroinitializer, zeroinitializer
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 %L, i8* %0
   %E6 = extractelement <4 x i64> zeroinitializer, i32 3
   %Shuff7 = shufflevector <2 x i16> zeroinitializer, <2 x i16> zeroinitializer, <2 x i32> <i32 2, i32 0>
@@ -33,7 +33,7 @@
   br label %CF80
 
 CF80:                                             ; preds = %BB
-  %L11 = load i8* %0
+  %L11 = load i8, i8* %0
   store i8 -123, i8* %0
   %E12 = extractelement <2 x i16> zeroinitializer, i32 1
   %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -42,7 +42,7 @@
   %PC = bitcast i1* %A4 to i64*
   %Sl16 = select i1 %Cmp10, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
   %Cmp17 = icmp ule <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %Sl16
-  %L18 = load double* %A2
+  %L18 = load double, double* %A2
   store i64 498254, i64* %PC
   %E19 = extractelement <4 x i64> zeroinitializer, i32 0
   %Shuff20 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
@@ -51,7 +51,7 @@
   %ZE = zext <2 x i1> %Shuff20 to <2 x i32>
   %Sl23 = select i1 %Cmp10, <2 x i1> %Shuff20, <2 x i1> zeroinitializer
   %Cmp24 = icmp ult <2 x i32> zeroinitializer, zeroinitializer
-  %L25 = load i8* %0
+  %L25 = load i8, i8* %0
   store i8 %L25, i8* %0
   %E26 = extractelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, i32 3
   %Shuff27 = shufflevector <4 x i32> %Shuff, <4 x i32> %I14, <4 x i32> <i32 6, i32 0, i32 undef, i32 4>
@@ -63,7 +63,7 @@
 CF79:                                             ; preds = %CF80
   %Sl30 = select i1 false, i8 %B29, i8 -123
   %Cmp31 = icmp sge <2 x i1> %I, %I
-  %L32 = load i64* %PC
+  %L32 = load i64, i64* %PC
   store i8 -123, i8* %0
   %E33 = extractelement <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 2
   %Shuff34 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
@@ -75,7 +75,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %CF79
-  %L40 = load double* %A
+  %L40 = load double, double* %A
   store i1 %Cmp39, i1* %PC37
   %E41 = extractelement <4 x i64> zeroinitializer, i32 3
   %Shuff42 = shufflevector <2 x i32> zeroinitializer, <2 x i32> %ZE, <2 x i32> <i32 2, i32 undef>
@@ -90,7 +90,7 @@
   br i1 %Cmp46, label %CF77, label %CF78
 
 CF78:                                             ; preds = %CF78, %CF83, %CF82, %CF77
-  %L47 = load i64* %PC
+  %L47 = load i64, i64* %PC
   store i8 -123, i8* %0
   %E48 = extractelement <4 x i64> zeroinitializer, i32 3
   %Shuff49 = shufflevector <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
@@ -105,7 +105,7 @@
   br i1 %Cmp54, label %CF78, label %CF82
 
 CF82:                                             ; preds = %CF83
-  %L55 = load i64* %PC
+  %L55 = load i64, i64* %PC
   store i64 %L32, i64* %PC
   %E56 = extractelement <2 x i16> %Shuff7, i32 1
   %Shuff57 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -114,7 +114,7 @@
   %FC = sitofp i64 498254 to double
   %Sl60 = select i1 false, i64 %E6, i64 -1
   %Cmp61 = icmp sgt <4 x i32> %Shuff27, %I43
-  %L62 = load i64* %PC
+  %L62 = load i64, i64* %PC
   store i64 %Sl9, i64* %PC
   %E63 = extractelement <2 x i32> %ZE, i32 0
   %Shuff64 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff13, <4 x i32> <i32 1, i32 3, i32 undef, i32 7>
@@ -126,7 +126,7 @@
 
 CF81:                                             ; preds = %CF82
   %Cmp69 = icmp ne <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, %B36
-  %L70 = load i8* %0
+  %L70 = load i8, i8* %0
   store i64 %L55, i64* %PC
   %E71 = extractelement <4 x i32> %Shuff49, i32 1
   %Shuff72 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %Shuff34, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
index bef75f3..181f72a 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s3997499501.ll
@@ -14,7 +14,7 @@
   %A2 = alloca float
   %A1 = alloca double
   %A = alloca double
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 97, i8* %0
   %E = extractelement <16 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, i32 14
   %Shuff = shufflevector <2 x i1> zeroinitializer, <2 x i1> zeroinitializer, <2 x i32> <i32 1, i32 3>
@@ -22,7 +22,7 @@
   %Tr = trunc <1 x i64> zeroinitializer to <1 x i8>
   %Sl = select i1 false, double* %A1, double* %A
   %Cmp = icmp ne <2 x i64> zeroinitializer, zeroinitializer
-  %L5 = load double* %Sl
+  %L5 = load double, double* %Sl
   store float -4.374162e+06, float* %A2
   %E6 = extractelement <4 x i64> zeroinitializer, i32 3
   %Shuff7 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I, <4 x i32> <i32 2, i32 4, i32 6, i32 undef>
@@ -34,7 +34,7 @@
   br label %CF72
 
 CF72:                                             ; preds = %CF72, %CF80, %CF78, %BB
-  %L11 = load double* %Sl
+  %L11 = load double, double* %Sl
   store double 0.000000e+00, double* %Sl
   %E12 = extractelement <2 x i1> zeroinitializer, i32 0
   br i1 %E12, label %CF72, label %CF80
@@ -49,7 +49,7 @@
   br i1 %Cmp17, label %CF72, label %CF77
 
 CF77:                                             ; preds = %CF77, %CF80
-  %L18 = load double* %Sl
+  %L18 = load double, double* %Sl
   store double 0.000000e+00, double* %Sl
   %E19 = extractelement <2 x i1> zeroinitializer, i32 0
   br i1 %E19, label %CF77, label %CF78
@@ -60,7 +60,7 @@
   %B22 = sdiv <4 x i64> %Shuff7, zeroinitializer
   %FC = uitofp i8 97 to double
   %Sl23 = select i1 %Cmp10, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer
-  %L24 = load double* %Sl
+  %L24 = load double, double* %Sl
   store float %Sl16, float* %PC
   %E25 = extractelement <2 x i1> %Shuff, i32 1
   br i1 %E25, label %CF72, label %CF76
@@ -71,7 +71,7 @@
   %B28 = mul <4 x i64> %I27, zeroinitializer
   %ZE = zext <8 x i1> zeroinitializer to <8 x i64>
   %Sl29 = select i1 %Cmp17, float -4.374162e+06, float -4.374162e+06
-  %L30 = load i8* %0
+  %L30 = load i8, i8* %0
   store double %L5, double* %Sl
   %E31 = extractelement <8 x i1> zeroinitializer, i32 5
   br label %CF
@@ -85,7 +85,7 @@
   br i1 %Cmp36, label %CF, label %CF74
 
 CF74:                                             ; preds = %CF74, %CF
-  %L37 = load float* %PC
+  %L37 = load float, float* %PC
   store double 0.000000e+00, double* %Sl
   %E38 = extractelement <2 x i1> %Sl23, i32 1
   br i1 %E38, label %CF74, label %CF75
@@ -95,7 +95,7 @@
   %I40 = insertelement <4 x i64> zeroinitializer, i64 %4, i32 2
   %Sl41 = select i1 %Cmp10, i32 0, i32 %3
   %Cmp42 = icmp ne <1 x i64> zeroinitializer, zeroinitializer
-  %L43 = load double* %Sl
+  %L43 = load double, double* %Sl
   store i64 %4, i64* %2
   %E44 = extractelement <2 x i1> %Shuff20, i32 1
   br i1 %E44, label %CF75, label %CF82
@@ -109,7 +109,7 @@
   br i1 %Cmp49, label %CF75, label %CF81
 
 CF81:                                             ; preds = %CF82
-  %L50 = load i8* %0
+  %L50 = load i8, i8* %0
   store double %L43, double* %Sl
   %E51 = extractelement <4 x i64> %Shuff7, i32 3
   %Shuff52 = shufflevector <4 x float> %BC34, <4 x float> %BC34, <4 x i32> <i32 2, i32 4, i32 6, i32 0>
@@ -117,7 +117,7 @@
   %B54 = fdiv double %L24, %L43
   %BC55 = bitcast <4 x i64> zeroinitializer to <4 x double>
   %Sl56 = select i1 false, i8 %5, i8 97
-  %L57 = load i8* %0
+  %L57 = load i8, i8* %0
   store i8 %L50, i8* %0
   %E58 = extractelement <2 x i1> %Shuff20, i32 1
   br i1 %E58, label %CF, label %CF73
@@ -129,7 +129,7 @@
   %PC62 = bitcast double* %A3 to float*
   %Sl63 = select i1 %Cmp10, <1 x i64> zeroinitializer, <1 x i64> zeroinitializer
   %Cmp64 = icmp ne <2 x i1> %Cmp, %Shuff
-  %L65 = load double* %A1
+  %L65 = load double, double* %A1
   store float -4.374162e+06, float* %PC62
   %E66 = extractelement <8 x i1> %I21, i32 3
   br i1 %E66, label %CF73, label %CF79
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
index 697871d..c0bc905 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s525530439.ll
@@ -14,7 +14,7 @@
   %A2 = alloca <1 x double>
   %A1 = alloca <8 x double>
   %A = alloca i64
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i64 33695, i64* %A
   %E = extractelement <4 x i32> zeroinitializer, i32 3
   %Shuff = shufflevector <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 2, i32 0>
@@ -22,7 +22,7 @@
   %B = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   %ZE = fpext float 0x3B64A2B880000000 to double
   %Sl = select i1 true, i16 -1, i16 -11642
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 0, i8* %0
   %E6 = extractelement <4 x i32> zeroinitializer, i32 2
   %Shuff7 = shufflevector <8 x i1> zeroinitializer, <8 x i1> zeroinitializer, <8 x i32> <i32 undef, i32 7, i32 9, i32 11, i32 13, i32 15, i32 1, i32 undef>
@@ -31,7 +31,7 @@
   %BC = bitcast <2 x i32> <i32 -1, i32 -1> to <2 x float>
   %Sl10 = select i1 true, i32* %1, i32* %1
   %Cmp = icmp sge <8 x i64> zeroinitializer, zeroinitializer
-  %L11 = load i32* %Sl10
+  %L11 = load i32, i32* %Sl10
   store <1 x double> zeroinitializer, <1 x double>* %A2
   %E12 = extractelement <4 x i16> zeroinitializer, i32 0
   %Shuff13 = shufflevector <1 x i64> zeroinitializer, <1 x i64> zeroinitializer, <1 x i32> undef
@@ -43,7 +43,7 @@
   br label %CF75
 
 CF75:                                             ; preds = %CF75, %BB
-  %L19 = load i32* %Sl10
+  %L19 = load i32, i32* %Sl10
   store i32 %L11, i32* %Sl10
   %E20 = extractelement <4 x i32> zeroinitializer, i32 1
   %Shuff21 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %I8, <4 x i32> <i32 undef, i32 2, i32 4, i32 6>
@@ -55,7 +55,7 @@
   br i1 %Cmp26, label %CF75, label %CF76
 
 CF76:                                             ; preds = %CF75
-  %L27 = load i32* %Sl10
+  %L27 = load i32, i32* %Sl10
   store i32 439732, i32* %Sl10
   %E28 = extractelement <4 x i32> %Shuff21, i32 3
   %Shuff29 = shufflevector <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0>
@@ -65,7 +65,7 @@
   br label %CF74
 
 CF74:                                             ; preds = %CF74, %CF80, %CF78, %CF76
-  %L33 = load i64* %2
+  %L33 = load i64, i64* %2
   store i32 71140, i32* %Sl10
   %E34 = extractelement <4 x i32> zeroinitializer, i32 1
   %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> undef
@@ -76,7 +76,7 @@
   br i1 %Cmp39, label %CF74, label %CF80
 
 CF80:                                             ; preds = %CF74
-  %L40 = load i8* %0
+  %L40 = load i8, i8* %0
   store i32 0, i32* %Sl10
   %E41 = extractelement <8 x i64> zeroinitializer, i32 1
   %Shuff42 = shufflevector <1 x i16> %I14, <1 x i16> %I14, <1 x i32> undef
@@ -86,7 +86,7 @@
   br i1 %Sl44, label %CF74, label %CF78
 
 CF78:                                             ; preds = %CF80
-  %L45 = load i32* %Sl10
+  %L45 = load i32, i32* %Sl10
   store i8 %L5, i8* %0
   %E46 = extractelement <8 x i1> %Shuff7, i32 2
   br i1 %E46, label %CF74, label %CF77
@@ -101,7 +101,7 @@
   br i1 %Cmp52, label %CF77, label %CF79
 
 CF79:                                             ; preds = %CF77
-  %L53 = load i32* %Sl10
+  %L53 = load i32, i32* %Sl10
   store i8 %L40, i8* %0
   %E54 = extractelement <4 x i32> zeroinitializer, i32 1
   %Shuff55 = shufflevector <4 x i32> %Shuff21, <4 x i32> %I8, <4 x i32> <i32 4, i32 6, i32 undef, i32 2>
@@ -109,7 +109,7 @@
   %Tr = trunc <1 x i64> %Shuff13 to <1 x i16>
   %Sl57 = select i1 %Cmp18, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 -1, i32 -1>
   %Cmp58 = icmp uge <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %I56
-  %L59 = load i8* %0
+  %L59 = load i8, i8* %0
   store <1 x double> zeroinitializer, <1 x double>* %A2
   %E60 = extractelement <4 x i32> zeroinitializer, i32 0
   %Shuff61 = shufflevector <4 x i32> %I8, <4 x i32> %I8, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>
@@ -121,7 +121,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF79
-  %L66 = load i32* %Sl10
+  %L66 = load i32, i32* %Sl10
   store i32 %E6, i32* %PC
   %E67 = extractelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i32 2
   %Shuff68 = shufflevector <4 x i32> %Sl64, <4 x i32> %I8, <4 x i32> <i32 5, i32 undef, i32 1, i32 undef>
diff --git a/llvm/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll b/llvm/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
index dc4200a..a3150e9a 100644
--- a/llvm/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
+++ b/llvm/test/CodeGen/Mips/msa/llvm-stress-s997348632.ll
@@ -14,14 +14,14 @@
   %A2 = alloca <4 x i1>
   %A1 = alloca <4 x i16>
   %A = alloca <2 x i32>
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 %L, i8* %0
   %E = extractelement <4 x i32> zeroinitializer, i32 0
   %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 1, i32 3, i32 5>
   %I = insertelement <2 x i1> zeroinitializer, i1 false, i32 1
   %FC = sitofp <4 x i32> zeroinitializer to <4 x double>
   %Sl = select i1 false, <4 x i64> %Shuff, <4 x i64> %Shuff
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   store i8 %5, i8* %0
   %E6 = extractelement <1 x i16> zeroinitializer, i32 0
   %Shuff7 = shufflevector <2 x i1> %I, <2 x i1> %I, <2 x i32> <i32 1, i32 undef>
@@ -30,7 +30,7 @@
   %FC9 = fptoui float 0x406DB70180000000 to i64
   %Sl10 = select i1 false, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   %Cmp = icmp ult <4 x i64> zeroinitializer, zeroinitializer
-  %L11 = load i8* %0
+  %L11 = load i8, i8* %0
   store i8 %L, i8* %0
   %E12 = extractelement <4 x i64> zeroinitializer, i32 2
   %Shuff13 = shufflevector <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 undef, i32 3>
@@ -42,7 +42,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %CF79, %CF84, %BB
-  %L18 = load i8* %0
+  %L18 = load i8, i8* %0
   store i8 %L, i8* %0
   %E19 = extractelement <4 x i64> %Sl, i32 3
   %Shuff20 = shufflevector <2 x i1> %Shuff7, <2 x i1> %I, <2 x i32> <i32 2, i32 0>
@@ -54,7 +54,7 @@
   br i1 %Cmp25, label %CF, label %CF79
 
 CF79:                                             ; preds = %CF
-  %L26 = load i8* %0
+  %L26 = load i8, i8* %0
   store i8 %L26, i8* %0
   %E27 = extractelement <1 x i16> zeroinitializer, i32 0
   %Shuff28 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> <i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
@@ -65,7 +65,7 @@
   br i1 %Cmp32, label %CF, label %CF78
 
 CF78:                                             ; preds = %CF78, %CF79
-  %L33 = load i8* %0
+  %L33 = load i8, i8* %0
   store i8 %L, i8* %0
   %E34 = extractelement <16 x i32> %Shuff28, i32 1
   %Shuff35 = shufflevector <4 x i64> zeroinitializer, <4 x i64> %I21, <4 x i32> <i32 undef, i32 6, i32 0, i32 2>
@@ -76,7 +76,7 @@
   br i1 %Cmp38, label %CF78, label %CF80
 
 CF80:                                             ; preds = %CF80, %CF82, %CF78
-  %L39 = load i8* %0
+  %L39 = load i8, i8* %0
   store i8 %L, i8* %0
   %E40 = extractelement <2 x i1> %Shuff20, i32 1
   br i1 %E40, label %CF80, label %CF82
@@ -87,7 +87,7 @@
   %B43 = sub i32 %E, 0
   %Sl44 = select i1 %Cmp32, <16 x i32> %Shuff28, <16 x i32> %Shuff28
   %Cmp45 = icmp sgt <4 x i64> zeroinitializer, %I21
-  %L46 = load i8* %0
+  %L46 = load i8, i8* %0
   store i8 %L11, i8* %0
   %E47 = extractelement <8 x i32> %Sl16, i32 4
   %Shuff48 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %Shuff7, <2 x i32> <i32 undef, i32 1>
@@ -99,7 +99,7 @@
 CF81:                                             ; preds = %CF81, %CF82
   %Sl52 = select i1 false, float -6.749110e+06, float 0x406DB70180000000
   %Cmp53 = icmp uge <2 x i32> <i32 -1, i32 -1>, <i32 -1, i32 -1>
-  %L54 = load i8* %0
+  %L54 = load i8, i8* %0
   store i8 %L5, i8* %0
   %E55 = extractelement <8 x i32> zeroinitializer, i32 7
   %Shuff56 = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 undef, i32 4, i32 6, i32 0>
@@ -108,7 +108,7 @@
   %FC59 = fptoui <4 x double> %I36 to <4 x i16>
   %Sl60 = select i1 %Cmp17, <2 x i1> %I, <2 x i1> %I57
   %Cmp61 = icmp ule <8 x i32> %B50, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-  %L62 = load i8* %0
+  %L62 = load i8, i8* %0
   store i8 %L33, i8* %0
   %E63 = extractelement <4 x i64> %Shuff, i32 2
   %Shuff64 = shufflevector <4 x i64> %Shuff56, <4 x i64> %Shuff56, <4 x i32> <i32 5, i32 7, i32 1, i32 undef>
@@ -126,7 +126,7 @@
   br i1 %Cmp69, label %CF, label %CF77
 
 CF77:                                             ; preds = %CF84
-  %L70 = load i8* %0
+  %L70 = load i8, i8* %0
   store i8 %L, i8* %0
   %E71 = extractelement <4 x i64> %Shuff, i32 0
   %Shuff72 = shufflevector <2 x i1> zeroinitializer, <2 x i1> %I, <2 x i32> <i32 3, i32 1>
diff --git a/llvm/test/CodeGen/Mips/msa/shuffle.ll b/llvm/test/CodeGen/Mips/msa/shuffle.ll
index faeec5d..7feed92 100644
--- a/llvm/test/CodeGen/Mips/msa/shuffle.ll
+++ b/llvm/test/CodeGen/Mips/msa/shuffle.ll
@@ -4,7 +4,7 @@
 define void @vshf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: vshf_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -20,7 +20,7 @@
 define void @vshf_v16i8_1(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: vshf_v16i8_1:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
@@ -34,8 +34,8 @@
 define void @vshf_v16i8_2(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: vshf_v16i8_2:
 
-  %1 = load <16 x i8>* %a
-  %2 = load <16 x i8>* %b
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 16>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -51,9 +51,9 @@
 define void @vshf_v16i8_3(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: vshf_v16i8_3:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 17, i32 24, i32 25, i32 18, i32 19, i32 20, i32 28, i32 19, i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -71,7 +71,7 @@
 define void @vshf_v16i8_4(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: vshf_v16i8_4:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> %1, <16 x i32> <i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17, i32 1, i32 17>
   ; CHECK-DAG: splati.b [[R3:\$w[0-9]+]], [[R1]][1]
@@ -85,7 +85,7 @@
 define void @vshf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: vshf_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -101,7 +101,7 @@
 define void @vshf_v8i16_1(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: vshf_v8i16_1:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
@@ -115,8 +115,8 @@
 define void @vshf_v8i16_2(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: vshf_v8i16_2:
 
-  %1 = load <8 x i16>* %a
-  %2 = load <8 x i16>* %b
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 8>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -132,9 +132,9 @@
 define void @vshf_v8i16_3(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: vshf_v8i16_3:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 8, i32 9, i32 2, i32 3, i32 4, i32 12, i32 3>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -152,7 +152,7 @@
 define void @vshf_v8i16_4(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: vshf_v8i16_4:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> %1, <8 x i32> <i32 1, i32 9, i32 1, i32 9, i32 1, i32 9, i32 1, i32 9>
   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][1]
@@ -169,7 +169,7 @@
 define void @vshf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
@@ -183,7 +183,7 @@
 define void @vshf_v4i32_1(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_1:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
@@ -197,8 +197,8 @@
 define void @vshf_v4i32_2(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_2:
 
-  %1 = load <4 x i32>* %a
-  %2 = load <4 x i32>* %b
+  %1 = load <4 x i32>, <4 x i32>* %a
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 4, i32 5, i32 6, i32 4>
   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R2]], 36
@@ -212,9 +212,9 @@
 define void @vshf_v4i32_3(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_3:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 6, i32 4>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -232,7 +232,7 @@
 define void @vshf_v4i32_4(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: vshf_v4i32_4:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> %1, <4 x i32> <i32 1, i32 5, i32 5, i32 1>
   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 85
@@ -246,7 +246,7 @@
 define void @vshf_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: vshf_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -262,7 +262,7 @@
 define void @vshf_v2i64_1(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: vshf_v2i64_1:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
@@ -276,8 +276,8 @@
 define void @vshf_v2i64_2(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: vshf_v2i64_2:
 
-  %1 = load <2 x i64>* %a
-  %2 = load <2 x i64>* %b
+  %1 = load <2 x i64>, <2 x i64>* %a
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 3, i32 2>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -293,9 +293,9 @@
 define void @vshf_v2i64_3(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: vshf_v2i64_3:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 2>
   ; CHECK-DAG: addiu [[PTR_A:\$[0-9]+]], {{.*}}, %lo($
@@ -313,7 +313,7 @@
 define void @vshf_v2i64_4(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: vshf_v2i64_4:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> %1, <2 x i32> <i32 1, i32 3>
   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
@@ -327,7 +327,7 @@
 define void @shf_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: shf_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 0, i32 5, i32 7, i32 6, i32 4, i32 9, i32 11, i32 10, i32 8, i32 13, i32 15, i32 14, i32 12>
   ; CHECK-DAG: shf.b [[R3:\$w[0-9]+]], [[R1]], 45
@@ -341,7 +341,7 @@
 define void @shf_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: shf_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   ; CHECK-DAG: shf.h [[R3:\$w[0-9]+]], [[R1]], 27
@@ -355,7 +355,7 @@
 define void @shf_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: shf_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ; CHECK-DAG: shf.w [[R3:\$w[0-9]+]], [[R1]], 27
@@ -371,9 +371,9 @@
 define void @ilvev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: ilvev_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -388,9 +388,9 @@
 define void @ilvev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: ilvev_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ; CHECK-DAG: ilvev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -404,9 +404,9 @@
 define void @ilvev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: ilvev_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ; CHECK-DAG: ilvev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -420,9 +420,9 @@
 define void @ilvev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: ilvev_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
   ; CHECK-DAG: ilvev.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -436,9 +436,9 @@
 define void @ilvod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: ilvod_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
@@ -453,9 +453,9 @@
 define void @ilvod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: ilvod_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ; CHECK-DAG: ilvod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -469,9 +469,9 @@
 define void @ilvod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: ilvod_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ; CHECK-DAG: ilvod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -485,9 +485,9 @@
 define void @ilvod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: ilvod_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   ; CHECK-DAG: ilvod.d [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -501,9 +501,9 @@
 define void @ilvl_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: ilvl_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -518,9 +518,9 @@
 define void @ilvl_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: ilvl_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ; CHECK-DAG: ilvl.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -534,9 +534,9 @@
 define void @ilvl_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: ilvl_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   ; CHECK-DAG: ilvl.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -550,9 +550,9 @@
 define void @ilvl_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: ilvl_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
   ; ilvl.d and ilvev.d are equivalent for v2i64
@@ -567,9 +567,9 @@
 define void @ilvr_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: ilvr_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -584,9 +584,9 @@
 define void @ilvr_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: ilvr_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   ; CHECK-DAG: ilvr.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -600,9 +600,9 @@
 define void @ilvr_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: ilvr_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
   ; CHECK-DAG: ilvr.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -616,9 +616,9 @@
 define void @ilvr_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: ilvr_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   ; ilvr.d and ilvod.d are equivalent for v2i64
@@ -633,9 +633,9 @@
 define void @pckev_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: pckev_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -650,9 +650,9 @@
 define void @pckev_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: pckev_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   ; CHECK-DAG: pckev.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -666,9 +666,9 @@
 define void @pckev_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: pckev_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
   ; CHECK-DAG: pckev.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -682,9 +682,9 @@
 define void @pckev_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: pckev_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 0, i32 2>
   ; pckev.d and ilvev.d are equivalent for v2i64
@@ -699,9 +699,9 @@
 define void @pckod_v16i8_0(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
   ; CHECK: pckod_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <16 x i8>* %b
+  %2 = load <16 x i8>, <16 x i8>* %b
   ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <16 x i8> %1, <16 x i8> %2,
                      <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -716,9 +716,9 @@
 define void @pckod_v8i16_0(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
   ; CHECK: pckod_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <8 x i16>* %b
+  %2 = load <8 x i16>, <8 x i16>* %b
   ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   ; CHECK-DAG: pckod.h [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -732,9 +732,9 @@
 define void @pckod_v4i32_0(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind {
   ; CHECK: pckod_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <4 x i32>* %b
+  %2 = load <4 x i32>, <4 x i32>* %b
   ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
   ; CHECK-DAG: pckod.w [[R3:\$w[0-9]+]], [[R1]], [[R2]]
@@ -748,9 +748,9 @@
 define void @pckod_v2i64_0(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind {
   ; CHECK: pckod_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
-  %2 = load <2 x i64>* %b
+  %2 = load <2 x i64>, <2 x i64>* %b
   ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6)
   %3 = shufflevector <2 x i64> %1, <2 x i64> %2, <2 x i32> <i32 1, i32 3>
   ; pckod.d and ilvod.d are equivalent for v2i64
@@ -765,7 +765,7 @@
 define void @splati_v16i8_0(<16 x i8>* %c, <16 x i8>* %a) nounwind {
   ; CHECK: splati_v16i8_0:
 
-  %1 = load <16 x i8>* %a
+  %1 = load <16 x i8>, <16 x i8>* %a
   ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <16 x i8> %1, <16 x i8> undef,
                      <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
@@ -780,7 +780,7 @@
 define void @splati_v8i16_0(<8 x i16>* %c, <8 x i16>* %a) nounwind {
   ; CHECK: splati_v8i16_0:
 
-  %1 = load <8 x i16>* %a
+  %1 = load <8 x i16>, <8 x i16>* %a
   ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   ; CHECK-DAG: splati.h [[R3:\$w[0-9]+]], [[R1]][4]
@@ -794,7 +794,7 @@
 define void @splati_v4i32_0(<4 x i32>* %c, <4 x i32>* %a) nounwind {
   ; CHECK: splati_v4i32_0:
 
-  %1 = load <4 x i32>* %a
+  %1 = load <4 x i32>, <4 x i32>* %a
   ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   ; shf.w and splati.w are equivalent
@@ -809,7 +809,7 @@
 define void @splati_v2i64_0(<2 x i64>* %c, <2 x i64>* %a) nounwind {
   ; CHECK: splati_v2i64_0:
 
-  %1 = load <2 x i64>* %a
+  %1 = load <2 x i64>, <2 x i64>* %a
   ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5)
   %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
   ; CHECK-DAG: splati.d [[R3:\$w[0-9]+]], [[R1]][1]
diff --git a/llvm/test/CodeGen/Mips/msa/spill.ll b/llvm/test/CodeGen/Mips/msa/spill.ll
index 085a16e..8c9a799 100644
--- a/llvm/test/CodeGen/Mips/msa/spill.ll
+++ b/llvm/test/CodeGen/Mips/msa/spill.ll
@@ -39,40 +39,40 @@
   %p31 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 31
   %p32 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 32
   %p33 = getelementptr <16 x i8>, <16 x i8>* %p0, i32 33
-  %0  = load <16 x i8>* %p0, align 16
-  %1  = load <16 x i8>* %p1, align 16
-  %2  = load <16 x i8>* %p2, align 16
-  %3  = load <16 x i8>* %p3, align 16
-  %4  = load <16 x i8>* %p4, align 16
-  %5  = load <16 x i8>* %p5, align 16
-  %6  = load <16 x i8>* %p6, align 16
-  %7  = load <16 x i8>* %p7, align 16
-  %8  = load <16 x i8>* %p8, align 16
-  %9  = load <16 x i8>* %p9, align 16
-  %10 = load <16 x i8>* %p10, align 16
-  %11 = load <16 x i8>* %p11, align 16
-  %12 = load <16 x i8>* %p12, align 16
-  %13 = load <16 x i8>* %p13, align 16
-  %14 = load <16 x i8>* %p14, align 16
-  %15 = load <16 x i8>* %p15, align 16
-  %16 = load <16 x i8>* %p16, align 16
-  %17 = load <16 x i8>* %p17, align 16
-  %18 = load <16 x i8>* %p18, align 16
-  %19 = load <16 x i8>* %p19, align 16
-  %20 = load <16 x i8>* %p20, align 16
-  %21 = load <16 x i8>* %p21, align 16
-  %22 = load <16 x i8>* %p22, align 16
-  %23 = load <16 x i8>* %p23, align 16
-  %24 = load <16 x i8>* %p24, align 16
-  %25 = load <16 x i8>* %p25, align 16
-  %26 = load <16 x i8>* %p26, align 16
-  %27 = load <16 x i8>* %p27, align 16
-  %28 = load <16 x i8>* %p28, align 16
-  %29 = load <16 x i8>* %p29, align 16
-  %30 = load <16 x i8>* %p30, align 16
-  %31 = load <16 x i8>* %p31, align 16
-  %32 = load <16 x i8>* %p32, align 16
-  %33 = load <16 x i8>* %p33, align 16
+  %0  = load <16 x i8>, <16 x i8>* %p0, align 16
+  %1  = load <16 x i8>, <16 x i8>* %p1, align 16
+  %2  = load <16 x i8>, <16 x i8>* %p2, align 16
+  %3  = load <16 x i8>, <16 x i8>* %p3, align 16
+  %4  = load <16 x i8>, <16 x i8>* %p4, align 16
+  %5  = load <16 x i8>, <16 x i8>* %p5, align 16
+  %6  = load <16 x i8>, <16 x i8>* %p6, align 16
+  %7  = load <16 x i8>, <16 x i8>* %p7, align 16
+  %8  = load <16 x i8>, <16 x i8>* %p8, align 16
+  %9  = load <16 x i8>, <16 x i8>* %p9, align 16
+  %10 = load <16 x i8>, <16 x i8>* %p10, align 16
+  %11 = load <16 x i8>, <16 x i8>* %p11, align 16
+  %12 = load <16 x i8>, <16 x i8>* %p12, align 16
+  %13 = load <16 x i8>, <16 x i8>* %p13, align 16
+  %14 = load <16 x i8>, <16 x i8>* %p14, align 16
+  %15 = load <16 x i8>, <16 x i8>* %p15, align 16
+  %16 = load <16 x i8>, <16 x i8>* %p16, align 16
+  %17 = load <16 x i8>, <16 x i8>* %p17, align 16
+  %18 = load <16 x i8>, <16 x i8>* %p18, align 16
+  %19 = load <16 x i8>, <16 x i8>* %p19, align 16
+  %20 = load <16 x i8>, <16 x i8>* %p20, align 16
+  %21 = load <16 x i8>, <16 x i8>* %p21, align 16
+  %22 = load <16 x i8>, <16 x i8>* %p22, align 16
+  %23 = load <16 x i8>, <16 x i8>* %p23, align 16
+  %24 = load <16 x i8>, <16 x i8>* %p24, align 16
+  %25 = load <16 x i8>, <16 x i8>* %p25, align 16
+  %26 = load <16 x i8>, <16 x i8>* %p26, align 16
+  %27 = load <16 x i8>, <16 x i8>* %p27, align 16
+  %28 = load <16 x i8>, <16 x i8>* %p28, align 16
+  %29 = load <16 x i8>, <16 x i8>* %p29, align 16
+  %30 = load <16 x i8>, <16 x i8>* %p30, align 16
+  %31 = load <16 x i8>, <16 x i8>* %p31, align 16
+  %32 = load <16 x i8>, <16 x i8>* %p32, align 16
+  %33 = load <16 x i8>, <16 x i8>* %p33, align 16
   %r1  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %0,   <16 x i8> %1)
   %r2  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r1,  <16 x i8> %2)
   %r3  = call <16 x i8> @llvm.mips.addv.b(<16 x i8> %r2,  <16 x i8> %3)
@@ -188,40 +188,40 @@
   %p31 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 31
   %p32 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 32
   %p33 = getelementptr <8 x i16>, <8 x i16>* %p0, i32 33
-  %0  = load <8 x i16>* %p0, align 16
-  %1  = load <8 x i16>* %p1, align 16
-  %2  = load <8 x i16>* %p2, align 16
-  %3  = load <8 x i16>* %p3, align 16
-  %4  = load <8 x i16>* %p4, align 16
-  %5  = load <8 x i16>* %p5, align 16
-  %6  = load <8 x i16>* %p6, align 16
-  %7  = load <8 x i16>* %p7, align 16
-  %8  = load <8 x i16>* %p8, align 16
-  %9  = load <8 x i16>* %p9, align 16
-  %10 = load <8 x i16>* %p10, align 16
-  %11 = load <8 x i16>* %p11, align 16
-  %12 = load <8 x i16>* %p12, align 16
-  %13 = load <8 x i16>* %p13, align 16
-  %14 = load <8 x i16>* %p14, align 16
-  %15 = load <8 x i16>* %p15, align 16
-  %16 = load <8 x i16>* %p16, align 16
-  %17 = load <8 x i16>* %p17, align 16
-  %18 = load <8 x i16>* %p18, align 16
-  %19 = load <8 x i16>* %p19, align 16
-  %20 = load <8 x i16>* %p20, align 16
-  %21 = load <8 x i16>* %p21, align 16
-  %22 = load <8 x i16>* %p22, align 16
-  %23 = load <8 x i16>* %p23, align 16
-  %24 = load <8 x i16>* %p24, align 16
-  %25 = load <8 x i16>* %p25, align 16
-  %26 = load <8 x i16>* %p26, align 16
-  %27 = load <8 x i16>* %p27, align 16
-  %28 = load <8 x i16>* %p28, align 16
-  %29 = load <8 x i16>* %p29, align 16
-  %30 = load <8 x i16>* %p30, align 16
-  %31 = load <8 x i16>* %p31, align 16
-  %32 = load <8 x i16>* %p32, align 16
-  %33 = load <8 x i16>* %p33, align 16
+  %0  = load <8 x i16>, <8 x i16>* %p0, align 16
+  %1  = load <8 x i16>, <8 x i16>* %p1, align 16
+  %2  = load <8 x i16>, <8 x i16>* %p2, align 16
+  %3  = load <8 x i16>, <8 x i16>* %p3, align 16
+  %4  = load <8 x i16>, <8 x i16>* %p4, align 16
+  %5  = load <8 x i16>, <8 x i16>* %p5, align 16
+  %6  = load <8 x i16>, <8 x i16>* %p6, align 16
+  %7  = load <8 x i16>, <8 x i16>* %p7, align 16
+  %8  = load <8 x i16>, <8 x i16>* %p8, align 16
+  %9  = load <8 x i16>, <8 x i16>* %p9, align 16
+  %10 = load <8 x i16>, <8 x i16>* %p10, align 16
+  %11 = load <8 x i16>, <8 x i16>* %p11, align 16
+  %12 = load <8 x i16>, <8 x i16>* %p12, align 16
+  %13 = load <8 x i16>, <8 x i16>* %p13, align 16
+  %14 = load <8 x i16>, <8 x i16>* %p14, align 16
+  %15 = load <8 x i16>, <8 x i16>* %p15, align 16
+  %16 = load <8 x i16>, <8 x i16>* %p16, align 16
+  %17 = load <8 x i16>, <8 x i16>* %p17, align 16
+  %18 = load <8 x i16>, <8 x i16>* %p18, align 16
+  %19 = load <8 x i16>, <8 x i16>* %p19, align 16
+  %20 = load <8 x i16>, <8 x i16>* %p20, align 16
+  %21 = load <8 x i16>, <8 x i16>* %p21, align 16
+  %22 = load <8 x i16>, <8 x i16>* %p22, align 16
+  %23 = load <8 x i16>, <8 x i16>* %p23, align 16
+  %24 = load <8 x i16>, <8 x i16>* %p24, align 16
+  %25 = load <8 x i16>, <8 x i16>* %p25, align 16
+  %26 = load <8 x i16>, <8 x i16>* %p26, align 16
+  %27 = load <8 x i16>, <8 x i16>* %p27, align 16
+  %28 = load <8 x i16>, <8 x i16>* %p28, align 16
+  %29 = load <8 x i16>, <8 x i16>* %p29, align 16
+  %30 = load <8 x i16>, <8 x i16>* %p30, align 16
+  %31 = load <8 x i16>, <8 x i16>* %p31, align 16
+  %32 = load <8 x i16>, <8 x i16>* %p32, align 16
+  %33 = load <8 x i16>, <8 x i16>* %p33, align 16
   %r1  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %0,   <8 x i16> %1)
   %r2  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r1,  <8 x i16> %2)
   %r3  = call <8 x i16> @llvm.mips.addv.h(<8 x i16> %r2,  <8 x i16> %3)
@@ -337,40 +337,40 @@
   %p31 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 31
   %p32 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 32
   %p33 = getelementptr <4 x i32>, <4 x i32>* %p0, i32 33
-  %0  = load <4 x i32>* %p0, align 16
-  %1  = load <4 x i32>* %p1, align 16
-  %2  = load <4 x i32>* %p2, align 16
-  %3  = load <4 x i32>* %p3, align 16
-  %4  = load <4 x i32>* %p4, align 16
-  %5  = load <4 x i32>* %p5, align 16
-  %6  = load <4 x i32>* %p6, align 16
-  %7  = load <4 x i32>* %p7, align 16
-  %8  = load <4 x i32>* %p8, align 16
-  %9  = load <4 x i32>* %p9, align 16
-  %10 = load <4 x i32>* %p10, align 16
-  %11 = load <4 x i32>* %p11, align 16
-  %12 = load <4 x i32>* %p12, align 16
-  %13 = load <4 x i32>* %p13, align 16
-  %14 = load <4 x i32>* %p14, align 16
-  %15 = load <4 x i32>* %p15, align 16
-  %16 = load <4 x i32>* %p16, align 16
-  %17 = load <4 x i32>* %p17, align 16
-  %18 = load <4 x i32>* %p18, align 16
-  %19 = load <4 x i32>* %p19, align 16
-  %20 = load <4 x i32>* %p20, align 16
-  %21 = load <4 x i32>* %p21, align 16
-  %22 = load <4 x i32>* %p22, align 16
-  %23 = load <4 x i32>* %p23, align 16
-  %24 = load <4 x i32>* %p24, align 16
-  %25 = load <4 x i32>* %p25, align 16
-  %26 = load <4 x i32>* %p26, align 16
-  %27 = load <4 x i32>* %p27, align 16
-  %28 = load <4 x i32>* %p28, align 16
-  %29 = load <4 x i32>* %p29, align 16
-  %30 = load <4 x i32>* %p30, align 16
-  %31 = load <4 x i32>* %p31, align 16
-  %32 = load <4 x i32>* %p32, align 16
-  %33 = load <4 x i32>* %p33, align 16
+  %0  = load <4 x i32>, <4 x i32>* %p0, align 16
+  %1  = load <4 x i32>, <4 x i32>* %p1, align 16
+  %2  = load <4 x i32>, <4 x i32>* %p2, align 16
+  %3  = load <4 x i32>, <4 x i32>* %p3, align 16
+  %4  = load <4 x i32>, <4 x i32>* %p4, align 16
+  %5  = load <4 x i32>, <4 x i32>* %p5, align 16
+  %6  = load <4 x i32>, <4 x i32>* %p6, align 16
+  %7  = load <4 x i32>, <4 x i32>* %p7, align 16
+  %8  = load <4 x i32>, <4 x i32>* %p8, align 16
+  %9  = load <4 x i32>, <4 x i32>* %p9, align 16
+  %10 = load <4 x i32>, <4 x i32>* %p10, align 16
+  %11 = load <4 x i32>, <4 x i32>* %p11, align 16
+  %12 = load <4 x i32>, <4 x i32>* %p12, align 16
+  %13 = load <4 x i32>, <4 x i32>* %p13, align 16
+  %14 = load <4 x i32>, <4 x i32>* %p14, align 16
+  %15 = load <4 x i32>, <4 x i32>* %p15, align 16
+  %16 = load <4 x i32>, <4 x i32>* %p16, align 16
+  %17 = load <4 x i32>, <4 x i32>* %p17, align 16
+  %18 = load <4 x i32>, <4 x i32>* %p18, align 16
+  %19 = load <4 x i32>, <4 x i32>* %p19, align 16
+  %20 = load <4 x i32>, <4 x i32>* %p20, align 16
+  %21 = load <4 x i32>, <4 x i32>* %p21, align 16
+  %22 = load <4 x i32>, <4 x i32>* %p22, align 16
+  %23 = load <4 x i32>, <4 x i32>* %p23, align 16
+  %24 = load <4 x i32>, <4 x i32>* %p24, align 16
+  %25 = load <4 x i32>, <4 x i32>* %p25, align 16
+  %26 = load <4 x i32>, <4 x i32>* %p26, align 16
+  %27 = load <4 x i32>, <4 x i32>* %p27, align 16
+  %28 = load <4 x i32>, <4 x i32>* %p28, align 16
+  %29 = load <4 x i32>, <4 x i32>* %p29, align 16
+  %30 = load <4 x i32>, <4 x i32>* %p30, align 16
+  %31 = load <4 x i32>, <4 x i32>* %p31, align 16
+  %32 = load <4 x i32>, <4 x i32>* %p32, align 16
+  %33 = load <4 x i32>, <4 x i32>* %p33, align 16
   %r1 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %0, <4 x i32> %1)
   %r2 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r1, <4 x i32> %2)
   %r3 = call <4 x i32> @llvm.mips.addv.w(<4 x i32> %r2, <4 x i32> %3)
@@ -486,40 +486,40 @@
   %p31 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 31
   %p32 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 32
   %p33 = getelementptr <2 x i64>, <2 x i64>* %p0, i32 33
-  %0  = load <2 x i64>* %p0, align 16
-  %1  = load <2 x i64>* %p1, align 16
-  %2  = load <2 x i64>* %p2, align 16
-  %3  = load <2 x i64>* %p3, align 16
-  %4  = load <2 x i64>* %p4, align 16
-  %5  = load <2 x i64>* %p5, align 16
-  %6  = load <2 x i64>* %p6, align 16
-  %7  = load <2 x i64>* %p7, align 16
-  %8  = load <2 x i64>* %p8, align 16
-  %9  = load <2 x i64>* %p9, align 16
-  %10 = load <2 x i64>* %p10, align 16
-  %11 = load <2 x i64>* %p11, align 16
-  %12 = load <2 x i64>* %p12, align 16
-  %13 = load <2 x i64>* %p13, align 16
-  %14 = load <2 x i64>* %p14, align 16
-  %15 = load <2 x i64>* %p15, align 16
-  %16 = load <2 x i64>* %p16, align 16
-  %17 = load <2 x i64>* %p17, align 16
-  %18 = load <2 x i64>* %p18, align 16
-  %19 = load <2 x i64>* %p19, align 16
-  %20 = load <2 x i64>* %p20, align 16
-  %21 = load <2 x i64>* %p21, align 16
-  %22 = load <2 x i64>* %p22, align 16
-  %23 = load <2 x i64>* %p23, align 16
-  %24 = load <2 x i64>* %p24, align 16
-  %25 = load <2 x i64>* %p25, align 16
-  %26 = load <2 x i64>* %p26, align 16
-  %27 = load <2 x i64>* %p27, align 16
-  %28 = load <2 x i64>* %p28, align 16
-  %29 = load <2 x i64>* %p29, align 16
-  %30 = load <2 x i64>* %p30, align 16
-  %31 = load <2 x i64>* %p31, align 16
-  %32 = load <2 x i64>* %p32, align 16
-  %33 = load <2 x i64>* %p33, align 16
+  %0  = load <2 x i64>, <2 x i64>* %p0, align 16
+  %1  = load <2 x i64>, <2 x i64>* %p1, align 16
+  %2  = load <2 x i64>, <2 x i64>* %p2, align 16
+  %3  = load <2 x i64>, <2 x i64>* %p3, align 16
+  %4  = load <2 x i64>, <2 x i64>* %p4, align 16
+  %5  = load <2 x i64>, <2 x i64>* %p5, align 16
+  %6  = load <2 x i64>, <2 x i64>* %p6, align 16
+  %7  = load <2 x i64>, <2 x i64>* %p7, align 16
+  %8  = load <2 x i64>, <2 x i64>* %p8, align 16
+  %9  = load <2 x i64>, <2 x i64>* %p9, align 16
+  %10 = load <2 x i64>, <2 x i64>* %p10, align 16
+  %11 = load <2 x i64>, <2 x i64>* %p11, align 16
+  %12 = load <2 x i64>, <2 x i64>* %p12, align 16
+  %13 = load <2 x i64>, <2 x i64>* %p13, align 16
+  %14 = load <2 x i64>, <2 x i64>* %p14, align 16
+  %15 = load <2 x i64>, <2 x i64>* %p15, align 16
+  %16 = load <2 x i64>, <2 x i64>* %p16, align 16
+  %17 = load <2 x i64>, <2 x i64>* %p17, align 16
+  %18 = load <2 x i64>, <2 x i64>* %p18, align 16
+  %19 = load <2 x i64>, <2 x i64>* %p19, align 16
+  %20 = load <2 x i64>, <2 x i64>* %p20, align 16
+  %21 = load <2 x i64>, <2 x i64>* %p21, align 16
+  %22 = load <2 x i64>, <2 x i64>* %p22, align 16
+  %23 = load <2 x i64>, <2 x i64>* %p23, align 16
+  %24 = load <2 x i64>, <2 x i64>* %p24, align 16
+  %25 = load <2 x i64>, <2 x i64>* %p25, align 16
+  %26 = load <2 x i64>, <2 x i64>* %p26, align 16
+  %27 = load <2 x i64>, <2 x i64>* %p27, align 16
+  %28 = load <2 x i64>, <2 x i64>* %p28, align 16
+  %29 = load <2 x i64>, <2 x i64>* %p29, align 16
+  %30 = load <2 x i64>, <2 x i64>* %p30, align 16
+  %31 = load <2 x i64>, <2 x i64>* %p31, align 16
+  %32 = load <2 x i64>, <2 x i64>* %p32, align 16
+  %33 = load <2 x i64>, <2 x i64>* %p33, align 16
   %r1  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %0,   <2 x i64> %1)
   %r2  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r1,  <2 x i64> %2)
   %r3  = call <2 x i64> @llvm.mips.addv.d(<2 x i64> %r2,  <2 x i64> %3)
diff --git a/llvm/test/CodeGen/Mips/msa/vec.ll b/llvm/test/CodeGen/Mips/msa/vec.ll
index d5b97f5..8790923 100644
--- a/llvm/test/CodeGen/Mips/msa/vec.ll
+++ b/llvm/test/CodeGen/Mips/msa/vec.ll
@@ -9,8 +9,8 @@
 
 define void @llvm_mips_and_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG2
   %2 = bitcast <16 x i8> %0 to <16 x i8>
   %3 = bitcast <16 x i8> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -32,8 +32,8 @@
 
 define void @llvm_mips_and_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG2
   %2 = bitcast <8 x i16> %0 to <16 x i8>
   %3 = bitcast <8 x i16> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -55,8 +55,8 @@
 
 define void @llvm_mips_and_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG2
   %2 = bitcast <4 x i32> %0 to <16 x i8>
   %3 = bitcast <4 x i32> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -78,8 +78,8 @@
 
 define void @llvm_mips_and_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG2
   %2 = bitcast <2 x i64> %0 to <16 x i8>
   %3 = bitcast <2 x i64> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.and.v(<16 x i8> %2, <16 x i8> %3)
@@ -97,8 +97,8 @@
 ;
 define void @and_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_and_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_and_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_and_v_b_ARG2
   %2 = and <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_and_v_b_RES
   ret void
@@ -113,8 +113,8 @@
 ;
 define void @and_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_and_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_and_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_and_v_h_ARG2
   %2 = and <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_and_v_h_RES
   ret void
@@ -130,8 +130,8 @@
 
 define void @and_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_and_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_and_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_and_v_w_ARG2
   %2 = and <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_and_v_w_RES
   ret void
@@ -147,8 +147,8 @@
 
 define void @and_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_and_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_and_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_and_v_d_ARG2
   %2 = and <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_and_v_d_RES
   ret void
@@ -168,9 +168,9 @@
 
 define void @llvm_mips_bmnz_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
   %3 = bitcast <16 x i8> %0 to <16 x i8>
   %4 = bitcast <16 x i8> %1 to <16 x i8>
   %5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -198,9 +198,9 @@
 
 define void @llvm_mips_bmnz_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
   %3 = bitcast <8 x i16> %0 to <16 x i8>
   %4 = bitcast <8 x i16> %1 to <16 x i8>
   %5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -228,9 +228,9 @@
 
 define void @llvm_mips_bmnz_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
   %3 = bitcast <4 x i32> %0 to <16 x i8>
   %4 = bitcast <4 x i32> %1 to <16 x i8>
   %5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -258,9 +258,9 @@
 
 define void @llvm_mips_bmnz_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
   %3 = bitcast <2 x i64> %0 to <16 x i8>
   %4 = bitcast <2 x i64> %1 to <16 x i8>
   %5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -288,9 +288,9 @@
 
 define void @llvm_mips_bmz_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bmz_v_b_ARG3
   %3 = bitcast <16 x i8> %0 to <16 x i8>
   %4 = bitcast <16 x i8> %1 to <16 x i8>
   %5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -319,9 +319,9 @@
 
 define void @llvm_mips_bmz_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bmz_v_h_ARG3
   %3 = bitcast <8 x i16> %0 to <16 x i8>
   %4 = bitcast <8 x i16> %1 to <16 x i8>
   %5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -350,9 +350,9 @@
 
 define void @llvm_mips_bmz_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bmz_v_w_ARG3
   %3 = bitcast <4 x i32> %0 to <16 x i8>
   %4 = bitcast <4 x i32> %1 to <16 x i8>
   %5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -381,9 +381,9 @@
 
 define void @llvm_mips_bmz_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bmz_v_d_ARG3
   %3 = bitcast <2 x i64> %0 to <16 x i8>
   %4 = bitcast <2 x i64> %1 to <16 x i8>
   %5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -412,9 +412,9 @@
 
 define void @llvm_mips_bsel_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2
-  %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG2
+  %2 = load <16 x i8>, <16 x i8>* @llvm_mips_bsel_v_b_ARG3
   %3 = bitcast <16 x i8> %0 to <16 x i8>
   %4 = bitcast <16 x i8> %1 to <16 x i8>
   %5 = bitcast <16 x i8> %2 to <16 x i8>
@@ -443,9 +443,9 @@
 
 define void @llvm_mips_bsel_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2
-  %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG2
+  %2 = load <8 x i16>, <8 x i16>* @llvm_mips_bsel_v_h_ARG3
   %3 = bitcast <8 x i16> %0 to <16 x i8>
   %4 = bitcast <8 x i16> %1 to <16 x i8>
   %5 = bitcast <8 x i16> %2 to <16 x i8>
@@ -474,9 +474,9 @@
 
 define void @llvm_mips_bsel_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2
-  %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG2
+  %2 = load <4 x i32>, <4 x i32>* @llvm_mips_bsel_v_w_ARG3
   %3 = bitcast <4 x i32> %0 to <16 x i8>
   %4 = bitcast <4 x i32> %1 to <16 x i8>
   %5 = bitcast <4 x i32> %2 to <16 x i8>
@@ -505,9 +505,9 @@
 
 define void @llvm_mips_bsel_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2
-  %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG2
+  %2 = load <2 x i64>, <2 x i64>* @llvm_mips_bsel_v_d_ARG3
   %3 = bitcast <2 x i64> %0 to <16 x i8>
   %4 = bitcast <2 x i64> %1 to <16 x i8>
   %5 = bitcast <2 x i64> %2 to <16 x i8>
@@ -535,8 +535,8 @@
 
 define void @llvm_mips_nor_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_nor_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_nor_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_nor_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_nor_v_b_ARG2
   %2 = bitcast <16 x i8> %0 to <16 x i8>
   %3 = bitcast <16 x i8> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -558,8 +558,8 @@
 
 define void @llvm_mips_nor_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_nor_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_nor_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_nor_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_nor_v_h_ARG2
   %2 = bitcast <8 x i16> %0 to <16 x i8>
   %3 = bitcast <8 x i16> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -581,8 +581,8 @@
 
 define void @llvm_mips_nor_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_nor_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_nor_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_nor_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_nor_v_w_ARG2
   %2 = bitcast <4 x i32> %0 to <16 x i8>
   %3 = bitcast <4 x i32> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -604,8 +604,8 @@
 
 define void @llvm_mips_nor_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_nor_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_nor_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_nor_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_nor_v_d_ARG2
   %2 = bitcast <2 x i64> %0 to <16 x i8>
   %3 = bitcast <2 x i64> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.nor.v(<16 x i8> %2, <16 x i8> %3)
@@ -627,8 +627,8 @@
 
 define void @llvm_mips_or_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG2
   %2 = bitcast <16 x i8> %0 to <16 x i8>
   %3 = bitcast <16 x i8> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -650,8 +650,8 @@
 
 define void @llvm_mips_or_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG2
   %2 = bitcast <8 x i16> %0 to <16 x i8>
   %3 = bitcast <8 x i16> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -673,8 +673,8 @@
 
 define void @llvm_mips_or_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG2
   %2 = bitcast <4 x i32> %0 to <16 x i8>
   %3 = bitcast <4 x i32> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -696,8 +696,8 @@
 
 define void @llvm_mips_or_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG2
   %2 = bitcast <2 x i64> %0 to <16 x i8>
   %3 = bitcast <2 x i64> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.or.v(<16 x i8> %2, <16 x i8> %3)
@@ -715,8 +715,8 @@
 ;
 define void @or_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_or_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_or_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_or_v_b_ARG2
   %2 = or <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_or_v_b_RES
   ret void
@@ -731,8 +731,8 @@
 ;
 define void @or_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_or_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_or_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_or_v_h_ARG2
   %2 = or <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_or_v_h_RES
   ret void
@@ -748,8 +748,8 @@
 
 define void @or_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_or_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_or_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_or_v_w_ARG2
   %2 = or <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_or_v_w_RES
   ret void
@@ -765,8 +765,8 @@
 
 define void @or_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_or_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_or_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_or_v_d_ARG2
   %2 = or <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_or_v_d_RES
   ret void
@@ -785,8 +785,8 @@
 
 define void @llvm_mips_xor_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG2
   %2 = bitcast <16 x i8> %0 to <16 x i8>
   %3 = bitcast <16 x i8> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -808,8 +808,8 @@
 
 define void @llvm_mips_xor_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG2
   %2 = bitcast <8 x i16> %0 to <16 x i8>
   %3 = bitcast <8 x i16> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -831,8 +831,8 @@
 
 define void @llvm_mips_xor_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG2
   %2 = bitcast <4 x i32> %0 to <16 x i8>
   %3 = bitcast <4 x i32> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -854,8 +854,8 @@
 
 define void @llvm_mips_xor_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG2
   %2 = bitcast <2 x i64> %0 to <16 x i8>
   %3 = bitcast <2 x i64> %1 to <16 x i8>
   %4 = tail call <16 x i8> @llvm.mips.xor.v(<16 x i8> %2, <16 x i8> %3)
@@ -873,8 +873,8 @@
 ;
 define void @xor_v_b_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_xor_v_b_ARG1
-  %1 = load <16 x i8>* @llvm_mips_xor_v_b_ARG2
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG1
+  %1 = load <16 x i8>, <16 x i8>* @llvm_mips_xor_v_b_ARG2
   %2 = xor <16 x i8> %0, %1
   store <16 x i8> %2, <16 x i8>* @llvm_mips_xor_v_b_RES
   ret void
@@ -889,8 +889,8 @@
 ;
 define void @xor_v_h_test() nounwind {
 entry:
-  %0 = load <8 x i16>* @llvm_mips_xor_v_h_ARG1
-  %1 = load <8 x i16>* @llvm_mips_xor_v_h_ARG2
+  %0 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG1
+  %1 = load <8 x i16>, <8 x i16>* @llvm_mips_xor_v_h_ARG2
   %2 = xor <8 x i16> %0, %1
   store <8 x i16> %2, <8 x i16>* @llvm_mips_xor_v_h_RES
   ret void
@@ -906,8 +906,8 @@
 
 define void @xor_v_w_test() nounwind {
 entry:
-  %0 = load <4 x i32>* @llvm_mips_xor_v_w_ARG1
-  %1 = load <4 x i32>* @llvm_mips_xor_v_w_ARG2
+  %0 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG1
+  %1 = load <4 x i32>, <4 x i32>* @llvm_mips_xor_v_w_ARG2
   %2 = xor <4 x i32> %0, %1
   store <4 x i32> %2, <4 x i32>* @llvm_mips_xor_v_w_RES
   ret void
@@ -923,8 +923,8 @@
 
 define void @xor_v_d_test() nounwind {
 entry:
-  %0 = load <2 x i64>* @llvm_mips_xor_v_d_ARG1
-  %1 = load <2 x i64>* @llvm_mips_xor_v_d_ARG2
+  %0 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG1
+  %1 = load <2 x i64>, <2 x i64>* @llvm_mips_xor_v_d_ARG2
   %2 = xor <2 x i64> %0, %1
   store <2 x i64> %2, <2 x i64>* @llvm_mips_xor_v_d_RES
   ret void
diff --git a/llvm/test/CodeGen/Mips/msa/vecs10.ll b/llvm/test/CodeGen/Mips/msa/vecs10.ll
index e22e075..f442f77 100644
--- a/llvm/test/CodeGen/Mips/msa/vecs10.ll
+++ b/llvm/test/CodeGen/Mips/msa/vecs10.ll
@@ -7,7 +7,7 @@
 
 define i32 @llvm_mips_bnz_v_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bnz_v_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bnz_v_ARG1
   %1 = tail call i32 @llvm.mips.bnz.v(<16 x i8> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
@@ -28,7 +28,7 @@
 
 define i32 @llvm_mips_bz_v_test() nounwind {
 entry:
-  %0 = load <16 x i8>* @llvm_mips_bz_v_ARG1
+  %0 = load <16 x i8>, <16 x i8>* @llvm_mips_bz_v_ARG1
   %1 = tail call i32 @llvm.mips.bz.v(<16 x i8> %0)
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %true, label %false
diff --git a/llvm/test/CodeGen/Mips/mul.ll b/llvm/test/CodeGen/Mips/mul.ll
index 4ce801b..3231f9c 100644
--- a/llvm/test/CodeGen/Mips/mul.ll
+++ b/llvm/test/CodeGen/Mips/mul.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %mul = mul nsw i32 %1, %0
 ; 16:	mult	${{[0-9]+}}, ${{[0-9]+}}
 ; 16: 	mflo	${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/mulll.ll b/llvm/test/CodeGen/Mips/mulll.ll
index e37b919..6e5ba64 100644
--- a/llvm/test/CodeGen/Mips/mulll.ll
+++ b/llvm/test/CodeGen/Mips/mulll.ll
@@ -6,8 +6,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i64* @iiii, align 8
-  %1 = load i64* @jjjj, align 8
+  %0 = load i64, i64* @iiii, align 8
+  %1 = load i64, i64* @jjjj, align 8
   %mul = mul nsw i64 %1, %0
   store i64 %mul, i64* @kkkk, align 8
 ; 16:	multu	${{[0-9]+}}, ${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/mulull.ll b/llvm/test/CodeGen/Mips/mulull.ll
index 4d23c69..c133448 100644
--- a/llvm/test/CodeGen/Mips/mulull.ll
+++ b/llvm/test/CodeGen/Mips/mulull.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i64* @iiii, align 8
-  %1 = load i64* @jjjj, align 8
+  %0 = load i64, i64* @iiii, align 8
+  %1 = load i64, i64* @jjjj, align 8
   %mul = mul nsw i64 %1, %0
   store i64 %mul, i64* @kkkk, align 8
 ; 16:	multu	${{[0-9]+}}, ${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/nacl-align.ll b/llvm/test/CodeGen/Mips/nacl-align.ll
index 892a7ed..ec8f3f0 100644
--- a/llvm/test/CodeGen/Mips/nacl-align.ll
+++ b/llvm/test/CodeGen/Mips/nacl-align.ll
@@ -68,7 +68,7 @@
 define i32 @test2(i32 %i) {
 entry:
   %elementptr = getelementptr inbounds [2 x i8*], [2 x i8*]* @bb_array, i32 0, i32 %i
-  %0 = load i8** %elementptr, align 4
+  %0 = load i8*, i8** %elementptr, align 4
   indirectbr i8* %0, [label %bb1, label %bb2]
 
 bb1:
diff --git a/llvm/test/CodeGen/Mips/nacl-branch-delay.ll b/llvm/test/CodeGen/Mips/nacl-branch-delay.ll
index d251eee..2927f39 100644
--- a/llvm/test/CodeGen/Mips/nacl-branch-delay.ll
+++ b/llvm/test/CodeGen/Mips/nacl-branch-delay.ll
@@ -10,7 +10,7 @@
 
 
 define void @test1() {
-  %1 = load i32* @x, align 4
+  %1 = load i32, i32* @x, align 4
   call void @f1(i32 %1)
   ret void
 
diff --git a/llvm/test/CodeGen/Mips/nacl-reserved-regs.ll b/llvm/test/CodeGen/Mips/nacl-reserved-regs.ll
index ae21283..efe2a663 100644
--- a/llvm/test/CodeGen/Mips/nacl-reserved-regs.ll
+++ b/llvm/test/CodeGen/Mips/nacl-reserved-regs.ll
@@ -5,22 +5,22 @@
 @var = external global i32
 
 define void @f() {
-  %val1 = load volatile i32* @var
-  %val2 = load volatile i32* @var
-  %val3 = load volatile i32* @var
-  %val4 = load volatile i32* @var
-  %val5 = load volatile i32* @var
-  %val6 = load volatile i32* @var
-  %val7 = load volatile i32* @var
-  %val8 = load volatile i32* @var
-  %val9 = load volatile i32* @var
-  %val10 = load volatile i32* @var
-  %val11 = load volatile i32* @var
-  %val12 = load volatile i32* @var
-  %val13 = load volatile i32* @var
-  %val14 = load volatile i32* @var
-  %val15 = load volatile i32* @var
-  %val16 = load volatile i32* @var
+  %val1 = load volatile i32, i32* @var
+  %val2 = load volatile i32, i32* @var
+  %val3 = load volatile i32, i32* @var
+  %val4 = load volatile i32, i32* @var
+  %val5 = load volatile i32, i32* @var
+  %val6 = load volatile i32, i32* @var
+  %val7 = load volatile i32, i32* @var
+  %val8 = load volatile i32, i32* @var
+  %val9 = load volatile i32, i32* @var
+  %val10 = load volatile i32, i32* @var
+  %val11 = load volatile i32, i32* @var
+  %val12 = load volatile i32, i32* @var
+  %val13 = load volatile i32, i32* @var
+  %val14 = load volatile i32, i32* @var
+  %val15 = load volatile i32, i32* @var
+  %val16 = load volatile i32, i32* @var
   store volatile i32 %val1, i32* @var
   store volatile i32 %val2, i32* @var
   store volatile i32 %val3, i32* @var
diff --git a/llvm/test/CodeGen/Mips/neg1.ll b/llvm/test/CodeGen/Mips/neg1.ll
index 281e626..c24d78b 100644
--- a/llvm/test/CodeGen/Mips/neg1.ll
+++ b/llvm/test/CodeGen/Mips/neg1.ll
@@ -5,7 +5,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %sub = sub nsw i32 0, %0
 ; 16:	neg	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %sub)
diff --git a/llvm/test/CodeGen/Mips/no-odd-spreg-msa.ll b/llvm/test/CodeGen/Mips/no-odd-spreg-msa.ll
index 30dd1ff..cf79557 100644
--- a/llvm/test/CodeGen/Mips/no-odd-spreg-msa.ll
+++ b/llvm/test/CodeGen/Mips/no-odd-spreg-msa.ll
@@ -8,7 +8,7 @@
   ; Force the float into an odd-numbered register using named registers and
   ; load the vector.
   %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
-  %0 = load volatile <4 x float>* @v4f32
+  %0 = load volatile <4 x float>, <4 x float>* @v4f32
 
   ; Clobber all except $f12/$w12 and $f13
   ;
@@ -42,7 +42,7 @@
   ; Force the float into an odd-numbered register using named registers and
   ; load the vector.
   %b = call float asm sideeffect "mov.s $0, $1", "={$f13},{$f12}" (float %a)
-  %0 = load volatile <4 x float>* @v4f32
+  %0 = load volatile <4 x float>, <4 x float>* @v4f32
 
   ; Clobber all except $f12/$w12 and $f13
   ;
@@ -73,7 +73,7 @@
 
 define float @msa_extract_0() {
 entry:
-  %0 = load volatile <4 x float>* @v4f32
+  %0 = load volatile <4 x float>, <4 x float>* @v4f32
   %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
 
   ; Clobber all except $f12, and $f13
@@ -101,7 +101,7 @@
 
 define float @msa_extract_1() {
 entry:
-  %0 = load volatile <4 x float>* @v4f32
+  %0 = load volatile <4 x float>, <4 x float>* @v4f32
   %1 = call <4 x float> asm sideeffect "move.v $0, $1", "={$w13},{$w12}" (<4 x float> %0)
 
   ; Clobber all except $f13
diff --git a/llvm/test/CodeGen/Mips/nomips16.ll b/llvm/test/CodeGen/Mips/nomips16.ll
index 5f7d74e..418d8ea 100644
--- a/llvm/test/CodeGen/Mips/nomips16.ll
+++ b/llvm/test/CodeGen/Mips/nomips16.ll
@@ -6,7 +6,7 @@
 ; Function Attrs: nounwind
 define void @foo() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %conv = fpext float %0 to double
   %add = fadd double %conv, 1.500000e+00
   %conv1 = fptrunc double %add to float
@@ -20,7 +20,7 @@
 ; Function Attrs: nounwind
 define void @nofoo() #1 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %conv = fpext float %0 to double
   %add = fadd double %conv, 3.900000e+00
   %conv1 = fptrunc double %add to float
diff --git a/llvm/test/CodeGen/Mips/not1.ll b/llvm/test/CodeGen/Mips/not1.ll
index 2163b23..52d29f0 100644
--- a/llvm/test/CodeGen/Mips/not1.ll
+++ b/llvm/test/CodeGen/Mips/not1.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %neg = xor i32 %0, -1
 ; 16:	not	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %neg)
diff --git a/llvm/test/CodeGen/Mips/o32_cc_byval.ll b/llvm/test/CodeGen/Mips/o32_cc_byval.ll
index dde5caa..108c663 100644
--- a/llvm/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/llvm/test/CodeGen/Mips/o32_cc_byval.ll
@@ -62,17 +62,17 @@
 ; CHECK: mfc1 $6, $f[[F0]]
 
   %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5
-  %tmp = load i32* %i2, align 4
+  %tmp = load i32, i32* %i2, align 4
   %d = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 4
-  %tmp1 = load double* %d, align 8
+  %tmp1 = load double, double* %d, align 8
   %ll = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 3
-  %tmp2 = load i64* %ll, align 8
+  %tmp2 = load i64, i64* %ll, align 8
   %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2
-  %tmp3 = load i32* %i, align 4
+  %tmp3 = load i32, i32* %i, align 4
   %s = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 1
-  %tmp4 = load i16* %s, align 2
+  %tmp4 = load i16, i16* %s, align 2
   %c = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 0
-  %tmp5 = load i8* %c, align 1
+  %tmp5 = load i8, i8* %c, align 1
   tail call void @callee4(i32 %tmp, double %tmp1, i64 %tmp2, i32 %tmp3, i16 signext %tmp4, i8 signext %tmp5, float %f) nounwind
   ret void
 }
@@ -91,9 +91,9 @@
 ; CHECK: sw  $[[R0]], 24($sp)
 
   %arrayidx = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 0
-  %tmp = load i32* %arrayidx, align 4
+  %tmp = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds %struct.S2, %struct.S2* %s2, i32 0, i32 0, i32 3
-  %tmp3 = load i32* %arrayidx2, align 4
+  %tmp3 = load i32, i32* %arrayidx2, align 4
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp3, i16 signext 4, i8 signext 5, float 6.000000e+00) nounwind
   ret void
 }
@@ -111,11 +111,11 @@
 ; CHECK: sw  $[[R1]], 24($sp)
 
   %i = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 2
-  %tmp = load i32* %i, align 4
+  %tmp = load i32, i32* %i, align 4
   %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i32 0, i32 5
-  %tmp1 = load i32* %i2, align 4
+  %tmp1 = load i32, i32* %i2, align 4
   %c = getelementptr inbounds %struct.S3, %struct.S3* %s3, i32 0, i32 0
-  %tmp2 = load i8* %c, align 1
+  %tmp2 = load i8, i8* %c, align 1
   tail call void @callee4(i32 %tmp, double 2.000000e+00, i64 3, i32 %tmp1, i16 signext 4, i8 signext %tmp2, float 6.000000e+00) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
index 10972e88..b4597a3 100644
--- a/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
+++ b/llvm/test/CodeGen/Mips/o32_cc_vararg.ll
@@ -24,7 +24,7 @@
   store i32 %0, i32* %b, align 4
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load i32* %b, align 4
+  %tmp = load i32, i32* %b, align 4
   ret i32 %tmp
 
 ; CHECK-LABEL: va1:
@@ -50,7 +50,7 @@
   store double %0, double* %b, align 8
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load double* %b, align 8
+  %tmp = load double, double* %b, align 8
   ret double %tmp
 
 ; CHECK-LABEL: va2:
@@ -78,7 +78,7 @@
   store i32 %0, i32* %b, align 4
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load i32* %b, align 4
+  %tmp = load i32, i32* %b, align 4
   ret i32 %tmp
 
 ; CHECK-LABEL: va3:
@@ -101,7 +101,7 @@
   store double %0, double* %b, align 8
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load double* %b, align 8
+  %tmp = load double, double* %b, align 8
   ret double %tmp
 
 ; CHECK-LABEL: va4:
@@ -129,7 +129,7 @@
   store i32 %0, i32* %d, align 4
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load i32* %d, align 4
+  %tmp = load i32, i32* %d, align 4
   ret i32 %tmp
 
 ; CHECK-LABEL: va5:
@@ -155,7 +155,7 @@
   store double %0, double* %d, align 8
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load double* %d, align 8
+  %tmp = load double, double* %d, align 8
   ret double %tmp
 
 ; CHECK-LABEL: va6:
@@ -183,7 +183,7 @@
   store i32 %0, i32* %c, align 4
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load i32* %c, align 4
+  %tmp = load i32, i32* %c, align 4
   ret i32 %tmp
 
 ; CHECK-LABEL: va7:
@@ -206,7 +206,7 @@
   store double %0, double* %c, align 8
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load double* %c, align 8
+  %tmp = load double, double* %c, align 8
   ret double %tmp
 
 ; CHECK-LABEL: va8:
@@ -232,7 +232,7 @@
   store i32 %0, i32* %d, align 4
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load i32* %d, align 4
+  %tmp = load i32, i32* %d, align 4
   ret i32 %tmp
 
 ; CHECK-LABEL: va9:
@@ -257,7 +257,7 @@
   store double %0, double* %d, align 8
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_end(i8* %ap2)
-  %tmp = load double* %d, align 8
+  %tmp = load double, double* %d, align 8
   ret double %tmp
 
 ; CHECK-LABEL: va10:
diff --git a/llvm/test/CodeGen/Mips/optimize-pic-o0.ll b/llvm/test/CodeGen/Mips/optimize-pic-o0.ll
index 554d49e..454bc85 100644
--- a/llvm/test/CodeGen/Mips/optimize-pic-o0.ll
+++ b/llvm/test/CodeGen/Mips/optimize-pic-o0.ll
@@ -10,7 +10,7 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10
   br i1 %cmp, label %for.body, label %for.end
 
@@ -20,13 +20,13 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %2 = load i32* %retval
+  %2 = load i32, i32* %retval
   ret i32 %2
 }
 
diff --git a/llvm/test/CodeGen/Mips/or1.ll b/llvm/test/CodeGen/Mips/or1.ll
index b1c3696..719356c 100644
--- a/llvm/test/CodeGen/Mips/or1.ll
+++ b/llvm/test/CodeGen/Mips/or1.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
-  %1 = load i32* @y, align 4
+  %0 = load i32, i32* @x, align 4
+  %1 = load i32, i32* @y, align 4
   %or = or i32 %0, %1
 ; 16:	or	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %or)
diff --git a/llvm/test/CodeGen/Mips/prevent-hoisting.ll b/llvm/test/CodeGen/Mips/prevent-hoisting.ll
index 3d90243..8a84ff0 100644
--- a/llvm/test/CodeGen/Mips/prevent-hoisting.ll
+++ b/llvm/test/CodeGen/Mips/prevent-hoisting.ll
@@ -46,7 +46,7 @@
 
 define void @readLumaCoeff8x8_CABAC(%struct.img_par* %img, i32 %b8) {
 
-  %1 = load i32* undef, align 4
+  %1 = load i32, i32* undef, align 4
   br i1 false, label %2, label %3
 
 ; <label>:2                                       ; preds = %0
@@ -93,9 +93,9 @@
 
 ; <label>:15                                      ; preds = %14, %13
   %16 = getelementptr inbounds [0 x [20 x i32]], [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
-  %17 = load i32* %16, align 4
+  %17 = load i32, i32* %16, align 4
   %18 = getelementptr inbounds %struct.datapartition, %struct.datapartition* null, i32 %17, i32 2
-  %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
+  %19 = load i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)*, i32 (%struct.syntaxelement*, %struct.img_par*, %struct.datapartition*)** %18, align 4
   %20 = call i32 %19(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* undef)
   br i1 false, label %.loopexit, label %21
 
@@ -123,17 +123,17 @@
 
 ; <label>:31                                      ; preds = %30, %29
   %32 = getelementptr inbounds [0 x [20 x i32]], [0 x [20 x i32]]* @assignSE2partition, i32 0, i32 %1, i32 undef
-  %33 = load i32* %32, align 4
+  %33 = load i32, i32* %32, align 4
   %34 = getelementptr inbounds %struct.datapartition, %struct.datapartition* null, i32 %33
   %35 = call i32 undef(%struct.syntaxelement* undef, %struct.img_par* %img, %struct.datapartition* %34)
   br i1 false, label %.loopexit, label %36
 
 ; <label>:36                                      ; preds = %31
-  %37 = load i32* undef, align 4
+  %37 = load i32, i32* undef, align 4
   %38 = add i32 %coef_ctr.29, 1
   %39 = add i32 %38, %37
   %40 = getelementptr inbounds [2 x i8], [2 x i8]* %7, i32 %39, i32 0
-  %41 = load i8* %40, align 1
+  %41 = load i8, i8* %40, align 1
   %42 = zext i8 %41 to i32
   %43 = add nsw i32 %42, %11
   %44 = getelementptr inbounds %struct.img_par, %struct.img_par* %img, i32 0, i32 27, i32 undef, i32 %43
diff --git a/llvm/test/CodeGen/Mips/private.ll b/llvm/test/CodeGen/Mips/private.ll
index 058db0b..5907dbd 100644
--- a/llvm/test/CodeGen/Mips/private.ll
+++ b/llvm/test/CodeGen/Mips/private.ll
@@ -15,6 +15,6 @@
 ; CHECK: lw $[[R0:[0-9]+]], %got($baz)($
 ; CHECK: lw ${{[0-9]+}}, %lo($baz)($[[R0]])
   call void @foo()
-  %1 = load i32* @baz, align 4
+  %1 = load i32, i32* @baz, align 4
   ret i32 %1
 }
diff --git a/llvm/test/CodeGen/Mips/ra-allocatable.ll b/llvm/test/CodeGen/Mips/ra-allocatable.ll
index afc5cb0..048d432 100644
--- a/llvm/test/CodeGen/Mips/ra-allocatable.ll
+++ b/llvm/test/CodeGen/Mips/ra-allocatable.ll
@@ -98,186 +98,186 @@
 ; CHECK: lw  $ra, {{[0-9]+}}($sp)            # 4-byte Folded Reload
 ; CHECK: jr  $ra
 
-  %0 = load i32* @a0, align 4
-  %1 = load i32** @b0, align 4
+  %0 = load i32, i32* @a0, align 4
+  %1 = load i32*, i32** @b0, align 4
   store i32 %0, i32* %1, align 4
-  %2 = load i32* @a1, align 4
-  %3 = load i32** @b1, align 4
+  %2 = load i32, i32* @a1, align 4
+  %3 = load i32*, i32** @b1, align 4
   store i32 %2, i32* %3, align 4
-  %4 = load i32* @a2, align 4
-  %5 = load i32** @b2, align 4
+  %4 = load i32, i32* @a2, align 4
+  %5 = load i32*, i32** @b2, align 4
   store i32 %4, i32* %5, align 4
-  %6 = load i32* @a3, align 4
-  %7 = load i32** @b3, align 4
+  %6 = load i32, i32* @a3, align 4
+  %7 = load i32*, i32** @b3, align 4
   store i32 %6, i32* %7, align 4
-  %8 = load i32* @a4, align 4
-  %9 = load i32** @b4, align 4
+  %8 = load i32, i32* @a4, align 4
+  %9 = load i32*, i32** @b4, align 4
   store i32 %8, i32* %9, align 4
-  %10 = load i32* @a5, align 4
-  %11 = load i32** @b5, align 4
+  %10 = load i32, i32* @a5, align 4
+  %11 = load i32*, i32** @b5, align 4
   store i32 %10, i32* %11, align 4
-  %12 = load i32* @a6, align 4
-  %13 = load i32** @b6, align 4
+  %12 = load i32, i32* @a6, align 4
+  %13 = load i32*, i32** @b6, align 4
   store i32 %12, i32* %13, align 4
-  %14 = load i32* @a7, align 4
-  %15 = load i32** @b7, align 4
+  %14 = load i32, i32* @a7, align 4
+  %15 = load i32*, i32** @b7, align 4
   store i32 %14, i32* %15, align 4
-  %16 = load i32* @a8, align 4
-  %17 = load i32** @b8, align 4
+  %16 = load i32, i32* @a8, align 4
+  %17 = load i32*, i32** @b8, align 4
   store i32 %16, i32* %17, align 4
-  %18 = load i32* @a9, align 4
-  %19 = load i32** @b9, align 4
+  %18 = load i32, i32* @a9, align 4
+  %19 = load i32*, i32** @b9, align 4
   store i32 %18, i32* %19, align 4
-  %20 = load i32* @a10, align 4
-  %21 = load i32** @b10, align 4
+  %20 = load i32, i32* @a10, align 4
+  %21 = load i32*, i32** @b10, align 4
   store i32 %20, i32* %21, align 4
-  %22 = load i32* @a11, align 4
-  %23 = load i32** @b11, align 4
+  %22 = load i32, i32* @a11, align 4
+  %23 = load i32*, i32** @b11, align 4
   store i32 %22, i32* %23, align 4
-  %24 = load i32* @a12, align 4
-  %25 = load i32** @b12, align 4
+  %24 = load i32, i32* @a12, align 4
+  %25 = load i32*, i32** @b12, align 4
   store i32 %24, i32* %25, align 4
-  %26 = load i32* @a13, align 4
-  %27 = load i32** @b13, align 4
+  %26 = load i32, i32* @a13, align 4
+  %27 = load i32*, i32** @b13, align 4
   store i32 %26, i32* %27, align 4
-  %28 = load i32* @a14, align 4
-  %29 = load i32** @b14, align 4
+  %28 = load i32, i32* @a14, align 4
+  %29 = load i32*, i32** @b14, align 4
   store i32 %28, i32* %29, align 4
-  %30 = load i32* @a15, align 4
-  %31 = load i32** @b15, align 4
+  %30 = load i32, i32* @a15, align 4
+  %31 = load i32*, i32** @b15, align 4
   store i32 %30, i32* %31, align 4
-  %32 = load i32* @a16, align 4
-  %33 = load i32** @b16, align 4
+  %32 = load i32, i32* @a16, align 4
+  %33 = load i32*, i32** @b16, align 4
   store i32 %32, i32* %33, align 4
-  %34 = load i32* @a17, align 4
-  %35 = load i32** @b17, align 4
+  %34 = load i32, i32* @a17, align 4
+  %35 = load i32*, i32** @b17, align 4
   store i32 %34, i32* %35, align 4
-  %36 = load i32* @a18, align 4
-  %37 = load i32** @b18, align 4
+  %36 = load i32, i32* @a18, align 4
+  %37 = load i32*, i32** @b18, align 4
   store i32 %36, i32* %37, align 4
-  %38 = load i32* @a19, align 4
-  %39 = load i32** @b19, align 4
+  %38 = load i32, i32* @a19, align 4
+  %39 = load i32*, i32** @b19, align 4
   store i32 %38, i32* %39, align 4
-  %40 = load i32* @a20, align 4
-  %41 = load i32** @b20, align 4
+  %40 = load i32, i32* @a20, align 4
+  %41 = load i32*, i32** @b20, align 4
   store i32 %40, i32* %41, align 4
-  %42 = load i32* @a21, align 4
-  %43 = load i32** @b21, align 4
+  %42 = load i32, i32* @a21, align 4
+  %43 = load i32*, i32** @b21, align 4
   store i32 %42, i32* %43, align 4
-  %44 = load i32* @a22, align 4
-  %45 = load i32** @b22, align 4
+  %44 = load i32, i32* @a22, align 4
+  %45 = load i32*, i32** @b22, align 4
   store i32 %44, i32* %45, align 4
-  %46 = load i32* @a23, align 4
-  %47 = load i32** @b23, align 4
+  %46 = load i32, i32* @a23, align 4
+  %47 = load i32*, i32** @b23, align 4
   store i32 %46, i32* %47, align 4
-  %48 = load i32* @a24, align 4
-  %49 = load i32** @b24, align 4
+  %48 = load i32, i32* @a24, align 4
+  %49 = load i32*, i32** @b24, align 4
   store i32 %48, i32* %49, align 4
-  %50 = load i32* @a25, align 4
-  %51 = load i32** @b25, align 4
+  %50 = load i32, i32* @a25, align 4
+  %51 = load i32*, i32** @b25, align 4
   store i32 %50, i32* %51, align 4
-  %52 = load i32* @a26, align 4
-  %53 = load i32** @b26, align 4
+  %52 = load i32, i32* @a26, align 4
+  %53 = load i32*, i32** @b26, align 4
   store i32 %52, i32* %53, align 4
-  %54 = load i32* @a27, align 4
-  %55 = load i32** @b27, align 4
+  %54 = load i32, i32* @a27, align 4
+  %55 = load i32*, i32** @b27, align 4
   store i32 %54, i32* %55, align 4
-  %56 = load i32* @a28, align 4
-  %57 = load i32** @b28, align 4
+  %56 = load i32, i32* @a28, align 4
+  %57 = load i32*, i32** @b28, align 4
   store i32 %56, i32* %57, align 4
-  %58 = load i32* @a29, align 4
-  %59 = load i32** @b29, align 4
+  %58 = load i32, i32* @a29, align 4
+  %59 = load i32*, i32** @b29, align 4
   store i32 %58, i32* %59, align 4
-  %60 = load i32* @a0, align 4
-  %61 = load i32** @c0, align 4
+  %60 = load i32, i32* @a0, align 4
+  %61 = load i32*, i32** @c0, align 4
   store i32 %60, i32* %61, align 4
-  %62 = load i32* @a1, align 4
-  %63 = load i32** @c1, align 4
+  %62 = load i32, i32* @a1, align 4
+  %63 = load i32*, i32** @c1, align 4
   store i32 %62, i32* %63, align 4
-  %64 = load i32* @a2, align 4
-  %65 = load i32** @c2, align 4
+  %64 = load i32, i32* @a2, align 4
+  %65 = load i32*, i32** @c2, align 4
   store i32 %64, i32* %65, align 4
-  %66 = load i32* @a3, align 4
-  %67 = load i32** @c3, align 4
+  %66 = load i32, i32* @a3, align 4
+  %67 = load i32*, i32** @c3, align 4
   store i32 %66, i32* %67, align 4
-  %68 = load i32* @a4, align 4
-  %69 = load i32** @c4, align 4
+  %68 = load i32, i32* @a4, align 4
+  %69 = load i32*, i32** @c4, align 4
   store i32 %68, i32* %69, align 4
-  %70 = load i32* @a5, align 4
-  %71 = load i32** @c5, align 4
+  %70 = load i32, i32* @a5, align 4
+  %71 = load i32*, i32** @c5, align 4
   store i32 %70, i32* %71, align 4
-  %72 = load i32* @a6, align 4
-  %73 = load i32** @c6, align 4
+  %72 = load i32, i32* @a6, align 4
+  %73 = load i32*, i32** @c6, align 4
   store i32 %72, i32* %73, align 4
-  %74 = load i32* @a7, align 4
-  %75 = load i32** @c7, align 4
+  %74 = load i32, i32* @a7, align 4
+  %75 = load i32*, i32** @c7, align 4
   store i32 %74, i32* %75, align 4
-  %76 = load i32* @a8, align 4
-  %77 = load i32** @c8, align 4
+  %76 = load i32, i32* @a8, align 4
+  %77 = load i32*, i32** @c8, align 4
   store i32 %76, i32* %77, align 4
-  %78 = load i32* @a9, align 4
-  %79 = load i32** @c9, align 4
+  %78 = load i32, i32* @a9, align 4
+  %79 = load i32*, i32** @c9, align 4
   store i32 %78, i32* %79, align 4
-  %80 = load i32* @a10, align 4
-  %81 = load i32** @c10, align 4
+  %80 = load i32, i32* @a10, align 4
+  %81 = load i32*, i32** @c10, align 4
   store i32 %80, i32* %81, align 4
-  %82 = load i32* @a11, align 4
-  %83 = load i32** @c11, align 4
+  %82 = load i32, i32* @a11, align 4
+  %83 = load i32*, i32** @c11, align 4
   store i32 %82, i32* %83, align 4
-  %84 = load i32* @a12, align 4
-  %85 = load i32** @c12, align 4
+  %84 = load i32, i32* @a12, align 4
+  %85 = load i32*, i32** @c12, align 4
   store i32 %84, i32* %85, align 4
-  %86 = load i32* @a13, align 4
-  %87 = load i32** @c13, align 4
+  %86 = load i32, i32* @a13, align 4
+  %87 = load i32*, i32** @c13, align 4
   store i32 %86, i32* %87, align 4
-  %88 = load i32* @a14, align 4
-  %89 = load i32** @c14, align 4
+  %88 = load i32, i32* @a14, align 4
+  %89 = load i32*, i32** @c14, align 4
   store i32 %88, i32* %89, align 4
-  %90 = load i32* @a15, align 4
-  %91 = load i32** @c15, align 4
+  %90 = load i32, i32* @a15, align 4
+  %91 = load i32*, i32** @c15, align 4
   store i32 %90, i32* %91, align 4
-  %92 = load i32* @a16, align 4
-  %93 = load i32** @c16, align 4
+  %92 = load i32, i32* @a16, align 4
+  %93 = load i32*, i32** @c16, align 4
   store i32 %92, i32* %93, align 4
-  %94 = load i32* @a17, align 4
-  %95 = load i32** @c17, align 4
+  %94 = load i32, i32* @a17, align 4
+  %95 = load i32*, i32** @c17, align 4
   store i32 %94, i32* %95, align 4
-  %96 = load i32* @a18, align 4
-  %97 = load i32** @c18, align 4
+  %96 = load i32, i32* @a18, align 4
+  %97 = load i32*, i32** @c18, align 4
   store i32 %96, i32* %97, align 4
-  %98 = load i32* @a19, align 4
-  %99 = load i32** @c19, align 4
+  %98 = load i32, i32* @a19, align 4
+  %99 = load i32*, i32** @c19, align 4
   store i32 %98, i32* %99, align 4
-  %100 = load i32* @a20, align 4
-  %101 = load i32** @c20, align 4
+  %100 = load i32, i32* @a20, align 4
+  %101 = load i32*, i32** @c20, align 4
   store i32 %100, i32* %101, align 4
-  %102 = load i32* @a21, align 4
-  %103 = load i32** @c21, align 4
+  %102 = load i32, i32* @a21, align 4
+  %103 = load i32*, i32** @c21, align 4
   store i32 %102, i32* %103, align 4
-  %104 = load i32* @a22, align 4
-  %105 = load i32** @c22, align 4
+  %104 = load i32, i32* @a22, align 4
+  %105 = load i32*, i32** @c22, align 4
   store i32 %104, i32* %105, align 4
-  %106 = load i32* @a23, align 4
-  %107 = load i32** @c23, align 4
+  %106 = load i32, i32* @a23, align 4
+  %107 = load i32*, i32** @c23, align 4
   store i32 %106, i32* %107, align 4
-  %108 = load i32* @a24, align 4
-  %109 = load i32** @c24, align 4
+  %108 = load i32, i32* @a24, align 4
+  %109 = load i32*, i32** @c24, align 4
   store i32 %108, i32* %109, align 4
-  %110 = load i32* @a25, align 4
-  %111 = load i32** @c25, align 4
+  %110 = load i32, i32* @a25, align 4
+  %111 = load i32*, i32** @c25, align 4
   store i32 %110, i32* %111, align 4
-  %112 = load i32* @a26, align 4
-  %113 = load i32** @c26, align 4
+  %112 = load i32, i32* @a26, align 4
+  %113 = load i32*, i32** @c26, align 4
   store i32 %112, i32* %113, align 4
-  %114 = load i32* @a27, align 4
-  %115 = load i32** @c27, align 4
+  %114 = load i32, i32* @a27, align 4
+  %115 = load i32*, i32** @c27, align 4
   store i32 %114, i32* %115, align 4
-  %116 = load i32* @a28, align 4
-  %117 = load i32** @c28, align 4
+  %116 = load i32, i32* @a28, align 4
+  %117 = load i32*, i32** @c28, align 4
   store i32 %116, i32* %117, align 4
-  %118 = load i32* @a29, align 4
-  %119 = load i32** @c29, align 4
+  %118 = load i32, i32* @a29, align 4
+  %119 = load i32*, i32** @c29, align 4
   store i32 %118, i32* %119, align 4
-  %120 = load i32* @a0, align 4
+  %120 = load i32, i32* @a0, align 4
   ret i32 %120
 }
diff --git a/llvm/test/CodeGen/Mips/rdhwr-directives.ll b/llvm/test/CodeGen/Mips/rdhwr-directives.ll
index 27010d4..ebc91ea 100644
--- a/llvm/test/CodeGen/Mips/rdhwr-directives.ll
+++ b/llvm/test/CodeGen/Mips/rdhwr-directives.ll
@@ -9,7 +9,7 @@
 ; CHECK: rdhwr 
 ; CHECK: .set  pop
 
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/rem.ll b/llvm/test/CodeGen/Mips/rem.ll
index b18f85d..70f957c 100644
--- a/llvm/test/CodeGen/Mips/rem.ll
+++ b/llvm/test/CodeGen/Mips/rem.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %rem = srem i32 %0, %1
 ; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
 ; 16: 	mfhi	${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/remu.ll b/llvm/test/CodeGen/Mips/remu.ll
index 472503c..1267972 100644
--- a/llvm/test/CodeGen/Mips/remu.ll
+++ b/llvm/test/CodeGen/Mips/remu.ll
@@ -7,8 +7,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @iiii, align 4
-  %1 = load i32* @jjjj, align 4
+  %0 = load i32, i32* @iiii, align 4
+  %1 = load i32, i32* @jjjj, align 4
   %rem = urem i32 %0, %1
 ; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
 ; 16: 	mfhi	${{[0-9]+}}
diff --git a/llvm/test/CodeGen/Mips/s2rem.ll b/llvm/test/CodeGen/Mips/s2rem.ll
index 9edb5be..65e48fe 100644
--- a/llvm/test/CodeGen/Mips/s2rem.ll
+++ b/llvm/test/CodeGen/Mips/s2rem.ll
@@ -56,7 +56,7 @@
 ; Function Attrs: nounwind
 define void @fft() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   %call = call float @ff(float %0)
   store float %call, float* @x, align 4
   ret void
@@ -71,7 +71,7 @@
 ; Function Attrs: nounwind
 define void @vft() #0 {
 entry:
-  %0 = load float* @x, align 4
+  %0 = load float, float* @x, align 4
   call void @vf(float %0)
   ret void
 ; PIC: 	.ent	vft
diff --git a/llvm/test/CodeGen/Mips/sb1.ll b/llvm/test/CodeGen/Mips/sb1.ll
index e1a28d4..8c8cb2f 100644
--- a/llvm/test/CodeGen/Mips/sb1.ll
+++ b/llvm/test/CodeGen/Mips/sb1.ll
@@ -6,11 +6,11 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %conv = trunc i32 %0 to i8
   store i8 %conv, i8* @c, align 1
-  %1 = load i32* @i, align 4
-  %2 = load i8* @c, align 1
+  %1 = load i32, i32* @i, align 4
+  %2 = load i8, i8* @c, align 1
   %conv1 = sext i8 %2 to i32
 ; 16:	sb	${{[0-9]+}}, 0(${{[0-9]+}})
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
diff --git a/llvm/test/CodeGen/Mips/sel1c.ll b/llvm/test/CodeGen/Mips/sel1c.ll
index edd2e3e..991228d7 100644
--- a/llvm/test/CodeGen/Mips/sel1c.ll
+++ b/llvm/test/CodeGen/Mips/sel1c.ll
@@ -7,8 +7,8 @@
 ; Function Attrs: nounwind optsize
 define void @t() #0 {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp eq i32 %0, %1
   %cond = select i1 %cmp, i32 1, i32 3
   store i32 %cond, i32* @k, align 4
diff --git a/llvm/test/CodeGen/Mips/sel2c.ll b/llvm/test/CodeGen/Mips/sel2c.ll
index 4b21124..ba95a51 100644
--- a/llvm/test/CodeGen/Mips/sel2c.ll
+++ b/llvm/test/CodeGen/Mips/sel2c.ll
@@ -7,8 +7,8 @@
 ; Function Attrs: nounwind optsize
 define void @t() #0 {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp ne i32 %0, %1
   %cond = select i1 %cmp, i32 1, i32 3
   store i32 %cond, i32* @k, align 4
diff --git a/llvm/test/CodeGen/Mips/selTBteqzCmpi.ll b/llvm/test/CodeGen/Mips/selTBteqzCmpi.ll
index 9cb8227..5a72ea0 100644
--- a/llvm/test/CodeGen/Mips/selTBteqzCmpi.ll
+++ b/llvm/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -8,10 +8,10 @@
 
 define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp eq i32 %0, 10
-  %1 = load i32* @i, align 4
-  %2 = load i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
+  %2 = load i32, i32* @j, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @i, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/selTBtnezCmpi.ll b/llvm/test/CodeGen/Mips/selTBtnezCmpi.ll
index bd334f5..b6407e6 100644
--- a/llvm/test/CodeGen/Mips/selTBtnezCmpi.ll
+++ b/llvm/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -8,10 +8,10 @@
 
 define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp ne i32 %0, 10
-  %1 = load i32* @i, align 4
-  %2 = load i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
+  %2 = load i32, i32* @j, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @i, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/selTBtnezSlti.ll b/llvm/test/CodeGen/Mips/selTBtnezSlti.ll
index 593f6f2..2f1cdb8 100644
--- a/llvm/test/CodeGen/Mips/selTBtnezSlti.ll
+++ b/llvm/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -8,10 +8,10 @@
 
 define void @t() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, 10
-  %1 = load i32* @j, align 4
-  %2 = load i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
+  %2 = load i32, i32* @i, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @i, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/select.ll b/llvm/test/CodeGen/Mips/select.ll
index d6e1826..96bd378 100644
--- a/llvm/test/CodeGen/Mips/select.ll
+++ b/llvm/test/CodeGen/Mips/select.ll
@@ -700,8 +700,8 @@
 ; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
 ; 64R6:          or $2, $[[NE]], $[[EQ]]
 
-  %tmp = load double* @d2, align 8
-  %tmp1 = load double* @d3, align 8
+  %tmp = load double, double* @d2, align 8
+  %tmp1 = load double, double* @d3, align 8
   %cmp = fcmp oeq double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
@@ -777,8 +777,8 @@
 ; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
 ; 64R6:          or $2, $[[NE]], $[[EQ]]
 
-  %tmp = load double* @d2, align 8
-  %tmp1 = load double* @d3, align 8
+  %tmp = load double, double* @d2, align 8
+  %tmp1 = load double, double* @d3, align 8
   %cmp = fcmp olt double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
@@ -854,8 +854,8 @@
 ; 64R6:          selnez $[[NE:[0-9]+]], $4, $[[CCGPR]]
 ; 64R6:          or $2, $[[NE]], $[[EQ]]
 
-  %tmp = load double* @d2, align 8
-  %tmp1 = load double* @d3, align 8
+  %tmp = load double, double* @d2, align 8
+  %tmp1 = load double, double* @d3, align 8
   %cmp = fcmp ogt double %tmp, %tmp1
   %cond = select i1 %cmp, i32 %f0, i32 %f1
   ret i32 %cond
diff --git a/llvm/test/CodeGen/Mips/seleq.ll b/llvm/test/CodeGen/Mips/seleq.ll
index 9af422f..bd25358 100644
--- a/llvm/test/CodeGen/Mips/seleq.ll
+++ b/llvm/test/CodeGen/Mips/seleq.ll
@@ -12,65 +12,65 @@
 
 define void @calc_seleq() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %2 = load i32* @f, align 4
+  %2 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %3 = load i32* @t, align 4
+  %3 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %4 = load i32* @b, align 4
-  %5 = load i32* @a, align 4
+  %4 = load i32, i32* @b, align 4
+  %5 = load i32, i32* @a, align 4
   %cmp1 = icmp eq i32 %4, %5
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %6 = load i32* @f, align 4
+  %6 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %7 = load i32* @t, align 4
+  %7 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %8 = load i32* @c, align 4
-  %9 = load i32* @a, align 4
+  %8 = load i32, i32* @c, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp6 = icmp eq i32 %8, %9
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %10 = load i32* @t, align 4
+  %10 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %11 = load i32* @f, align 4
+  %11 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %12 = load i32* @a, align 4
-  %13 = load i32* @c, align 4
+  %12 = load i32, i32* @a, align 4
+  %13 = load i32, i32* @c, align 4
   %cmp11 = icmp eq i32 %12, %13
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %14 = load i32* @t, align 4
+  %14 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %15 = load i32* @f, align 4
+  %15 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/seleqk.ll b/llvm/test/CodeGen/Mips/seleqk.ll
index 3ca622d..2eeaa9e 100644
--- a/llvm/test/CodeGen/Mips/seleqk.ll
+++ b/llvm/test/CodeGen/Mips/seleqk.ll
@@ -12,61 +12,61 @@
 
 define void @calc_seleqk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp eq i32 %0, 1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %1 = load i32* @t, align 4
+  %1 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %2 = load i32* @f, align 4
+  %2 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %3 = load i32* @a, align 4
+  %3 = load i32, i32* @a, align 4
   %cmp1 = icmp eq i32 %3, 1000
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %4 = load i32* @f, align 4
+  %4 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %5 = load i32* @t, align 4
+  %5 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %6 = load i32* @b, align 4
+  %6 = load i32, i32* @b, align 4
   %cmp6 = icmp eq i32 %6, 3
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %7 = load i32* @f, align 4
+  %7 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %8 = load i32* @t, align 4
+  %8 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %9 = load i32* @b, align 4
+  %9 = load i32, i32* @b, align 4
   %cmp11 = icmp eq i32 %9, 1000
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %10 = load i32* @t, align 4
+  %10 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %11 = load i32* @f, align 4
+  %11 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/selgek.ll b/llvm/test/CodeGen/Mips/selgek.ll
index 8ab4046..38ad95e 100644
--- a/llvm/test/CodeGen/Mips/selgek.ll
+++ b/llvm/test/CodeGen/Mips/selgek.ll
@@ -13,61 +13,61 @@
 
 define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp sge i32 %0, 1000
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %1 = load i32* @f, align 4
+  %1 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %2 = load i32* @t, align 4
+  %2 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp1 = icmp sge i32 %3, 1
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %4 = load i32* @t, align 4
+  %4 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %5 = load i32* @f, align 4
+  %5 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %6 = load i32* @c, align 4
+  %6 = load i32, i32* @c, align 4
   %cmp6 = icmp sge i32 %6, 2
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %7 = load i32* @t, align 4
+  %7 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %8 = load i32* @f, align 4
+  %8 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %9 = load i32* @a, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp11 = icmp sge i32 %9, 2
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %10 = load i32* @t, align 4
+  %10 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %11 = load i32* @f, align 4
+  %11 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/selgt.ll b/llvm/test/CodeGen/Mips/selgt.ll
index 67b9b49..a2e1e39 100644
--- a/llvm/test/CodeGen/Mips/selgt.ll
+++ b/llvm/test/CodeGen/Mips/selgt.ll
@@ -14,71 +14,71 @@
 define i32 @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
   %retval = alloca i32, align 4
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp sgt i32 %0, %1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %2 = load i32* @f, align 4
+  %2 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %3 = load i32* @t, align 4
+  %3 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %4 = load i32* @b, align 4
-  %5 = load i32* @a, align 4
+  %4 = load i32, i32* @b, align 4
+  %5 = load i32, i32* @a, align 4
   %cmp1 = icmp sgt i32 %4, %5
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %6 = load i32* @t, align 4
+  %6 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %7 = load i32* @f, align 4
+  %7 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %8 = load i32* @c, align 4
-  %9 = load i32* @a, align 4
+  %8 = load i32, i32* @c, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp6 = icmp sgt i32 %8, %9
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %10 = load i32* @f, align 4
+  %10 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %11 = load i32* @t, align 4
+  %11 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %12 = load i32* @a, align 4
-  %13 = load i32* @c, align 4
+  %12 = load i32, i32* @a, align 4
+  %13 = load i32, i32* @c, align 4
   %cmp11 = icmp sgt i32 %12, %13
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %14 = load i32* @f, align 4
+  %14 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %15 = load i32* @t, align 4
+  %15 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
   %cond15 = phi i32 [ %14, %cond.true12 ], [ %15, %cond.false13 ]
   store i32 %cond15, i32* @z4, align 4
-  %16 = load i32* %retval
+  %16 = load i32, i32* %retval
   ret i32 %16
 }
 
diff --git a/llvm/test/CodeGen/Mips/selle.ll b/llvm/test/CodeGen/Mips/selle.ll
index b27df45..1adefb7 100644
--- a/llvm/test/CodeGen/Mips/selle.ll
+++ b/llvm/test/CodeGen/Mips/selle.ll
@@ -13,65 +13,65 @@
 
 define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp sle i32 %0, %1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %2 = load i32* @t, align 4
+  %2 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %3 = load i32* @f, align 4
+  %3 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %4 = load i32* @b, align 4
-  %5 = load i32* @a, align 4
+  %4 = load i32, i32* @b, align 4
+  %5 = load i32, i32* @a, align 4
   %cmp1 = icmp sle i32 %4, %5
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %6 = load i32* @f, align 4
+  %6 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %7 = load i32* @t, align 4
+  %7 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %8 = load i32* @c, align 4
-  %9 = load i32* @a, align 4
+  %8 = load i32, i32* @c, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp6 = icmp sle i32 %8, %9
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %10 = load i32* @t, align 4
+  %10 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %11 = load i32* @f, align 4
+  %11 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %12 = load i32* @a, align 4
-  %13 = load i32* @c, align 4
+  %12 = load i32, i32* @a, align 4
+  %13 = load i32, i32* @c, align 4
   %cmp11 = icmp sle i32 %12, %13
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %14 = load i32* @t, align 4
+  %14 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %15 = load i32* @f, align 4
+  %15 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/selltk.ll b/llvm/test/CodeGen/Mips/selltk.ll
index 1471b89..db9f8c1 100644
--- a/llvm/test/CodeGen/Mips/selltk.ll
+++ b/llvm/test/CodeGen/Mips/selltk.ll
@@ -13,61 +13,61 @@
 
 define void @calc_selltk() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, 1000
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %1 = load i32* @t, align 4
+  %1 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %2 = load i32* @f, align 4
+  %2 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp1 = icmp slt i32 %3, 2
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %4 = load i32* @f, align 4
+  %4 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %5 = load i32* @t, align 4
+  %5 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %6 = load i32* @c, align 4
+  %6 = load i32, i32* @c, align 4
   %cmp6 = icmp sgt i32 %6, 2
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %7 = load i32* @f, align 4
+  %7 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %8 = load i32* @t, align 4
+  %8 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %9 = load i32* @a, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp11 = icmp sgt i32 %9, 2
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %10 = load i32* @f, align 4
+  %10 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %11 = load i32* @t, align 4
+  %11 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/selne.ll b/llvm/test/CodeGen/Mips/selne.ll
index e3d82b8..9be99d6 100644
--- a/llvm/test/CodeGen/Mips/selne.ll
+++ b/llvm/test/CodeGen/Mips/selne.ll
@@ -13,65 +13,65 @@
 
 define void @calc_seleq() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %2 = load i32* @f, align 4
+  %2 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %3 = load i32* @t, align 4
+  %3 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %2, %cond.true ], [ %3, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %4 = load i32* @b, align 4
-  %5 = load i32* @a, align 4
+  %4 = load i32, i32* @b, align 4
+  %5 = load i32, i32* @a, align 4
   %cmp1 = icmp ne i32 %4, %5
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %6 = load i32* @f, align 4
+  %6 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %7 = load i32* @t, align 4
+  %7 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %6, %cond.true2 ], [ %7, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %8 = load i32* @c, align 4
-  %9 = load i32* @a, align 4
+  %8 = load i32, i32* @c, align 4
+  %9 = load i32, i32* @a, align 4
   %cmp6 = icmp ne i32 %8, %9
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %10 = load i32* @t, align 4
+  %10 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %11 = load i32* @f, align 4
+  %11 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %10, %cond.true7 ], [ %11, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %12 = load i32* @a, align 4
-  %13 = load i32* @c, align 4
+  %12 = load i32, i32* @a, align 4
+  %13 = load i32, i32* @c, align 4
   %cmp11 = icmp ne i32 %12, %13
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %14 = load i32* @t, align 4
+  %14 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %15 = load i32* @f, align 4
+  %15 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
diff --git a/llvm/test/CodeGen/Mips/selnek.ll b/llvm/test/CodeGen/Mips/selnek.ll
index 64834b2..95d92f7 100644
--- a/llvm/test/CodeGen/Mips/selnek.ll
+++ b/llvm/test/CodeGen/Mips/selnek.ll
@@ -12,61 +12,61 @@
 
 define void @calc_z() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp ne i32 %0, 1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %1 = load i32* @f, align 4
+  %1 = load i32, i32* @f, align 4
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %2 = load i32* @t, align 4
+  %2 = load i32, i32* @t, align 4
   br label %cond.end
 
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi i32 [ %1, %cond.true ], [ %2, %cond.false ]
   store i32 %cond, i32* @z1, align 4
-  %3 = load i32* @a, align 4
+  %3 = load i32, i32* @a, align 4
   %cmp1 = icmp ne i32 %3, 1000
   br i1 %cmp1, label %cond.true2, label %cond.false3
 
 cond.true2:                                       ; preds = %cond.end
-  %4 = load i32* @t, align 4
+  %4 = load i32, i32* @t, align 4
   br label %cond.end4
 
 cond.false3:                                      ; preds = %cond.end
-  %5 = load i32* @f, align 4
+  %5 = load i32, i32* @f, align 4
   br label %cond.end4
 
 cond.end4:                                        ; preds = %cond.false3, %cond.true2
   %cond5 = phi i32 [ %4, %cond.true2 ], [ %5, %cond.false3 ]
   store i32 %cond5, i32* @z2, align 4
-  %6 = load i32* @b, align 4
+  %6 = load i32, i32* @b, align 4
   %cmp6 = icmp ne i32 %6, 3
   br i1 %cmp6, label %cond.true7, label %cond.false8
 
 cond.true7:                                       ; preds = %cond.end4
-  %7 = load i32* @t, align 4
+  %7 = load i32, i32* @t, align 4
   br label %cond.end9
 
 cond.false8:                                      ; preds = %cond.end4
-  %8 = load i32* @f, align 4
+  %8 = load i32, i32* @f, align 4
   br label %cond.end9
 
 cond.end9:                                        ; preds = %cond.false8, %cond.true7
   %cond10 = phi i32 [ %7, %cond.true7 ], [ %8, %cond.false8 ]
   store i32 %cond10, i32* @z3, align 4
-  %9 = load i32* @b, align 4
+  %9 = load i32, i32* @b, align 4
   %cmp11 = icmp ne i32 %9, 1000
   br i1 %cmp11, label %cond.true12, label %cond.false13
 
 cond.true12:                                      ; preds = %cond.end9
-  %10 = load i32* @f, align 4
+  %10 = load i32, i32* @f, align 4
   br label %cond.end14
 
 cond.false13:                                     ; preds = %cond.end9
-  %11 = load i32* @t, align 4
+  %11 = load i32, i32* @t, align 4
   br label %cond.end14
 
 cond.end14:                                       ; preds = %cond.false13, %cond.true12
@@ -78,13 +78,13 @@
 define i32 @main() nounwind "target-cpu"="mips16" "target-features"="+mips16,+o32" {
 entry:
   call void @calc_z() "target-cpu"="mips16" "target-features"="+mips16,+o32"
-  %0 = load i32* @z1, align 4
+  %0 = load i32, i32* @z1, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %0) "target-cpu"="mips16" "target-features"="+mips16,+o32"
-  %1 = load i32* @z2, align 4
+  %1 = load i32, i32* @z2, align 4
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1) "target-cpu"="mips16" "target-features"="+mips16,+o32"
-  %2 = load i32* @z3, align 4
+  %2 = load i32, i32* @z3, align 4
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2) "target-cpu"="mips16" "target-features"="+mips16,+o32"
-  %3 = load i32* @z4, align 4
+  %3 = load i32, i32* @z4, align 4
   %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %3) "target-cpu"="mips16" "target-features"="+mips16,+o32"
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/selpat.ll b/llvm/test/CodeGen/Mips/selpat.ll
index 8eda8de..c682d81 100644
--- a/llvm/test/CodeGen/Mips/selpat.ll
+++ b/llvm/test/CodeGen/Mips/selpat.ll
@@ -12,18 +12,18 @@
 
 define void @calc_seleq() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp eq i32 %0, %1
-  %2 = load i32* @f, align 4
-  %3 = load i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
+  %3 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	bteqz	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp eq i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %3, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -34,10 +34,10 @@
 
 define void @calc_seleqk() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp eq i32 %0, 1
-  %1 = load i32* @t, align 4
-  %2 = load i32* @f, align 4
+  %1 = load i32, i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
@@ -46,7 +46,7 @@
   %cmp1 = icmp eq i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp6 = icmp eq i32 %3, 3
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
@@ -61,19 +61,19 @@
 
 define void @calc_seleqz() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp eq i32 %0, 0
-  %1 = load i32* @t, align 4
-  %2 = load i32* @f, align 4
+  %1 = load i32, i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	beqz	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp1 = icmp eq i32 %3, 0
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp eq i32 %4, 0
   %cond10 = select i1 %cmp6, i32 %1, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -83,11 +83,11 @@
 
 define void @calc_selge() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp sge i32 %0, %1
-  %2 = load i32* @f, align 4
-  %3 = load i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
+  %3 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
@@ -96,7 +96,7 @@
   %cmp1 = icmp sge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp sge i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %3, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -108,20 +108,20 @@
 
 define i32 @calc_selgt() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp sgt i32 %0, %1
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
-  %2 = load i32* @f, align 4
-  %3 = load i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
+  %3 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
   %cmp1 = icmp sgt i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp sgt i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %2, i32 %3
   store i32 %cond10, i32* @z3, align 4
@@ -133,11 +133,11 @@
 
 define void @calc_selle() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp sle i32 %0, %1
-  %2 = load i32* @t, align 4
-  %3 = load i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
+  %3 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
@@ -146,7 +146,7 @@
   %cmp1 = icmp sle i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp sle i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %2, i32 %3
   store i32 %cond10, i32* @z3, align 4
@@ -158,20 +158,20 @@
 
 define void @calc_selltk() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp slt i32 %0, 10
-  %1 = load i32* @t, align 4
-  %2 = load i32* @f, align 4
+  %1 = load i32, i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	slti	${{[0-9]+}}, {{[0-9]+}}
 ; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp1 = icmp slt i32 %3, 2
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp sgt i32 %4, 2
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
@@ -184,18 +184,18 @@
 
 define void @calc_selne() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp ne i32 %0, %1
-  %2 = load i32* @t, align 4
-  %3 = load i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
+  %3 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmp	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	btnez	$BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
   store i32 %cond, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp ne i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %3, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -205,10 +205,10 @@
 
 define void @calc_selnek() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp ne i32 %0, 1
-  %1 = load i32* @f, align 4
-  %2 = load i32* @t, align 4
+  %1 = load i32, i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	cmpi	${{[0-9]+}}, 1
@@ -217,7 +217,7 @@
   %cmp1 = icmp ne i32 %0, 10
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp6 = icmp ne i32 %3, 3
   %cond10 = select i1 %cmp6, i32 %2, i32 %1
   store i32 %cond10, i32* @z3, align 4
@@ -232,19 +232,19 @@
 
 define void @calc_selnez() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %cmp = icmp ne i32 %0, 0
-  %1 = load i32* @f, align 4
-  %2 = load i32* @t, align 4
+  %1 = load i32, i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %cmp1 = icmp ne i32 %3, 0
   %cond5 = select i1 %cmp1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp ne i32 %4, 0
   %cond10 = select i1 %cmp6, i32 %1, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -254,19 +254,19 @@
 
 define void @calc_selnez2() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp ne i32 %0, 0
-  %1 = load i32* @f, align 4
-  %2 = load i32* @t, align 4
+  %1 = load i32, i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
   %cond = select i1 %tobool, i32 %1, i32 %2
   store i32 %cond, i32* @z1, align 4
 ; 16:	bnez	${{[0-9]+}}, $BB{{[0-9]+}}_{{[0-9]}}
 ; 16: 	move    ${{[0-9]+}}, ${{[0-9]+}}
-  %3 = load i32* @b, align 4
+  %3 = load i32, i32* @b, align 4
   %tobool1 = icmp ne i32 %3, 0
   %cond5 = select i1 %tobool1, i32 %2, i32 %1
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %tobool6 = icmp ne i32 %4, 0
   %cond10 = select i1 %tobool6, i32 %1, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -276,11 +276,11 @@
 
 define void @calc_seluge() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp uge i32 %0, %1
-  %2 = load i32* @f, align 4
-  %3 = load i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
+  %3 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
@@ -289,7 +289,7 @@
   %cmp1 = icmp uge i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp uge i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %3, i32 %2
   store i32 %cond10, i32* @z3, align 4
@@ -301,11 +301,11 @@
 
 define void @calc_selugt() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp ugt i32 %0, %1
-  %2 = load i32* @f, align 4
-  %3 = load i32* @t, align 4
+  %2 = load i32, i32* @f, align 4
+  %3 = load i32, i32* @t, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
@@ -314,7 +314,7 @@
   %cmp1 = icmp ugt i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp ugt i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %2, i32 %3
   store i32 %cond10, i32* @z3, align 4
@@ -326,11 +326,11 @@
 
 define void @calc_selule() nounwind {
 entry:
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %cmp = icmp ule i32 %0, %1
-  %2 = load i32* @t, align 4
-  %3 = load i32* @f, align 4
+  %2 = load i32, i32* @t, align 4
+  %3 = load i32, i32* @f, align 4
   %cond = select i1 %cmp, i32 %2, i32 %3
   store i32 %cond, i32* @z1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
@@ -339,7 +339,7 @@
   %cmp1 = icmp ule i32 %1, %0
   %cond5 = select i1 %cmp1, i32 %3, i32 %2
   store i32 %cond5, i32* @z2, align 4
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %cmp6 = icmp ule i32 %4, %0
   %cond10 = select i1 %cmp6, i32 %2, i32 %3
   store i32 %cond10, i32* @z3, align 4
diff --git a/llvm/test/CodeGen/Mips/seteq.ll b/llvm/test/CodeGen/Mips/seteq.ll
index 5fadf78..8fad612 100644
--- a/llvm/test/CodeGen/Mips/seteq.ll
+++ b/llvm/test/CodeGen/Mips/seteq.ll
@@ -8,8 +8,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp eq i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/seteqz.ll b/llvm/test/CodeGen/Mips/seteqz.ll
index 80dc312..8e9a4be 100644
--- a/llvm/test/CodeGen/Mips/seteqz.ll
+++ b/llvm/test/CodeGen/Mips/seteqz.ll
@@ -7,13 +7,13 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %cmp = icmp eq i32 %0, 0
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltiu	${{[0-9]+}}, 1
 ; 16:	move	${{[0-9]+}}, $24
-  %1 = load i32* @j, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp1 = icmp eq i32 %1, 99
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
diff --git a/llvm/test/CodeGen/Mips/setge.ll b/llvm/test/CodeGen/Mips/setge.ll
index 8869eb8..8fb7299 100644
--- a/llvm/test/CodeGen/Mips/setge.ll
+++ b/llvm/test/CodeGen/Mips/setge.ll
@@ -11,15 +11,15 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @k, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @k, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp sge i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
-  %2 = load i32* @m, align 4
+  %2 = load i32, i32* @m, align 4
   %cmp1 = icmp sge i32 %0, %2
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
diff --git a/llvm/test/CodeGen/Mips/setgek.ll b/llvm/test/CodeGen/Mips/setgek.ll
index 18a0fcf..1148d1b 100644
--- a/llvm/test/CodeGen/Mips/setgek.ll
+++ b/llvm/test/CodeGen/Mips/setgek.ll
@@ -7,7 +7,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @k, align 4
+  %0 = load i32, i32* @k, align 4
   %cmp = icmp sgt i32 %0, -32769
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setle.ll b/llvm/test/CodeGen/Mips/setle.ll
index 2df6774..fe4a2c3 100644
--- a/llvm/test/CodeGen/Mips/setle.ll
+++ b/llvm/test/CodeGen/Mips/setle.ll
@@ -10,15 +10,15 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp sle i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	slt	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
-  %2 = load i32* @m, align 4
+  %2 = load i32, i32* @m, align 4
   %cmp1 = icmp sle i32 %2, %1
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
diff --git a/llvm/test/CodeGen/Mips/setlt.ll b/llvm/test/CodeGen/Mips/setlt.ll
index 3dac74b..c4211e6 100644
--- a/llvm/test/CodeGen/Mips/setlt.ll
+++ b/llvm/test/CodeGen/Mips/setlt.ll
@@ -10,8 +10,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp slt i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setltk.ll b/llvm/test/CodeGen/Mips/setltk.ll
index ecebc7e..8c00411 100644
--- a/llvm/test/CodeGen/Mips/setltk.ll
+++ b/llvm/test/CodeGen/Mips/setltk.ll
@@ -10,7 +10,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %cmp = icmp slt i32 %0, 10
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setne.ll b/llvm/test/CodeGen/Mips/setne.ll
index 9e66901..484674e 100644
--- a/llvm/test/CodeGen/Mips/setne.ll
+++ b/llvm/test/CodeGen/Mips/setne.ll
@@ -8,8 +8,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp ne i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setuge.ll b/llvm/test/CodeGen/Mips/setuge.ll
index 1c9b5bb..025b4dc 100644
--- a/llvm/test/CodeGen/Mips/setuge.ll
+++ b/llvm/test/CodeGen/Mips/setuge.ll
@@ -10,15 +10,15 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @k, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @k, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp uge i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	move    $[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
-  %2 = load i32* @m, align 4
+  %2 = load i32, i32* @m, align 4
   %cmp1 = icmp uge i32 %0, %2
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
diff --git a/llvm/test/CodeGen/Mips/setugt.ll b/llvm/test/CodeGen/Mips/setugt.ll
index f10b47a..0ce317e 100644
--- a/llvm/test/CodeGen/Mips/setugt.ll
+++ b/llvm/test/CodeGen/Mips/setugt.ll
@@ -10,8 +10,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @k, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @k, align 4
+  %1 = load i32, i32* @j, align 4
   %cmp = icmp ugt i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setule.ll b/llvm/test/CodeGen/Mips/setule.ll
index a6d6bf0..4255fd2 100644
--- a/llvm/test/CodeGen/Mips/setule.ll
+++ b/llvm/test/CodeGen/Mips/setule.ll
@@ -10,15 +10,15 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp ule i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
 ; 16:	sltu	${{[0-9]+}}, ${{[0-9]+}}
 ; 16:	move	$[[REGISTER:[0-9]+]], $24
 ; 16:	xor	$[[REGISTER]], ${{[0-9]+}}
-  %2 = load i32* @m, align 4
+  %2 = load i32, i32* @m, align 4
   %cmp1 = icmp ule i32 %2, %1
   %conv2 = zext i1 %cmp1 to i32
   store i32 %conv2, i32* @r2, align 4
diff --git a/llvm/test/CodeGen/Mips/setult.ll b/llvm/test/CodeGen/Mips/setult.ll
index 00ee437..d30107e 100644
--- a/llvm/test/CodeGen/Mips/setult.ll
+++ b/llvm/test/CodeGen/Mips/setult.ll
@@ -10,8 +10,8 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
   %cmp = icmp ult i32 %0, %1
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/setultk.ll b/llvm/test/CodeGen/Mips/setultk.ll
index eb9edba..1b79f10 100644
--- a/llvm/test/CodeGen/Mips/setultk.ll
+++ b/llvm/test/CodeGen/Mips/setultk.ll
@@ -10,7 +10,7 @@
 
 define void @test() nounwind {
 entry:
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %cmp = icmp ult i32 %0, 10
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @r1, align 4
diff --git a/llvm/test/CodeGen/Mips/sh1.ll b/llvm/test/CodeGen/Mips/sh1.ll
index 1746ae2..976c68a 100644
--- a/llvm/test/CodeGen/Mips/sh1.ll
+++ b/llvm/test/CodeGen/Mips/sh1.ll
@@ -6,11 +6,11 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %conv = trunc i32 %0 to i16
   store i16 %conv, i16* @s, align 2
-  %1 = load i32* @i, align 4
-  %2 = load i16* @s, align 2
+  %1 = load i32, i32* @i, align 4
+  %2 = load i16, i16* @s, align 2
   %conv1 = sext i16 %2 to i32
 ; 16:	sh	${{[0-9]+}}, 0(${{[0-9]+}})
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %1, i32 %conv1)
diff --git a/llvm/test/CodeGen/Mips/simplebr.ll b/llvm/test/CodeGen/Mips/simplebr.ll
index a1d6367..08e153a 100644
--- a/llvm/test/CodeGen/Mips/simplebr.ll
+++ b/llvm/test/CodeGen/Mips/simplebr.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind
 define void @foo() #0 {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %tobool = icmp ne i32 %0, 0
   br i1 %tobool, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/llvm/test/CodeGen/Mips/sitofp-selectcc-opt.ll
index 576cbd8..c60fceb 100644
--- a/llvm/test/CodeGen/Mips/sitofp-selectcc-opt.ll
+++ b/llvm/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -14,7 +14,7 @@
   %tobool1. = or i1 %tobool1, %not.tobool
   %lor.ext = zext i1 %tobool1. to i32
   %conv = sitofp i32 %lor.ext to double
-  %1 = load double* @foo12.d4, align 8
+  %1 = load double, double* @foo12.d4, align 8
   %add = fadd double %conv, %1
   store double %add, double* @foo12.d4, align 8
   ret double %add
diff --git a/llvm/test/CodeGen/Mips/sll1.ll b/llvm/test/CodeGen/Mips/sll1.ll
index fdcd38c..b23808c 100644
--- a/llvm/test/CodeGen/Mips/sll1.ll
+++ b/llvm/test/CodeGen/Mips/sll1.ll
@@ -7,11 +7,11 @@
 define i32 @main() nounwind {
 entry:
 ; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %shl = shl i32 %0, 4
 ; 16:	sll	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
   store i32 %shl, i32* @j, align 4
-  %1 = load i32* @j, align 4
+  %1 = load i32, i32* @j, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/sll2.ll b/llvm/test/CodeGen/Mips/sll2.ll
index c2af454..3a6222a 100644
--- a/llvm/test/CodeGen/Mips/sll2.ll
+++ b/llvm/test/CodeGen/Mips/sll2.ll
@@ -6,12 +6,12 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %shl = shl i32 %0, %1
 ; 16:	sllv	${{[0-9]+}}, ${{[0-9]+}}
   store i32 %shl, i32* @i, align 4
-  %2 = load i32* @j, align 4
+  %2 = load i32, i32* @j, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/small-section-reserve-gp.ll b/llvm/test/CodeGen/Mips/small-section-reserve-gp.ll
index cbf0681..c4e3766 100644
--- a/llvm/test/CodeGen/Mips/small-section-reserve-gp.ll
+++ b/llvm/test/CodeGen/Mips/small-section-reserve-gp.ll
@@ -6,7 +6,7 @@
 define i32 @geti() nounwind readonly {
 entry:
 ; CHECK: lw ${{[0-9]+}}, %gp_rel(i)($gp)
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/Mips/spill-copy-acreg.ll b/llvm/test/CodeGen/Mips/spill-copy-acreg.ll
index 6563a5c..fd160b6 100644
--- a/llvm/test/CodeGen/Mips/spill-copy-acreg.ll
+++ b/llvm/test/CodeGen/Mips/spill-copy-acreg.ll
@@ -6,7 +6,7 @@
 
 define i64 @test_acreg_copy(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
 entry:
-  %0 = load i64* @g1, align 8
+  %0 = load i64, i64* @g1, align 8
   %1 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a0, i32 %a1)
   %2 = tail call i64 @llvm.mips.maddu(i64 %0, i32 %a2, i32 %a3)
   store i64 %1, i64* @g1, align 8
@@ -32,8 +32,8 @@
   %sext = sext <2 x i1> %cmp3 to <2 x i16>
   store <2 x i16> %sext, <2 x i16>* @g4, align 4
   tail call void @foo1()
-  %2 = load <2 x i16>* @g5, align 4
-  %3 = load <2 x i16>* @g6, align 4
+  %2 = load <2 x i16>, <2 x i16>* @g5, align 4
+  %3 = load <2 x i16>, <2 x i16>* @g6, align 4
   %or = select <2 x i1> %cmp3, <2 x i16> %2, <2 x i16> %3
   %4 = bitcast <2 x i16> %or to i32
   %.fca.0.insert = insertvalue { i32 } undef, i32 %4, 0
diff --git a/llvm/test/CodeGen/Mips/sra1.ll b/llvm/test/CodeGen/Mips/sra1.ll
index 15bf8d6..6f74c2c 100644
--- a/llvm/test/CodeGen/Mips/sra1.ll
+++ b/llvm/test/CodeGen/Mips/sra1.ll
@@ -5,7 +5,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %shr = ashr i32 %0, 3
 ; 16:	sra	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
diff --git a/llvm/test/CodeGen/Mips/sra2.ll b/llvm/test/CodeGen/Mips/sra2.ll
index 26bf19d..5c073af 100644
--- a/llvm/test/CodeGen/Mips/sra2.ll
+++ b/llvm/test/CodeGen/Mips/sra2.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @j, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @j, align 4
   %shr = ashr i32 %0, %1
 ; 16:	srav	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %shr)
diff --git a/llvm/test/CodeGen/Mips/srl1.ll b/llvm/test/CodeGen/Mips/srl1.ll
index 3474283..0e8b984 100644
--- a/llvm/test/CodeGen/Mips/srl1.ll
+++ b/llvm/test/CodeGen/Mips/srl1.ll
@@ -6,11 +6,11 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %shr = lshr i32 %0, 4
 ; 16:	srl	${{[0-9]+}}, ${{[0-9]+}}, {{[0-9]+}}
   store i32 %shr, i32* @j, align 4
-  %1 = load i32* @j, align 4
+  %1 = load i32, i32* @j, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %1)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/srl2.ll b/llvm/test/CodeGen/Mips/srl2.ll
index 26ec092..d1329c7 100644
--- a/llvm/test/CodeGen/Mips/srl2.ll
+++ b/llvm/test/CodeGen/Mips/srl2.ll
@@ -7,12 +7,12 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
-  %1 = load i32* @k, align 4
+  %0 = load i32, i32* @i, align 4
+  %1 = load i32, i32* @k, align 4
   %shr = lshr i32 %0, %1
 ; 16:	srlv	${{[0-9]+}}, ${{[0-9]+}}
   store i32 %shr, i32* @j, align 4
-  %2 = load i32* @j, align 4
+  %2 = load i32, i32* @j, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), i32 %2)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Mips/stackcoloring.ll b/llvm/test/CodeGen/Mips/stackcoloring.ll
index 99d2b4a..5516b5a 100644
--- a/llvm/test/CodeGen/Mips/stackcoloring.ll
+++ b/llvm/test/CodeGen/Mips/stackcoloring.ll
@@ -18,9 +18,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %v.04 = phi i32 [ 0, %entry ], [ %add, %for.body ]
-  %1 = load i32** @g1, align 4
+  %1 = load i32*, i32** @g1, align 4
   %arrayidx = getelementptr inbounds i32, i32* %1, i32 %i.05
-  %2 = load i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx, align 4
   %call = call i32 @foo2(i32 %2, i32* %arraydecay)
   %add = add nsw i32 %call, %v.04
   %inc = add nsw i32 %i.05, 1
diff --git a/llvm/test/CodeGen/Mips/stchar.ll b/llvm/test/CodeGen/Mips/stchar.ll
index 12eae34..70782ff 100644
--- a/llvm/test/CodeGen/Mips/stchar.ll
+++ b/llvm/test/CodeGen/Mips/stchar.ll
@@ -17,16 +17,16 @@
 
 define void @p2() nounwind {
 entry:
-  %0 = load i16** @sp, align 4
-  %1 = load i16* %0, align 2
-  %2 = load i8** @cp, align 4
-  %3 = load i8* %2, align 1
+  %0 = load i16*, i16** @sp, align 4
+  %1 = load i16, i16* %0, align 2
+  %2 = load i8*, i8** @cp, align 4
+  %3 = load i8, i8* %2, align 1
   %conv.i = sext i16 %1 to i32
   %conv1.i = sext i8 %3 to i32
   %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
-  %4 = load i16** @sp, align 4
+  %4 = load i16*, i16** @sp, align 4
   store i16 32, i16* %4, align 2
-  %5 = load i8** @cp, align 4
+  %5 = load i8*, i8** @cp, align 4
   store i8 97, i8* %5, align 1
   ret void
 }
@@ -40,12 +40,12 @@
   store i16* %s, i16** @sp, align 4
   store i8* %c, i8** @cp, align 4
   %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
-  %0 = load i16** @sp, align 4
+  %0 = load i16*, i16** @sp, align 4
   store i16 32, i16* %0, align 2
-  %1 = load i8** @cp, align 4
+  %1 = load i8*, i8** @cp, align 4
   store i8 97, i8* %1, align 1
-  %2 = load i16* %s, align 4
-  %3 = load i8* %c, align 4
+  %2 = load i16, i16* %s, align 4
+  %3 = load i8, i8* %c, align 4
   %conv.i = sext i16 %2 to i32
   %conv1.i = sext i8 %3 to i32
   %call.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i, i32 %conv1.i) nounwind
@@ -70,12 +70,12 @@
   store i16* %s.i, i16** @sp, align 4
   store i8* %c.i, i8** @cp, align 4
   %call.i.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 16, i32 99) nounwind
-  %1 = load i16** @sp, align 4
+  %1 = load i16*, i16** @sp, align 4
   store i16 32, i16* %1, align 2
-  %2 = load i8** @cp, align 4
+  %2 = load i8*, i8** @cp, align 4
   store i8 97, i8* %2, align 1
-  %3 = load i16* %s.i, align 4
-  %4 = load i8* %c.i, align 4
+  %3 = load i16, i16* %s.i, align 4
+  %4 = load i8, i8* %c.i, align 4
   %conv.i.i = sext i16 %3 to i32
   %conv1.i.i = sext i8 %4 to i32
   %call.i.i = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str, i32 0, i32 0), i32 %conv.i.i, i32 %conv1.i.i) nounwind
diff --git a/llvm/test/CodeGen/Mips/stldst.ll b/llvm/test/CodeGen/Mips/stldst.ll
index 4182b9e..3260639 100644
--- a/llvm/test/CodeGen/Mips/stldst.ll
+++ b/llvm/test/CodeGen/Mips/stldst.ll
@@ -12,21 +12,21 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @kkkk, align 4
-  %1 = load i32* @llll, align 4
+  %0 = load i32, i32* @kkkk, align 4
+  %1 = load i32, i32* @llll, align 4
   %add = add nsw i32 %0, 10
   %add1 = add nsw i32 %1, 10
-  %2 = load i32* @mmmm, align 4
+  %2 = load i32, i32* @mmmm, align 4
   %sub = add nsw i32 %2, -3
-  %3 = load i32* @nnnn, align 4
+  %3 = load i32, i32* @nnnn, align 4
   %add2 = add nsw i32 %3, 10
-  %4 = load i32* @oooo, align 4
+  %4 = load i32, i32* @oooo, align 4
   %add3 = add nsw i32 %4, 4
-  %5 = load i32* @pppp, align 4
+  %5 = load i32, i32* @pppp, align 4
   %sub4 = add nsw i32 %5, -5
-  %6 = load i32* @qqqq, align 4
+  %6 = load i32, i32* @qqqq, align 4
   %sub5 = add nsw i32 %6, -10
-  %7 = load i32* @rrrr, align 4
+  %7 = load i32, i32* @rrrr, align 4
   %add6 = add nsw i32 %7, 6
 
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([32 x i8]* @.str, i32 0, i32 0), i32 %sub5, i32 %add6, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) nounwind
diff --git a/llvm/test/CodeGen/Mips/sub1.ll b/llvm/test/CodeGen/Mips/sub1.ll
index 195750b..cfa7fa5 100644
--- a/llvm/test/CodeGen/Mips/sub1.ll
+++ b/llvm/test/CodeGen/Mips/sub1.ll
@@ -5,7 +5,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   %sub = sub nsw i32 %0, 5
 ; 16:	addiu	${{[0-9]+}}, -{{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
diff --git a/llvm/test/CodeGen/Mips/sub2.ll b/llvm/test/CodeGen/Mips/sub2.ll
index 4f6bfcc..fc831c5 100644
--- a/llvm/test/CodeGen/Mips/sub2.ll
+++ b/llvm/test/CodeGen/Mips/sub2.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @j, align 4
-  %1 = load i32* @i, align 4
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @i, align 4
   %sub = sub nsw i32 %0, %1
 ; 16:	subu	${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %sub)
diff --git a/llvm/test/CodeGen/Mips/tailcall.ll b/llvm/test/CodeGen/Mips/tailcall.ll
index 30f47ab..01b2d73 100644
--- a/llvm/test/CodeGen/Mips/tailcall.ll
+++ b/llvm/test/CodeGen/Mips/tailcall.ll
@@ -85,16 +85,16 @@
 ; PIC16: jalrc
 ; PIC16: .end caller5
 
-  %0 = load i32* @g0, align 4
-  %1 = load i32* @g1, align 4
-  %2 = load i32* @g2, align 4
-  %3 = load i32* @g3, align 4
-  %4 = load i32* @g4, align 4
-  %5 = load i32* @g5, align 4
-  %6 = load i32* @g6, align 4
-  %7 = load i32* @g7, align 4
-  %8 = load i32* @g8, align 4
-  %9 = load i32* @g9, align 4
+  %0 = load i32, i32* @g0, align 4
+  %1 = load i32, i32* @g1, align 4
+  %2 = load i32, i32* @g2, align 4
+  %3 = load i32, i32* @g3, align 4
+  %4 = load i32, i32* @g4, align 4
+  %5 = load i32, i32* @g5, align 4
+  %6 = load i32, i32* @g6, align 4
+  %7 = load i32, i32* @g7, align 4
+  %8 = load i32, i32* @g8, align 4
+  %9 = load i32, i32* @g9, align 4
   %call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/Mips/tls.ll b/llvm/test/CodeGen/Mips/tls.ll
index b14ad5b..97e270f 100644
--- a/llvm/test/CodeGen/Mips/tls.ll
+++ b/llvm/test/CodeGen/Mips/tls.ll
@@ -10,7 +10,7 @@
 
 define i32 @f1() nounwind {
 entry:
-  %tmp = load i32* @t1, align 4
+  %tmp = load i32, i32* @t1, align 4
   ret i32 %tmp
 
 ; PIC-LABEL:       f1:
@@ -33,7 +33,7 @@
 
 define i32 @f2() nounwind {
 entry:
-  %tmp = load i32* @t2, align 4
+  %tmp = load i32, i32* @t2, align 4
   ret i32 %tmp
 
 ; PIC-LABEL:       f2:
@@ -69,7 +69,7 @@
 ; PIC:   addu    $[[R1:[0-9]+]], $[[R0]], $2
 ; PIC:   lw      ${{[0-9]+}}, %dtprel_lo(f3.i)($[[R1]])
 
-  %0 = load i32* @f3.i, align 4
+  %0 = load i32, i32* @f3.i, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @f3.i, align 4
   ret i32 %inc
diff --git a/llvm/test/CodeGen/Mips/tls16.ll b/llvm/test/CodeGen/Mips/tls16.ll
index 861864b..3d324d7 100644
--- a/llvm/test/CodeGen/Mips/tls16.ll
+++ b/llvm/test/CodeGen/Mips/tls16.ll
@@ -4,7 +4,7 @@
 
 define i32 @foo() nounwind readonly {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
 ; PIC16:	lw	${{[0-9]+}}, %call16(__tls_get_addr)(${{[0-9]+}})
 ; PIC16:	addiu	${{[0-9]+}}, %tlsgd(a)
   ret i32 %0
diff --git a/llvm/test/CodeGen/Mips/tls16_2.ll b/llvm/test/CodeGen/Mips/tls16_2.ll
index b33e3c3..0a6a412 100644
--- a/llvm/test/CodeGen/Mips/tls16_2.ll
+++ b/llvm/test/CodeGen/Mips/tls16_2.ll
@@ -4,7 +4,7 @@
 
 define i8* @f(i8* nocapture %a) nounwind {
 entry:
-  %0 = load i32* @f.i, align 4
+  %0 = load i32, i32* @f.i, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @f.i, align 4
   %1 = inttoptr i32 %inc to i8*
diff --git a/llvm/test/CodeGen/Mips/uitofp.ll b/llvm/test/CodeGen/Mips/uitofp.ll
index aff70c2..83c2069 100644
--- a/llvm/test/CodeGen/Mips/uitofp.ll
+++ b/llvm/test/CodeGen/Mips/uitofp.ll
@@ -5,7 +5,7 @@
   %b = alloca i32, align 4
   %a = alloca float, align 4
   store volatile i32 1, i32* %b, align 4
-  %0 = load volatile i32* %b, align 4
+  %0 = load volatile i32, i32* %b, align 4
   %conv = uitofp i32 %0 to float
   store float %conv, float* %a, align 4
   ret void
diff --git a/llvm/test/CodeGen/Mips/vector-load-store.ll b/llvm/test/CodeGen/Mips/vector-load-store.ll
index d889963..61cbc5a 100644
--- a/llvm/test/CodeGen/Mips/vector-load-store.ll
+++ b/llvm/test/CodeGen/Mips/vector-load-store.ll
@@ -10,7 +10,7 @@
 ; CHECK: lw
 ; CHECK: sw
 
-  %0 = load <2 x i16>* @g1, align 4
+  %0 = load <2 x i16>, <2 x i16>* @g1, align 4
   store <2 x i16> %0, <2 x i16>* @g0, align 4
   ret void
 }
@@ -20,7 +20,7 @@
 ; CHECK: lw
 ; CHECK: sw
 
-  %0 = load <4 x i8>* @g3, align 4
+  %0 = load <4 x i8>, <4 x i8>* @g3, align 4
   store <4 x i8> %0, <4 x i8>* @g2, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/Mips/vector-setcc.ll b/llvm/test/CodeGen/Mips/vector-setcc.ll
index aeff491..64b84e4 100644
--- a/llvm/test/CodeGen/Mips/vector-setcc.ll
+++ b/llvm/test/CodeGen/Mips/vector-setcc.ll
@@ -6,8 +6,8 @@
 
 define void @foo0() nounwind {
 entry:
-  %0 = load <4 x i32>* @a, align 16
-  %1 = load <4 x i32>* @b, align 16
+  %0 = load <4 x i32>, <4 x i32>* @a, align 16
+  %1 = load <4 x i32>, <4 x i32>* @b, align 16
   %cmp = icmp slt <4 x i32> %0, %1
   %sext = sext <4 x i1> %cmp to <4 x i32>
   store <4 x i32> %sext, <4 x i32>* @g0, align 16
diff --git a/llvm/test/CodeGen/Mips/xor1.ll b/llvm/test/CodeGen/Mips/xor1.ll
index f2c1316..b5ef1a1 100644
--- a/llvm/test/CodeGen/Mips/xor1.ll
+++ b/llvm/test/CodeGen/Mips/xor1.ll
@@ -6,8 +6,8 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
-  %1 = load i32* @y, align 4
+  %0 = load i32, i32* @x, align 4
+  %1 = load i32, i32* @y, align 4
   %xor = xor i32 %0, %1
 ; 16:	xor	${{[0-9]+}}, ${{[0-9]+}}
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 %xor)
diff --git a/llvm/test/CodeGen/Mips/zeroreg.ll b/llvm/test/CodeGen/Mips/zeroreg.ll
index c766d3b..6baf9d4 100644
--- a/llvm/test/CodeGen/Mips/zeroreg.ll
+++ b/llvm/test/CodeGen/Mips/zeroreg.ll
@@ -25,7 +25,7 @@
 ; 64R6:          seleqz $2, $[[R0]], $4
 
   %tobool = icmp ne i32 %s, 0
-  %0 = load i32* @g1, align 4
+  %0 = load i32, i32* @g1, align 4
   %cond = select i1 %tobool, i32 0, i32 %0
   ret i32 %cond
 }
@@ -47,7 +47,7 @@
 ; 64R6:          selnez $2, $[[R0]], $4
 
   %tobool = icmp ne i32 %s, 0
-  %0 = load i32* @g1, align 4
+  %0 = load i32, i32* @g1, align 4
   %cond = select i1 %tobool, i32 %0, i32 0
   ret i32 %cond
 }
@@ -76,7 +76,7 @@
 ; 64R6:          seleqz $2, $[[R0]], $4
 
   %tobool = icmp ne i64 %s, 0
-  %0 = load i64* @g2, align 4
+  %0 = load i64, i64* @g2, align 4
   %cond = select i1 %tobool, i64 0, i64 %0
   ret i64 %cond
 }
@@ -103,7 +103,7 @@
 ; 64R6:          selnez $2, $[[R0]], $4
 
   %tobool = icmp ne i64 %s, 0
-  %0 = load i64* @g2, align 4
+  %0 = load i64, i64* @g2, align 4
   %cond = select i1 %tobool, i64 %0, i64 0
   ret i64 %cond
 }
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index e779c9e..f3ff93f 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -18,7 +18,7 @@
 ; IR-NOT: addrspacecast
 ; PTX-LABEL: ld_st_shared_f32(
   ; load cast
-  %1 = load float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
+  %1 = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
   ; store cast
   store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
@@ -29,7 +29,7 @@
 
   ; cast; load
   %2 = addrspacecast float addrspace(3)* @scalar to float*
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [scalar];
   ; cast; store
   store float %v, float* %2, align 4
@@ -38,7 +38,7 @@
 ; PTX: bar.sync 0;
 
   ; load gep cast
-  %4 = load float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
+  %4 = load float, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
   ; store gep cast
   store float %v, float* getelementptr inbounds ([10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
@@ -48,7 +48,7 @@
 
   ; gep cast; load
   %5 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
-  %6 = load float* %5, align 4
+  %6 = load float, float* %5, align 4
 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [array+20];
   ; gep cast; store
   store float %v, float* %5, align 4
@@ -59,7 +59,7 @@
   ; cast; gep; load
   %7 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
   %8 = getelementptr inbounds [10 x float], [10 x float]* %7, i32 0, i32 %i
-  %9 = load float* %8, align 4
+  %9 = load float, float* %8, align 4
 ; PTX: ld.shared.f32 %f{{[0-9]+}}, [%{{(r|rl|rd)[0-9]+}}];
   ; cast; gep; store
   store float %v, float* %8, align 4
@@ -78,10 +78,10 @@
 ; addrspacecast with a bitcast.
 define i32 @ld_int_from_float() {
 ; IR-LABEL: @ld_int_from_float
-; IR: load i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
+; IR: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*)
 ; PTX-LABEL: ld_int_from_float(
 ; PTX: ld.shared.u{{(32|64)}}
-  %1 = load i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
+  %1 = load i32, i32* addrspacecast(float addrspace(3)* @scalar to i32*), align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast.ll b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
index 03b9a98..42e67ca 100644
--- a/llvm/test/CodeGen/NVPTX/addrspacecast.ll
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast.ll
@@ -10,7 +10,7 @@
 ; PTX64: cvta.global.u64
 ; PTX64: ld.u32
   %genptr = addrspacecast i32 addrspace(1)* %ptr to i32*
-  %val = load i32* %genptr
+  %val = load i32, i32* %genptr
   ret i32 %val
 }
 
@@ -22,7 +22,7 @@
 ; PTX64: cvta.shared.u64
 ; PTX64: ld.u32
   %genptr = addrspacecast i32 addrspace(3)* %ptr to i32*
-  %val = load i32* %genptr
+  %val = load i32, i32* %genptr
   ret i32 %val
 }
 
@@ -34,7 +34,7 @@
 ; PTX64: cvta.const.u64
 ; PTX64: ld.u32
   %genptr = addrspacecast i32 addrspace(4)* %ptr to i32*
-  %val = load i32* %genptr
+  %val = load i32, i32* %genptr
   ret i32 %val
 }
 
@@ -46,7 +46,7 @@
 ; PTX64: cvta.local.u64
 ; PTX64: ld.u32
   %genptr = addrspacecast i32 addrspace(5)* %ptr to i32*
-  %val = load i32* %genptr
+  %val = load i32, i32* %genptr
   ret i32 %val
 }
 
@@ -58,7 +58,7 @@
 ; PTX64: cvta.to.global.u64
 ; PTX64: ld.global.u32
   %specptr = addrspacecast i32* %ptr to i32 addrspace(1)*
-  %val = load i32 addrspace(1)* %specptr
+  %val = load i32, i32 addrspace(1)* %specptr
   ret i32 %val
 }
 
@@ -70,7 +70,7 @@
 ; PTX64: cvta.to.shared.u64
 ; PTX64: ld.shared.u32
   %specptr = addrspacecast i32* %ptr to i32 addrspace(3)*
-  %val = load i32 addrspace(3)* %specptr
+  %val = load i32, i32 addrspace(3)* %specptr
   ret i32 %val
 }
 
@@ -82,7 +82,7 @@
 ; PTX64: cvta.to.const.u64
 ; PTX64: ld.const.u32
   %specptr = addrspacecast i32* %ptr to i32 addrspace(4)*
-  %val = load i32 addrspace(4)* %specptr
+  %val = load i32, i32 addrspace(4)* %specptr
   ret i32 %val
 }
 
@@ -94,6 +94,6 @@
 ; PTX64: cvta.to.local.u64
 ; PTX64: ld.local.u32
   %specptr = addrspacecast i32* %ptr to i32 addrspace(5)*
-  %val = load i32 addrspace(5)* %specptr
+  %val = load i32, i32 addrspace(5)* %specptr
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll
index 4a1588b..76af386 100644
--- a/llvm/test/CodeGen/NVPTX/bug21465.ll
+++ b/llvm/test/CodeGen/NVPTX/bug21465.ll
@@ -12,7 +12,7 @@
 ; CHECK:   bitcast %struct.S* %input to i8*
 ; CHECK:   call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8
   %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
-  %0 = load i32* %b, align 4
+  %0 = load i32, i32* %b, align 4
   store i32 %0, i32* %output, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/NVPTX/bug22322.ll b/llvm/test/CodeGen/NVPTX/bug22322.ll
index 713c34f..97863b9 100644
--- a/llvm/test/CodeGen/NVPTX/bug22322.ll
+++ b/llvm/test/CodeGen/NVPTX/bug22322.ll
@@ -24,7 +24,7 @@
   store float %9, float* %ret_vec.sroa.8.i, align 4
 ; CHECK: setp.lt.f32     %p{{[0-9]+}}, %f{{[0-9]+}}, 0f00000000
   %10 = fcmp olt float %9, 0.000000e+00
-  %ret_vec.sroa.8.i.val = load float* %ret_vec.sroa.8.i, align 4
+  %ret_vec.sroa.8.i.val = load float, float* %ret_vec.sroa.8.i, align 4
   %11 = select i1 %10, float 0.000000e+00, float %ret_vec.sroa.8.i.val
   call void @llvm.lifetime.end(i64 4, i8* %6)
   %12 = getelementptr inbounds %class.float3, %class.float3* %dst, i64 %5, i32 0
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 2fc36be4..58b1911 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -27,21 +27,21 @@
 ; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]]
 ; CHECK: st.f32 [%SP+0], %f[[A0_REG]]
 
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   %1 = bitcast [16 x i8]* %buf to float*
   store float %0, float* %1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %a, i64 1
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 1
   %3 = bitcast i8* %arrayidx3 to float*
   store float %2, float* %3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %a, i64 2
-  %4 = load float* %arrayidx4, align 4
+  %4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 2
   %5 = bitcast i8* %arrayidx5 to float*
   store float %4, float* %5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %a, i64 3
-  %6 = load float* %arrayidx6, align 4
+  %6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i64 0, i64 3
   %7 = bitcast i8* %arrayidx7 to float*
   store float %6, float* %7, align 4
diff --git a/llvm/test/CodeGen/NVPTX/fp16.ll b/llvm/test/CodeGen/NVPTX/fp16.ll
index 8770399..b85eed0 100644
--- a/llvm/test/CodeGen/NVPTX/fp16.ll
+++ b/llvm/test/CodeGen/NVPTX/fp16.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: @test_convert_fp16_to_fp32
 ; CHECK: cvt.f32.f16
 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: @test_convert_fp16_to_fp64
 ; CHECK: cvt.f64.f16
 define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
   store double %cvt, double addrspace(1)* %out, align 4
   ret void
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: @test_convert_fp32_to_fp16
 ; CHECK: cvt.rn.f16.f32
 define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float addrspace(1)* %in, align 2
+  %val = load float, float addrspace(1)* %in, align 2
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
   store i16 %cvt, i16 addrspace(1)* %out, align 4
   ret void
@@ -38,7 +38,7 @@
 ; CHECK-LABEL: @test_convert_fp64_to_fp16
 ; CHECK: cvt.rn.f16.f64
 define void @test_convert_fp64_to_fp16(i16 addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
-  %val = load double addrspace(1)* %in, align 2
+  %val = load double, double addrspace(1)* %in, align 2
   %cvt = call i16 @llvm.convert.to.fp16.f64(double %val) nounwind readnone
   store i16 %cvt, i16 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
index fb63d6e..66917d5 100644
--- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
+++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm.ll
@@ -13,9 +13,9 @@
 
 define void @foo(i32* %a, i32* %b) {
 ; CHECK: cvta.global.u32
-  %ld1 = load i32* @myglobal
+  %ld1 = load i32, i32* @myglobal
 ; CHECK: cvta.global.u32
-  %ld2 = load i32* @myconst
+  %ld2 = load i32, i32* @myconst
   store i32 %ld1, i32* %a
   store i32 %ld2, i32* %b
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll
index aa08cc7..b995241 100644
--- a/llvm/test/CodeGen/NVPTX/half.ll
+++ b/llvm/test/CodeGen/NVPTX/half.ll
@@ -4,7 +4,7 @@
 ; CHECK-LABEL: @test_load_store
 ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load half addrspace(1)* %in
+  %val = load half, half addrspace(1)* %in
   store half %val, half addrspace(1) * %out
   ret void
 }
@@ -13,7 +13,7 @@
 ; CHECK-LABEL: @test_bitcast_from_half
 ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load half addrspace(1) * %in
+  %val = load half, half addrspace(1) * %in
   %val_int = bitcast half %val to i16
   store i16 %val_int, i16 addrspace(1)* %out
   ret void
@@ -23,7 +23,7 @@
 ; CHECK-LABEL: @test_bitcast_to_half
 ; CHECK: ld.global.u16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}]
 ; CHECK: st.global.u16 [{{%r[0-9]+}}], [[TMP]]
-  %val = load i16 addrspace(1)* %in
+  %val = load i16, i16 addrspace(1)* %in
   %val_fp = bitcast i16 %val to half
   store half %val_fp, half addrspace(1)* %out
   ret void
@@ -33,7 +33,7 @@
 ; CHECK-LABEL: @test_extend32
 ; CHECK: cvt.f32.f16
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val32 = fpext half %val16 to float
   store float %val32, float addrspace(1)* %out
   ret void
@@ -43,7 +43,7 @@
 ; CHECK-LABEL: @test_extend64
 ; CHECK: cvt.f64.f16
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val64 = fpext half %val16 to double
   store double %val64, double addrspace(1)* %out
   ret void
@@ -53,7 +53,7 @@
 ; CHECK-LABEL: test_trunc32
 ; CHECK: cvt.rn.f16.f32
 
-  %val32 = load float addrspace(1)* %in
+  %val32 = load float, float addrspace(1)* %in
   %val16 = fptrunc float %val32 to half
   store half %val16, half addrspace(1)* %out
   ret void
@@ -63,7 +63,7 @@
 ; CHECK-LABEL: @test_trunc64
 ; CHECK: cvt.rn.f16.f64
 
-  %val32 = load double addrspace(1)* %in
+  %val32 = load double, double addrspace(1)* %in
   %val16 = fptrunc double %val32 to half
   store half %val16, half addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/i1-global.ll b/llvm/test/CodeGen/NVPTX/i1-global.ll
index e3fe08e..35d77b4 100644
--- a/llvm/test/CodeGen/NVPTX/i1-global.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-global.ll
@@ -8,7 +8,7 @@
 
 
 define void @foo(i1 %p, i32* %out) {
-  %ld = load i1 addrspace(1)* @mypred
+  %ld = load i1, i1 addrspace(1)* @mypred
   %val = zext i1 %ld to i32
   store i32 %val, i32* %out
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/i8-param.ll b/llvm/test/CodeGen/NVPTX/i8-param.ll
index 84daa9f..6a1e3a0 100644
--- a/llvm/test/CodeGen/NVPTX/i8-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i8-param.ll
@@ -13,7 +13,7 @@
 ; CHECK: .visible .func caller
 define void @caller(i8* %a) {
 ; CHECK: ld.u8
-  %val = load i8* %a
+  %val = load i8, i8* %a
   %ret = tail call i8 @callee(i8 %val)
 ; CHECK: ld.param.b32
   store i8 %ret, i8* %a
diff --git a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
index f33659c..0018e61 100644
--- a/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-addrspace.ll
@@ -8,7 +8,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i8 addrspace(1)* %ptr
+  %a = load i8, i8 addrspace(1)* %ptr
   ret i8 %a
 }
 
@@ -17,7 +17,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i8 addrspace(3)* %ptr
+  %a = load i8, i8 addrspace(3)* %ptr
   ret i8 %a
 }
 
@@ -26,7 +26,7 @@
 ; PTX32: ret
 ; PTX64: ld.local.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i8 addrspace(5)* %ptr
+  %a = load i8, i8 addrspace(5)* %ptr
   ret i8 %a
 }
 
@@ -36,7 +36,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i16 addrspace(1)* %ptr
+  %a = load i16, i16 addrspace(1)* %ptr
   ret i16 %a
 }
 
@@ -45,7 +45,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i16 addrspace(3)* %ptr
+  %a = load i16, i16 addrspace(3)* %ptr
   ret i16 %a
 }
 
@@ -54,7 +54,7 @@
 ; PTX32: ret
 ; PTX64: ld.local.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i16 addrspace(5)* %ptr
+  %a = load i16, i16 addrspace(5)* %ptr
   ret i16 %a
 }
 
@@ -64,7 +64,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i32 addrspace(1)* %ptr
+  %a = load i32, i32 addrspace(1)* %ptr
   ret i32 %a
 }
 
@@ -73,7 +73,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i32 addrspace(3)* %ptr
+  %a = load i32, i32 addrspace(3)* %ptr
   ret i32 %a
 }
 
@@ -82,7 +82,7 @@
 ; PTX32: ret
 ; PTX64: ld.local.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i32 addrspace(5)* %ptr
+  %a = load i32, i32 addrspace(5)* %ptr
   ret i32 %a
 }
 
@@ -92,7 +92,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i64 addrspace(1)* %ptr
+  %a = load i64, i64 addrspace(1)* %ptr
   ret i64 %a
 }
 
@@ -101,7 +101,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i64 addrspace(3)* %ptr
+  %a = load i64, i64 addrspace(3)* %ptr
   ret i64 %a
 }
 
@@ -110,7 +110,7 @@
 ; PTX32: ret
 ; PTX64: ld.local.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i64 addrspace(5)* %ptr
+  %a = load i64, i64 addrspace(5)* %ptr
   ret i64 %a
 }
 
@@ -120,7 +120,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load float addrspace(1)* %ptr
+  %a = load float, float addrspace(1)* %ptr
   ret float %a
 }
 
@@ -129,7 +129,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load float addrspace(3)* %ptr
+  %a = load float, float addrspace(3)* %ptr
   ret float %a
 }
 
@@ -138,7 +138,7 @@
 ; PTX32: ret
 ; PTX64: ld.local.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load float addrspace(5)* %ptr
+  %a = load float, float addrspace(5)* %ptr
   ret float %a
 }
 
@@ -148,7 +148,7 @@
 ; PTX32: ret
 ; PTX64: ld.global.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load double addrspace(1)* %ptr
+  %a = load double, double addrspace(1)* %ptr
   ret double %a
 }
 
@@ -157,7 +157,7 @@
 ; PTX32: ret
 ; PTX64: ld.shared.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load double addrspace(3)* %ptr
+  %a = load double, double addrspace(3)* %ptr
   ret double %a
 }
 
@@ -166,6 +166,6 @@
 ; PTX32: ret
 ; PTX64: ld.local.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load double addrspace(5)* %ptr
+  %a = load double, double addrspace(5)* %ptr
   ret double %a
 }
diff --git a/llvm/test/CodeGen/NVPTX/ld-generic.ll b/llvm/test/CodeGen/NVPTX/ld-generic.ll
index d629e0e..44cfe65 100644
--- a/llvm/test/CodeGen/NVPTX/ld-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/ld-generic.ll
@@ -8,7 +8,7 @@
 ; PTX32: ret
 ; PTX64: ld.u8 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i8 addrspace(0)* %ptr
+  %a = load i8, i8 addrspace(0)* %ptr
   ret i8 %a
 }
 
@@ -18,7 +18,7 @@
 ; PTX32: ret
 ; PTX64: ld.u16 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i16 addrspace(0)* %ptr
+  %a = load i16, i16 addrspace(0)* %ptr
   ret i16 %a
 }
 
@@ -28,7 +28,7 @@
 ; PTX32: ret
 ; PTX64: ld.u32 %r{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i32 addrspace(0)* %ptr
+  %a = load i32, i32 addrspace(0)* %ptr
   ret i32 %a
 }
 
@@ -38,7 +38,7 @@
 ; PTX32: ret
 ; PTX64: ld.u64 %rd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load i64 addrspace(0)* %ptr
+  %a = load i64, i64 addrspace(0)* %ptr
   ret i64 %a
 }
 
@@ -48,7 +48,7 @@
 ; PTX32: ret
 ; PTX64: ld.f32 %f{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load float addrspace(0)* %ptr
+  %a = load float, float addrspace(0)* %ptr
   ret float %a
 }
 
@@ -58,6 +58,6 @@
 ; PTX32: ret
 ; PTX64: ld.f64 %fd{{[0-9]+}}, [%rd{{[0-9]+}}]
 ; PTX64: ret
-  %a = load double addrspace(0)* %ptr
+  %a = load double, double addrspace(0)* %ptr
   ret double %a
 }
diff --git a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
index 6dc7bd8..9fc98a4 100644
--- a/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
+++ b/llvm/test/CodeGen/NVPTX/load-sext-i1.ll
@@ -7,7 +7,7 @@
 ; CHECK: ld.u8
 ; CHECK-NOT: ld.u1
   %t1 = getelementptr i1, i1* %a1, i32 %a2
-  %t2 = load i1* %t1
+  %t2 = load i1, i1* %t1
   %t3 = sext i1 %t2 to i32
   store i32 %t3, i32* %arg3
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/machine-sink.ll b/llvm/test/CodeGen/NVPTX/machine-sink.ll
index 3614bea..65ba141 100644
--- a/llvm/test/CodeGen/NVPTX/machine-sink.ll
+++ b/llvm/test/CodeGen/NVPTX/machine-sink.ll
@@ -14,8 +14,8 @@
 define float @post_dominate(float %x, i1 %cond) {
 ; CHECK-LABEL: post_dominate(
 entry:
-  %0 = load float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
-  %1 = load float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
+  %0 = load float, float* addrspacecast (float addrspace(3)* @scalar1 to float*), align 4
+  %1 = load float, float* addrspacecast (float addrspace(3)* @scalar2 to float*), align 4
 ; CHECK: ld.shared.f32
 ; CHECK: ld.shared.f32
   %2 = fmul float %0, %0
diff --git a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
index 90c9c43..2ad72b0 100644
--- a/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
+++ b/llvm/test/CodeGen/NVPTX/misaligned-vector-ldst.ll
@@ -10,7 +10,7 @@
 ; CHECK-NOT: ld.f32
 ; CHECK: ld.u8
   %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>* %cast, align 1
+  %r = load <4 x float>, <4 x float>* %cast, align 1
   ret <4 x float> %r
 }
 
@@ -20,7 +20,7 @@
 ; CHECK-NOT: ld.v2
 ; CHECK: ld.f32
   %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>* %cast, align 4
+  %r = load <4 x float>, <4 x float>* %cast, align 4
   ret <4 x float> %r
 }
 
@@ -29,7 +29,7 @@
 ; CHECK-NOT: ld.v4
 ; CHECK: ld.v2
   %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>* %cast, align 8
+  %r = load <4 x float>, <4 x float>* %cast, align 8
   ret <4 x float> %r
 }
 
@@ -37,7 +37,7 @@
 define <4 x float> @t4(i8* %p1) {
 ; CHECK: ld.v4
   %cast = bitcast i8* %p1 to <4 x float>*
-  %r = load <4 x float>* %cast, align 16
+  %r = load <4 x float>, <4 x float>* %cast, align 16
   ret <4 x float> %r
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index fcb88ea..2fec31b 100644
--- a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -11,16 +11,16 @@
 entry:
   %output.addr = alloca float*, align 8
   store float* %output, float** %output.addr, align 8
-  %0 = load float** %output.addr, align 8
+  %0 = load float*, float** %output.addr, align 8
   %arrayidx = getelementptr inbounds float, float* %0, i64 0
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %conv = fpext float %1 to double
   %cmp = fcmp olt double %conv, 1.000000e+01
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:                                          ; preds = %entry
-  %2 = load float** %output.addr, align 8
-  %3 = load float* %2, align 4
+  %2 = load float*, float** %output.addr, align 8
+  %3 = load float, float* %2, align 4
   %conv1 = fpext float %3 to double
   %add = fadd double %conv1, 1.000000e+00
   %conv2 = fptrunc double %add to float
@@ -28,8 +28,8 @@
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %4 = load float** %output.addr, align 8
-  %5 = load float* %4, align 4
+  %4 = load float*, float** %output.addr, align 8
+  %5 = load float, float* %4, align 4
   %conv3 = fpext float %5 to double
   %add4 = fadd double %conv3, 2.000000e+00
   %conv5 = fptrunc double %add4 to float
@@ -38,16 +38,16 @@
 
 if.end:                                           ; preds = %if.else, %if.then
   call void @llvm.cuda.syncthreads()
-  %6 = load float** %output.addr, align 8
+  %6 = load float*, float** %output.addr, align 8
   %arrayidx6 = getelementptr inbounds float, float* %6, i64 0
-  %7 = load float* %arrayidx6, align 4
+  %7 = load float, float* %arrayidx6, align 4
   %conv7 = fpext float %7 to double
   %cmp8 = fcmp olt double %conv7, 1.000000e+01
   br i1 %cmp8, label %if.then9, label %if.else13
 
 if.then9:                                         ; preds = %if.end
-  %8 = load float** %output.addr, align 8
-  %9 = load float* %8, align 4
+  %8 = load float*, float** %output.addr, align 8
+  %9 = load float, float* %8, align 4
   %conv10 = fpext float %9 to double
   %add11 = fadd double %conv10, 3.000000e+00
   %conv12 = fptrunc double %add11 to float
@@ -55,8 +55,8 @@
   br label %if.end17
 
 if.else13:                                        ; preds = %if.end
-  %10 = load float** %output.addr, align 8
-  %11 = load float* %10, align 4
+  %10 = load float*, float** %output.addr, align 8
+  %11 = load float, float* %10, align 4
   %conv14 = fpext float %11 to double
   %add15 = fadd double %conv14, 4.000000e+00
   %conv16 = fptrunc double %add15 to float
diff --git a/llvm/test/CodeGen/NVPTX/nounroll.ll b/llvm/test/CodeGen/NVPTX/nounroll.ll
index 3e606f5..e80a4a2 100644
--- a/llvm/test/CodeGen/NVPTX/nounroll.ll
+++ b/llvm/test/CodeGen/NVPTX/nounroll.ll
@@ -18,7 +18,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.06 to i64
   %arrayidx = getelementptr inbounds float, float* %input, i64 %idxprom
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
 ; CHECK: ld.f32
   %arrayidx2 = getelementptr inbounds float, float* %output, i64 %idxprom
   store float %0, float* %arrayidx2, align 4
diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
index cc67a6f..d4f7c3b 100644
--- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
+++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll
@@ -19,7 +19,7 @@
 ; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 ; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1;
 
-  %t1 = load i1* %a
+  %t1 = load i1, i1* %a
   %t2 = select i1 %t1, i8 1, i8 2
   store i8 %t2, i8* %b
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/pr16278.ll b/llvm/test/CodeGen/NVPTX/pr16278.ll
index 5432a84..a836eaf 100644
--- a/llvm/test/CodeGen/NVPTX/pr16278.ll
+++ b/llvm/test/CodeGen/NVPTX/pr16278.ll
@@ -5,6 +5,6 @@
 
 define float @foo() {
 ; CHECK: ld.const.f32
-  %val = load float addrspace(4)* @one_f
+  %val = load float, float addrspace(4)* @one_f
   ret float %val
 }
diff --git a/llvm/test/CodeGen/NVPTX/refl1.ll b/llvm/test/CodeGen/NVPTX/refl1.ll
index e8782ea..0432b67 100644
--- a/llvm/test/CodeGen/NVPTX/refl1.ll
+++ b/llvm/test/CodeGen/NVPTX/refl1.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind
 ; CHECK: .entry foo
 define void @foo(float* nocapture %a) #0 {
-  %val = load float* %a
+  %val = load float, float* %a
   %tan = tail call fastcc float @__nv_fast_tanf(float %val)
   store float %tan, float* %a
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/sched1.ll b/llvm/test/CodeGen/NVPTX/sched1.ll
index eb0f8ce..fb01eb2 100644
--- a/llvm/test/CodeGen/NVPTX/sched1.ll
+++ b/llvm/test/CodeGen/NVPTX/sched1.ll
@@ -12,13 +12,13 @@
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
   %ptr0 = getelementptr i32, i32* %a, i32 0
-  %val0 = load i32* %ptr0
+  %val0 = load i32, i32* %ptr0
   %ptr1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32* %ptr1
+  %val1 = load i32, i32* %ptr1
   %ptr2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32* %ptr2
+  %val2 = load i32, i32* %ptr2
   %ptr3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32* %ptr3
+  %val3 = load i32, i32* %ptr3
 
   %t0 = add i32 %val0, %val1
   %t1 = add i32 %t0, %val2
diff --git a/llvm/test/CodeGen/NVPTX/sched2.ll b/llvm/test/CodeGen/NVPTX/sched2.ll
index 4d7f00e..91ed778 100644
--- a/llvm/test/CodeGen/NVPTX/sched2.ll
+++ b/llvm/test/CodeGen/NVPTX/sched2.ll
@@ -13,13 +13,13 @@
 ; CHECK-NEXT: add.s32
 ; CHECK-NEXT: add.s32
   %ptr0 = getelementptr <2 x i32>, <2 x i32>* %a, i32 0
-  %val0 = load <2 x i32>* %ptr0
+  %val0 = load <2 x i32>, <2 x i32>* %ptr0
   %ptr1 = getelementptr <2 x i32>, <2 x i32>* %a, i32 1
-  %val1 = load <2 x i32>* %ptr1
+  %val1 = load <2 x i32>, <2 x i32>* %ptr1
   %ptr2 = getelementptr <2 x i32>, <2 x i32>* %a, i32 2
-  %val2 = load <2 x i32>* %ptr2
+  %val2 = load <2 x i32>, <2 x i32>* %ptr2
   %ptr3 = getelementptr <2 x i32>, <2 x i32>* %a, i32 3
-  %val3 = load <2 x i32>* %ptr3
+  %val3 = load <2 x i32>, <2 x i32>* %ptr3
 
   %t0 = add <2 x i32> %val0, %val1
   %t1 = add <2 x i32> %t0, %val2
diff --git a/llvm/test/CodeGen/NVPTX/shift-parts.ll b/llvm/test/CodeGen/NVPTX/shift-parts.ll
index 748297c..b4d408f 100644
--- a/llvm/test/CodeGen/NVPTX/shift-parts.ll
+++ b/llvm/test/CodeGen/NVPTX/shift-parts.ll
@@ -12,8 +12,8 @@
 ; CHECK: setp.gt.s32
 ; CHECK: selp.b64
 ; CHECK: shl.b64
-  %amt = load i128* %amtptr
-  %a = load i128* %val
+  %amt = load i128, i128* %amtptr
+  %a = load i128, i128* %val
   %val0 = shl i128 %a, %amt
   store i128 %val0, i128* %val
   ret void
@@ -30,8 +30,8 @@
 ; CHECK: setp.gt.s32
 ; CHECK: selp.b64
 ; CHECK: shr.s64
-  %amt = load i128* %amtptr
-  %a = load i128* %val
+  %amt = load i128, i128* %amtptr
+  %a = load i128, i128* %val
   %val0 = ashr i128 %a, %amt
   store i128 %val0, i128* %val
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/simple-call.ll b/llvm/test/CodeGen/NVPTX/simple-call.ll
index 1b41361..da65686 100644
--- a/llvm/test/CodeGen/NVPTX/simple-call.ll
+++ b/llvm/test/CodeGen/NVPTX/simple-call.ll
@@ -11,7 +11,7 @@
 
 ; CHECK: .entry kernel_func
 define void @kernel_func(float* %a) {
-  %val = load float* %a
+  %val = load float, float* %a
 ; CHECK: call.uni (retval0),
 ; CHECK: device_func,
   %mul = call float @device_func(float %val)
diff --git a/llvm/test/CodeGen/NVPTX/vector-compare.ll b/llvm/test/CodeGen/NVPTX/vector-compare.ll
index 2180499..2992b0e 100644
--- a/llvm/test/CodeGen/NVPTX/vector-compare.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-compare.ll
@@ -6,8 +6,8 @@
 ; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
 
 define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
-  %aval = load <2 x i32>* %a
-  %bval = load <2 x i32>* %b
+  %aval = load <2 x i32>, <2 x i32>* %a
+  %bval = load <2 x i32>, <2 x i32>* %b
   %res = icmp slt <2 x i32> %aval, %bval
   %t1 = extractelement <2 x i1> %res, i32 0
   %t2 = extractelement <2 x i1> %res, i32 1
diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll
index 58882bf..d703489 100644
--- a/llvm/test/CodeGen/NVPTX/vector-loads.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll
@@ -10,7 +10,7 @@
 define void @foo(<2 x float>* %a) {
 ; CHECK: .func foo
 ; CHECK: ld.v2.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <2 x float>* %a
+  %t1 = load <2 x float>, <2 x float>* %a
   %t2 = fmul <2 x float> %t1, %t1
   store <2 x float> %t2, <2 x float>* %a
   ret void
@@ -19,7 +19,7 @@
 define void @foo2(<4 x float>* %a) {
 ; CHECK: .func foo2
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <4 x float>* %a
+  %t1 = load <4 x float>, <4 x float>* %a
   %t2 = fmul <4 x float> %t1, %t1
   store <4 x float> %t2, <4 x float>* %a
   ret void
@@ -29,7 +29,7 @@
 ; CHECK: .func foo3
 ; CHECK: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.f32 {%f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}}
-  %t1 = load <8 x float>* %a
+  %t1 = load <8 x float>, <8 x float>* %a
   %t2 = fmul <8 x float> %t1, %t1
   store <8 x float> %t2, <8 x float>* %a
   ret void
@@ -40,7 +40,7 @@
 define void @foo4(<2 x i32>* %a) {
 ; CHECK: .func foo4
 ; CHECK: ld.v2.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <2 x i32>* %a
+  %t1 = load <2 x i32>, <2 x i32>* %a
   %t2 = mul <2 x i32> %t1, %t1
   store <2 x i32> %t2, <2 x i32>* %a
   ret void
@@ -49,7 +49,7 @@
 define void @foo5(<4 x i32>* %a) {
 ; CHECK: .func foo5
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <4 x i32>* %a
+  %t1 = load <4 x i32>, <4 x i32>* %a
   %t2 = mul <4 x i32> %t1, %t1
   store <4 x i32> %t2, <4 x i32>* %a
   ret void
@@ -59,7 +59,7 @@
 ; CHECK: .func foo6
 ; CHECK: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
 ; CHECK-NEXT: ld.v4.u32 {%r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}}
-  %t1 = load <8 x i32>* %a
+  %t1 = load <8 x i32>, <8 x i32>* %a
   %t2 = mul <8 x i32> %t1, %t1
   store <8 x i32> %t2, <8 x i32>* %a
   ret void
diff --git a/llvm/test/CodeGen/NVPTX/vector-select.ll b/llvm/test/CodeGen/NVPTX/vector-select.ll
index 11893df..1e81031c6 100644
--- a/llvm/test/CodeGen/NVPTX/vector-select.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-select.ll
@@ -6,9 +6,9 @@
 
 define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
 entry:
-  %tmp4 = load <2 x i32> addrspace(1)* %def_a
-  %tmp6 = load <2 x i32> addrspace(1)* %def_c
-  %tmp8 = load <2 x i32> addrspace(1)* %def_b
+  %tmp4 = load <2 x i32>, <2 x i32> addrspace(1)* %def_a
+  %tmp6 = load <2 x i32>, <2 x i32> addrspace(1)* %def_c
+  %tmp8 = load <2 x i32>, <2 x i32> addrspace(1)* %def_b
   %0 = icmp sge <2 x i32> %tmp4, zeroinitializer
   %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
   store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
diff --git a/llvm/test/CodeGen/NVPTX/weak-global.ll b/llvm/test/CodeGen/NVPTX/weak-global.ll
index 2bef4c5..a64f9f4 100644
--- a/llvm/test/CodeGen/NVPTX/weak-global.ll
+++ b/llvm/test/CodeGen/NVPTX/weak-global.ll
@@ -4,6 +4,6 @@
 @g = common addrspace(1) global i32 zeroinitializer
 
 define i32 @func0() {
-  %val = load i32 addrspace(1)* @g
+  %val = load i32, i32 addrspace(1)* @g
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/llvm/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
index 047a12b..0f56ac9 100644
--- a/llvm/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -7,7 +7,7 @@
         %ap = alloca i8*                ; <i8**> [#uses=2]
         %va.upgrd.1 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
         call void @llvm.va_start( i8* %va.upgrd.1 )
-        %tmp.1 = load i8** %ap          ; <i8*> [#uses=1]
+        %tmp.1 = load i8*, i8** %ap          ; <i8*> [#uses=1]
         %tmp.0 = call double @foo( i8* %tmp.1 )         ; <double> [#uses=0]
         ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/llvm/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index fbf2540..fde3303 100644
--- a/llvm/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -4,11 +4,11 @@
         %zero = alloca i32              ; <i32*> [#uses=2]
         %b = alloca i32         ; <i32*> [#uses=1]
         store i32 0, i32* %zero
-        %tmp = load i32* %zero          ; <i32> [#uses=1]
+        %tmp = load i32, i32* %zero          ; <i32> [#uses=1]
         %tmp5 = bitcast i32 %tmp to i32         ; <i32> [#uses=1]
         %tmp6.u = add i32 %tmp5, 32             ; <i32> [#uses=1]
         %tmp6 = bitcast i32 %tmp6.u to i32              ; <i32> [#uses=1]
-        %tmp7 = load i64* null          ; <i64> [#uses=1]
+        %tmp7 = load i64, i64* null          ; <i64> [#uses=1]
         %tmp6.upgrd.1 = trunc i32 %tmp6 to i8           ; <i8> [#uses=1]
         %shift.upgrd.2 = zext i8 %tmp6.upgrd.1 to i64           ; <i64> [#uses=1]
         %tmp8 = ashr i64 %tmp7, %shift.upgrd.2          ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/llvm/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
index 7e84538..80827dc 100644
--- a/llvm/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -2,7 +2,7 @@
 ; RUN:   grep "vspltish v.*, 10"
 
 define void @test(<8 x i16>* %P) {
-        %tmp = load <8 x i16>* %P               ; <<8 x i16>> [#uses=1]
+        %tmp = load <8 x i16>, <8 x i16>* %P               ; <<8 x i16>> [#uses=1]
         %tmp1 = add <8 x i16> %tmp, < i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10 >          ; <<8 x i16>> [#uses=1]
         store <8 x i16> %tmp1, <8 x i16>* %P
         ret void
diff --git a/llvm/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/llvm/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
index 48d1faa..50d64f4 100644
--- a/llvm/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -21,31 +21,31 @@
 	ret void
 cond_next92:		; preds = %bb30
 	%tmp173 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
-	%tmp174 = load i32* %tmp173		; <i32> [#uses=1]
+	%tmp174 = load i32, i32* %tmp173		; <i32> [#uses=1]
 	%tmp177 = and i32 %tmp174, -9		; <i32> [#uses=1]
 	store i32 %tmp177, i32* %tmp173
 	%tmp180 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp181 = load i32* %tmp180		; <i32> [#uses=1]
+	%tmp181 = load i32, i32* %tmp180		; <i32> [#uses=1]
 	%tmp185 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
-	%tmp186 = load i32* %tmp185		; <i32> [#uses=1]
+	%tmp186 = load i32, i32* %tmp185		; <i32> [#uses=1]
 	%tmp183187 = shl i32 %tmp181, 1		; <i32> [#uses=1]
 	%tmp188 = and i32 %tmp183187, 16		; <i32> [#uses=1]
 	%tmp190 = and i32 %tmp186, -17		; <i32> [#uses=1]
 	%tmp191 = or i32 %tmp190, %tmp188		; <i32> [#uses=1]
 	store i32 %tmp191, i32* %tmp185
 	%tmp193 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp194 = load i32* %tmp193		; <i32> [#uses=1]
+	%tmp194 = load i32, i32* %tmp193		; <i32> [#uses=1]
 	%tmp198 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
-	%tmp199 = load i32* %tmp198		; <i32> [#uses=1]
+	%tmp199 = load i32, i32* %tmp198		; <i32> [#uses=1]
 	%tmp196200 = shl i32 %tmp194, 2		; <i32> [#uses=1]
 	%tmp201 = and i32 %tmp196200, 64		; <i32> [#uses=1]
 	%tmp203 = and i32 %tmp199, -65		; <i32> [#uses=1]
 	%tmp204 = or i32 %tmp203, %tmp201		; <i32> [#uses=1]
 	store i32 %tmp204, i32* %tmp198
 	%tmp206 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp207 = load i32* %tmp206		; <i32> [#uses=1]
+	%tmp207 = load i32, i32* %tmp206		; <i32> [#uses=1]
 	%tmp211 = getelementptr %struct.attr_desc, %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
-	%tmp212 = load i32* %tmp211		; <i32> [#uses=1]
+	%tmp212 = load i32, i32* %tmp211		; <i32> [#uses=1]
 	%tmp209213 = shl i32 %tmp207, 1		; <i32> [#uses=1]
 	%tmp214 = and i32 %tmp209213, 128		; <i32> [#uses=1]
 	%tmp216 = and i32 %tmp212, -129		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
index 913c31d..792c271 100644
--- a/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -4,14 +4,14 @@
 @vals = external global i32*            ; <i32**> [#uses=1]
 
 define i32 @test(i32 %i) {
-        %tmp = load i8** @lens          ; <i8*> [#uses=1]
+        %tmp = load i8*, i8** @lens          ; <i8*> [#uses=1]
         %tmp1 = getelementptr i8, i8* %tmp, i32 %i          ; <i8*> [#uses=1]
-        %tmp.upgrd.1 = load i8* %tmp1           ; <i8> [#uses=1]
+        %tmp.upgrd.1 = load i8, i8* %tmp1           ; <i8> [#uses=1]
         %tmp2 = zext i8 %tmp.upgrd.1 to i32             ; <i32> [#uses=1]
-        %tmp3 = load i32** @vals                ; <i32*> [#uses=1]
+        %tmp3 = load i32*, i32** @vals                ; <i32*> [#uses=1]
         %tmp5 = sub i32 1, %tmp2                ; <i32> [#uses=1]
         %tmp6 = getelementptr i32, i32* %tmp3, i32 %tmp5             ; <i32*> [#uses=1]
-        %tmp7 = load i32* %tmp6         ; <i32> [#uses=1]
+        %tmp7 = load i32, i32* %tmp6         ; <i32> [#uses=1]
         ret i32 %tmp7
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/llvm/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
index 65dd568..4b28764 100644
--- a/llvm/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32
 
 define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
-        %tmp93 = load i16* null         ; <i16> [#uses=1]
+        %tmp93 = load i16, i16* null         ; <i16> [#uses=1]
         %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]
         store i16 %tmp99, i16* %ui16
         ret void
diff --git a/llvm/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/llvm/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index cb76b5c..c63fd9a 100644
--- a/llvm/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -5,7 +5,7 @@
 
 define fastcc void @immed_double_const(i32 %i0, i32 %i1) {
 entry:
-	%tmp1 = load i32* null		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* null		; <i32> [#uses=1]
 	switch i32 %tmp1, label %bb103 [
 		 i32 1, label %bb
 		 i32 3, label %bb
diff --git a/llvm/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/llvm/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 6d9a3fa..9660d45 100644
--- a/llvm/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -10,7 +10,7 @@
 cond_true:              ; preds = %entry
         ret void
 cond_next71:            ; preds = %entry
-        %tmp73.b = load i1* @qsz.b              ; <i1> [#uses=1]
+        %tmp73.b = load i1, i1* @qsz.b              ; <i1> [#uses=1]
         %ii.4.ph = select i1 %tmp73.b, i64 4, i64 0             ; <i64> [#uses=1]
         br label %bb139
 bb82:           ; preds = %bb139
diff --git a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index c779288..57c3531 100644
--- a/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -8,19 +8,19 @@
 	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]
 	%ctz_c = alloca i32, align 4		; <i32*> [#uses=2]
 	store i32 61440, i32* %ctz_x
-	%tmp = load i32* %ctz_x		; <i32> [#uses=1]
+	%tmp = load i32, i32* %ctz_x		; <i32> [#uses=1]
 	%tmp1 = sub i32 0, %tmp		; <i32> [#uses=1]
-	%tmp2 = load i32* %ctz_x		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %ctz_x		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp1, %tmp2		; <i32> [#uses=1]
 	%tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 )		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %ctz_c
-	%tmp5 = load i32* %ctz_c		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %ctz_c		; <i32> [#uses=1]
 	store i32 %tmp5, i32* %temp
-	%tmp6 = load i32* %temp		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %temp		; <i32> [#uses=1]
 	store i32 %tmp6, i32* %retval
 	br label %return
 
 return:		; preds = %entry
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	%retval2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
 }
diff --git a/llvm/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/llvm/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
index 3624b51..2db87fc 100644
--- a/llvm/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -3,7 +3,7 @@
 target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
-        %tmp19 = load i64* %t
+        %tmp19 = load i64, i64* %t
         %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19, i1 true )             ; <i64> [#uses=1]
         %tmp23 = trunc i64 %tmp22 to i32
         %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/llvm/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
index 90fbac7..de445f4 100644
--- a/llvm/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -552,10 +552,10 @@
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1		; <<4 x float>*>:548 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2		; <<4 x float>*>:549 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3		; <<4 x float>*>:550 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:551 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:551 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:552 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:553 [#uses=1]
-	load <4 x float>* %553		; <<4 x float>>:554 [#uses=1]
+	load <4 x float>, <4 x float>* %553		; <<4 x float>>:554 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3		; <<4 x float>*>:555 [#uses=0]
 	shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:556 [#uses=1]
 	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 )		; <<4 x i32>>:557 [#uses=0]
@@ -566,7 +566,7 @@
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:561 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:562 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2		; <<4 x float>*>:563 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:564 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:564 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:565 [#uses=1]
 	store <4 x float> %565, <4 x float>* null
 	icmp eq i32 0, 0		; <i1>:566 [#uses=1]
@@ -584,23 +584,23 @@
 
 xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:571 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:572 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:572 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:573 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:574 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:575 [#uses=0]
-	load <4 x float>* %0		; <<4 x float>>:576 [#uses=0]
+	load <4 x float>, <4 x float>* %0		; <<4 x float>>:576 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:577 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0		; <<4 x float>*>:578 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:579 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:580 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:581 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:582 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:583 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:583 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:584 [#uses=1]
-	load <4 x float>* %584		; <<4 x float>>:585 [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:586 [#uses=0]
+	load <4 x float>, <4 x float>* %584		; <<4 x float>>:585 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:586 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:587 [#uses=1]
-	load <4 x float>* %587		; <<4 x float>>:588 [#uses=1]
+	load <4 x float>, <4 x float>* %587		; <<4 x float>>:588 [#uses=1]
 	shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:589 [#uses=1]
 	shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:590 [#uses=1]
 	shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:591 [#uses=1]
@@ -609,31 +609,31 @@
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
 	fmul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:596 [#uses=2]
-	load <4 x float>* %596		; <<4 x float>>:597 [#uses=0]
+	load <4 x float>, <4 x float>* %596		; <<4 x float>>:597 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %596
-	load <4 x float>* null		; <<4 x float>>:598 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:598 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:599 [#uses=0]
 	shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:600 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:601 [#uses=2]
-	load <4 x float>* %601		; <<4 x float>>:602 [#uses=0]
+	load <4 x float>, <4 x float>* %601		; <<4 x float>>:602 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %601
-	load <4 x float>* null		; <<4 x float>>:603 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:604 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:603 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:604 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:605 [#uses=1]
-	load <4 x float>* %605		; <<4 x float>>:606 [#uses=1]
+	load <4 x float>, <4 x float>* %605		; <<4 x float>>:606 [#uses=1]
 	fsub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
 	fsub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:609 [#uses=0]
 	br i1 false, label %617, label %610
 
 ; <label>:610		; preds = %xPIF.exit
-	load <4 x float>* null		; <<4 x float>>:611 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:611 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:612 [#uses=2]
-	load <4 x float>* %612		; <<4 x float>>:613 [#uses=1]
+	load <4 x float>, <4 x float>* %612		; <<4 x float>>:613 [#uses=1]
 	shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:614 [#uses=1]
 	store <4 x float> %614, <4 x float>* %612
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:615 [#uses=2]
-	load <4 x float>* %615		; <<4 x float>>:616 [#uses=0]
+	load <4 x float>, <4 x float>* %615		; <<4 x float>>:616 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %615
 	br label %xST.exit400
 
@@ -650,17 +650,17 @@
 	br label %625
 
 ; <label>:625		; preds = %622, %617
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:626 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:626 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:627 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:628 [#uses=1]
-	load <4 x float>* %628		; <<4 x float>>:629 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:630 [#uses=0]
+	load <4 x float>, <4 x float>* %628		; <<4 x float>>:629 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:630 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:631 [#uses=1]
 	icmp eq i32 %631, 0		; <i1>:632 [#uses=1]
 	br i1 %632, label %xST.exit400, label %633
 
 ; <label>:633		; preds = %625
-	load <4 x float>* null		; <<4 x float>>:634 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:634 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:635 [#uses=1]
 	store <4 x float> %635, <4 x float>* null
 	br label %xST.exit400
@@ -668,10 +668,10 @@
 xST.exit400:		; preds = %633, %625, %610
 	%.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ]		; <<4 x float>> [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:636 [#uses=1]
-	load <4 x float>* %636		; <<4 x float>>:637 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:638 [#uses=2]
+	load <4 x float>, <4 x float>* %636		; <<4 x float>>:637 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:638 [#uses=2]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:639 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:640 [#uses=2]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:640 [#uses=2]
 	fmul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
 	fmul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
@@ -694,7 +694,7 @@
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:658 [#uses=0]
 	shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:659 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:660 [#uses=1]
-	load <4 x float>* %660		; <<4 x float>>:661 [#uses=0]
+	load <4 x float>, <4 x float>* %660		; <<4 x float>>:661 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:662 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:663 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:664 [#uses=0]
@@ -705,7 +705,7 @@
 	br i1 false, label %669, label %667
 
 ; <label>:667		; preds = %665
-	load <4 x float>* null		; <<4 x float>>:668 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:668 [#uses=0]
 	br label %669
 
 ; <label>:669		; preds = %667, %665
@@ -714,11 +714,11 @@
 
 xST.exit402:		; preds = %669, %657
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:671 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:672 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:672 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:673 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:674 [#uses=1]
-	load <4 x float>* %674		; <<4 x float>>:675 [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:676 [#uses=0]
+	load <4 x float>, <4 x float>* %674		; <<4 x float>>:675 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:676 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:677 [#uses=1]
 	shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:678 [#uses=1]
 	fmul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
@@ -729,7 +729,7 @@
 
 ; <label>:683		; preds = %xST.exit402
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:684 [#uses=1]
-	load <4 x float>* %684		; <<4 x float>>:685 [#uses=0]
+	load <4 x float>, <4 x float>* %684		; <<4 x float>>:685 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:686 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:687 [#uses=0]
 	shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:688 [#uses=0]
@@ -737,7 +737,7 @@
 
 ; <label>:689		; preds = %xST.exit402
 	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:690 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:691 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:691 [#uses=1]
 	shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:692 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer )		; <i32>:693 [#uses=1]
 	icmp eq i32 %693, 0		; <i1>:694 [#uses=0]
@@ -747,48 +747,48 @@
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:695 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:696 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:697 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:698 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:698 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:699 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:700 [#uses=1]
 	fadd <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer )		; <i32>:703 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:704 [#uses=2]
-	load <4 x float>* %704		; <<4 x float>>:705 [#uses=0]
+	load <4 x float>, <4 x float>* %704		; <<4 x float>>:705 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %704
-	load <4 x float>* null		; <<4 x float>>:706 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:706 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:707 [#uses=2]
-	load <4 x float>* %707		; <<4 x float>>:708 [#uses=0]
+	load <4 x float>, <4 x float>* %707		; <<4 x float>>:708 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %707
-	load <4 x float>* null		; <<4 x float>>:709 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:710 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:711 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:709 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:710 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:711 [#uses=1]
 	shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:712 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:713 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:714 [#uses=1]
-	load <4 x float>* %714		; <<4 x float>>:715 [#uses=0]
+	load <4 x float>, <4 x float>* %714		; <<4 x float>>:715 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:716 [#uses=0]
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0		; <<4 x float>*>:719 [#uses=1]
 	store <4 x float> zeroinitializer, <4 x float>* %719
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:720 [#uses=1]
 	shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:721 [#uses=1]
 	store <4 x float> %721, <4 x float>* %720
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:722 [#uses=1]
-	load <4 x float>* %722		; <<4 x float>>:723 [#uses=1]
+	load <4 x float>, <4 x float>* %722		; <<4 x float>>:723 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:724 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:725 [#uses=1]
 	store <4 x float> zeroinitializer, <4 x float>* %725
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:726 [#uses=1]
-	load <4 x float>* %726		; <<4 x float>>:727 [#uses=0]
+	load <4 x float>, <4 x float>* %726		; <<4 x float>>:727 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:728 [#uses=1]
-	load <4 x float>* %728		; <<4 x float>>:729 [#uses=0]
+	load <4 x float>, <4 x float>* %728		; <<4 x float>>:729 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:730 [#uses=1]
-	load <4 x float>* %730		; <<4 x float>>:731 [#uses=0]
+	load <4 x float>, <4 x float>* %730		; <<4 x float>>:731 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:732 [#uses=1]
-	load <4 x float>* %732		; <<4 x float>>:733 [#uses=0]
+	load <4 x float>, <4 x float>* %732		; <<4 x float>>:733 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:734 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:735 [#uses=1]
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
@@ -798,26 +798,26 @@
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:740 [#uses=1]
 	icmp eq i32 %740, 0		; <i1>:741 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:742 [#uses=2]
-	load <4 x float>* %742		; <<4 x float>>:743 [#uses=1]
+	load <4 x float>, <4 x float>* %742		; <<4 x float>>:743 [#uses=1]
 	shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:744 [#uses=1]
 	store <4 x float> %744, <4 x float>* %742
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:745 [#uses=1]
-	load <4 x float>* %745		; <<4 x float>>:746 [#uses=1]
+	load <4 x float>, <4 x float>* %745		; <<4 x float>>:746 [#uses=1]
 	shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:747 [#uses=0]
 	shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:748 [#uses=1]
 	store <4 x float> %748, <4 x float>* null
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:749 [#uses=1]
-	load <4 x float>* %749		; <<4 x float>>:750 [#uses=1]
+	load <4 x float>, <4 x float>* %749		; <<4 x float>>:750 [#uses=1]
 	shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:751 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:752 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:753 [#uses=1]
-	load <4 x float>* %753		; <<4 x float>>:754 [#uses=0]
+	load <4 x float>, <4 x float>* %753		; <<4 x float>>:754 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:755 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:756 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:756 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:757 [#uses=1]
 	shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:758 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:759 [#uses=1]
-	load <4 x float>* %759		; <<4 x float>>:760 [#uses=0]
+	load <4 x float>, <4 x float>* %759		; <<4 x float>>:760 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:761 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:762 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:763 [#uses=1]
@@ -828,11 +828,11 @@
 
 ; <label>:767		; preds = %xST.exit405
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:768 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:769 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:769 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:770 [#uses=1]
 	store <4 x float> %770, <4 x float>* null
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:771 [#uses=1]
-	load <4 x float>* %771		; <<4 x float>>:772 [#uses=0]
+	load <4 x float>, <4 x float>* %771		; <<4 x float>>:772 [#uses=0]
 	br label %xST.exit422
 
 ; <label>:773		; preds = %xST.exit405
@@ -851,19 +851,19 @@
 	br label %xST.exit431
 
 ; <label>:780		; preds = %xST.exit422
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:781 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:781 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:782 [#uses=2]
-	load <4 x float>* %782		; <<4 x float>>:783 [#uses=0]
+	load <4 x float>, <4 x float>* %782		; <<4 x float>>:783 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %782
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:784 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:784 [#uses=1]
 	shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:785 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:786 [#uses=0]
 	br label %xST.exit431
 
 xST.exit431:		; preds = %780, %777
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:787 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:788 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:789 [#uses=2]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:788 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:789 [#uses=2]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer )		; <i32>:790 [#uses=1]
 	icmp eq i32 %790, 0		; <i1>:791 [#uses=0]
 	shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:792 [#uses=1]
@@ -872,7 +872,7 @@
 	br i1 %794, label %797, label %795
 
 ; <label>:795		; preds = %xST.exit431
-	load <4 x float>* null		; <<4 x float>>:796 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:796 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	br label %797
 
@@ -882,26 +882,26 @@
 	br i1 false, label %xST.exit434, label %799
 
 ; <label>:799		; preds = %797
-	load <4 x float>* null		; <<4 x float>>:800 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:800 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	br label %xST.exit434
 
 xST.exit434:		; preds = %799, %797
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:801 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:801 [#uses=1]
 	shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:802 [#uses=0]
 	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:803 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:804 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:805 [#uses=1]
-	load <4 x float>* %805		; <<4 x float>>:806 [#uses=0]
+	load <4 x float>, <4 x float>* %805		; <<4 x float>>:806 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:807 [#uses=1]
-	load <4 x float>* %807		; <<4 x float>>:808 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:809 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:810 [#uses=0]
+	load <4 x float>, <4 x float>* %807		; <<4 x float>>:808 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:809 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:810 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:811 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:812 [#uses=1]
-	load <4 x float>* %812		; <<4 x float>>:813 [#uses=0]
+	load <4 x float>, <4 x float>* %812		; <<4 x float>>:813 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:814 [#uses=1]
-	load <4 x float>* %814		; <<4 x float>>:815 [#uses=0]
+	load <4 x float>, <4 x float>* %814		; <<4 x float>>:815 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:816 [#uses=0]
 	unreachable
 
@@ -909,11 +909,11 @@
 	store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896
 	store <4 x i32> zeroinitializer, <4 x i32>* null
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:817 [#uses=1]
-	load <4 x float>* %817		; <<4 x float>>:818 [#uses=1]
+	load <4 x float>, <4 x float>* %817		; <<4 x float>>:818 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:819 [#uses=1]
-	load <4 x float>* %819		; <<4 x float>>:820 [#uses=1]
+	load <4 x float>, <4 x float>* %819		; <<4 x float>>:820 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:821 [#uses=1]
-	load <4 x float>* %821		; <<4 x float>>:822 [#uses=1]
+	load <4 x float>, <4 x float>* %821		; <<4 x float>>:822 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:823 [#uses=1]
 	shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:824 [#uses=1]
 	shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:825 [#uses=1]
@@ -921,10 +921,10 @@
 	shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:827 [#uses=0]
 	shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:828 [#uses=1]
 	store <4 x float> %828, <4 x float>* null
-	load <4 x float>* null		; <<4 x float>>:829 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:829 [#uses=1]
 	shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:830 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:831 [#uses=2]
-	load <4 x float>* %831		; <<4 x float>>:832 [#uses=1]
+	load <4 x float>, <4 x float>* %831		; <<4 x float>>:832 [#uses=1]
 	shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:833 [#uses=1]
 	store <4 x float> %833, <4 x float>* %831
 	br label %xLS.exit449
@@ -959,13 +959,13 @@
 	%.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ]		; <<4 x float>> [#uses=2]
 	%.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ]		; <<4 x float>> [#uses=2]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:834 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:835 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:835 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:836 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:837 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:838 [#uses=0]
 	shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:839 [#uses=1]
 	getelementptr <4 x float>, <4 x float>* null, i32 878		; <<4 x float>*>:840 [#uses=1]
-	load <4 x float>* %840		; <<4 x float>>:841 [#uses=0]
+	load <4 x float>, <4 x float>* %840		; <<4 x float>>:841 [#uses=0]
 	call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 )		; <<4 x float>>:842 [#uses=1]
 	shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:843 [#uses=2]
 	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 )		; <<4 x i32>>:844 [#uses=1]
@@ -990,7 +990,7 @@
 
 ; <label>:856		; preds = %854
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:857 [#uses=2]
-	load <4 x float>* %857		; <<4 x float>>:858 [#uses=0]
+	load <4 x float>, <4 x float>* %857		; <<4 x float>>:858 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %857
 	br label %859
 
@@ -1005,7 +1005,7 @@
 	br label %864
 
 ; <label>:864		; preds = %861, %859
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:865 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:865 [#uses=1]
 	shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:866 [#uses=0]
 	br i1 false, label %868, label %867
 
@@ -1020,7 +1020,7 @@
 xST.exit451:		; preds = %868, %849
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:870 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:871 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:872 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:872 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:873 [#uses=1]
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:874 [#uses=1]
 	xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:875 [#uses=0]
@@ -1029,7 +1029,7 @@
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:878 [#uses=1]
 	xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:879 [#uses=1]
 	bitcast <4 x i32> %879 to <4 x float>		; <<4 x float>>:880 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:881 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:881 [#uses=1]
 	icmp eq i32 0, 0		; <i1>:882 [#uses=1]
 	br i1 %882, label %888, label %883
 
@@ -1061,18 +1061,18 @@
 	br label %898
 
 ; <label>:898		; preds = %897, %894
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:899 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:899 [#uses=0]
 	br i1 false, label %xST.exit453, label %900
 
 ; <label>:900		; preds = %898
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:901 [#uses=1]
-	load <4 x float>* %901		; <<4 x float>>:902 [#uses=1]
+	load <4 x float>, <4 x float>* %901		; <<4 x float>>:902 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:903 [#uses=0]
 	br label %xST.exit453
 
 xST.exit453:		; preds = %900, %898, %883
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:904 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:905 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:905 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:906 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:907 [#uses=1]
 	shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:908 [#uses=1]
@@ -1080,15 +1080,15 @@
 	bitcast <4 x float> %908 to <4 x i32>		; <<4 x i32>>:910 [#uses=0]
 	bitcast <4 x float> %907 to <4 x i32>		; <<4 x i32>>:911 [#uses=0]
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:912 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:913 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:913 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:914 [#uses=0]
 	br i1 false, label %915, label %xPIF.exit455
 
 ; <label>:915		; preds = %xST.exit453
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:916 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:916 [#uses=0]
 	getelementptr [4 x <4 x i32>], [4 x <4 x i32>]* null, i32 0, i32 3		; <<4 x i32>*>:917 [#uses=1]
 	store <4 x i32> zeroinitializer, <4 x i32>* %917
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:918 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:918 [#uses=1]
 	and <4 x i32> %918, zeroinitializer		; <<4 x i32>>:919 [#uses=0]
 	br label %.critedge7899
 
@@ -1102,15 +1102,15 @@
 
 xPIF.exit455:		; preds = %xST.exit453
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:922 [#uses=1]
-	load <4 x float>* %922		; <<4 x float>>:923 [#uses=0]
+	load <4 x float>, <4 x float>* %922		; <<4 x float>>:923 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:924 [#uses=1]
-	load <4 x float>* %924		; <<4 x float>>:925 [#uses=0]
+	load <4 x float>, <4 x float>* %924		; <<4 x float>>:925 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:926 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:927 [#uses=0]
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:928 [#uses=0]
 	bitcast { { i16, i16, i32 } }* %1 to <4 x float>*		; <<4 x float>*>:929 [#uses=0]
 	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:930 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:931 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:931 [#uses=0]
 	icmp eq i32 0, 0		; <i1>:932 [#uses=1]
 	br i1 %932, label %934, label %933
 
@@ -1131,11 +1131,11 @@
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer )		; <i32>:939 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:940 [#uses=1]
 	store <4 x float> zeroinitializer, <4 x float>* %940
-	load <4 x float>* null		; <<4 x float>>:941 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:941 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:942 [#uses=1]
 	store <4 x float> %942, <4 x float>* null
 	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:943 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:944 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:944 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:945 [#uses=0]
 	br i1 false, label %947, label %946
 
@@ -1170,7 +1170,7 @@
 	br label %xStoreDestAddressWithMask.exit461
 
 xStoreDestAddressWithMask.exit461:		; preds = %958, %955
-	load <4 x float>* %0		; <<4 x float>>:960 [#uses=0]
+	load <4 x float>, <4 x float>* %0		; <<4 x float>>:960 [#uses=0]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:961 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0		; <<4 x float>*>:962 [#uses=0]
 	br i1 false, label %968, label %xST.exit463
@@ -1179,7 +1179,7 @@
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:963 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2		; <<4 x float>*>:964 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3		; <<4 x float>*>:965 [#uses=0]
-	load <4 x float>* %0		; <<4 x float>>:966 [#uses=3]
+	load <4 x float>, <4 x float>* %0		; <<4 x float>>:966 [#uses=3]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:967 [#uses=0]
 	br i1 false, label %972, label %969
 
@@ -1197,7 +1197,7 @@
 	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>>:973 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	store <4 x float> zeroinitializer, <4 x float>* null
-	load <4 x float>* null		; <<4 x float>>:974 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:974 [#uses=0]
 	bitcast <4 x float> %966 to <4 x i32>		; <<4 x i32>>:975 [#uses=1]
 	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer )		; <<4 x i32>>:976 [#uses=1]
 	bitcast <4 x i32> %976 to <4 x float>		; <<4 x float>>:977 [#uses=1]
@@ -1209,7 +1209,7 @@
 	br label %xST.exit465
 
 xST.exit465:		; preds = %972, %969
-	load <4 x float>* %0		; <<4 x float>>:982 [#uses=3]
+	load <4 x float>, <4 x float>* %0		; <<4 x float>>:982 [#uses=3]
 	icmp eq i32 0, 0		; <i1>:983 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:984 [#uses=1]
 	br i1 %983, label %989, label %985
@@ -1226,7 +1226,7 @@
 	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:991 [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* %984
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:992 [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:993 [#uses=0]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:993 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:994 [#uses=0]
 	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:995 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:996 [#uses=0]
@@ -1238,16 +1238,16 @@
 	br label %xST.exit467
 
 xST.exit467:		; preds = %989, %985
-	load <4 x float>* %0		; <<4 x float>>:1002 [#uses=5]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:1003 [#uses=2]
+	load <4 x float>, <4 x float>* %0		; <<4 x float>>:1002 [#uses=5]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:1003 [#uses=2]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer )		; <i32>:1004 [#uses=0]
 	br i1 false, label %1011, label %1005
 
 ; <label>:1005		; preds = %xST.exit467
-	load <4 x float>* null		; <<4 x float>>:1006 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1006 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:1007 [#uses=1]
-	load <4 x float>* %1007		; <<4 x float>>:1008 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:1009 [#uses=0]
+	load <4 x float>, <4 x float>* %1007		; <<4 x float>>:1008 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1009 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1010 [#uses=0]
 	br label %xST.exit469
 
@@ -1293,15 +1293,15 @@
 	%.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ]		; <<4 x float>> [#uses=1]
 	%.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ]		; <<4 x float>> [#uses=1]
 	%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ]		; <<4 x float>> [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:1029 [#uses=0]
-	load <4 x float>* null		; <<4 x float>>:1030 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1029 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1030 [#uses=0]
 	fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
 	br i1 false, label %1037, label %1032
 
 ; <label>:1032		; preds = %xST.exit469
-	load <4 x float>* null		; <<4 x float>>:1033 [#uses=0]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1033 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:1034 [#uses=1]
-	load <4 x float>* %1034		; <<4 x float>>:1035 [#uses=0]
+	load <4 x float>, <4 x float>* %1034		; <<4 x float>>:1035 [#uses=0]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:1036 [#uses=0]
 	br label %xST.exit472
 
@@ -1319,7 +1319,7 @@
 
 ; <label>:1042		; preds = %1040
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:1043 [#uses=1]
-	load <4 x float>* %1043		; <<4 x float>>:1044 [#uses=0]
+	load <4 x float>, <4 x float>* %1043		; <<4 x float>>:1044 [#uses=0]
 	br label %1045
 
 ; <label>:1045		; preds = %1042, %1040
@@ -1367,7 +1367,7 @@
 	br label %xST.exit474
 
 xST.exit474:		; preds = %1059, %1058, %1051
-	load <4 x float>* null		; <<4 x float>>:1060 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1060 [#uses=1]
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
 	fmul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
 	br i1 false, label %1065, label %1063
@@ -1555,7 +1555,7 @@
 	br label %xST.exit492
 
 xST.exit492:		; preds = %1118, %1117, %1110
-	load <4 x float>* null		; <<4 x float>>:1119 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1119 [#uses=1]
 	fmul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
 	br i1 false, label %1123, label %1122
@@ -1590,7 +1590,7 @@
 xST.exit495:		; preds = %1130, %1129, %1122
 	%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ]		; <<4 x float>> [#uses=1]
 	%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ]		; <<4 x float>> [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:1131 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1131 [#uses=1]
 	fadd <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
 	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
 	br i1 false, label %1135, label %1134
@@ -1625,11 +1625,11 @@
 xST.exit498:		; preds = %1142, %1141, %1134
 	%.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ]		; <<4 x float>> [#uses=1]
 	%.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ]		; <<4 x float>> [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:1143 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1143 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1144 [#uses=1]
-	load <4 x float>* %1144		; <<4 x float>>:1145 [#uses=1]
+	load <4 x float>, <4 x float>* %1144		; <<4 x float>>:1145 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1146 [#uses=1]
-	load <4 x float>* %1146		; <<4 x float>>:1147 [#uses=1]
+	load <4 x float>, <4 x float>* %1146		; <<4 x float>>:1147 [#uses=1]
 	shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1148 [#uses=1]
 	shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1149 [#uses=1]
 	shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1150 [#uses=1]
@@ -1671,11 +1671,11 @@
 	%.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ]		; <<4 x float>> [#uses=1]
 	%.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ]		; <<4 x float>> [#uses=1]
 	%.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ]		; <<4 x float>> [#uses=1]
-	load <4 x float>* null		; <<4 x float>>:1164 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1164 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1165 [#uses=1]
-	load <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]
+	load <4 x float>, <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]
 	getelementptr [193 x [4 x <4 x float>]], [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1167 [#uses=1]
-	load <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]
+	load <4 x float>, <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]
 	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
 	fadd <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
 	fadd <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
@@ -1734,21 +1734,21 @@
 	br label %1188
 
 ; <label>:1188		; preds = %1187, %1186
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:1189 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:1189 [#uses=1]
 	shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:1190 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer )		; <i32>:1191 [#uses=1]
 	icmp eq i32 %1191, 0		; <i1>:1192 [#uses=1]
 	br i1 %1192, label %1196, label %1193
 
 ; <label>:1193		; preds = %1188
-	load <4 x float>* null		; <<4 x float>>:1194 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1194 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1195 [#uses=1]
 	store <4 x float> %1195, <4 x float>* null
 	br label %1196
 
 ; <label>:1196		; preds = %1193, %1188
 	%.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ]		; <<4 x float>> [#uses=0]
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:1197 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:1197 [#uses=1]
 	shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:1198 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer )		; <i32>:1199 [#uses=1]
 	icmp eq i32 %1199, 0		; <i1>:1200 [#uses=1]
@@ -1765,20 +1765,20 @@
 	br i1 %1203, label %1207, label %1204
 
 ; <label>:1204		; preds = %xST.exit507
-	load <4 x float>* null		; <<4 x float>>:1205 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1205 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1206 [#uses=1]
 	store <4 x float> %1206, <4 x float>* null
 	br label %1207
 
 ; <label>:1207		; preds = %1204, %xST.exit507
-	load <4 x i32>* %.sub7896		; <<4 x i32>>:1208 [#uses=1]
+	load <4 x i32>, <4 x i32>* %.sub7896		; <<4 x i32>>:1208 [#uses=1]
 	shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:1209 [#uses=1]
 	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer )		; <i32>:1210 [#uses=1]
 	icmp eq i32 %1210, 0		; <i1>:1211 [#uses=1]
 	br i1 %1211, label %1215, label %1212
 
 ; <label>:1212		; preds = %1207
-	load <4 x float>* null		; <<4 x float>>:1213 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1213 [#uses=1]
 	shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1214 [#uses=1]
 	store <4 x float> %1214, <4 x float>* null
 	br label %1215
diff --git a/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
index 53231b4..01777578 100644
--- a/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -22,7 +22,7 @@
 entry:
 	%Y = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C )		; <i32> [#uses=1]
-	%tmp5 = load i32* %Y		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %Y		; <i32> [#uses=1]
 	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
 	%tmp7 = shl i64 %tmp56, 32		; <i64> [#uses=1]
 	%tmp89 = zext i32 %tmp4 to i64		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/llvm/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 382ba1f..bba3707 100644
--- a/llvm/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -15,7 +15,7 @@
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
-	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i_addr		; <i32> [#uses=1]
 	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
 	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
@@ -24,7 +24,7 @@
 cond_true:		; preds = %entry
 	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
 	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
 	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
@@ -33,7 +33,7 @@
 cond_false:		; preds = %entry
 	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
 	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
-	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* %q_addr		; <i32> [#uses=1]
 	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
 	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
 	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
@@ -54,7 +54,7 @@
 	br label %return
 
 return:		; preds = %cond_next18
-	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	%retval20 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval20
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/llvm/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
index 40f46fd..ee1e233 100644
--- a/llvm/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -8,7 +8,7 @@
 entry:
         %ttype = alloca i32, align 4            ; <i32*> [#uses=1]
         %regs = alloca [1024 x %struct.__db_region], align 16           ; <[1024 x %struct.__db_region]*> [#uses=0]
-        %tmp = load i32* %ttype, align 4                ; <i32> [#uses=1]
+        %tmp = load i32, i32* %ttype, align 4                ; <i32> [#uses=1]
         %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
         ret void
 
diff --git a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
index 8897189..341b632 100644
--- a/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -17,17 +17,17 @@
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp = getelementptr %struct.anon, %struct.anon* @s, i32 0, i32 1		; <float*> [#uses=1]
-	%tmp1 = load float* %tmp, align 1		; <float> [#uses=1]
+	%tmp1 = load float, float* %tmp, align 1		; <float> [#uses=1]
 	%tmp2 = getelementptr %struct.anon, %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
 	store float %tmp1, float* %tmp2, align 1
 	%tmp3 = getelementptr <{ i8, double }>, <{ i8, double }>* @u, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp4 = load double* %tmp3, align 1		; <double> [#uses=1]
+	%tmp4 = load double, double* %tmp3, align 1		; <double> [#uses=1]
 	%tmp5 = getelementptr <{ i8, double }>, <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
 	store double %tmp4, double* %tmp5, align 1
 	br label %return
 
 return:		; preds = %entry
-	%retval6 = load i32* %retval		; <i32> [#uses=1]
+	%retval6 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval6
 }
 
@@ -37,16 +37,16 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp = call i32 @foo( )		; <i32> [#uses=0]
 	%tmp1 = getelementptr %struct.anon, %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
-	%tmp2 = load float* %tmp1, align 1		; <float> [#uses=1]
+	%tmp2 = load float, float* %tmp1, align 1		; <float> [#uses=1]
 	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
 	%tmp4 = getelementptr <{ i8, double }>, <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp5 = load double* %tmp4, align 1		; <double> [#uses=1]
+	%tmp5 = load double, double* %tmp4, align 1		; <double> [#uses=1]
 	%tmp6 = getelementptr [8 x i8], [8 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp7 = call i32 (i8*, ...)* @printf( i8* %tmp6, double %tmp23, double %tmp5 )		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
-	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval8
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/llvm/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
index 84fadd1..07b1f8d 100644
--- a/llvm/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -10,9 +10,9 @@
 cond_true:		; preds = %entry
 	%tmp89 = bitcast float* %res to <4 x i32>*		; <<4 x i32>*> [#uses=1]
 	%tmp1011 = bitcast float* %argA to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp14 = load <4 x i32>* %tmp1011, align 16		; <<4 x i32>> [#uses=1]
+	%tmp14 = load <4 x i32>, <4 x i32>* %tmp1011, align 16		; <<4 x i32>> [#uses=1]
 	%tmp1516 = bitcast float* %argB to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp18 = load <4 x i32>* %tmp1516, align 16		; <<4 x i32>> [#uses=1]
+	%tmp18 = load <4 x i32>, <4 x i32>* %tmp1516, align 16		; <<4 x i32>> [#uses=1]
 	%tmp19 = sdiv <4 x i32> %tmp14, %tmp18		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp19, <4 x i32>* %tmp89, align 16
 	ret void
diff --git a/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
index a60d11c..b136f21 100644
--- a/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -18,8 +18,8 @@
 entry:
 	%storeCoordinator = alloca %struct.NSPersistentStoreCoordinator*		; <%struct.NSPersistentStoreCoordinator**> [#uses=0]
 	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
-	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
-	%tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
 	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=1]
 	%tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null )		; <%struct.objc_object*> [#uses=1]
 	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
diff --git a/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
index 3d1a328..0399020 100644
--- a/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -14,11 +14,11 @@
 
 define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
 entry:
-	%tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
 	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
-	%tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
-	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
-	%tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp34 = load %struct.NSString*, %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
 	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=0]
 	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
 	unreachable
diff --git a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
index 86c7b57..3412291 100644
--- a/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -39,7 +39,7 @@
         resume { i8*, i32 } %exn
 
 invcont23:		; preds = %cond_true
-	%tmp27 = load i64* %tmp26, align 8		; <i64> [#uses=1]
+	%tmp27 = load i64, i64* %tmp26, align 8		; <i64> [#uses=1]
 	%tmp28 = sub i64 %range_addr.1.0, %tmp27		; <i64> [#uses=1]
 	br label %bb30
 
diff --git a/llvm/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/llvm/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index d1f0285..4830ca6 100644
--- a/llvm/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/llvm/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -6,7 +6,7 @@
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
 %CONST) {
 entry:
-        %input2 = load <4 x float>* null, align 16               ; <<4 x float>>
+        %input2 = load <4 x float>, <4 x float>* null, align 16               ; <<4 x float>>
        	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
 
         %mul1 = fmul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
diff --git a/llvm/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/llvm/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
index e28a3e0..73a804b 100644
--- a/llvm/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -2,7 +2,7 @@
 
 define i32 @bork(i64 %foo, i64 %bar) {
 entry:
-        %tmp = load i64* null, align 8          ; <i64> [#uses=2]
+        %tmp = load i64, i64* null, align 8          ; <i64> [#uses=2]
         %tmp2 = icmp ule i64 %tmp, 0            ; <i1> [#uses=1]
         %min = select i1 %tmp2, i64 %tmp, i64 0   ; <i64> [#uses=1]
         store i64 %min, i64* null, align 8
diff --git a/llvm/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/llvm/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
index d10291e1..863b025 100644
--- a/llvm/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -6,7 +6,7 @@
 entry:
 	%tmp2 = tail call i8* @bar( i32 14 ) nounwind 		; <i8*> [#uses=0]
 	%tmp28 = bitcast i8* %pp to void ()**		; <void ()**> [#uses=1]
-	%tmp38 = load void ()** %tmp28, align 4		; <void ()*> [#uses=2]
+	%tmp38 = load void ()*, void ()** %tmp28, align 4		; <void ()*> [#uses=2]
 	br i1 false, label %bb34, label %bb25
 bb25:		; preds = %entry
 	%tmp30 = bitcast void ()* %tmp38 to void (i8*)*		; <void (i8*)*> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/llvm/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
index 7eb382a..dc9734f 100644
--- a/llvm/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -7,7 +7,7 @@
 
 define i32 @cpp_interpret_string(i32 %pfile, %struct.cpp_string* %from, i32 %wide) nounwind  {
 entry:
-	%tmp61 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp61 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%toBool = icmp eq i32 %wide, 0		; <i1> [#uses=2]
 	%iftmp.87.0 = select i1 %toBool, i32 %tmp61, i32 0		; <i32> [#uses=2]
 	%tmp69 = icmp ult i32 %iftmp.87.0, 33		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/llvm/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
index 7d056da..1191748 100644
--- a/llvm/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -6,7 +6,7 @@
 	%tmp28 = and i64 %tmp2627, -16384		; <i64> [#uses=2]
 	%tmp2829 = inttoptr i64 %tmp28 to i8*		; <i8*> [#uses=1]
 	%tmp37 = getelementptr i8, i8* %tmp2829, i64 42		; <i8*> [#uses=1]
-	%tmp40 = load i8* %tmp37, align 1		; <i8> [#uses=1]
+	%tmp40 = load i8, i8* %tmp37, align 1		; <i8> [#uses=1]
 	%tmp4041 = zext i8 %tmp40 to i64		; <i64> [#uses=1]
 	%tmp42 = shl i64 %tmp4041, 8		; <i64> [#uses=1]
 	%tmp47 = add i64 %tmp42, 0		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/llvm/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
index 8e5bf56..908a2a8 100644
--- a/llvm/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -2,7 +2,7 @@
 
 define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
 entry:
-	%tmp50 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp50 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp5051 = zext i32 %tmp50 to i64		; <i64> [#uses=3]
 	%tmp53 = udiv i64 %byteStart, %tmp5051		; <i64> [#uses=1]
 	%tmp5354 = trunc i64 %tmp53 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/llvm/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
index 32c928e6..16f4853 100644
--- a/llvm/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -24,7 +24,7 @@
 
 bb31:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
 	%Pos.0.reg2mem.0 = phi i64 [ %tmp93, %_Z24unlock_then_erase_sectory.exit ], [ %Offset, %bb ]		; <i64> [#uses=3]
-	%tmp35 = load i16* @_ZL10DeviceCode, align 2		; <i16> [#uses=1]
+	%tmp35 = load i16, i16* @_ZL10DeviceCode, align 2		; <i16> [#uses=1]
 	%tmp3536 = zext i16 %tmp35 to i32		; <i32> [#uses=2]
 	%tmp37 = and i32 %tmp3536, 65520		; <i32> [#uses=1]
 	%tmp38 = icmp eq i32 %tmp37, 35008		; <i1> [#uses=1]
@@ -43,7 +43,7 @@
 	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
 	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
 	tail call void @IODelay( i32 500 ) nounwind 
-	%tmp53.i = load volatile i16* null, align 2		; <i16> [#uses=2]
+	%tmp53.i = load volatile i16, i16* null, align 2		; <i16> [#uses=2]
 	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
 	br i1 false, label %bb.i, label %bb65.i
 
diff --git a/llvm/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/llvm/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
index 862559b..8ce1708 100644
--- a/llvm/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -4,7 +4,7 @@
 @h = external global ppc_fp128
 
 define void @f() {
-	%tmp = load ppc_fp128* @g
+	%tmp = load ppc_fp128, ppc_fp128* @g
 	store ppc_fp128 %tmp, ppc_fp128* @h
 	ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/llvm/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
index 83c5511..db488ff 100644
--- a/llvm/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -6,7 +6,7 @@
 	br i1 true, label %bb1, label %bb3
 
 bb1:
-	%tmp1 = load i8* null, align 1
+	%tmp1 = load i8, i8* null, align 1
 	%tmp2 = icmp eq i8 %tmp1, 0
 	br label %bb2
 
diff --git a/llvm/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/llvm/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
index 08efe40..b271048 100644
--- a/llvm/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -137,7 +137,7 @@
 	%numEdgesToTest.1770 = phi i32 [ 4, %bb16 ], [ %numEdgesToTest.2, %bb395 ]		; <i32> [#uses=1]
 	icmp eq i32 %idxEachField11.0773, 0		; <i1>:30 [#uses=0]
 	getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %mbIndexLeft.2772		; <%struct.BiPartSrcDescriptor**>:31 [#uses=1]
-	load %struct.BiPartSrcDescriptor** %31, align 4		; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
+	load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %31, align 4		; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
 	%fMacroblockHasNonZeroBS.4 = select i1 %21, i32 1, i32 0		; <i32> [#uses=1]
 	%numEdgesToTest.2 = select i1 %21, i32 1, i32 %numEdgesToTest.1770		; <i32> [#uses=2]
 	store i8 32, i8* %boundaryStrengthsV.1771, align 1
@@ -181,31 +181,31 @@
 	mul i32 %51, 0		; <i32>:54 [#uses=1]
 	add i32 %46, %54		; <i32>:55 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %53		; <%struct.BiPartSrcDescriptor**>:56 [#uses=1]
-	load %struct.BiPartSrcDescriptor** %56, align 4		; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
+	load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %56, align 4		; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
 	getelementptr %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** null, i32 %55		; <%struct.BiPartSrcDescriptor**>:58 [#uses=1]
-	load %struct.BiPartSrcDescriptor** %58, align 4		; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
+	load %struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor** %58, align 4		; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
 	icmp slt i32 %159, 0		; <i1>:60 [#uses=0]
 	icmp eq %struct.BiPartSrcDescriptor* %57, %59		; <i1>:61 [#uses=0]
 	bitcast %struct.BiPartSrcDescriptor* %57 to i16*		; <i16*>:62 [#uses=5]
-	load i16* %62, align 2		; <i16>:63 [#uses=2]
+	load i16, i16* %62, align 2		; <i16>:63 [#uses=2]
 	getelementptr i16, i16* %62, i32 1		; <i16*>:64 [#uses=1]
-	load i16* %64, align 2		; <i16>:65 [#uses=2]
+	load i16, i16* %64, align 2		; <i16>:65 [#uses=2]
 	getelementptr i16, i16* %62, i32 2		; <i16*>:66 [#uses=1]
-	load i16* %66, align 2		; <i16>:67 [#uses=2]
+	load i16, i16* %66, align 2		; <i16>:67 [#uses=2]
 	getelementptr i16, i16* %62, i32 3		; <i16*>:68 [#uses=1]
-	load i16* %68, align 2		; <i16>:69 [#uses=2]
+	load i16, i16* %68, align 2		; <i16>:69 [#uses=2]
 	getelementptr i16, i16* %62, i32 6		; <i16*>:70 [#uses=1]
-	load i16* %70, align 2		; <i16>:71 [#uses=2]
+	load i16, i16* %70, align 2		; <i16>:71 [#uses=2]
 	bitcast %struct.BiPartSrcDescriptor* %59 to i16*		; <i16*>:72 [#uses=5]
-	load i16* %72, align 2		; <i16>:73 [#uses=2]
+	load i16, i16* %72, align 2		; <i16>:73 [#uses=2]
 	getelementptr i16, i16* %72, i32 1		; <i16*>:74 [#uses=1]
-	load i16* %74, align 2		; <i16>:75 [#uses=2]
+	load i16, i16* %74, align 2		; <i16>:75 [#uses=2]
 	getelementptr i16, i16* %72, i32 2		; <i16*>:76 [#uses=1]
-	load i16* %76, align 2		; <i16>:77 [#uses=2]
+	load i16, i16* %76, align 2		; <i16>:77 [#uses=2]
 	getelementptr i16, i16* %72, i32 3		; <i16*>:78 [#uses=1]
-	load i16* %78, align 2		; <i16>:79 [#uses=2]
+	load i16, i16* %78, align 2		; <i16>:79 [#uses=2]
 	getelementptr i16, i16* %72, i32 6		; <i16*>:80 [#uses=1]
-	load i16* %80, align 2		; <i16>:81 [#uses=2]
+	load i16, i16* %80, align 2		; <i16>:81 [#uses=2]
 	sub i16 %63, %73		; <i16>:82 [#uses=3]
 	sub i16 %65, %75		; <i16>:83 [#uses=3]
 	sub i16 %67, %77		; <i16>:84 [#uses=3]
@@ -227,22 +227,22 @@
 	icmp slt i16 %86, 0		; <i1>:96 [#uses=1]
 	%.663 = select i1 %96, i16 %95, i16 %86		; <i16> [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 0		; <i8*>:97 [#uses=1]
-	load i8* %97, align 1		; <i8>:98 [#uses=1]
+	load i8, i8* %97, align 1		; <i8>:98 [#uses=1]
 	zext i8 %98 to i32		; <i32>:99 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 1		; <i8*>:100 [#uses=1]
-	load i8* %100, align 1		; <i8>:101 [#uses=1]
+	load i8, i8* %100, align 1		; <i8>:101 [#uses=1]
 	zext i8 %101 to i32		; <i32>:102 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:103 [#uses=1]
-	load i8* %103, align 1		; <i8>:104 [#uses=2]
+	load i8, i8* %103, align 1		; <i8>:104 [#uses=2]
 	zext i8 %104 to i32		; <i32>:105 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:106 [#uses=1]
-	load i8* %106, align 1		; <i8>:107 [#uses=2]
+	load i8, i8* %106, align 1		; <i8>:107 [#uses=2]
 	zext i8 %107 to i32		; <i32>:108 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:109 [#uses=1]
-	load i8* %109, align 1		; <i8>:110 [#uses=1]
+	load i8, i8* %109, align 1		; <i8>:110 [#uses=1]
 	zext i8 %110 to i32		; <i32>:111 [#uses=1]
 	getelementptr %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:112 [#uses=1]
-	load i8* %112, align 1		; <i8>:113 [#uses=1]
+	load i8, i8* %112, align 1		; <i8>:113 [#uses=1]
 	zext i8 %113 to i32		; <i32>:114 [#uses=1]
 	lshr i32 %99, 4		; <i32>:115 [#uses=1]
 	and i32 %115, 2		; <i32>:116 [#uses=1]
@@ -322,7 +322,7 @@
 	%bfNZ12.2 = phi i32 [ %159, %bb205 ], [ 0, %bb144 ], [ %159, %bb206 ]		; <i32> [#uses=1]
 	%boundaryStrengthsV.3 = phi i8* [ %158, %bb205 ], [ %boundaryStrengthsV.1771, %bb144 ], [ %158, %bb206 ]		; <i8*> [#uses=3]
 	or i32 %fMacroblockHasNonZeroBS.6, %fEdgeHasNonZeroBS.0		; <i32>:152 [#uses=2]
-	load i8* %boundaryStrengthsV.3, align 1		; <i8>:153 [#uses=1]
+	load i8, i8* %boundaryStrengthsV.3, align 1		; <i8>:153 [#uses=1]
 	trunc i32 %fEdgeHasNonZeroBS.0 to i8		; <i8>:154 [#uses=1]
 	shl i8 %154, 5		; <i8>:155 [#uses=1]
 	xor i8 %155, 32		; <i8>:156 [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/llvm/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 21b0c61..53639e7 100644
--- a/llvm/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -4,7 +4,7 @@
 
 define signext i16 @t(i16* %dct)  nounwind  {
 entry:
-         load i16* null, align 2         ; <i16>:0 [#uses=2]
+         load i16, i16* null, align 2         ; <i16>:0 [#uses=2]
          lshr i16 %0, 11         ; <i16>:1 [#uses=0]
          trunc i16 %0 to i8              ; <i8>:2 [#uses=1]
          sext i8 %2 to i16               ; <i16>:3 [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/llvm/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
index c70bc63..d98080b 100644
--- a/llvm/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -10,7 +10,7 @@
 
 define void @lb(%struct.CGLSI* %src, i32 %n, %struct.CGLDI* %dst) nounwind {
 entry:
-	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
 	br i1 %1, label %bb.nph4945, label %return
 
@@ -26,9 +26,9 @@
 	%7 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
 	%8 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
 	%9 = getelementptr float, float* null, i32 2		; <float*> [#uses=1]
-	%10 = load float* %9, align 4		; <float> [#uses=1]
+	%10 = load float, float* %9, align 4		; <float> [#uses=1]
 	%11 = getelementptr float, float* null, i32 3		; <float*> [#uses=1]
-	%12 = load float* %11, align 4		; <float> [#uses=1]
+	%12 = load float, float* %11, align 4		; <float> [#uses=1]
 	%13 = fmul float %10, 6.553500e+04		; <float> [#uses=1]
 	%14 = fadd float %13, 5.000000e-01		; <float> [#uses=1]
 	%15 = fmul float %12, 6.553500e+04		; <float> [#uses=1]
@@ -65,9 +65,9 @@
 	store i64 %31, i64* %3, align 8
 	%35 = getelementptr i8, i8* null, i32 0		; <i8*> [#uses=1]
 	%36 = bitcast i8* %35 to float*		; <float*> [#uses=4]
-	%37 = load float* %36, align 4		; <float> [#uses=1]
+	%37 = load float, float* %36, align 4		; <float> [#uses=1]
 	%38 = getelementptr float, float* %36, i32 1		; <float*> [#uses=1]
-	%39 = load float* %38, align 4		; <float> [#uses=1]
+	%39 = load float, float* %38, align 4		; <float> [#uses=1]
 	%40 = fmul float %37, 6.553500e+04		; <float> [#uses=1]
 	%41 = fadd float %40, 5.000000e-01		; <float> [#uses=1]
 	%42 = fmul float %39, 6.553500e+04		; <float> [#uses=1]
@@ -85,9 +85,9 @@
 	%47 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
 	%48 = fptosi float %f1582.0 to i32		; <i32> [#uses=1]
 	%49 = getelementptr float, float* %36, i32 2		; <float*> [#uses=1]
-	%50 = load float* %49, align 4		; <float> [#uses=1]
+	%50 = load float, float* %49, align 4		; <float> [#uses=1]
 	%51 = getelementptr float, float* %36, i32 3		; <float*> [#uses=1]
-	%52 = load float* %51, align 4		; <float> [#uses=1]
+	%52 = load float, float* %51, align 4		; <float> [#uses=1]
 	%53 = fmul float %50, 6.553500e+04		; <float> [#uses=1]
 	%54 = fadd float %53, 5.000000e-01		; <float> [#uses=1]
 	%55 = fmul float %52, 6.553500e+04		; <float> [#uses=1]
@@ -108,9 +108,9 @@
 	%70 = or i64 %69, %67		; <i64> [#uses=2]
 	%71 = getelementptr i8, i8* null, i32 0		; <i8*> [#uses=1]
 	%72 = bitcast i8* %71 to float*		; <float*> [#uses=4]
-	%73 = load float* %72, align 4		; <float> [#uses=1]
+	%73 = load float, float* %72, align 4		; <float> [#uses=1]
 	%74 = getelementptr float, float* %72, i32 1		; <float*> [#uses=1]
-	%75 = load float* %74, align 4		; <float> [#uses=1]
+	%75 = load float, float* %74, align 4		; <float> [#uses=1]
 	%76 = fmul float %73, 6.553500e+04		; <float> [#uses=1]
 	%77 = fadd float %76, 5.000000e-01		; <float> [#uses=3]
 	%78 = fmul float %75, 6.553500e+04		; <float> [#uses=1]
@@ -131,9 +131,9 @@
 	%83 = fptosi float %f0569.0 to i32		; <i32> [#uses=1]
 	%84 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
 	%85 = getelementptr float, float* %72, i32 2		; <float*> [#uses=1]
-	%86 = load float* %85, align 4		; <float> [#uses=1]
+	%86 = load float, float* %85, align 4		; <float> [#uses=1]
 	%87 = getelementptr float, float* %72, i32 3		; <float*> [#uses=1]
-	%88 = load float* %87, align 4		; <float> [#uses=1]
+	%88 = load float, float* %87, align 4		; <float> [#uses=1]
 	%89 = fmul float %86, 6.553500e+04		; <float> [#uses=1]
 	%90 = fadd float %89, 5.000000e-01		; <float> [#uses=1]
 	%91 = fmul float %88, 6.553500e+04		; <float> [#uses=1]
@@ -168,46 +168,46 @@
 	br i1 %114, label %bb2318, label %bb2317
 
 bb2317:		; preds = %bb2315
-	%115 = load i64* %2, align 16		; <i64> [#uses=1]
+	%115 = load i64, i64* %2, align 16		; <i64> [#uses=1]
 	%116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%117 = sext i32 %116 to i64		; <i64> [#uses=1]
 	store i64 %117, i64* %2, align 16
-	%118 = load i64* %3, align 8		; <i64> [#uses=1]
+	%118 = load i64, i64* %3, align 8		; <i64> [#uses=1]
 	%119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%120 = sext i32 %119 to i64		; <i64> [#uses=1]
 	store i64 %120, i64* %3, align 8
-	%121 = load i64* %4, align 16		; <i64> [#uses=1]
+	%121 = load i64, i64* %4, align 16		; <i64> [#uses=1]
 	%122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
 	%123 = sext i32 %122 to i64		; <i64> [#uses=1]
 	store i64 %123, i64* %4, align 16
-	%124 = load i64* %5, align 8		; <i64> [#uses=1]
+	%124 = load i64, i64* %5, align 8		; <i64> [#uses=1]
 	%125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb2318:		; preds = %bb2315
 	%126 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 8		; <%struct.vv_t*> [#uses=1]
 	%127 = bitcast %struct.vv_t* %126 to i64*		; <i64*> [#uses=1]
-	%128 = load i64* %127, align 8		; <i64> [#uses=1]
+	%128 = load i64, i64* %127, align 8		; <i64> [#uses=1]
 	%129 = trunc i64 %128 to i32		; <i32> [#uses=4]
-	%130 = load i64* %2, align 16		; <i64> [#uses=1]
+	%130 = load i64, i64* %2, align 16		; <i64> [#uses=1]
 	%131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%132 = sext i32 %131 to i64		; <i64> [#uses=1]
 	store i64 %132, i64* %2, align 16
-	%133 = load i64* %3, align 8		; <i64> [#uses=1]
+	%133 = load i64, i64* %3, align 8		; <i64> [#uses=1]
 	%134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%135 = sext i32 %134 to i64		; <i64> [#uses=1]
 	store i64 %135, i64* %3, align 8
-	%136 = load i64* %4, align 16		; <i64> [#uses=1]
+	%136 = load i64, i64* %4, align 16		; <i64> [#uses=1]
 	%137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
 	%138 = sext i32 %137 to i64		; <i64> [#uses=1]
 	store i64 %138, i64* %4, align 16
-	%139 = load i64* %5, align 8		; <i64> [#uses=1]
+	%139 = load i64, i64* %5, align 8		; <i64> [#uses=1]
 	%140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
 	unreachable
 
 bb2319:		; preds = %bb2326
 	%141 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 2		; <i8**> [#uses=1]
-	%142 = load i8** %141, align 4		; <i8*> [#uses=4]
+	%142 = load i8*, i8** %141, align 4		; <i8*> [#uses=4]
 	%143 = getelementptr i8, i8* %142, i32 0		; <i8*> [#uses=1]
 	%144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
 	%145 = sext i32 %144 to i64		; <i64> [#uses=2]
@@ -234,9 +234,9 @@
 	%indvar5021 = phi i32 [ 0, %bb.nph4945 ], [ %indvar.next5145, %bb2325 ]		; <i32> [#uses=6]
 	%157 = icmp slt i32 %indvar5021, %n		; <i1> [#uses=0]
 	%158 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 10		; <%struct.xx_t**> [#uses=1]
-	%159 = load %struct.xx_t** %158, align 4		; <%struct.xx_t*> [#uses=5]
+	%159 = load %struct.xx_t*, %struct.xx_t** %158, align 4		; <%struct.xx_t*> [#uses=5]
 	%160 = getelementptr %struct.CGLSI, %struct.CGLSI* %src, i32 %indvar5021, i32 1		; <i32*> [#uses=1]
-	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%161 = load i32, i32* %160, align 4		; <i32> [#uses=1]
 	%162 = and i32 %161, 255		; <i32> [#uses=1]
 	switch i32 %162, label %bb2325 [
 		 i32 59, label %bb2217
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/llvm/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
index f474a6d..2372b2f 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -2,7 +2,7 @@
 
 define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
 entry:
-        %imag59 = load ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
+        %imag59 = load ppc_fp128, ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
         %0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
         %1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
         %2 = fadd ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/llvm/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
index 8322a843..fbe1287 100644
--- a/llvm/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
+++ b/llvm/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -9,17 +9,17 @@
 
 define void @foo() nounwind {
 entry:
-	%0 = load ppc_fp128* @a, align 16		; <ppc_fp128> [#uses=1]
+	%0 = load ppc_fp128, ppc_fp128* @a, align 16		; <ppc_fp128> [#uses=1]
 	%1 = call ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %0)		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %1, ppc_fp128* @a, align 16
-	%2 = load ppc_fp128* @b, align 16		; <ppc_fp128> [#uses=1]
+	%2 = load ppc_fp128, ppc_fp128* @b, align 16		; <ppc_fp128> [#uses=1]
 	%3 = call ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128 %2) nounwind readonly		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %3, ppc_fp128* @b, align 16
-	%4 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%4 = load ppc_fp128, ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
 	%5 = call ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128 %4) nounwind readonly		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %5, ppc_fp128* @c, align 16
-	%6 = load ppc_fp128* @d, align 16		; <ppc_fp128> [#uses=1]
-	%7 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%6 = load ppc_fp128, ppc_fp128* @d, align 16		; <ppc_fp128> [#uses=1]
+	%7 = load ppc_fp128, ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
 	%8 = call ppc_fp128 @llvm.pow.ppcf128(ppc_fp128 %6, ppc_fp128 %7)		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %8, ppc_fp128* @d, align 16
 	br label %return
diff --git a/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
index e172de2..289e09b2 100644
--- a/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
+++ b/llvm/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -15,7 +15,7 @@
   %y_addr = alloca i32                            ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 %y, i32* %y_addr
-  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %0 = load i32, i32* %y_addr, align 4                 ; <i32> [#uses=1]
   %1 = getelementptr inbounds [0 x i32], [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
   call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
   br label %return
diff --git a/llvm/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/llvm/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
index b95ac68..d4972a9 100644
--- a/llvm/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
+++ b/llvm/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -11,7 +11,7 @@
 entry:
 ; CHECK: mtctr r12
 ; CHECK: bctrl
-  %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
+  %0 = load void (...)*, void (...)** @p, align 4              ; <void (...)*> [#uses=1]
   call void (...)* %0() nounwind
   br label %return
 
diff --git a/llvm/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/llvm/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
index a25ce07..1f320a8 100644
--- a/llvm/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
+++ b/llvm/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -12,11 +12,11 @@
   %0 = alloca i32
   %"alloca point" = bitcast i32 0 to i32
   store i32 0, i32* %0, align 4
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   store i32 %1, i32* %retval, align 4
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval
+  %retval1 = load i32, i32* %retval
   ret i32 %retval1
 }
diff --git a/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll b/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
index 0a8f774..70d2ff11 100644
--- a/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
+++ b/llvm/test/CodeGen/PowerPC/2011-12-05-NoSpillDupCR.ll
@@ -47,9 +47,9 @@
   %sext = shl i64 %sub5.us, 32
   %idxprom.us = ashr exact i64 %sext, 32
   %arrayidx.us = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us
-  %2 = load float* %arrayidx.us, align 4
+  %2 = load float, float* %arrayidx.us, align 4
   %arrayidx7.us = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
-  %3 = load float* %arrayidx7.us, align 4
+  %3 = load float, float* %arrayidx7.us, align 4
   %add8.us = fadd float %3, %2
   store float %add8.us, float* %arrayidx7.us, align 4
   %indvars.iv.next = add i64 %indvars.iv, %1
@@ -82,9 +82,9 @@
   %sext23 = shl i64 %sub5.us.1, 32
   %idxprom.us.1 = ashr exact i64 %sext23, 32
   %arrayidx.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.1
-  %5 = load float* %arrayidx.us.1, align 4
+  %5 = load float, float* %arrayidx.us.1, align 4
   %arrayidx7.us.1 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.1
-  %6 = load float* %arrayidx7.us.1, align 4
+  %6 = load float, float* %arrayidx7.us.1, align 4
   %add8.us.1 = fadd float %6, %5
   store float %add8.us.1, float* %arrayidx7.us.1, align 4
   %indvars.iv.next.1 = add i64 %indvars.iv.1, %1
@@ -104,9 +104,9 @@
   %sext24 = shl i64 %sub5.us.2, 32
   %idxprom.us.2 = ashr exact i64 %sext24, 32
   %arrayidx.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.2
-  %8 = load float* %arrayidx.us.2, align 4
+  %8 = load float, float* %arrayidx.us.2, align 4
   %arrayidx7.us.2 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.2
-  %9 = load float* %arrayidx7.us.2, align 4
+  %9 = load float, float* %arrayidx7.us.2, align 4
   %add8.us.2 = fadd float %9, %8
   store float %add8.us.2, float* %arrayidx7.us.2, align 4
   %indvars.iv.next.2 = add i64 %indvars.iv.2, %1
@@ -126,9 +126,9 @@
   %sext25 = shl i64 %sub5.us.3, 32
   %idxprom.us.3 = ashr exact i64 %sext25, 32
   %arrayidx.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.3
-  %11 = load float* %arrayidx.us.3, align 4
+  %11 = load float, float* %arrayidx.us.3, align 4
   %arrayidx7.us.3 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.3
-  %12 = load float* %arrayidx7.us.3, align 4
+  %12 = load float, float* %arrayidx7.us.3, align 4
   %add8.us.3 = fadd float %12, %11
   store float %add8.us.3, float* %arrayidx7.us.3, align 4
   %indvars.iv.next.3 = add i64 %indvars.iv.3, %1
@@ -148,9 +148,9 @@
   %sext26 = shl i64 %sub5.us.4, 32
   %idxprom.us.4 = ashr exact i64 %sext26, 32
   %arrayidx.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @b, i64 0, i64 %idxprom.us.4
-  %14 = load float* %arrayidx.us.4, align 4
+  %14 = load float, float* %arrayidx.us.4, align 4
   %arrayidx7.us.4 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.4
-  %15 = load float* %arrayidx7.us.4, align 4
+  %15 = load float, float* %arrayidx7.us.4, align 4
   %add8.us.4 = fadd float %15, %14
   store float %add8.us.4, float* %arrayidx7.us.4, align 4
   %indvars.iv.next.4 = add i64 %indvars.iv.4, %1
diff --git a/llvm/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll b/llvm/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
index 7ec821b..ec31036 100644
--- a/llvm/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
+++ b/llvm/test/CodeGen/PowerPC/2011-12-06-SpillAndRestoreCR.ll
@@ -35,7 +35,7 @@
 
 for.body:                                         ; preds = %for.end17, %entry
   %nl.041 = phi i32 [ 0, %entry ], [ %inc22, %for.end17 ]
-  %0 = load float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16
+  %0 = load float, float* getelementptr inbounds ([256 x [256 x float]]* @aa, i64 0, i64 0, i64 0), align 16
   br label %for.cond5.preheader
 
 for.cond5.preheader:                              ; preds = %for.inc15, %for.body
@@ -51,7 +51,7 @@
   %xindex.234 = phi i32 [ %xindex.138, %for.cond5.preheader ], [ %xindex.3.15, %for.body7 ]
   %yindex.233 = phi i32 [ %yindex.137, %for.cond5.preheader ], [ %yindex.3.15, %for.body7 ]
   %arrayidx9 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv
-  %1 = load float* %arrayidx9, align 16
+  %1 = load float, float* %arrayidx9, align 16
   %cmp10 = fcmp ogt float %1, %max.235
   %2 = trunc i64 %indvars.iv to i32
   %yindex.3 = select i1 %cmp10, i32 %2, i32 %yindex.233
@@ -60,7 +60,7 @@
   %max.3 = select i1 %cmp10, float %1, float %max.235
   %indvars.iv.next45 = or i64 %indvars.iv, 1
   %arrayidx9.1 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next45
-  %4 = load float* %arrayidx9.1, align 4
+  %4 = load float, float* %arrayidx9.1, align 4
   %cmp10.1 = fcmp ogt float %4, %max.3
   %5 = trunc i64 %indvars.iv.next45 to i32
   %yindex.3.1 = select i1 %cmp10.1, i32 %5, i32 %yindex.3
@@ -68,7 +68,7 @@
   %max.3.1 = select i1 %cmp10.1, float %4, float %max.3
   %indvars.iv.next.146 = or i64 %indvars.iv, 2
   %arrayidx9.2 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.146
-  %6 = load float* %arrayidx9.2, align 8
+  %6 = load float, float* %arrayidx9.2, align 8
   %cmp10.2 = fcmp ogt float %6, %max.3.1
   %7 = trunc i64 %indvars.iv.next.146 to i32
   %yindex.3.2 = select i1 %cmp10.2, i32 %7, i32 %yindex.3.1
@@ -76,7 +76,7 @@
   %max.3.2 = select i1 %cmp10.2, float %6, float %max.3.1
   %indvars.iv.next.247 = or i64 %indvars.iv, 3
   %arrayidx9.3 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.247
-  %8 = load float* %arrayidx9.3, align 4
+  %8 = load float, float* %arrayidx9.3, align 4
   %cmp10.3 = fcmp ogt float %8, %max.3.2
   %9 = trunc i64 %indvars.iv.next.247 to i32
   %yindex.3.3 = select i1 %cmp10.3, i32 %9, i32 %yindex.3.2
@@ -84,7 +84,7 @@
   %max.3.3 = select i1 %cmp10.3, float %8, float %max.3.2
   %indvars.iv.next.348 = or i64 %indvars.iv, 4
   %arrayidx9.4 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.348
-  %10 = load float* %arrayidx9.4, align 16
+  %10 = load float, float* %arrayidx9.4, align 16
   %cmp10.4 = fcmp ogt float %10, %max.3.3
   %11 = trunc i64 %indvars.iv.next.348 to i32
   %yindex.3.4 = select i1 %cmp10.4, i32 %11, i32 %yindex.3.3
@@ -92,7 +92,7 @@
   %max.3.4 = select i1 %cmp10.4, float %10, float %max.3.3
   %indvars.iv.next.449 = or i64 %indvars.iv, 5
   %arrayidx9.5 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.449
-  %12 = load float* %arrayidx9.5, align 4
+  %12 = load float, float* %arrayidx9.5, align 4
   %cmp10.5 = fcmp ogt float %12, %max.3.4
   %13 = trunc i64 %indvars.iv.next.449 to i32
   %yindex.3.5 = select i1 %cmp10.5, i32 %13, i32 %yindex.3.4
@@ -100,7 +100,7 @@
   %max.3.5 = select i1 %cmp10.5, float %12, float %max.3.4
   %indvars.iv.next.550 = or i64 %indvars.iv, 6
   %arrayidx9.6 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.550
-  %14 = load float* %arrayidx9.6, align 8
+  %14 = load float, float* %arrayidx9.6, align 8
   %cmp10.6 = fcmp ogt float %14, %max.3.5
   %15 = trunc i64 %indvars.iv.next.550 to i32
   %yindex.3.6 = select i1 %cmp10.6, i32 %15, i32 %yindex.3.5
@@ -108,7 +108,7 @@
   %max.3.6 = select i1 %cmp10.6, float %14, float %max.3.5
   %indvars.iv.next.651 = or i64 %indvars.iv, 7
   %arrayidx9.7 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.651
-  %16 = load float* %arrayidx9.7, align 4
+  %16 = load float, float* %arrayidx9.7, align 4
   %cmp10.7 = fcmp ogt float %16, %max.3.6
   %17 = trunc i64 %indvars.iv.next.651 to i32
   %yindex.3.7 = select i1 %cmp10.7, i32 %17, i32 %yindex.3.6
@@ -116,7 +116,7 @@
   %max.3.7 = select i1 %cmp10.7, float %16, float %max.3.6
   %indvars.iv.next.752 = or i64 %indvars.iv, 8
   %arrayidx9.8 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.752
-  %18 = load float* %arrayidx9.8, align 16
+  %18 = load float, float* %arrayidx9.8, align 16
   %cmp10.8 = fcmp ogt float %18, %max.3.7
   %19 = trunc i64 %indvars.iv.next.752 to i32
   %yindex.3.8 = select i1 %cmp10.8, i32 %19, i32 %yindex.3.7
@@ -124,7 +124,7 @@
   %max.3.8 = select i1 %cmp10.8, float %18, float %max.3.7
   %indvars.iv.next.853 = or i64 %indvars.iv, 9
   %arrayidx9.9 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.853
-  %20 = load float* %arrayidx9.9, align 4
+  %20 = load float, float* %arrayidx9.9, align 4
   %cmp10.9 = fcmp ogt float %20, %max.3.8
   %21 = trunc i64 %indvars.iv.next.853 to i32
   %yindex.3.9 = select i1 %cmp10.9, i32 %21, i32 %yindex.3.8
@@ -132,7 +132,7 @@
   %max.3.9 = select i1 %cmp10.9, float %20, float %max.3.8
   %indvars.iv.next.954 = or i64 %indvars.iv, 10
   %arrayidx9.10 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.954
-  %22 = load float* %arrayidx9.10, align 8
+  %22 = load float, float* %arrayidx9.10, align 8
   %cmp10.10 = fcmp ogt float %22, %max.3.9
   %23 = trunc i64 %indvars.iv.next.954 to i32
   %yindex.3.10 = select i1 %cmp10.10, i32 %23, i32 %yindex.3.9
@@ -140,7 +140,7 @@
   %max.3.10 = select i1 %cmp10.10, float %22, float %max.3.9
   %indvars.iv.next.1055 = or i64 %indvars.iv, 11
   %arrayidx9.11 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1055
-  %24 = load float* %arrayidx9.11, align 4
+  %24 = load float, float* %arrayidx9.11, align 4
   %cmp10.11 = fcmp ogt float %24, %max.3.10
   %25 = trunc i64 %indvars.iv.next.1055 to i32
   %yindex.3.11 = select i1 %cmp10.11, i32 %25, i32 %yindex.3.10
@@ -148,7 +148,7 @@
   %max.3.11 = select i1 %cmp10.11, float %24, float %max.3.10
   %indvars.iv.next.1156 = or i64 %indvars.iv, 12
   %arrayidx9.12 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1156
-  %26 = load float* %arrayidx9.12, align 16
+  %26 = load float, float* %arrayidx9.12, align 16
   %cmp10.12 = fcmp ogt float %26, %max.3.11
   %27 = trunc i64 %indvars.iv.next.1156 to i32
   %yindex.3.12 = select i1 %cmp10.12, i32 %27, i32 %yindex.3.11
@@ -156,7 +156,7 @@
   %max.3.12 = select i1 %cmp10.12, float %26, float %max.3.11
   %indvars.iv.next.1257 = or i64 %indvars.iv, 13
   %arrayidx9.13 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1257
-  %28 = load float* %arrayidx9.13, align 4
+  %28 = load float, float* %arrayidx9.13, align 4
   %cmp10.13 = fcmp ogt float %28, %max.3.12
   %29 = trunc i64 %indvars.iv.next.1257 to i32
   %yindex.3.13 = select i1 %cmp10.13, i32 %29, i32 %yindex.3.12
@@ -164,7 +164,7 @@
   %max.3.13 = select i1 %cmp10.13, float %28, float %max.3.12
   %indvars.iv.next.1358 = or i64 %indvars.iv, 14
   %arrayidx9.14 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1358
-  %30 = load float* %arrayidx9.14, align 8
+  %30 = load float, float* %arrayidx9.14, align 8
   %cmp10.14 = fcmp ogt float %30, %max.3.13
   %31 = trunc i64 %indvars.iv.next.1358 to i32
   %yindex.3.14 = select i1 %cmp10.14, i32 %31, i32 %yindex.3.13
@@ -172,7 +172,7 @@
   %max.3.14 = select i1 %cmp10.14, float %30, float %max.3.13
   %indvars.iv.next.1459 = or i64 %indvars.iv, 15
   %arrayidx9.15 = getelementptr inbounds [256 x [256 x float]], [256 x [256 x float]]* @aa, i64 0, i64 %indvars.iv42, i64 %indvars.iv.next.1459
-  %32 = load float* %arrayidx9.15, align 4
+  %32 = load float, float* %arrayidx9.15, align 4
   %cmp10.15 = fcmp ogt float %32, %max.3.14
   %33 = trunc i64 %indvars.iv.next.1459 to i32
   %yindex.3.15 = select i1 %cmp10.15, i32 %33, i32 %yindex.3.14
diff --git a/llvm/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll b/llvm/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
index b1cbb36..05390cf 100644
--- a/llvm/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
+++ b/llvm/test/CodeGen/PowerPC/2011-12-08-DemandedBitsMiscompile.ll
@@ -2,7 +2,7 @@
 
 define void @test(i32* nocapture %x, i64* %xx, i32* %yp) nounwind uwtable ssp {
 entry:
-  %yy = load i32* %yp
+  %yy = load i32, i32* %yp
   %y = add i32 %yy, 1
   %z = zext i32 %y to i64
   %z2 = shl i64 %z, 32 
diff --git a/llvm/test/CodeGen/PowerPC/Atomics-64.ll b/llvm/test/CodeGen/PowerPC/Atomics-64.ll
index 122b54e..77066de 100644
--- a/llvm/test/CodeGen/PowerPC/Atomics-64.ll
+++ b/llvm/test/CodeGen/PowerPC/Atomics-64.ll
@@ -254,272 +254,272 @@
 
 define void @test_op_and_fetch() nounwind {
 entry:
-  %0 = load i8* @uc, align 1
+  %0 = load i8, i8* @uc, align 1
   %1 = atomicrmw add i8* @sc, i8 %0 monotonic
   %2 = add i8 %1, %0
   store i8 %2, i8* @sc, align 1
-  %3 = load i8* @uc, align 1
+  %3 = load i8, i8* @uc, align 1
   %4 = atomicrmw add i8* @uc, i8 %3 monotonic
   %5 = add i8 %4, %3
   store i8 %5, i8* @uc, align 1
-  %6 = load i8* @uc, align 1
+  %6 = load i8, i8* @uc, align 1
   %7 = zext i8 %6 to i16
   %8 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %9 = atomicrmw add i16* %8, i16 %7 monotonic
   %10 = add i16 %9, %7
   store i16 %10, i16* @ss, align 2
-  %11 = load i8* @uc, align 1
+  %11 = load i8, i8* @uc, align 1
   %12 = zext i8 %11 to i16
   %13 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %14 = atomicrmw add i16* %13, i16 %12 monotonic
   %15 = add i16 %14, %12
   store i16 %15, i16* @us, align 2
-  %16 = load i8* @uc, align 1
+  %16 = load i8, i8* @uc, align 1
   %17 = zext i8 %16 to i32
   %18 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %19 = atomicrmw add i32* %18, i32 %17 monotonic
   %20 = add i32 %19, %17
   store i32 %20, i32* @si, align 4
-  %21 = load i8* @uc, align 1
+  %21 = load i8, i8* @uc, align 1
   %22 = zext i8 %21 to i32
   %23 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %24 = atomicrmw add i32* %23, i32 %22 monotonic
   %25 = add i32 %24, %22
   store i32 %25, i32* @ui, align 4
-  %26 = load i8* @uc, align 1
+  %26 = load i8, i8* @uc, align 1
   %27 = zext i8 %26 to i64
   %28 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %29 = atomicrmw add i64* %28, i64 %27 monotonic
   %30 = add i64 %29, %27
   store i64 %30, i64* @sl, align 8
-  %31 = load i8* @uc, align 1
+  %31 = load i8, i8* @uc, align 1
   %32 = zext i8 %31 to i64
   %33 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %34 = atomicrmw add i64* %33, i64 %32 monotonic
   %35 = add i64 %34, %32
   store i64 %35, i64* @ul, align 8
-  %36 = load i8* @uc, align 1
+  %36 = load i8, i8* @uc, align 1
   %37 = atomicrmw sub i8* @sc, i8 %36 monotonic
   %38 = sub i8 %37, %36
   store i8 %38, i8* @sc, align 1
-  %39 = load i8* @uc, align 1
+  %39 = load i8, i8* @uc, align 1
   %40 = atomicrmw sub i8* @uc, i8 %39 monotonic
   %41 = sub i8 %40, %39
   store i8 %41, i8* @uc, align 1
-  %42 = load i8* @uc, align 1
+  %42 = load i8, i8* @uc, align 1
   %43 = zext i8 %42 to i16
   %44 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %45 = atomicrmw sub i16* %44, i16 %43 monotonic
   %46 = sub i16 %45, %43
   store i16 %46, i16* @ss, align 2
-  %47 = load i8* @uc, align 1
+  %47 = load i8, i8* @uc, align 1
   %48 = zext i8 %47 to i16
   %49 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %50 = atomicrmw sub i16* %49, i16 %48 monotonic
   %51 = sub i16 %50, %48
   store i16 %51, i16* @us, align 2
-  %52 = load i8* @uc, align 1
+  %52 = load i8, i8* @uc, align 1
   %53 = zext i8 %52 to i32
   %54 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %55 = atomicrmw sub i32* %54, i32 %53 monotonic
   %56 = sub i32 %55, %53
   store i32 %56, i32* @si, align 4
-  %57 = load i8* @uc, align 1
+  %57 = load i8, i8* @uc, align 1
   %58 = zext i8 %57 to i32
   %59 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %60 = atomicrmw sub i32* %59, i32 %58 monotonic
   %61 = sub i32 %60, %58
   store i32 %61, i32* @ui, align 4
-  %62 = load i8* @uc, align 1
+  %62 = load i8, i8* @uc, align 1
   %63 = zext i8 %62 to i64
   %64 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %65 = atomicrmw sub i64* %64, i64 %63 monotonic
   %66 = sub i64 %65, %63
   store i64 %66, i64* @sl, align 8
-  %67 = load i8* @uc, align 1
+  %67 = load i8, i8* @uc, align 1
   %68 = zext i8 %67 to i64
   %69 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %70 = atomicrmw sub i64* %69, i64 %68 monotonic
   %71 = sub i64 %70, %68
   store i64 %71, i64* @ul, align 8
-  %72 = load i8* @uc, align 1
+  %72 = load i8, i8* @uc, align 1
   %73 = atomicrmw or i8* @sc, i8 %72 monotonic
   %74 = or i8 %73, %72
   store i8 %74, i8* @sc, align 1
-  %75 = load i8* @uc, align 1
+  %75 = load i8, i8* @uc, align 1
   %76 = atomicrmw or i8* @uc, i8 %75 monotonic
   %77 = or i8 %76, %75
   store i8 %77, i8* @uc, align 1
-  %78 = load i8* @uc, align 1
+  %78 = load i8, i8* @uc, align 1
   %79 = zext i8 %78 to i16
   %80 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %81 = atomicrmw or i16* %80, i16 %79 monotonic
   %82 = or i16 %81, %79
   store i16 %82, i16* @ss, align 2
-  %83 = load i8* @uc, align 1
+  %83 = load i8, i8* @uc, align 1
   %84 = zext i8 %83 to i16
   %85 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %86 = atomicrmw or i16* %85, i16 %84 monotonic
   %87 = or i16 %86, %84
   store i16 %87, i16* @us, align 2
-  %88 = load i8* @uc, align 1
+  %88 = load i8, i8* @uc, align 1
   %89 = zext i8 %88 to i32
   %90 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %91 = atomicrmw or i32* %90, i32 %89 monotonic
   %92 = or i32 %91, %89
   store i32 %92, i32* @si, align 4
-  %93 = load i8* @uc, align 1
+  %93 = load i8, i8* @uc, align 1
   %94 = zext i8 %93 to i32
   %95 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %96 = atomicrmw or i32* %95, i32 %94 monotonic
   %97 = or i32 %96, %94
   store i32 %97, i32* @ui, align 4
-  %98 = load i8* @uc, align 1
+  %98 = load i8, i8* @uc, align 1
   %99 = zext i8 %98 to i64
   %100 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %101 = atomicrmw or i64* %100, i64 %99 monotonic
   %102 = or i64 %101, %99
   store i64 %102, i64* @sl, align 8
-  %103 = load i8* @uc, align 1
+  %103 = load i8, i8* @uc, align 1
   %104 = zext i8 %103 to i64
   %105 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %106 = atomicrmw or i64* %105, i64 %104 monotonic
   %107 = or i64 %106, %104
   store i64 %107, i64* @ul, align 8
-  %108 = load i8* @uc, align 1
+  %108 = load i8, i8* @uc, align 1
   %109 = atomicrmw xor i8* @sc, i8 %108 monotonic
   %110 = xor i8 %109, %108
   store i8 %110, i8* @sc, align 1
-  %111 = load i8* @uc, align 1
+  %111 = load i8, i8* @uc, align 1
   %112 = atomicrmw xor i8* @uc, i8 %111 monotonic
   %113 = xor i8 %112, %111
   store i8 %113, i8* @uc, align 1
-  %114 = load i8* @uc, align 1
+  %114 = load i8, i8* @uc, align 1
   %115 = zext i8 %114 to i16
   %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %117 = atomicrmw xor i16* %116, i16 %115 monotonic
   %118 = xor i16 %117, %115
   store i16 %118, i16* @ss, align 2
-  %119 = load i8* @uc, align 1
+  %119 = load i8, i8* @uc, align 1
   %120 = zext i8 %119 to i16
   %121 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %122 = atomicrmw xor i16* %121, i16 %120 monotonic
   %123 = xor i16 %122, %120
   store i16 %123, i16* @us, align 2
-  %124 = load i8* @uc, align 1
+  %124 = load i8, i8* @uc, align 1
   %125 = zext i8 %124 to i32
   %126 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %127 = atomicrmw xor i32* %126, i32 %125 monotonic
   %128 = xor i32 %127, %125
   store i32 %128, i32* @si, align 4
-  %129 = load i8* @uc, align 1
+  %129 = load i8, i8* @uc, align 1
   %130 = zext i8 %129 to i32
   %131 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %132 = atomicrmw xor i32* %131, i32 %130 monotonic
   %133 = xor i32 %132, %130
   store i32 %133, i32* @ui, align 4
-  %134 = load i8* @uc, align 1
+  %134 = load i8, i8* @uc, align 1
   %135 = zext i8 %134 to i64
   %136 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %137 = atomicrmw xor i64* %136, i64 %135 monotonic
   %138 = xor i64 %137, %135
   store i64 %138, i64* @sl, align 8
-  %139 = load i8* @uc, align 1
+  %139 = load i8, i8* @uc, align 1
   %140 = zext i8 %139 to i64
   %141 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %142 = atomicrmw xor i64* %141, i64 %140 monotonic
   %143 = xor i64 %142, %140
   store i64 %143, i64* @ul, align 8
-  %144 = load i8* @uc, align 1
+  %144 = load i8, i8* @uc, align 1
   %145 = atomicrmw and i8* @sc, i8 %144 monotonic
   %146 = and i8 %145, %144
   store i8 %146, i8* @sc, align 1
-  %147 = load i8* @uc, align 1
+  %147 = load i8, i8* @uc, align 1
   %148 = atomicrmw and i8* @uc, i8 %147 monotonic
   %149 = and i8 %148, %147
   store i8 %149, i8* @uc, align 1
-  %150 = load i8* @uc, align 1
+  %150 = load i8, i8* @uc, align 1
   %151 = zext i8 %150 to i16
   %152 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %153 = atomicrmw and i16* %152, i16 %151 monotonic
   %154 = and i16 %153, %151
   store i16 %154, i16* @ss, align 2
-  %155 = load i8* @uc, align 1
+  %155 = load i8, i8* @uc, align 1
   %156 = zext i8 %155 to i16
   %157 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %158 = atomicrmw and i16* %157, i16 %156 monotonic
   %159 = and i16 %158, %156
   store i16 %159, i16* @us, align 2
-  %160 = load i8* @uc, align 1
+  %160 = load i8, i8* @uc, align 1
   %161 = zext i8 %160 to i32
   %162 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %163 = atomicrmw and i32* %162, i32 %161 monotonic
   %164 = and i32 %163, %161
   store i32 %164, i32* @si, align 4
-  %165 = load i8* @uc, align 1
+  %165 = load i8, i8* @uc, align 1
   %166 = zext i8 %165 to i32
   %167 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %168 = atomicrmw and i32* %167, i32 %166 monotonic
   %169 = and i32 %168, %166
   store i32 %169, i32* @ui, align 4
-  %170 = load i8* @uc, align 1
+  %170 = load i8, i8* @uc, align 1
   %171 = zext i8 %170 to i64
   %172 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %173 = atomicrmw and i64* %172, i64 %171 monotonic
   %174 = and i64 %173, %171
   store i64 %174, i64* @sl, align 8
-  %175 = load i8* @uc, align 1
+  %175 = load i8, i8* @uc, align 1
   %176 = zext i8 %175 to i64
   %177 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %178 = atomicrmw and i64* %177, i64 %176 monotonic
   %179 = and i64 %178, %176
   store i64 %179, i64* @ul, align 8
-  %180 = load i8* @uc, align 1
+  %180 = load i8, i8* @uc, align 1
   %181 = atomicrmw nand i8* @sc, i8 %180 monotonic
   %182 = xor i8 %181, -1
   %183 = and i8 %182, %180
   store i8 %183, i8* @sc, align 1
-  %184 = load i8* @uc, align 1
+  %184 = load i8, i8* @uc, align 1
   %185 = atomicrmw nand i8* @uc, i8 %184 monotonic
   %186 = xor i8 %185, -1
   %187 = and i8 %186, %184
   store i8 %187, i8* @uc, align 1
-  %188 = load i8* @uc, align 1
+  %188 = load i8, i8* @uc, align 1
   %189 = zext i8 %188 to i16
   %190 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %191 = atomicrmw nand i16* %190, i16 %189 monotonic
   %192 = xor i16 %191, -1
   %193 = and i16 %192, %189
   store i16 %193, i16* @ss, align 2
-  %194 = load i8* @uc, align 1
+  %194 = load i8, i8* @uc, align 1
   %195 = zext i8 %194 to i16
   %196 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %197 = atomicrmw nand i16* %196, i16 %195 monotonic
   %198 = xor i16 %197, -1
   %199 = and i16 %198, %195
   store i16 %199, i16* @us, align 2
-  %200 = load i8* @uc, align 1
+  %200 = load i8, i8* @uc, align 1
   %201 = zext i8 %200 to i32
   %202 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %203 = atomicrmw nand i32* %202, i32 %201 monotonic
   %204 = xor i32 %203, -1
   %205 = and i32 %204, %201
   store i32 %205, i32* @si, align 4
-  %206 = load i8* @uc, align 1
+  %206 = load i8, i8* @uc, align 1
   %207 = zext i8 %206 to i32
   %208 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %209 = atomicrmw nand i32* %208, i32 %207 monotonic
   %210 = xor i32 %209, -1
   %211 = and i32 %210, %207
   store i32 %211, i32* @ui, align 4
-  %212 = load i8* @uc, align 1
+  %212 = load i8, i8* @uc, align 1
   %213 = zext i8 %212 to i64
   %214 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %215 = atomicrmw nand i64* %214, i64 %213 monotonic
   %216 = xor i64 %215, -1
   %217 = and i64 %216, %213
   store i64 %217, i64* @sl, align 8
-  %218 = load i8* @uc, align 1
+  %218 = load i8, i8* @uc, align 1
   %219 = zext i8 %218 to i64
   %220 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %221 = atomicrmw nand i64* %220, i64 %219 monotonic
@@ -534,73 +534,73 @@
 
 define void @test_compare_and_swap() nounwind {
 entry:
-  %0 = load i8* @uc, align 1
-  %1 = load i8* @sc, align 1
+  %0 = load i8, i8* @uc, align 1
+  %1 = load i8, i8* @sc, align 1
   %2 = cmpxchg i8* @sc, i8 %0, i8 %1 monotonic monotonic
   store i8 %2, i8* @sc, align 1
-  %3 = load i8* @uc, align 1
-  %4 = load i8* @sc, align 1
+  %3 = load i8, i8* @uc, align 1
+  %4 = load i8, i8* @sc, align 1
   %5 = cmpxchg i8* @uc, i8 %3, i8 %4 monotonic monotonic
   store i8 %5, i8* @uc, align 1
-  %6 = load i8* @uc, align 1
+  %6 = load i8, i8* @uc, align 1
   %7 = zext i8 %6 to i16
-  %8 = load i8* @sc, align 1
+  %8 = load i8, i8* @sc, align 1
   %9 = sext i8 %8 to i16
   %10 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %11 = cmpxchg i16* %10, i16 %7, i16 %9 monotonic monotonic
   store i16 %11, i16* @ss, align 2
-  %12 = load i8* @uc, align 1
+  %12 = load i8, i8* @uc, align 1
   %13 = zext i8 %12 to i16
-  %14 = load i8* @sc, align 1
+  %14 = load i8, i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %17 = cmpxchg i16* %16, i16 %13, i16 %15 monotonic monotonic
   store i16 %17, i16* @us, align 2
-  %18 = load i8* @uc, align 1
+  %18 = load i8, i8* @uc, align 1
   %19 = zext i8 %18 to i32
-  %20 = load i8* @sc, align 1
+  %20 = load i8, i8* @sc, align 1
   %21 = sext i8 %20 to i32
   %22 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %23 = cmpxchg i32* %22, i32 %19, i32 %21 monotonic monotonic
   store i32 %23, i32* @si, align 4
-  %24 = load i8* @uc, align 1
+  %24 = load i8, i8* @uc, align 1
   %25 = zext i8 %24 to i32
-  %26 = load i8* @sc, align 1
+  %26 = load i8, i8* @sc, align 1
   %27 = sext i8 %26 to i32
   %28 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %29 = cmpxchg i32* %28, i32 %25, i32 %27 monotonic monotonic
   store i32 %29, i32* @ui, align 4
-  %30 = load i8* @uc, align 1
+  %30 = load i8, i8* @uc, align 1
   %31 = zext i8 %30 to i64
-  %32 = load i8* @sc, align 1
+  %32 = load i8, i8* @sc, align 1
   %33 = sext i8 %32 to i64
   %34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %35 = cmpxchg i64* %34, i64 %31, i64 %33 monotonic monotonic
   store i64 %35, i64* @sl, align 8
-  %36 = load i8* @uc, align 1
+  %36 = load i8, i8* @uc, align 1
   %37 = zext i8 %36 to i64
-  %38 = load i8* @sc, align 1
+  %38 = load i8, i8* @sc, align 1
   %39 = sext i8 %38 to i64
   %40 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %41 = cmpxchg i64* %40, i64 %37, i64 %39 monotonic monotonic
   store i64 %41, i64* @ul, align 8
-  %42 = load i8* @uc, align 1
-  %43 = load i8* @sc, align 1
+  %42 = load i8, i8* @uc, align 1
+  %43 = load i8, i8* @sc, align 1
   %44 = cmpxchg i8* @sc, i8 %42, i8 %43 monotonic monotonic
   %45 = icmp eq i8 %44, %42
   %46 = zext i1 %45 to i8
   %47 = zext i8 %46 to i32
   store i32 %47, i32* @ui, align 4
-  %48 = load i8* @uc, align 1
-  %49 = load i8* @sc, align 1
+  %48 = load i8, i8* @uc, align 1
+  %49 = load i8, i8* @sc, align 1
   %50 = cmpxchg i8* @uc, i8 %48, i8 %49 monotonic monotonic
   %51 = icmp eq i8 %50, %48
   %52 = zext i1 %51 to i8
   %53 = zext i8 %52 to i32
   store i32 %53, i32* @ui, align 4
-  %54 = load i8* @uc, align 1
+  %54 = load i8, i8* @uc, align 1
   %55 = zext i8 %54 to i16
-  %56 = load i8* @sc, align 1
+  %56 = load i8, i8* @sc, align 1
   %57 = sext i8 %56 to i16
   %58 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %59 = cmpxchg i16* %58, i16 %55, i16 %57 monotonic monotonic
@@ -608,9 +608,9 @@
   %61 = zext i1 %60 to i8
   %62 = zext i8 %61 to i32
   store i32 %62, i32* @ui, align 4
-  %63 = load i8* @uc, align 1
+  %63 = load i8, i8* @uc, align 1
   %64 = zext i8 %63 to i16
-  %65 = load i8* @sc, align 1
+  %65 = load i8, i8* @sc, align 1
   %66 = sext i8 %65 to i16
   %67 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %68 = cmpxchg i16* %67, i16 %64, i16 %66 monotonic monotonic
@@ -618,9 +618,9 @@
   %70 = zext i1 %69 to i8
   %71 = zext i8 %70 to i32
   store i32 %71, i32* @ui, align 4
-  %72 = load i8* @uc, align 1
+  %72 = load i8, i8* @uc, align 1
   %73 = zext i8 %72 to i32
-  %74 = load i8* @sc, align 1
+  %74 = load i8, i8* @sc, align 1
   %75 = sext i8 %74 to i32
   %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %77 = cmpxchg i32* %76, i32 %73, i32 %75 monotonic monotonic
@@ -628,9 +628,9 @@
   %79 = zext i1 %78 to i8
   %80 = zext i8 %79 to i32
   store i32 %80, i32* @ui, align 4
-  %81 = load i8* @uc, align 1
+  %81 = load i8, i8* @uc, align 1
   %82 = zext i8 %81 to i32
-  %83 = load i8* @sc, align 1
+  %83 = load i8, i8* @sc, align 1
   %84 = sext i8 %83 to i32
   %85 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %86 = cmpxchg i32* %85, i32 %82, i32 %84 monotonic monotonic
@@ -638,9 +638,9 @@
   %88 = zext i1 %87 to i8
   %89 = zext i8 %88 to i32
   store i32 %89, i32* @ui, align 4
-  %90 = load i8* @uc, align 1
+  %90 = load i8, i8* @uc, align 1
   %91 = zext i8 %90 to i64
-  %92 = load i8* @sc, align 1
+  %92 = load i8, i8* @sc, align 1
   %93 = sext i8 %92 to i64
   %94 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %95 = cmpxchg i64* %94, i64 %91, i64 %93 monotonic monotonic
@@ -648,9 +648,9 @@
   %97 = zext i1 %96 to i8
   %98 = zext i8 %97 to i32
   store i32 %98, i32* @ui, align 4
-  %99 = load i8* @uc, align 1
+  %99 = load i8, i8* @uc, align 1
   %100 = zext i8 %99 to i64
-  %101 = load i8* @sc, align 1
+  %101 = load i8, i8* @sc, align 1
   %102 = sext i8 %101 to i64
   %103 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %104 = cmpxchg i64* %103, i64 %100, i64 %102 monotonic monotonic
diff --git a/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll b/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll
index 07a7d9e..0324e38 100644
--- a/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll
+++ b/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll
@@ -5,13 +5,13 @@
 define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
 entry:
   %a.realp = getelementptr inbounds %0, %0* %a, i32 0, i32 0
-  %a.real = load double* %a.realp
+  %a.real = load double, double* %a.realp
   %a.imagp = getelementptr inbounds %0, %0* %a, i32 0, i32 1
-  %a.imag = load double* %a.imagp
+  %a.imag = load double, double* %a.imagp
   %b.realp = getelementptr inbounds %0, %0* %b, i32 0, i32 0
-  %b.real = load double* %b.realp
+  %b.real = load double, double* %b.realp
   %b.imagp = getelementptr inbounds %0, %0* %b, i32 0, i32 1
-  %b.imag = load double* %b.imagp
+  %b.imag = load double, double* %b.imagp
   %mul.rl = fmul double %a.real, %b.real
   %mul.rr = fmul double %a.imag, %b.imag
   %mul.r = fsub double %mul.rl, %mul.rr
@@ -19,9 +19,9 @@
   %mul.ir = fmul double %a.real, %b.imag
   %mul.i = fadd double %mul.il, %mul.ir
   %c.realp = getelementptr inbounds %0, %0* %c, i32 0, i32 0
-  %c.real = load double* %c.realp
+  %c.real = load double, double* %c.realp
   %c.imagp = getelementptr inbounds %0, %0* %c, i32 0, i32 1
-  %c.imag = load double* %c.imagp
+  %c.imag = load double, double* %c.imagp
   %add.r = fadd double %mul.r, %c.real
   %add.i = fadd double %mul.i, %c.imag
   %real = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/addi-licm.ll b/llvm/test/CodeGen/PowerPC/addi-licm.ll
index ff33cf0..b6cfeec0 100644
--- a/llvm/test/CodeGen/PowerPC/addi-licm.ll
+++ b/llvm/test/CodeGen/PowerPC/addi-licm.ll
@@ -34,10 +34,10 @@
   %accumulator.09.i = phi double [ %add.i, %for.body.i ], [ 0.000000e+00, %entry ]
   %i.08.i = phi i64 [ %inc.i, %for.body.i ], [ 0, %entry ]
   %arrayidx.i = getelementptr inbounds [2048 x float], [2048 x float]* %x, i64 0, i64 %i.08.i
-  %v14 = load float* %arrayidx.i, align 4
+  %v14 = load float, float* %arrayidx.i, align 4
   %conv.i = fpext float %v14 to double
   %arrayidx1.i = getelementptr inbounds [2048 x float], [2048 x float]* %y, i64 0, i64 %i.08.i
-  %v15 = load float* %arrayidx1.i, align 4
+  %v15 = load float, float* %arrayidx1.i, align 4
   %conv2.i = fpext float %v15 to double
   %mul.i = fmul double %conv.i, %conv2.i
   %add.i = fadd double %accumulator.09.i, %mul.i
diff --git a/llvm/test/CodeGen/PowerPC/addi-reassoc.ll b/llvm/test/CodeGen/PowerPC/addi-reassoc.ll
index 3a93b53..3624ce6 100644
--- a/llvm/test/CodeGen/PowerPC/addi-reassoc.ll
+++ b/llvm/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -5,14 +5,14 @@
 define i32 @test1([4 x i32]* %P, i32 %i) {
         %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]
         %tmp.4 = getelementptr [4 x i32], [4 x i32]* %P, i32 %tmp.2, i32 1         ; <i32*> [#uses=1]
-        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]
+        %tmp.5 = load i32, i32* %tmp.4               ; <i32> [#uses=1]
         ret i32 %tmp.5
 }
 
 define i32 @test2(%struct.X* %P, i32 %i) {
         %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]
         %tmp.5 = getelementptr %struct.X, %struct.X* %P, i32 %tmp.2, i32 0, i32 1          ; <i8*> [#uses=1]
-        %tmp.6 = load i8* %tmp.5                ; <i8> [#uses=1]
+        %tmp.6 = load i8, i8* %tmp.5                ; <i8> [#uses=1]
         %tmp.7 = sext i8 %tmp.6 to i32          ; <i32> [#uses=1]
         ret i32 %tmp.7
 }
diff --git a/llvm/test/CodeGen/PowerPC/alias.ll b/llvm/test/CodeGen/PowerPC/alias.ll
index 86e4114..b1521d6 100644
--- a/llvm/test/CodeGen/PowerPC/alias.ll
+++ b/llvm/test/CodeGen/PowerPC/alias.ll
@@ -11,7 +11,7 @@
 define i32 @bar() {
 ; MEDIUM: addis 3, 2, fooa@toc@ha
 ; LARGE: addis 3, 2, .LC1@toc@ha
-  %a = load i32* @fooa
+  %a = load i32, i32* @fooa
   ret i32 %a
 }
 
@@ -20,7 +20,7 @@
 ; MEDIUM: addis 3, 2, foo2a@toc@ha
 ; MEDIUM: addi 3, 3, foo2a@toc@l
 ; LARGE: addis 3, 2, .LC3@toc@ha
-  %a = load i64* @foo2a
+  %a = load i64, i64* @foo2a
   ret i64 %a
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/and-elim.ll b/llvm/test/CodeGen/PowerPC/and-elim.ll
index a1ec29b..f1738b2 100644
--- a/llvm/test/CodeGen/PowerPC/and-elim.ll
+++ b/llvm/test/CodeGen/PowerPC/and-elim.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 | not grep rlwin
 
 define void @test(i8* %P) {
-	%W = load i8* %P
+	%W = load i8, i8* %P
 	%X = shl i8 %W, 1
 	%Y = add i8 %X, 2
 	%Z = and i8 %Y, 254        ; dead and
diff --git a/llvm/test/CodeGen/PowerPC/anon_aggr.ll b/llvm/test/CodeGen/PowerPC/anon_aggr.ll
index 90497a2..1a25f4b 100644
--- a/llvm/test/CodeGen/PowerPC/anon_aggr.ll
+++ b/llvm/test/CodeGen/PowerPC/anon_aggr.ll
@@ -52,7 +52,7 @@
 entry:
   %array1_ptr = extractvalue {i64, i8* } %array1, 1
   %tmp = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
-  %array2_ptr = load i8** %tmp
+  %array2_ptr = load i8*, i8** %tmp
   %cond = icmp eq i8* %array1_ptr, %array2_ptr
   br i1 %cond, label %equal, label %unequal
 equal:
@@ -94,9 +94,9 @@
 define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) {
 entry:
   %tmp1 = getelementptr inbounds { i64, i8* }, { i64, i8* }* %array1, i32 0, i32 1
-  %array1_ptr = load i8** %tmp1
+  %array1_ptr = load i8*, i8** %tmp1
   %tmp2 = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
-  %array2_ptr = load i8** %tmp2
+  %array2_ptr = load i8*, i8** %tmp2
   %cond = icmp eq i8* %array1_ptr, %array2_ptr
   br i1 %cond, label %equal, label %unequal
 equal:
@@ -141,7 +141,7 @@
 entry:
   %array1_ptr = extractvalue {i64, i8* } %array1, 1
   %tmp = getelementptr inbounds %tarray, %tarray* %array2, i32 0, i32 1
-  %array2_ptr = load i8** %tmp
+  %array2_ptr = load i8*, i8** %tmp
   %cond = icmp eq i8* %array1_ptr, %array2_ptr
   br i1 %cond, label %equal, label %unequal
 equal:
diff --git a/llvm/test/CodeGen/PowerPC/asm-constraints.ll b/llvm/test/CodeGen/PowerPC/asm-constraints.ll
index 9bf8b75..1cb00ca 100644
--- a/llvm/test/CodeGen/PowerPC/asm-constraints.ll
+++ b/llvm/test/CodeGen/PowerPC/asm-constraints.ll
@@ -23,7 +23,7 @@
   %addr.addr = alloca i8*, align 8
   store i32 %result, i32* %result.addr, align 4
   store i8* %addr, i8** %addr.addr, align 8
-  %0 = load i8** %addr.addr, align 8
+  %0 = load i8*, i8** %addr.addr, align 8
   %1 = call i32 asm sideeffect "ld${1:U}${1:X} $0,$1\0Acmpw $0,$0\0Abne- 1f\0A1: isync\0A", "=r,*m,~{memory},~{cr0}"(i8* %0) #1, !srcloc !1
   store i32 %1, i32* %result.addr, align 4
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/atomic-2.ll b/llvm/test/CodeGen/PowerPC/atomic-2.ll
index 9cb0fa5..bc77ed7 100644
--- a/llvm/test/CodeGen/PowerPC/atomic-2.ll
+++ b/llvm/test/CodeGen/PowerPC/atomic-2.ll
@@ -39,7 +39,7 @@
 define i64 @atomic_load(i64* %mem) nounwind {
 entry:
 ; CHECK: @atomic_load
-  %tmp = load atomic i64* %mem acquire, align 64
+  %tmp = load atomic i64, i64* %mem acquire, align 64
 ; CHECK-NOT: ldarx
 ; CHECK: ld
 ; CHECK: sync 1
diff --git a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
index 910874e..b9ec3c6 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-indexed.ll
@@ -13,7 +13,7 @@
 ; CHECK: lbzx
 ; CHECK: sync 1
   %ptr = getelementptr inbounds [100000 x i8], [100000 x i8]* %mem, i64 0, i64 90000
-  %val = load atomic i8* %ptr seq_cst, align 1
+  %val = load atomic i8, i8* %ptr seq_cst, align 1
   ret i8 %val
 }
 define i16 @load_x_i16_acquire([100000 x i16]* %mem) {
@@ -21,7 +21,7 @@
 ; CHECK: lhzx
 ; CHECK: sync 1
   %ptr = getelementptr inbounds [100000 x i16], [100000 x i16]* %mem, i64 0, i64 90000
-  %val = load atomic i16* %ptr acquire, align 2
+  %val = load atomic i16, i16* %ptr acquire, align 2
   ret i16 %val
 }
 define i32 @load_x_i32_monotonic([100000 x i32]* %mem) {
@@ -29,7 +29,7 @@
 ; CHECK: lwzx
 ; CHECK-NOT: sync
   %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %mem, i64 0, i64 90000
-  %val = load atomic i32* %ptr monotonic, align 4
+  %val = load atomic i32, i32* %ptr monotonic, align 4
   ret i32 %val
 }
 define i64 @load_x_i64_unordered([100000 x i64]* %mem) {
@@ -39,7 +39,7 @@
 ; PPC64: ldx
 ; CHECK-NOT: sync
   %ptr = getelementptr inbounds [100000 x i64], [100000 x i64]* %mem, i64 0, i64 90000
-  %val = load atomic i64* %ptr unordered, align 8
+  %val = load atomic i64, i64* %ptr unordered, align 8
   ret i64 %val
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/atomics.ll b/llvm/test/CodeGen/PowerPC/atomics.ll
index 5f6a6a4..fe4791e 100644
--- a/llvm/test/CodeGen/PowerPC/atomics.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics.ll
@@ -13,20 +13,20 @@
 ; CHECK-LABEL: load_i8_unordered
 ; CHECK: lbz
 ; CHECK-NOT: sync
-  %val = load atomic i8* %mem unordered, align 1
+  %val = load atomic i8, i8* %mem unordered, align 1
   ret i8 %val
 }
 define i16 @load_i16_monotonic(i16* %mem) {
 ; CHECK-LABEL: load_i16_monotonic
 ; CHECK: lhz
 ; CHECK-NOT: sync
-  %val = load atomic i16* %mem monotonic, align 2
+  %val = load atomic i16, i16* %mem monotonic, align 2
   ret i16 %val
 }
 define i32 @load_i32_acquire(i32* %mem) {
 ; CHECK-LABEL: load_i32_acquire
 ; CHECK: lwz
-  %val = load atomic i32* %mem acquire, align 4
+  %val = load atomic i32, i32* %mem acquire, align 4
 ; CHECK: sync 1
   ret i32 %val
 }
@@ -36,7 +36,7 @@
 ; PPC32: __sync_
 ; PPC64-NOT: __sync_
 ; PPC64: ld
-  %val = load atomic i64* %mem seq_cst, align 8
+  %val = load atomic i64, i64* %mem seq_cst, align 8
 ; CHECK: sync 1
   ret i64 %val
 }
diff --git a/llvm/test/CodeGen/PowerPC/bdzlr.ll b/llvm/test/CodeGen/PowerPC/bdzlr.ll
index c54c07a..d650604 100644
--- a/llvm/test/CodeGen/PowerPC/bdzlr.ll
+++ b/llvm/test/CodeGen/PowerPC/bdzlr.ll
@@ -36,7 +36,7 @@
   %0 = phi %struct.lua_TValue.17.692* [ undef, %for.body.lr.ph ], [ %.pre, %for.body.for.body_crit_edge ]
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body.for.body_crit_edge ]
   %tt = getelementptr inbounds %struct.lua_TValue.17.692, %struct.lua_TValue.17.692* %0, i64 %indvars.iv, i32 1
-  %1 = load i32* %tt, align 4
+  %1 = load i32, i32* %tt, align 4
   store i32 %1, i32* undef, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -44,7 +44,7 @@
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %.pre = load %struct.lua_TValue.17.692** undef, align 8
+  %.pre = load %struct.lua_TValue.17.692*, %struct.lua_TValue.17.692** undef, align 8
   br label %for.body
 
 for.end:                                          ; preds = %for.body, %if.end, %entry
diff --git a/llvm/test/CodeGen/PowerPC/bswap-load-store.ll b/llvm/test/CodeGen/PowerPC/bswap-load-store.ll
index bb219c2..cee1f0c 100644
--- a/llvm/test/CodeGen/PowerPC/bswap-load-store.ll
+++ b/llvm/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -15,7 +15,7 @@
 define i32 @LWBRX(i8* %ptr, i32 %off) {
         %tmp1 = getelementptr i8, i8* %ptr, i32 %off                ; <i8*> [#uses=1]
         %tmp1.upgrd.2 = bitcast i8* %tmp1 to i32*               ; <i32*> [#uses=1]
-        %tmp = load i32* %tmp1.upgrd.2          ; <i32> [#uses=1]
+        %tmp = load i32, i32* %tmp1.upgrd.2          ; <i32> [#uses=1]
         %tmp14 = tail call i32 @llvm.bswap.i32( i32 %tmp )              ; <i32> [#uses=1]
         ret i32 %tmp14
 }
@@ -31,7 +31,7 @@
 define i16 @LHBRX(i8* %ptr, i32 %off) {
         %tmp1 = getelementptr i8, i8* %ptr, i32 %off                ; <i8*> [#uses=1]
         %tmp1.upgrd.4 = bitcast i8* %tmp1 to i16*               ; <i16*> [#uses=1]
-        %tmp = load i16* %tmp1.upgrd.4          ; <i16> [#uses=1]
+        %tmp = load i16, i16* %tmp1.upgrd.4          ; <i16> [#uses=1]
         %tmp6 = call i16 @llvm.bswap.i16( i16 %tmp )            ; <i16> [#uses=1]
         ret i16 %tmp6
 }
@@ -47,7 +47,7 @@
 define i64 @LDBRX(i8* %ptr, i64 %off) {
         %tmp1 = getelementptr i8, i8* %ptr, i64 %off                ; <i8*> [#uses=1]
         %tmp1.upgrd.2 = bitcast i8* %tmp1 to i64*               ; <i64*> [#uses=1]
-        %tmp = load i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
+        %tmp = load i64, i64* %tmp1.upgrd.2          ; <i64> [#uses=1]
         %tmp14 = tail call i64 @llvm.bswap.i64( i64 %tmp )              ; <i64> [#uses=1]
         ret i64 %tmp14
 }
diff --git a/llvm/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/llvm/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index b70671b..6c59191 100644
--- a/llvm/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/llvm/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
-        %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %P1           ; <<4 x float>> [#uses=1]
+        %tmp = load <4 x float>, <4 x float>* %P3            ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>, <4 x float>* %P1           ; <<4 x float>> [#uses=1]
         %tmp4 = fmul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp4, <4 x float>* %P3
         store <4 x float> zeroinitializer, <4 x float>* %P1
diff --git a/llvm/test/CodeGen/PowerPC/byval-aliased.ll b/llvm/test/CodeGen/PowerPC/byval-aliased.ll
index caaf0c9..8668e64 100644
--- a/llvm/test/CodeGen/PowerPC/byval-aliased.ll
+++ b/llvm/test/CodeGen/PowerPC/byval-aliased.ll
@@ -8,7 +8,7 @@
 define void @foo(%struct.sm* byval %s) #0 {
 entry:
   %a = getelementptr inbounds %struct.sm, %struct.sm* %s, i32 0, i32 0
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %conv2 = zext i8 %0 to i32
   %add = add nuw nsw i32 %conv2, 3
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/PowerPC/code-align.ll b/llvm/test/CodeGen/PowerPC/code-align.ll
index 6b743e9..19d1b23 100644
--- a/llvm/test/CodeGen/PowerPC/code-align.ll
+++ b/llvm/test/CodeGen/PowerPC/code-align.ll
@@ -56,8 +56,8 @@
   %induction45 = or i64 %index, 1
   %0 = getelementptr inbounds i32, i32* %a, i64 %index
   %1 = getelementptr inbounds i32, i32* %a, i64 %induction45
-  %2 = load i32* %0, align 4
-  %3 = load i32* %1, align 4
+  %2 = load i32, i32* %0, align 4
+  %3 = load i32, i32* %1, align 4
   %4 = add nsw i32 %2, 4
   %5 = add nsw i32 %3, 4
   %6 = mul nsw i32 %4, 3
@@ -93,7 +93,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, 4
   %mul = mul nsw i32 %add, 3
   store i32 %mul, i32* %arrayidx, align 4
diff --git a/llvm/test/CodeGen/PowerPC/complex-return.ll b/llvm/test/CodeGen/PowerPC/complex-return.ll
index bc201f9..e419f07 100644
--- a/llvm/test/CodeGen/PowerPC/complex-return.ll
+++ b/llvm/test/CodeGen/PowerPC/complex-return.ll
@@ -12,14 +12,14 @@
   store ppc_fp128 0xM400C0000000000000000000000000000, ppc_fp128* %real
   store ppc_fp128 0xMC00547AE147AE1483CA47AE147AE147A, ppc_fp128* %imag
   %x.realp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 0
-  %x.real = load ppc_fp128* %x.realp
+  %x.real = load ppc_fp128, ppc_fp128* %x.realp
   %x.imagp = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %x, i32 0, i32 1
-  %x.imag = load ppc_fp128* %x.imagp
+  %x.imag = load ppc_fp128, ppc_fp128* %x.imagp
   %real1 = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 0
   %imag2 = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval, i32 0, i32 1
   store ppc_fp128 %x.real, ppc_fp128* %real1
   store ppc_fp128 %x.imag, ppc_fp128* %imag2
-  %0 = load { ppc_fp128, ppc_fp128 }* %retval
+  %0 = load { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %retval
   ret { ppc_fp128, ppc_fp128 } %0
 }
 
@@ -38,14 +38,14 @@
   store float 3.500000e+00, float* %real
   store float 0xC00547AE20000000, float* %imag
   %x.realp = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 0
-  %x.real = load float* %x.realp
+  %x.real = load float, float* %x.realp
   %x.imagp = getelementptr inbounds { float, float }, { float, float }* %x, i32 0, i32 1
-  %x.imag = load float* %x.imagp
+  %x.imag = load float, float* %x.imagp
   %real1 = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
   %imag2 = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
   store float %x.real, float* %real1
   store float %x.imag, float* %imag2
-  %0 = load { float, float }* %retval
+  %0 = load { float, float }, { float, float }* %retval
   ret { float, float } %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/cr-spills.ll b/llvm/test/CodeGen/PowerPC/cr-spills.ll
index 14a48a25..e560554 100644
--- a/llvm/test/CodeGen/PowerPC/cr-spills.ll
+++ b/llvm/test/CodeGen/PowerPC/cr-spills.ll
@@ -31,7 +31,7 @@
 
 land.end:                                         ; preds = %land.rhs, %land.lhs.true, %entry
   %0 = phi i1 [ %tobool21, %land.rhs ], [ false, %land.lhs.true ], [ false, %entry ]
-  %cond = load i32** undef, align 8
+  %cond = load i32*, i32** undef, align 8
   br i1 undef, label %if.then95, label %for.body.lr.ph
 
 if.then95:                                        ; preds = %land.end
@@ -53,11 +53,11 @@
 
 for.cond290.preheader:                            ; preds = %for.end520, %for.cond286.preheader
   %srcptr.31595 = phi i16* [ getelementptr inbounds ([768 x i16]* @SetupFastFullPelSearch.orig_pels, i64 0, i64 0), %for.cond286.preheader ], [ null, %for.end520 ]
-  %1 = load i32* undef, align 4
-  %2 = load i32* @weight_luma, align 4
-  %3 = load i32* @wp_luma_round, align 4
-  %4 = load i32* @luma_log_weight_denom, align 4
-  %5 = load i32* @offset_luma, align 4
+  %1 = load i32, i32* undef, align 4
+  %2 = load i32, i32* @weight_luma, align 4
+  %3 = load i32, i32* @wp_luma_round, align 4
+  %4 = load i32, i32* @luma_log_weight_denom, align 4
+  %5 = load i32, i32* @offset_luma, align 4
   %incdec.ptr502.sum = add i64 undef, 16
   br label %for.body293
 
@@ -68,7 +68,7 @@
   %LineSadBlk1.01587 = phi i32 [ 0, %for.cond290.preheader ], [ %add402, %for.body293 ]
   %LineSadBlk3.01586 = phi i32 [ 0, %for.cond290.preheader ], [ %add514, %for.body293 ]
   %LineSadBlk2.01585 = phi i32 [ 0, %for.cond290.preheader ], [ %add458, %for.body293 ]
-  %6 = load i16* %refptr.11590, align 2
+  %6 = load i16, i16* %refptr.11590, align 2
   %conv294 = zext i16 %6 to i32
   %mul295 = mul nsw i32 %conv294, %2
   %add296 = add nsw i32 %mul295, %3
@@ -78,16 +78,16 @@
   %cond.i.i1514 = select i1 %cmp.i.i1513, i32 %add297, i32 0
   %cmp.i4.i1515 = icmp slt i32 %cond.i.i1514, %1
   %cond.i5.i1516 = select i1 %cmp.i4.i1515, i32 %cond.i.i1514, i32 %1
-  %7 = load i16* %srcptr.41591, align 2
+  %7 = load i16, i16* %srcptr.41591, align 2
   %conv300 = zext i16 %7 to i32
   %sub301 = sub nsw i32 %cond.i5.i1516, %conv300
   %idxprom302 = sext i32 %sub301 to i64
   %arrayidx303 = getelementptr inbounds i32, i32* %cond, i64 %idxprom302
-  %8 = load i32* %arrayidx303, align 4
+  %8 = load i32, i32* %arrayidx303, align 4
   %add304 = add nsw i32 %8, %LineSadBlk0.01588
-  %9 = load i32* undef, align 4
+  %9 = load i32, i32* undef, align 4
   %add318 = add nsw i32 %add304, %9
-  %10 = load i16* undef, align 2
+  %10 = load i16, i16* undef, align 2
   %conv321 = zext i16 %10 to i32
   %mul322 = mul nsw i32 %conv321, %2
   %add323 = add nsw i32 %mul322, %3
@@ -100,22 +100,22 @@
   %sub329 = sub nsw i32 %cond.i5.i1508, 0
   %idxprom330 = sext i32 %sub329 to i64
   %arrayidx331 = getelementptr inbounds i32, i32* %cond, i64 %idxprom330
-  %11 = load i32* %arrayidx331, align 4
+  %11 = load i32, i32* %arrayidx331, align 4
   %add332 = add nsw i32 %add318, %11
   %cmp.i.i1501 = icmp sgt i32 undef, 0
   %cond.i.i1502 = select i1 %cmp.i.i1501, i32 undef, i32 0
   %cmp.i4.i1503 = icmp slt i32 %cond.i.i1502, %1
   %cond.i5.i1504 = select i1 %cmp.i4.i1503, i32 %cond.i.i1502, i32 %1
   %incdec.ptr341 = getelementptr inbounds i16, i16* %srcptr.41591, i64 4
-  %12 = load i16* null, align 2
+  %12 = load i16, i16* null, align 2
   %conv342 = zext i16 %12 to i32
   %sub343 = sub nsw i32 %cond.i5.i1504, %conv342
   %idxprom344 = sext i32 %sub343 to i64
   %arrayidx345 = getelementptr inbounds i32, i32* %cond, i64 %idxprom344
-  %13 = load i32* %arrayidx345, align 4
+  %13 = load i32, i32* %arrayidx345, align 4
   %add346 = add nsw i32 %add332, %13
   %incdec.ptr348 = getelementptr inbounds i16, i16* %refptr.11590, i64 5
-  %14 = load i16* null, align 2
+  %14 = load i16, i16* null, align 2
   %conv349 = zext i16 %14 to i32
   %mul350 = mul nsw i32 %conv349, %2
   %add351 = add nsw i32 %mul350, %3
@@ -126,15 +126,15 @@
   %cmp.i4.i1499 = icmp slt i32 %cond.i.i1498, %1
   %cond.i5.i1500 = select i1 %cmp.i4.i1499, i32 %cond.i.i1498, i32 %1
   %incdec.ptr355 = getelementptr inbounds i16, i16* %srcptr.41591, i64 5
-  %15 = load i16* %incdec.ptr341, align 2
+  %15 = load i16, i16* %incdec.ptr341, align 2
   %conv356 = zext i16 %15 to i32
   %sub357 = sub nsw i32 %cond.i5.i1500, %conv356
   %idxprom358 = sext i32 %sub357 to i64
   %arrayidx359 = getelementptr inbounds i32, i32* %cond, i64 %idxprom358
-  %16 = load i32* %arrayidx359, align 4
+  %16 = load i32, i32* %arrayidx359, align 4
   %add360 = add nsw i32 %16, %LineSadBlk1.01587
   %incdec.ptr362 = getelementptr inbounds i16, i16* %refptr.11590, i64 6
-  %17 = load i16* %incdec.ptr348, align 2
+  %17 = load i16, i16* %incdec.ptr348, align 2
   %conv363 = zext i16 %17 to i32
   %mul364 = mul nsw i32 %conv363, %2
   %add365 = add nsw i32 %mul364, %3
@@ -145,15 +145,15 @@
   %cmp.i4.i1495 = icmp slt i32 %cond.i.i1494, %1
   %cond.i5.i1496 = select i1 %cmp.i4.i1495, i32 %cond.i.i1494, i32 %1
   %incdec.ptr369 = getelementptr inbounds i16, i16* %srcptr.41591, i64 6
-  %18 = load i16* %incdec.ptr355, align 2
+  %18 = load i16, i16* %incdec.ptr355, align 2
   %conv370 = zext i16 %18 to i32
   %sub371 = sub nsw i32 %cond.i5.i1496, %conv370
   %idxprom372 = sext i32 %sub371 to i64
   %arrayidx373 = getelementptr inbounds i32, i32* %cond, i64 %idxprom372
-  %19 = load i32* %arrayidx373, align 4
+  %19 = load i32, i32* %arrayidx373, align 4
   %add374 = add nsw i32 %add360, %19
   %incdec.ptr376 = getelementptr inbounds i16, i16* %refptr.11590, i64 7
-  %20 = load i16* %incdec.ptr362, align 2
+  %20 = load i16, i16* %incdec.ptr362, align 2
   %conv377 = zext i16 %20 to i32
   %mul378 = mul nsw i32 %conv377, %2
   %add379 = add nsw i32 %mul378, %3
@@ -164,14 +164,14 @@
   %cmp.i4.i1491 = icmp slt i32 %cond.i.i1490, %1
   %cond.i5.i1492 = select i1 %cmp.i4.i1491, i32 %cond.i.i1490, i32 %1
   %incdec.ptr383 = getelementptr inbounds i16, i16* %srcptr.41591, i64 7
-  %21 = load i16* %incdec.ptr369, align 2
+  %21 = load i16, i16* %incdec.ptr369, align 2
   %conv384 = zext i16 %21 to i32
   %sub385 = sub nsw i32 %cond.i5.i1492, %conv384
   %idxprom386 = sext i32 %sub385 to i64
   %arrayidx387 = getelementptr inbounds i32, i32* %cond, i64 %idxprom386
-  %22 = load i32* %arrayidx387, align 4
+  %22 = load i32, i32* %arrayidx387, align 4
   %add388 = add nsw i32 %add374, %22
-  %23 = load i16* %incdec.ptr376, align 2
+  %23 = load i16, i16* %incdec.ptr376, align 2
   %conv391 = zext i16 %23 to i32
   %mul392 = mul nsw i32 %conv391, %2
   %add395 = add nsw i32 0, %5
@@ -180,25 +180,25 @@
   %cmp.i4.i1487 = icmp slt i32 %cond.i.i1486, %1
   %cond.i5.i1488 = select i1 %cmp.i4.i1487, i32 %cond.i.i1486, i32 %1
   %incdec.ptr397 = getelementptr inbounds i16, i16* %srcptr.41591, i64 8
-  %24 = load i16* %incdec.ptr383, align 2
+  %24 = load i16, i16* %incdec.ptr383, align 2
   %conv398 = zext i16 %24 to i32
   %sub399 = sub nsw i32 %cond.i5.i1488, %conv398
   %idxprom400 = sext i32 %sub399 to i64
   %arrayidx401 = getelementptr inbounds i32, i32* %cond, i64 %idxprom400
-  %25 = load i32* %arrayidx401, align 4
+  %25 = load i32, i32* %arrayidx401, align 4
   %add402 = add nsw i32 %add388, %25
   %incdec.ptr404 = getelementptr inbounds i16, i16* %refptr.11590, i64 9
   %cmp.i4.i1483 = icmp slt i32 undef, %1
   %cond.i5.i1484 = select i1 %cmp.i4.i1483, i32 undef, i32 %1
-  %26 = load i16* %incdec.ptr397, align 2
+  %26 = load i16, i16* %incdec.ptr397, align 2
   %conv412 = zext i16 %26 to i32
   %sub413 = sub nsw i32 %cond.i5.i1484, %conv412
   %idxprom414 = sext i32 %sub413 to i64
   %arrayidx415 = getelementptr inbounds i32, i32* %cond, i64 %idxprom414
-  %27 = load i32* %arrayidx415, align 4
+  %27 = load i32, i32* %arrayidx415, align 4
   %add416 = add nsw i32 %27, %LineSadBlk2.01585
   %incdec.ptr418 = getelementptr inbounds i16, i16* %refptr.11590, i64 10
-  %28 = load i16* %incdec.ptr404, align 2
+  %28 = load i16, i16* %incdec.ptr404, align 2
   %conv419 = zext i16 %28 to i32
   %mul420 = mul nsw i32 %conv419, %2
   %add421 = add nsw i32 %mul420, %3
@@ -212,10 +212,10 @@
   %sub427 = sub nsw i32 %cond.i5.i1480, 0
   %idxprom428 = sext i32 %sub427 to i64
   %arrayidx429 = getelementptr inbounds i32, i32* %cond, i64 %idxprom428
-  %29 = load i32* %arrayidx429, align 4
+  %29 = load i32, i32* %arrayidx429, align 4
   %add430 = add nsw i32 %add416, %29
   %incdec.ptr432 = getelementptr inbounds i16, i16* %refptr.11590, i64 11
-  %30 = load i16* %incdec.ptr418, align 2
+  %30 = load i16, i16* %incdec.ptr418, align 2
   %conv433 = zext i16 %30 to i32
   %mul434 = mul nsw i32 %conv433, %2
   %add435 = add nsw i32 %mul434, %3
@@ -225,15 +225,15 @@
   %cond.i.i1474 = select i1 %cmp.i.i1473, i32 %add437, i32 0
   %cmp.i4.i1475 = icmp slt i32 %cond.i.i1474, %1
   %cond.i5.i1476 = select i1 %cmp.i4.i1475, i32 %cond.i.i1474, i32 %1
-  %31 = load i16* %incdec.ptr425, align 2
+  %31 = load i16, i16* %incdec.ptr425, align 2
   %conv440 = zext i16 %31 to i32
   %sub441 = sub nsw i32 %cond.i5.i1476, %conv440
   %idxprom442 = sext i32 %sub441 to i64
   %arrayidx443 = getelementptr inbounds i32, i32* %cond, i64 %idxprom442
-  %32 = load i32* %arrayidx443, align 4
+  %32 = load i32, i32* %arrayidx443, align 4
   %add444 = add nsw i32 %add430, %32
   %incdec.ptr446 = getelementptr inbounds i16, i16* %refptr.11590, i64 12
-  %33 = load i16* %incdec.ptr432, align 2
+  %33 = load i16, i16* %incdec.ptr432, align 2
   %conv447 = zext i16 %33 to i32
   %mul448 = mul nsw i32 %conv447, %2
   %add449 = add nsw i32 %mul448, %3
@@ -244,15 +244,15 @@
   %cmp.i4.i1471 = icmp slt i32 %cond.i.i1470, %1
   %cond.i5.i1472 = select i1 %cmp.i4.i1471, i32 %cond.i.i1470, i32 %1
   %incdec.ptr453 = getelementptr inbounds i16, i16* %srcptr.41591, i64 12
-  %34 = load i16* undef, align 2
+  %34 = load i16, i16* undef, align 2
   %conv454 = zext i16 %34 to i32
   %sub455 = sub nsw i32 %cond.i5.i1472, %conv454
   %idxprom456 = sext i32 %sub455 to i64
   %arrayidx457 = getelementptr inbounds i32, i32* %cond, i64 %idxprom456
-  %35 = load i32* %arrayidx457, align 4
+  %35 = load i32, i32* %arrayidx457, align 4
   %add458 = add nsw i32 %add444, %35
   %incdec.ptr460 = getelementptr inbounds i16, i16* %refptr.11590, i64 13
-  %36 = load i16* %incdec.ptr446, align 2
+  %36 = load i16, i16* %incdec.ptr446, align 2
   %conv461 = zext i16 %36 to i32
   %mul462 = mul nsw i32 %conv461, %2
   %add463 = add nsw i32 %mul462, %3
@@ -263,12 +263,12 @@
   %cmp.i4.i1467 = icmp slt i32 %cond.i.i1466, %1
   %cond.i5.i1468 = select i1 %cmp.i4.i1467, i32 %cond.i.i1466, i32 %1
   %incdec.ptr467 = getelementptr inbounds i16, i16* %srcptr.41591, i64 13
-  %37 = load i16* %incdec.ptr453, align 2
+  %37 = load i16, i16* %incdec.ptr453, align 2
   %conv468 = zext i16 %37 to i32
   %sub469 = sub nsw i32 %cond.i5.i1468, %conv468
   %idxprom470 = sext i32 %sub469 to i64
   %arrayidx471 = getelementptr inbounds i32, i32* %cond, i64 %idxprom470
-  %38 = load i32* %arrayidx471, align 4
+  %38 = load i32, i32* %arrayidx471, align 4
   %add472 = add nsw i32 %38, %LineSadBlk3.01586
   %incdec.ptr474 = getelementptr inbounds i16, i16* %refptr.11590, i64 14
   %add477 = add nsw i32 0, %3
@@ -279,15 +279,15 @@
   %cmp.i4.i1463 = icmp slt i32 %cond.i.i1462, %1
   %cond.i5.i1464 = select i1 %cmp.i4.i1463, i32 %cond.i.i1462, i32 %1
   %incdec.ptr481 = getelementptr inbounds i16, i16* %srcptr.41591, i64 14
-  %39 = load i16* %incdec.ptr467, align 2
+  %39 = load i16, i16* %incdec.ptr467, align 2
   %conv482 = zext i16 %39 to i32
   %sub483 = sub nsw i32 %cond.i5.i1464, %conv482
   %idxprom484 = sext i32 %sub483 to i64
   %arrayidx485 = getelementptr inbounds i32, i32* %cond, i64 %idxprom484
-  %40 = load i32* %arrayidx485, align 4
+  %40 = load i32, i32* %arrayidx485, align 4
   %add486 = add nsw i32 %add472, %40
   %incdec.ptr488 = getelementptr inbounds i16, i16* %refptr.11590, i64 15
-  %41 = load i16* %incdec.ptr474, align 2
+  %41 = load i16, i16* %incdec.ptr474, align 2
   %conv489 = zext i16 %41 to i32
   %mul490 = mul nsw i32 %conv489, %2
   %add491 = add nsw i32 %mul490, %3
@@ -298,14 +298,14 @@
   %cmp.i4.i1459 = icmp slt i32 %cond.i.i1458, %1
   %cond.i5.i1460 = select i1 %cmp.i4.i1459, i32 %cond.i.i1458, i32 %1
   %incdec.ptr495 = getelementptr inbounds i16, i16* %srcptr.41591, i64 15
-  %42 = load i16* %incdec.ptr481, align 2
+  %42 = load i16, i16* %incdec.ptr481, align 2
   %conv496 = zext i16 %42 to i32
   %sub497 = sub nsw i32 %cond.i5.i1460, %conv496
   %idxprom498 = sext i32 %sub497 to i64
   %arrayidx499 = getelementptr inbounds i32, i32* %cond, i64 %idxprom498
-  %43 = load i32* %arrayidx499, align 4
+  %43 = load i32, i32* %arrayidx499, align 4
   %add500 = add nsw i32 %add486, %43
-  %44 = load i16* %incdec.ptr488, align 2
+  %44 = load i16, i16* %incdec.ptr488, align 2
   %conv503 = zext i16 %44 to i32
   %mul504 = mul nsw i32 %conv503, %2
   %add505 = add nsw i32 %mul504, %3
@@ -315,12 +315,12 @@
   %cond.i.i1454 = select i1 %cmp.i.i1453, i32 %add507, i32 0
   %cmp.i4.i1455 = icmp slt i32 %cond.i.i1454, %1
   %cond.i5.i1456 = select i1 %cmp.i4.i1455, i32 %cond.i.i1454, i32 %1
-  %45 = load i16* %incdec.ptr495, align 2
+  %45 = load i16, i16* %incdec.ptr495, align 2
   %conv510 = zext i16 %45 to i32
   %sub511 = sub nsw i32 %cond.i5.i1456, %conv510
   %idxprom512 = sext i32 %sub511 to i64
   %arrayidx513 = getelementptr inbounds i32, i32* %cond, i64 %idxprom512
-  %46 = load i32* %arrayidx513, align 4
+  %46 = load i32, i32* %arrayidx513, align 4
   %add514 = add nsw i32 %add500, %46
   %add.ptr517 = getelementptr inbounds i16, i16* %refptr.11590, i64 %incdec.ptr502.sum
   %exitcond1692 = icmp eq i32 undef, 4
diff --git a/llvm/test/CodeGen/PowerPC/crbits.ll b/llvm/test/CodeGen/PowerPC/crbits.ll
index 06e9001..8873c1b 100644
--- a/llvm/test/CodeGen/PowerPC/crbits.ll
+++ b/llvm/test/CodeGen/PowerPC/crbits.ll
@@ -145,7 +145,7 @@
 
 define zeroext i32 @exttest8() #0 {
 entry:
-  %v0 = load i64* undef, align 8
+  %v0 = load i64, i64* undef, align 8
   %sub = sub i64 80, %v0
   %div = lshr i64 %sub, 1
   %conv13 = trunc i64 %div to i32
diff --git a/llvm/test/CodeGen/PowerPC/crsave.ll b/llvm/test/CodeGen/PowerPC/crsave.ll
index 602ba94..8121e1b 100644
--- a/llvm/test/CodeGen/PowerPC/crsave.ll
+++ b/llvm/test/CodeGen/PowerPC/crsave.ll
@@ -9,7 +9,7 @@
   %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09mfcr $0", "=r,r,r,r,r,~{cr2}"(i32 1, i32 2, i32 3, i32 0) nounwind
   store i32 %0, i32* %ret, align 4
   call void @foo()
-  %1 = load i32* %ret, align 4
+  %1 = load i32, i32* %ret, align 4
   ret i32 %1
 }
 
@@ -38,7 +38,7 @@
   %0 = call i32 asm sideeffect "\0A\09mtcr $4\0A\09cmpw 2,$2,$1\0A\09cmpw 3,$2,$2\0A\09cmpw 4,$2,$3\0A\09mfcr $0", "=r,r,r,r,r,~{cr2},~{cr3},~{cr4}"(i32 1, i32 2, i32 3, i32 0) nounwind
   store i32 %0, i32* %ret, align 4
   call void @foo()
-  %1 = load i32* %ret, align 4
+  %1 = load i32, i32* %ret, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
index 7bff8d3..fcfcf15 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-cpsgn.ll
@@ -11,7 +11,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi ppc_fp128 [ %d, %entry ], [ %conv, %for.body ]
   %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
-  %0 = load ppc_fp128* %arrayidx, align 8
+  %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
   %conv = tail call ppc_fp128 @copysignl(ppc_fp128 %x.05, ppc_fp128 %d) nounwind readonly
   %inc = add nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, 2048
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp64.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp64.ll
index b5aeb72..6128d7c 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-fp64.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp64.ll
@@ -11,7 +11,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %n, i32 %i.06
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %conv = sitofp i64 %x.05 to double
   %add = fadd double %conv, %0
   %conv1 = fptosi double %add to i64
@@ -31,7 +31,7 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) {
 entry:
-  %0 = load double* @init_value, align 8
+  %0 = load double, double* @init_value, align 8
   %conv = fptosi double %0 to i64
   %broadcast.splatinsert.i = insertelement <2 x i64> undef, i64 %conv, i32 0
   %broadcast.splat.i = shufflevector <2 x i64> %broadcast.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-i64.ll b/llvm/test/CodeGen/PowerPC/ctrloop-i64.ll
index 6aa3dba..5c66a68 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-i64.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-i64.ll
@@ -11,7 +11,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
   %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %conv = udiv i64 %x.05, %d
   %conv1 = add i64 %conv, %0
   %inc = add nsw i32 %i.06, 1
@@ -33,7 +33,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
   %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %conv = sdiv i64 %x.05, %d
   %conv1 = add i64 %conv, %0
   %inc = add nsw i32 %i.06, 1
@@ -55,7 +55,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
   %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %conv = urem i64 %x.05, %d
   %conv1 = add i64 %conv, %0
   %inc = add nsw i32 %i.06, 1
@@ -77,7 +77,7 @@
   %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %x.05 = phi i64 [ 0, %entry ], [ %conv1, %for.body ]
   %arrayidx = getelementptr inbounds i64, i64* %n, i32 %i.06
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %conv = srem i64 %x.05, %d
   %conv1 = add i64 %conv, %0
   %inc = add nsw i32 %i.06, 1
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
index e6b745d..bef04370 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-le.ll
@@ -19,7 +19,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 28395, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -48,7 +48,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9073, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -77,7 +77,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 21956, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -106,7 +106,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 16782, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -135,7 +135,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 19097, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -164,7 +164,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -193,7 +193,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -222,7 +222,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -251,7 +251,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -280,7 +280,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -310,7 +310,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -340,7 +340,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -370,7 +370,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -400,7 +400,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -430,7 +430,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll b/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll
index d1ab376..fa910aa 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-lt.ll
@@ -19,7 +19,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 8531, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -49,7 +49,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9152, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -79,7 +79,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 18851, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -108,7 +108,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 25466, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -137,7 +137,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 9295, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -166,7 +166,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -195,7 +195,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -224,7 +224,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -253,7 +253,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -282,7 +282,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -311,7 +311,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -340,7 +340,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -369,7 +369,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -398,7 +398,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -427,7 +427,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll b/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll
index d51e877..13a9909 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-ne.ll
@@ -16,7 +16,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 32623, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -46,7 +46,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 29554, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -76,7 +76,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 15692, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -106,7 +106,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 10449, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -136,7 +136,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ 32087, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -165,7 +165,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -195,7 +195,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -225,7 +225,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -255,7 +255,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -285,7 +285,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -314,7 +314,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -344,7 +344,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -374,7 +374,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -404,7 +404,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
@@ -434,7 +434,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %i.04 = phi i32 [ %a, %for.body.lr.ph ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %p, i32 %i.04
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %0 to i32
   %add = add nsw i32 %conv, 1
   %conv1 = trunc i32 %add to i8
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-s000.ll b/llvm/test/CodeGen/PowerPC/ctrloop-s000.ll
index bfb61ca..4b42d2e 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-s000.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-s000.ll
@@ -36,97 +36,97 @@
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next.15, %for.body3 ]
   %arrayidx = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 32
+  %0 = load double, double* %arrayidx, align 32
   %add = fadd double %0, 1.000000e+00
   %arrayidx5 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv
   store double %add, double* %arrayidx5, align 32
   %indvars.iv.next11 = or i64 %indvars.iv, 1
   %arrayidx.1 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next11
-  %1 = load double* %arrayidx.1, align 8
+  %1 = load double, double* %arrayidx.1, align 8
   %add.1 = fadd double %1, 1.000000e+00
   %arrayidx5.1 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next11
   store double %add.1, double* %arrayidx5.1, align 8
   %indvars.iv.next.112 = or i64 %indvars.iv, 2
   %arrayidx.2 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.112
-  %2 = load double* %arrayidx.2, align 16
+  %2 = load double, double* %arrayidx.2, align 16
   %add.2 = fadd double %2, 1.000000e+00
   %arrayidx5.2 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.112
   store double %add.2, double* %arrayidx5.2, align 16
   %indvars.iv.next.213 = or i64 %indvars.iv, 3
   %arrayidx.3 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.213
-  %3 = load double* %arrayidx.3, align 8
+  %3 = load double, double* %arrayidx.3, align 8
   %add.3 = fadd double %3, 1.000000e+00
   %arrayidx5.3 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.213
   store double %add.3, double* %arrayidx5.3, align 8
   %indvars.iv.next.314 = or i64 %indvars.iv, 4
   %arrayidx.4 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.314
-  %4 = load double* %arrayidx.4, align 32
+  %4 = load double, double* %arrayidx.4, align 32
   %add.4 = fadd double %4, 1.000000e+00
   %arrayidx5.4 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.314
   store double %add.4, double* %arrayidx5.4, align 32
   %indvars.iv.next.415 = or i64 %indvars.iv, 5
   %arrayidx.5 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.415
-  %5 = load double* %arrayidx.5, align 8
+  %5 = load double, double* %arrayidx.5, align 8
   %add.5 = fadd double %5, 1.000000e+00
   %arrayidx5.5 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.415
   store double %add.5, double* %arrayidx5.5, align 8
   %indvars.iv.next.516 = or i64 %indvars.iv, 6
   %arrayidx.6 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.516
-  %6 = load double* %arrayidx.6, align 16
+  %6 = load double, double* %arrayidx.6, align 16
   %add.6 = fadd double %6, 1.000000e+00
   %arrayidx5.6 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.516
   store double %add.6, double* %arrayidx5.6, align 16
   %indvars.iv.next.617 = or i64 %indvars.iv, 7
   %arrayidx.7 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.617
-  %7 = load double* %arrayidx.7, align 8
+  %7 = load double, double* %arrayidx.7, align 8
   %add.7 = fadd double %7, 1.000000e+00
   %arrayidx5.7 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.617
   store double %add.7, double* %arrayidx5.7, align 8
   %indvars.iv.next.718 = or i64 %indvars.iv, 8
   %arrayidx.8 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.718
-  %8 = load double* %arrayidx.8, align 32
+  %8 = load double, double* %arrayidx.8, align 32
   %add.8 = fadd double %8, 1.000000e+00
   %arrayidx5.8 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.718
   store double %add.8, double* %arrayidx5.8, align 32
   %indvars.iv.next.819 = or i64 %indvars.iv, 9
   %arrayidx.9 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.819
-  %9 = load double* %arrayidx.9, align 8
+  %9 = load double, double* %arrayidx.9, align 8
   %add.9 = fadd double %9, 1.000000e+00
   %arrayidx5.9 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.819
   store double %add.9, double* %arrayidx5.9, align 8
   %indvars.iv.next.920 = or i64 %indvars.iv, 10
   %arrayidx.10 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.920
-  %10 = load double* %arrayidx.10, align 16
+  %10 = load double, double* %arrayidx.10, align 16
   %add.10 = fadd double %10, 1.000000e+00
   %arrayidx5.10 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.920
   store double %add.10, double* %arrayidx5.10, align 16
   %indvars.iv.next.1021 = or i64 %indvars.iv, 11
   %arrayidx.11 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1021
-  %11 = load double* %arrayidx.11, align 8
+  %11 = load double, double* %arrayidx.11, align 8
   %add.11 = fadd double %11, 1.000000e+00
   %arrayidx5.11 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1021
   store double %add.11, double* %arrayidx5.11, align 8
   %indvars.iv.next.1122 = or i64 %indvars.iv, 12
   %arrayidx.12 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1122
-  %12 = load double* %arrayidx.12, align 32
+  %12 = load double, double* %arrayidx.12, align 32
   %add.12 = fadd double %12, 1.000000e+00
   %arrayidx5.12 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1122
   store double %add.12, double* %arrayidx5.12, align 32
   %indvars.iv.next.1223 = or i64 %indvars.iv, 13
   %arrayidx.13 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1223
-  %13 = load double* %arrayidx.13, align 8
+  %13 = load double, double* %arrayidx.13, align 8
   %add.13 = fadd double %13, 1.000000e+00
   %arrayidx5.13 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1223
   store double %add.13, double* %arrayidx5.13, align 8
   %indvars.iv.next.1324 = or i64 %indvars.iv, 14
   %arrayidx.14 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1324
-  %14 = load double* %arrayidx.14, align 16
+  %14 = load double, double* %arrayidx.14, align 16
   %add.14 = fadd double %14, 1.000000e+00
   %arrayidx5.14 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1324
   store double %add.14, double* %arrayidx5.14, align 16
   %indvars.iv.next.1425 = or i64 %indvars.iv, 15
   %arrayidx.15 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1425
-  %15 = load double* %arrayidx.15, align 8
+  %15 = load double, double* %arrayidx.15, align 8
   %add.15 = fadd double %15, 1.000000e+00
   %arrayidx5.15 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1425
   store double %add.15, double* %arrayidx5.15, align 8
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
index d8e6fc7..540f0d6 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll
@@ -9,8 +9,8 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i128* %b, align 16
-  %1 = load i128* %c, align 16
+  %0 = load i128, i128* %b, align 16
+  %1 = load i128, i128* %c, align 16
   %shl = shl i128 %0, %1
   store i128 %shl, i128* %a, align 16
   %inc = add nsw i32 %i.02, 1
@@ -31,8 +31,8 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i128* %b, align 16
-  %1 = load i128* %c, align 16
+  %0 = load i128, i128* %b, align 16
+  %1 = load i128, i128* %c, align 16
   %shl = ashr i128 %0, %1
   store i128 %shl, i128* %a, align 16
   %inc = add nsw i32 %i.02, 1
@@ -53,8 +53,8 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i128* %b, align 16
-  %1 = load i128* %c, align 16
+  %0 = load i128, i128* %b, align 16
+  %1 = load i128, i128* %c, align 16
   %shl = lshr i128 %0, %1
   store i128 %shl, i128* %a, align 16
   %inc = add nsw i32 %i.02, 1
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sums.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sums.ll
index 8b9d691..fa5e8d9 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-sums.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-sums.ll
@@ -24,7 +24,7 @@
   %indvars.iv = phi i64 [ 0, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
   %Result.111.us = phi i32 [ %Result.014.us, %for.body3.lr.ph.us ], [ %add.us, %for.body3.us ]
   %arrayidx5.us = getelementptr inbounds [100 x i32], [100 x i32]* %Array, i64 %indvars.iv16, i64 %indvars.iv
-  %0 = load i32* %arrayidx5.us, align 4
+  %0 = load i32, i32* %arrayidx5.us, align 4
   %add.us = add nsw i32 %0, %Result.111.us
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -106,7 +106,7 @@
   %indvars.iv.i = phi i64 [ 0, %for.body3.lr.ph.us.i ], [ %indvars.iv.next.i, %for.body3.us.i ]
   %Result.111.us.i = phi i32 [ %Result.014.us.i, %for.body3.lr.ph.us.i ], [ %add.us.i, %for.body3.us.i ]
   %arrayidx5.us.i = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* %Array, i64 0, i64 %indvars.iv16.i, i64 %indvars.iv.i
-  %5 = load i32* %arrayidx5.us.i, align 4
+  %5 = load i32, i32* %arrayidx5.us.i, align 4
   %add.us.i = add nsw i32 %5, %Result.111.us.i
   %indvars.iv.next.i = add i64 %indvars.iv.i, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
diff --git a/llvm/test/CodeGen/PowerPC/ctrloops.ll b/llvm/test/CodeGen/PowerPC/ctrloops.ll
index ccab7cb..fff9e20 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloops.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloops.ll
@@ -10,7 +10,7 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load volatile i32* @a, align 4
+  %0 = load volatile i32, i32* @a, align 4
   %add = add nsw i32 %0, %c
   store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.01, 1
@@ -34,7 +34,7 @@
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
-  %0 = load volatile i32* @a, align 4
+  %0 = load volatile i32, i32* @a, align 4
   %add = add nsw i32 %0, %c
   store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.02, 1
@@ -58,7 +58,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %mul = mul nsw i32 %i.02, %c
-  %0 = load volatile i32* @a, align 4
+  %0 = load volatile i32, i32* @a, align 4
   %add = add nsw i32 %0, %mul
   store volatile i32 %add, i32* @a, align 4
   %inc = add nsw i32 %i.02, 1
diff --git a/llvm/test/CodeGen/PowerPC/dcbt-sched.ll b/llvm/test/CodeGen/PowerPC/dcbt-sched.ll
index dfa1b75..51d58b4 100644
--- a/llvm/test/CodeGen/PowerPC/dcbt-sched.ll
+++ b/llvm/test/CodeGen/PowerPC/dcbt-sched.ll
@@ -4,9 +4,9 @@
 
 define i8 @test1(i8* noalias %a, i8* noalias %b, i8* noalias %c) nounwind {
 entry:
-  %q = load i8* %b
+  %q = load i8, i8* %b
   call void @llvm.prefetch(i8* %a, i32 0, i32 3, i32 1)
-  %r = load i8* %c
+  %r = load i8, i8* %c
   %s = add i8 %q, %r
   ret i8 %s
 }
diff --git a/llvm/test/CodeGen/PowerPC/delete-node.ll b/llvm/test/CodeGen/PowerPC/delete-node.ll
index 388c902..999af54 100644
--- a/llvm/test/CodeGen/PowerPC/delete-node.ll
+++ b/llvm/test/CodeGen/PowerPC/delete-node.ll
@@ -9,11 +9,11 @@
       	br label %bb1
 
 bb1:            ; preds = %bb1, %entry
-        %0 = load i16* null, align 2            ; <i16> [#uses=1]
+        %0 = load i16, i16* null, align 2            ; <i16> [#uses=1]
         %1 = ashr i16 %0, 4             ; <i16> [#uses=1]
         %2 = sext i16 %1 to i32         ; <i32> [#uses=1]
         %3 = getelementptr i8, i8* null, i32 %2             ; <i8*> [#uses=1]
-        %4 = load i8* %3, align 1               ; <i8> [#uses=1]
+        %4 = load i8, i8* %3, align 1               ; <i8> [#uses=1]
         %5 = zext i8 %4 to i32          ; <i32> [#uses=1]
         %6 = shl i32 %5, 24             ; <i32> [#uses=1]
         %7 = or i32 0, %6               ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/dyn-alloca-aligned.ll b/llvm/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
index e743997..98b0a17 100644
--- a/llvm/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
+++ b/llvm/test/CodeGen/PowerPC/dyn-alloca-aligned.ll
@@ -12,10 +12,10 @@
   %vla = alloca i32, i64 %0, align 128
   %vla1 = alloca i32, i64 %0, align 128
   %a2 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %1 = load i32* %a2, align 4
+  %1 = load i32, i32* %a2, align 4
   store i32 %1, i32* %vla1, align 128
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %2 = load i32* %b, align 4
+  %2 = load i32, i32* %b, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %vla1, i64 1
   store i32 %2, i32* %arrayidx3, align 4
   call void @bar(i32* %vla1, i32* %vla) #0
diff --git a/llvm/test/CodeGen/PowerPC/emptystruct.ll b/llvm/test/CodeGen/PowerPC/emptystruct.ll
index 47cfadd..66cada1 100644
--- a/llvm/test/CodeGen/PowerPC/emptystruct.ll
+++ b/llvm/test/CodeGen/PowerPC/emptystruct.ll
@@ -18,7 +18,7 @@
 entry:
   %a2.addr = alloca %struct.empty*, align 8
   store %struct.empty* %a2, %struct.empty** %a2.addr, align 8
-  %0 = load %struct.empty** %a2.addr, align 8
+  %0 = load %struct.empty*, %struct.empty** %a2.addr, align 8
   %1 = bitcast %struct.empty* %agg.result to i8*
   %2 = bitcast %struct.empty* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 0, i32 1, i1 false)
diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
index f99089b..f90eccb 100644
--- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
+++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -69,9 +69,9 @@
 }
 
 define void @VNOR(<4 x float>* %P, <4 x float>* %Q) nounwind {
-	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]
+	%tmp = load <4 x float>, <4 x float>* %P		; <<4 x float>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp2 = load <4 x float>, <4 x float>* %Q		; <<4 x float>> [#uses=1]
 	%tmp2.upgrd.2 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp3 = or <4 x i32> %tmp.upgrd.1, %tmp2.upgrd.2		; <<4 x i32>> [#uses=1]
 	%tmp4 = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
@@ -81,9 +81,9 @@
 }
 
 define void @VANDC(<4 x float>* %P, <4 x float>* %Q) nounwind {
-	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]
+	%tmp = load <4 x float>, <4 x float>* %P		; <<4 x float>> [#uses=1]
 	%tmp.upgrd.4 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp2 = load <4 x float>, <4 x float>* %Q		; <<4 x float>> [#uses=1]
 	%tmp2.upgrd.5 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp4 = xor <4 x i32> %tmp2.upgrd.5, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
 	%tmp3 = and <4 x i32> %tmp.upgrd.4, %tmp4		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll b/llvm/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
index 4b40930..92f986e 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-GEP-coalesce.ll
@@ -13,7 +13,7 @@
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([2 x [2 x [2 x [2 x [2 x i32]]]]]* @arr, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1), i32** %addr, align 4
 ; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 124
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -23,7 +23,7 @@
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
 ; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1148
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -33,7 +33,7 @@
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1), i32** %addr, align 4
 ; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 140
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
 
@@ -43,6 +43,6 @@
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([2 x [2 x [2 x %struct.B]]]* @B, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 3, i32 1, i32 2, i32 1), i32** %addr, align 4
 ; ELF64: addi {{[0-9]+}}, {{[0-9]+}}, 1284
-  %0 = load i32** %addr, align 4
+  %0 = load i32*, i32** %addr, align 4
   ret i32* %0
 }
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-call.ll b/llvm/test/CodeGen/PowerPC/fast-isel-call.ll
index dac71c5..64d8f6e 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -85,7 +85,7 @@
 ;define void @foo3() uwtable {
 ;  %fptr = alloca i32 (i32)*, align 8
 ;  store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
-;  %1 = load i32 (i32)** %fptr, align 8
+;  %1 = load i32 (i32)*, i32 (i32)** %fptr, align 8
 ;  %call = call i32 %1(i32 0)
 ;  ret void
 ;}
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-fold.ll b/llvm/test/CodeGen/PowerPC/fast-isel-fold.ll
index cdc4c0a..e56101a 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-fold.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -6,7 +6,7 @@
 
 define void @t1() nounwind uwtable ssp {
 ; ELF64: t1
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   call void @foo1(i8 zeroext %1)
 ; ELF64: lbz
 ; ELF64-NOT: rldicl
@@ -16,7 +16,7 @@
 
 define void @t2() nounwind uwtable ssp {
 ; ELF64: t2
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   call void @foo2(i16 zeroext %1)
 ; ELF64: lhz
 ; ELF64-NOT: rldicl
@@ -26,7 +26,7 @@
 
 define void @t2a() nounwind uwtable ssp {
 ; ELF64: t2a
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   call void @foo3(i32 zeroext %1)
 ; ELF64: lwz
 ; ELF64-NOT: rldicl
@@ -40,7 +40,7 @@
 
 define i32 @t3() nounwind uwtable ssp {
 ; ELF64: t3
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i32
 ; ELF64: lbz
 ; ELF64-NOT: rlwinm
@@ -49,7 +49,7 @@
 
 define i32 @t4() nounwind uwtable ssp {
 ; ELF64: t4
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i32
 ; ELF64: lhz
 ; ELF64-NOT: rlwinm
@@ -58,7 +58,7 @@
 
 define i32 @t5() nounwind uwtable ssp {
 ; ELF64: t5
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i32
 ; ELF64: lha
 ; ELF64-NOT: rlwinm
@@ -67,7 +67,7 @@
 
 define i32 @t6() nounwind uwtable ssp {
 ; ELF64: t6
-  %1 = load i8* @a, align 2
+  %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i32
 ; ELF64: lbz
 ; ELF64-NOT: rlwinm
@@ -76,7 +76,7 @@
 
 define i64 @t7() nounwind uwtable ssp {
 ; ELF64: t7
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i64
 ; ELF64: lbz
 ; ELF64-NOT: rldicl
@@ -85,7 +85,7 @@
 
 define i64 @t8() nounwind uwtable ssp {
 ; ELF64: t8
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i64
 ; ELF64: lhz
 ; ELF64-NOT: rldicl
@@ -94,7 +94,7 @@
 
 define i64 @t9() nounwind uwtable ssp {
 ; ELF64: t9
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i64
 ; ELF64: lha
 ; ELF64-NOT: extsh
@@ -103,7 +103,7 @@
 
 define i64 @t10() nounwind uwtable ssp {
 ; ELF64: t10
-  %1 = load i8* @a, align 2
+  %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i64
 ; ELF64: lbz
 ; ELF64: extsb
@@ -112,7 +112,7 @@
 
 define i64 @t11() nounwind uwtable ssp {
 ; ELF64: t11
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %2 = zext i32 %1 to i64
 ; ELF64: lwz
 ; ELF64-NOT: rldicl
@@ -121,7 +121,7 @@
 
 define i64 @t12() nounwind uwtable ssp {
 ; ELF64: t12
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %2 = sext i32 %1 to i64
 ; ELF64: lwa
 ; ELF64-NOT: extsw
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-load-store.ll b/llvm/test/CodeGen/PowerPC/fast-isel-load-store.ll
index cf53d6b..ec4b0dc 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-load-store.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -26,7 +26,7 @@
 
 define i8 @t1() nounwind uwtable ssp {
 ; ELF64: t1
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
 ; ELF64: lbz
   %2 = add nsw i8 %1, 1
 ; ELF64: addi
@@ -35,7 +35,7 @@
 
 define i16 @t2() nounwind uwtable ssp {
 ; ELF64: t2
-  %1 = load i16* @b, align 2
+  %1 = load i16, i16* @b, align 2
 ; ELF64: lhz
   %2 = add nsw i16 %1, 1
 ; ELF64: addi
@@ -44,7 +44,7 @@
 
 define i32 @t3() nounwind uwtable ssp {
 ; ELF64: t3
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
 ; ELF64: lwz
   %2 = add nsw i32 %1, 1
 ; ELF64: addi
@@ -53,7 +53,7 @@
 
 define i64 @t4() nounwind uwtable ssp {
 ; ELF64: t4
-  %1 = load i64* @d, align 4
+  %1 = load i64, i64* @d, align 4
 ; ELF64: ld
   %2 = add nsw i64 %1, 1
 ; ELF64: addi
@@ -62,7 +62,7 @@
 
 define float @t5() nounwind uwtable ssp {
 ; ELF64: t5
-  %1 = load float* @e, align 4
+  %1 = load float, float* @e, align 4
 ; ELF64: lfs
   %2 = fadd float %1, 1.0
 ; ELF64: fadds
@@ -71,7 +71,7 @@
 
 define double @t6() nounwind uwtable ssp {
 ; ELF64: t6
-  %1 = load double* @f, align 8
+  %1 = load double, double* @f, align 8
 ; ELF64: lfd
   %2 = fadd double %1, 1.0
 ; ELF64: fadd
@@ -145,7 +145,7 @@
 ;; lwa requires an offset divisible by 4, so we need lwax here.
 define i64 @t13() nounwind uwtable ssp {
 ; ELF64: t13
-  %1 = load i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1
+  %1 = load i32, i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1
   %2 = sext i32 %1 to i64
 ; ELF64: li
 ; ELF64: lwax
@@ -157,7 +157,7 @@
 ;; ld requires an offset divisible by 4, so we need ldx here.
 define i64 @t14() nounwind uwtable ssp {
 ; ELF64: t14
-  %1 = load i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
+  %1 = load i64, i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1
 ; ELF64: li
 ; ELF64: ldx
   %2 = add nsw i64 %1, 1
@@ -181,7 +181,7 @@
 ;; ld requires an offset that fits in 16 bits, so we need ldx here.
 define i64 @t16() nounwind uwtable ssp {
 ; ELF64: t16
-  %1 = load i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
+  %1 = load i64, i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8
 ; ELF64: lis
 ; ELF64: ori
 ; ELF64: ldx
diff --git a/llvm/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/llvm/test/CodeGen/PowerPC/fast-isel-redefinition.ll
index aa25786..60706a6 100644
--- a/llvm/test/CodeGen/PowerPC/fast-isel-redefinition.ll
+++ b/llvm/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -5,6 +5,6 @@
 
 define i32 @f(i32* %x) nounwind ssp {
   %y = getelementptr inbounds i32, i32* %x, i32 5000
-  %tmp103 = load i32* %y, align 4
+  %tmp103 = load i32, i32* %y, align 4
   ret i32 %tmp103
 }
diff --git a/llvm/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/llvm/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
index fdedc5d..96cf67c 100644
--- a/llvm/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
+++ b/llvm/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -6,12 +6,12 @@
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
-  %0 = load i8** %ptr.addr, align 8
+  %0 = load i8*, i8** %ptr.addr, align 8
 
   ; CHECK-LABEL: gep_promotion:
   ; CHECK: lbz {{[0-9]+}}, 0({{.*}})
   %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
 
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
diff --git a/llvm/test/CodeGen/PowerPC/floatPSA.ll b/llvm/test/CodeGen/PowerPC/floatPSA.ll
index f14c736..cff95d5 100644
--- a/llvm/test/CodeGen/PowerPC/floatPSA.ll
+++ b/llvm/test/CodeGen/PowerPC/floatPSA.ll
@@ -37,7 +37,7 @@
   store float %l, float* %l.addr, align 4
   store float %m, float* %m.addr, align 4
   store float %n, float* %n.addr, align 4
-  %0 = load float* %n.addr, align 4
+  %0 = load float, float* %n.addr, align 4
   ret float %0
 }
 
@@ -73,20 +73,20 @@
   store float 1.200000e+01, float* %l, align 4
   store float 1.300000e+01, float* %m, align 4
   store float 1.400000e+01, float* %n, align 4
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
-  %2 = load float* %c, align 4
-  %3 = load float* %d, align 4
-  %4 = load float* %e, align 4
-  %5 = load float* %f, align 4
-  %6 = load float* %g, align 4
-  %7 = load float* %h, align 4
-  %8 = load float* %i, align 4
-  %9 = load float* %j, align 4
-  %10 = load float* %k, align 4
-  %11 = load float* %l, align 4
-  %12 = load float* %m, align 4
-  %13 = load float* %n, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
+  %2 = load float, float* %c, align 4
+  %3 = load float, float* %d, align 4
+  %4 = load float, float* %e, align 4
+  %5 = load float, float* %f, align 4
+  %6 = load float, float* %g, align 4
+  %7 = load float, float* %h, align 4
+  %8 = load float, float* %i, align 4
+  %9 = load float, float* %j, align 4
+  %10 = load float, float* %k, align 4
+  %11 = load float, float* %l, align 4
+  %12 = load float, float* %m, align 4
+  %13 = load float, float* %n, align 4
   %call = call float @bar(float %0, float %1, float %2, float %3, float %4, float %5, float %6, float %7, float %8, float %9, float %10, float %11, float %12, float %13)
   ret float %call
 }
diff --git a/llvm/test/CodeGen/PowerPC/flt-preinc.ll b/llvm/test/CodeGen/PowerPC/flt-preinc.ll
index a0ea01e..c0e3d3a 100644
--- a/llvm/test/CodeGen/PowerPC/flt-preinc.ll
+++ b/llvm/test/CodeGen/PowerPC/flt-preinc.ll
@@ -7,10 +7,10 @@
 entry:
   %idx.ext = sext i32 %o to i64
   %add.ptr = getelementptr inbounds float, float* %i, i64 %idx.ext
-  %0 = load float* %add.ptr, align 4
+  %0 = load float, float* %add.ptr, align 4
   %add.ptr.sum = add nsw i64 %idx.ext, 1
   %add.ptr3 = getelementptr inbounds float, float* %i, i64 %add.ptr.sum
-  %1 = load float* %add.ptr3, align 4
+  %1 = load float, float* %add.ptr3, align 4
   %add = fadd float %0, %1
   ret float %add
 
@@ -24,10 +24,10 @@
 entry:
   %idx.ext = sext i32 %o to i64
   %add.ptr = getelementptr inbounds double, double* %i, i64 %idx.ext
-  %0 = load double* %add.ptr, align 8
+  %0 = load double, double* %add.ptr, align 8
   %add.ptr.sum = add nsw i64 %idx.ext, 1
   %add.ptr3 = getelementptr inbounds double, double* %i, i64 %add.ptr.sum
-  %1 = load double* %add.ptr3, align 8
+  %1 = load double, double* %add.ptr3, align 8
   %add = fadd double %0, %1
   ret double %add
 
diff --git a/llvm/test/CodeGen/PowerPC/fp-to-int-ext.ll b/llvm/test/CodeGen/PowerPC/fp-to-int-ext.ll
index bfacd89..393fe04 100644
--- a/llvm/test/CodeGen/PowerPC/fp-to-int-ext.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-to-int-ext.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind
 define double @foo1(i32* %x) #0 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %conv = sext i32 %0 to i64
   %conv1 = sitofp i64 %conv to double
   ret double %conv1
@@ -18,7 +18,7 @@
 
 define double @foo2(i32* %x) #0 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %conv = zext i32 %0 to i64
   %conv1 = sitofp i64 %conv to double
   ret double %conv1
@@ -31,7 +31,7 @@
 
 define double @foo3(i32* %x) #0 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %1 = add i32 %0, 8
   %conv = zext i32 %1 to i64
   %conv1 = sitofp i64 %conv to double
@@ -49,7 +49,7 @@
 
 define double @foo4(i32* %x) #0 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %1 = add i32 %0, 8
   %conv = sext i32 %1 to i64
   %conv1 = sitofp i64 %conv to double
diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll
index 8eeadc3..49f3465 100644
--- a/llvm/test/CodeGen/PowerPC/frounds.ll
+++ b/llvm/test/CodeGen/PowerPC/frounds.ll
@@ -7,12 +7,12 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp1 = call i32 @llvm.flt.rounds( )		; <i32> [#uses=1]
 	store i32 %tmp1, i32* %tmp, align 4
-	%tmp2 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	store i32 %tmp2, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
+	%retval3 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval3
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/glob-comp-aa-crash.ll b/llvm/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
index 1f3fec8..66df6bb 100644
--- a/llvm/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
+++ b/llvm/test/CodeGen/PowerPC/glob-comp-aa-crash.ll
@@ -65,12 +65,12 @@
   br label %ehcleanup
 
 if.end:                                           ; preds = %invoke.cont
-  %7 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %7 = load i8, i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
   %tobool.i.i = icmp eq i8 %7, 0
   br i1 %tobool.i.i, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit, label %if.then.i.i
 
 if.then.i.i:                                      ; preds = %if.end
-  %8 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  %8 = load %"class.std::__1::mutex"*, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
   call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %8) #5
   br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit
 
@@ -80,12 +80,12 @@
 ehcleanup:                                        ; preds = %lpad3, %lpad
   %exn.slot.0 = phi i8* [ %5, %lpad3 ], [ %2, %lpad ]
   %ehselector.slot.0 = phi i32 [ %6, %lpad3 ], [ %3, %lpad ]
-  %9 = load i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
+  %9 = load i8, i8* %__owns_.i.i, align 8, !tbaa !6, !range !4
   %tobool.i.i9 = icmp eq i8 %9, 0
   br i1 %tobool.i.i9, label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12, label %if.then.i.i11
 
 if.then.i.i11:                                    ; preds = %ehcleanup
-  %10 = load %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
+  %10 = load %"class.std::__1::mutex"*, %"class.std::__1::mutex"** %__m_.i.i, align 8, !tbaa !5
   call void @_ZNSt3__15mutex6unlockEv(%"class.std::__1::mutex"* %10) #5
   br label %_ZNSt3__111unique_lockINS_5mutexEED1Ev.exit12
 
diff --git a/llvm/test/CodeGen/PowerPC/hidden-vis-2.ll b/llvm/test/CodeGen/PowerPC/hidden-vis-2.ll
index e9e2c0a..3eb9dbd 100644
--- a/llvm/test/CodeGen/PowerPC/hidden-vis-2.ll
+++ b/llvm/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -5,8 +5,8 @@
 
 define i32 @t() nounwind readonly {
 entry:
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/PowerPC/hidden-vis.ll b/llvm/test/CodeGen/PowerPC/hidden-vis.ll
index b2cc143..bcb6723 100644
--- a/llvm/test/CodeGen/PowerPC/hidden-vis.ll
+++ b/llvm/test/CodeGen/PowerPC/hidden-vis.ll
@@ -4,6 +4,6 @@
 
 define i32 @t() nounwind readonly {
 entry:
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll b/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll
index 0ce6cc8..a007fc1 100644
--- a/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll
+++ b/llvm/test/CodeGen/PowerPC/ia-mem-r0.ll
@@ -84,10 +84,10 @@
   %52 = bitcast i8* %51 to i64*
   call void asm sideeffect "std  31, $0", "=*m"(i64* %52)
   %53 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 1
-  %.funcptr = load void (i8*, i8*)** %53
+  %.funcptr = load void (i8*, i8*)*, void (i8*, i8*)** %53
   %54 = getelementptr { i8*, void (i8*, i8*)* }, { i8*, void (i8*, i8*)* }* %fn, i32 0, i32 0
-  %.ptr = load i8** %54
-  %55 = load i8** %sp
+  %.ptr = load i8*, i8** %54
+  %55 = load i8*, i8** %sp
   call void %.funcptr(i8* %.ptr, i8* %55)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/indexed-load.ll b/llvm/test/CodeGen/PowerPC/indexed-load.ll
index 28d5724..ce386d7 100644
--- a/llvm/test/CodeGen/PowerPC/indexed-load.ll
+++ b/llvm/test/CodeGen/PowerPC/indexed-load.ll
@@ -15,7 +15,7 @@
 entry:
   %Subminor.i.i = getelementptr inbounds %class.test, %class.test* %this, i64 0, i32 1
   %0 = bitcast [5 x i8]* %Subminor.i.i to i40*
-  %bf.load2.i.i = load i40* %0, align 4
+  %bf.load2.i.i = load i40, i40* %0, align 4
   %bf.clear7.i.i = and i40 %bf.load2.i.i, -8589934592
   store i40 %bf.clear7.i.i, i40* %0, align 4
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/indirectbr.ll b/llvm/test/CodeGen/PowerPC/indirectbr.ll
index cd34e08..d1e03ca 100644
--- a/llvm/test/CodeGen/PowerPC/indirectbr.ll
+++ b/llvm/test/CodeGen/PowerPC/indirectbr.ll
@@ -10,7 +10,7 @@
 ; STATIC-LABEL: foo:
 ; PPC64-LABEL: foo:
 entry:
-  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %0 = load i8*, i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
   br i1 %1, label %bb3, label %bb2
 
@@ -38,7 +38,7 @@
 
 bb3:                                              ; preds = %entry
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
-  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0.pre = load i8*, i8** %2, align 4        ; <i8*> [#uses=1]
   br label %bb2
 
 L5:                                               ; preds = %bb2
diff --git a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index 4d8e704..05f2a19 100644
--- a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -19,18 +19,18 @@
   store %struct.BG_CoordinateMapping_t* %map, %struct.BG_CoordinateMapping_t** %map.addr, align 8
   store i64* %numentries, i64** %numentries.addr, align 8
   store i64 1055, i64* %r0, align 8
-  %0 = load i64* %mapsize.addr, align 8
+  %0 = load i64, i64* %mapsize.addr, align 8
   store i64 %0, i64* %r3, align 8
-  %1 = load %struct.BG_CoordinateMapping_t** %map.addr, align 8
+  %1 = load %struct.BG_CoordinateMapping_t*, %struct.BG_CoordinateMapping_t** %map.addr, align 8
   %2 = ptrtoint %struct.BG_CoordinateMapping_t* %1 to i64
   store i64 %2, i64* %r4, align 8
-  %3 = load i64** %numentries.addr, align 8
+  %3 = load i64*, i64** %numentries.addr, align 8
   %4 = ptrtoint i64* %3 to i64
   store i64 %4, i64* %r5, align 8
-  %5 = load i64* %r0, align 8
-  %6 = load i64* %r3, align 8
-  %7 = load i64* %r4, align 8
-  %8 = load i64* %r5, align 8
+  %5 = load i64, i64* %r0, align 8
+  %6 = load i64, i64* %r3, align 8
+  %7 = load i64, i64* %r4, align 8
+  %8 = load i64, i64* %r5, align 8
   %9 = call { i64, i64, i64, i64 } asm sideeffect "sc", "={r0},={r3},={r4},={r5},{r0},{r3},{r4},{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{cr0},~{memory}"(i64 %5, i64 %6, i64 %7, i64 %8) #1, !srcloc !0
 
 ; CHECK-LABEL: @Kernel_RanksToCoords
@@ -52,9 +52,9 @@
   store i64 %asmresult1, i64* %r3, align 8
   store i64 %asmresult2, i64* %r4, align 8
   store i64 %asmresult3, i64* %r5, align 8
-  %10 = load i64* %r3, align 8
+  %10 = load i64, i64* %r3, align 8
   store i64 %10, i64* %tmp
-  %11 = load i64* %tmp
+  %11 = load i64, i64* %tmp
   %conv = trunc i64 %11 to i32
   ret i32 %conv
 }
@@ -87,7 +87,7 @@
 
 if.then:                                          ; preds = %entry
   call void @mtrace()
-  %.pre = load i32* %argc.addr, align 4
+  %.pre = load i32, i32* %argc.addr, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
diff --git a/llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll b/llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll
index ac99aa4..df0116d 100644
--- a/llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll
+++ b/llvm/test/CodeGen/PowerPC/isel-rc-nox0.ll
@@ -22,7 +22,7 @@
 
 for.cond1.preheader.i2961.i:                      ; preds = %for.inc44.i2977.i, %crc32_gentab.exit
   call void @llvm.memset.p0i8.i64(i8* bitcast ([1 x [9 x i32]]* @g_62 to i8*), i8 -1, i64 36, i32 4, i1 false) #1
-  %0 = load i32* %retval.0.i.i.i, align 4
+  %0 = load i32, i32* %retval.0.i.i.i, align 4
   %tobool.i2967.i = icmp eq i32 %0, 0
   br label %for.body21.i2968.i
 
diff --git a/llvm/test/CodeGen/PowerPC/lbz-from-ld-shift.ll b/llvm/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
index 3eacd6a..7696b84 100644
--- a/llvm/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/lbz-from-ld-shift.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind readonly
 define signext i32 @test(i32* nocapture readonly %P) #0 {
 entry:
-  %0 = load i32* %P, align 4
+  %0 = load i32, i32* %P, align 4
   %shr = lshr i32 %0, 24
   ret i32 %shr
 
diff --git a/llvm/test/CodeGen/PowerPC/lbzux.ll b/llvm/test/CodeGen/PowerPC/lbzux.ll
index 11231e7..4bd9cb6 100644
--- a/llvm/test/CodeGen/PowerPC/lbzux.ll
+++ b/llvm/test/CodeGen/PowerPC/lbzux.ll
@@ -4,7 +4,7 @@
 
 define fastcc void @allocateSpace(i1 %cond1, i1 %cond2) nounwind {
 entry:
-  %0 = load i8** undef, align 8
+  %0 = load i8*, i8** undef, align 8
   br i1 undef, label %return, label %lor.lhs.false
 
 lor.lhs.false:                                    ; preds = %entry
@@ -30,7 +30,7 @@
   %arrayidx49 = getelementptr inbounds i8, i8* %0, i64 %idxprom48139
   %1 = bitcast i8* %arrayidx49 to i16*
   %2 = bitcast i8* %arrayidx18 to i16*
-  %3 = load i16* %1, align 1
+  %3 = load i16, i16* %1, align 1
   store i16 %3, i16* %2, align 1
   br label %return
 
diff --git a/llvm/test/CodeGen/PowerPC/ld-st-upd.ll b/llvm/test/CodeGen/PowerPC/ld-st-upd.ll
index 30fae70..be0c94a 100644
--- a/llvm/test/CodeGen/PowerPC/ld-st-upd.ll
+++ b/llvm/test/CodeGen/PowerPC/ld-st-upd.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind
 define i32* @test4(i32* readonly %X, i32* nocapture %dest) #0 {
   %Y = getelementptr i32, i32* %X, i64 4
-  %A = load i32* %Y, align 4
+  %A = load i32, i32* %Y, align 4
   store i32 %A, i32* %dest, align 4
   ret i32* %Y
 
diff --git a/llvm/test/CodeGen/PowerPC/ldtoc-inv.ll b/llvm/test/CodeGen/PowerPC/ldtoc-inv.ll
index 69a3dd2..07a1686 100644
--- a/llvm/test/CodeGen/PowerPC/ldtoc-inv.ll
+++ b/llvm/test/CodeGen/PowerPC/ldtoc-inv.ll
@@ -24,7 +24,7 @@
   %idxprom2 = sext i32 %shl1 to i64
   %arrayidx.sum = add nsw i64 %idxprom2, %idxprom
   %arrayidx3 = getelementptr inbounds [4096 x i32], [4096 x i32]* @phasor, i64 0, i64 %arrayidx.sum
-  %1 = load i32* %arrayidx3, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %out, i64 %indvars.iv
   store i32 %1, i32* %arrayidx5, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
diff --git a/llvm/test/CodeGen/PowerPC/lha.ll b/llvm/test/CodeGen/PowerPC/lha.ll
index 3a100c1..c5c5f59 100644
--- a/llvm/test/CodeGen/PowerPC/lha.ll
+++ b/llvm/test/CodeGen/PowerPC/lha.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 | grep lha
 
 define i32 @test(i16* %a) {
-        %tmp.1 = load i16* %a           ; <i16> [#uses=1]
+        %tmp.1 = load i16, i16* %a           ; <i16> [#uses=1]
         %tmp.2 = sext i16 %tmp.1 to i32         ; <i32> [#uses=1]
         ret i32 %tmp.2
 }
diff --git a/llvm/test/CodeGen/PowerPC/load-constant-addr.ll b/llvm/test/CodeGen/PowerPC/load-constant-addr.ll
index f1d061c..7834430 100644
--- a/llvm/test/CodeGen/PowerPC/load-constant-addr.ll
+++ b/llvm/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=ppc32 | not grep ori
 
 define float @test() {
-        %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
+        %tmp.i = load float, float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
         ret float %tmp.i
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/load-shift-combine.ll b/llvm/test/CodeGen/PowerPC/load-shift-combine.ll
index a5d1224..cf0b7fb 100644
--- a/llvm/test/CodeGen/PowerPC/load-shift-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -16,17 +16,17 @@
 define void @test1847() nounwind {
 entry:
   %j = alloca i32, align 4
-  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
-  %1 = load i32* @fails, align 4
-  %bf.load1 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %0 = load i64, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %1 = load i32, i32* @fails, align 4
+  %bf.load1 = load i96, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
   %bf.clear2 = and i96 %bf.load1, 302231454903657293676543
   %bf.set3 = or i96 %bf.clear2, -38383394772764476296921088
   store i96 %bf.set3, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
-  %2 = load i32* %j, align 4
-  %3 = load i32* %j, align 4
+  %2 = load i32, i32* %j, align 4
+  %3 = load i32, i32* %j, align 4
   %inc11 = add nsw i32 %3, 1
   store i32 %inc11, i32* %j, align 4
-  %bf.load15 = load i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
+  %bf.load15 = load i96, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
   %bf.clear16 = and i96 %bf.load15, -18446744069414584321
   %bf.set17 = or i96 %bf.clear16, 18446743532543672320
   store i96 %bf.set17, i96* bitcast (%struct.S1847* getelementptr inbounds ([5 x %struct.S1847]* @a1847, i32 0, i64 2) to i96*), align 8
diff --git a/llvm/test/CodeGen/PowerPC/loop-data-prefetch.ll b/llvm/test/CodeGen/PowerPC/loop-data-prefetch.ll
index aa2512a..59b74e6 100644
--- a/llvm/test/CodeGen/PowerPC/loop-data-prefetch.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-data-prefetch.ll
@@ -10,7 +10,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %add = fadd double %0, 1.000000e+00
   %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
   store double %add, double* %arrayidx2, align 8
diff --git a/llvm/test/CodeGen/PowerPC/lsa.ll b/llvm/test/CodeGen/PowerPC/lsa.ll
index 2811a0f..f4d61c0 100644
--- a/llvm/test/CodeGen/PowerPC/lsa.ll
+++ b/llvm/test/CodeGen/PowerPC/lsa.ll
@@ -17,9 +17,9 @@
   %arraydecay1 = getelementptr inbounds [8200 x i32], [8200 x i32]* %v, i64 0, i64 0
   %arraydecay2 = getelementptr inbounds [8200 x i32], [8200 x i32]* %w, i64 0, i64 0
   call void @bar(i32* %arraydecay, i32* %arraydecay1, i32* %arraydecay2) #0
-  %3 = load i32* %arraydecay2, align 4
+  %3 = load i32, i32* %arraydecay2, align 4
   %arrayidx3 = getelementptr inbounds [8200 x i32], [8200 x i32]* %w, i64 0, i64 1
-  %4 = load i32* %arrayidx3, align 4
+  %4 = load i32, i32* %arrayidx3, align 4
 
 ; CHECK: @foo
 ; CHECK-NOT: lwzx
diff --git a/llvm/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/llvm/test/CodeGen/PowerPC/lsr-postinc-pos.ll
index ee16aa9..7831df1 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-postinc-pos.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -3,7 +3,7 @@
 ; The icmp is a post-inc use, and the increment is in %bb11, but the
 ; scevgep needs to be inserted in %bb so that it is dominated by %t.
 
-; CHECK: %t = load i8** undef
+; CHECK: %t = load i8*, i8** undef
 ; CHECK: %scevgep = getelementptr i8, i8* %t, i32 %lsr.iv.next
 ; CHECK: %c1 = icmp ult i8* %scevgep, undef
 
@@ -21,7 +21,7 @@
   br i1 %c0, label %bb13, label %bb
 
 bb:
-  %t = load i8** undef, align 16                ; <i8*> [#uses=1]
+  %t = load i8*, i8** undef, align 16                ; <i8*> [#uses=1]
   %p = getelementptr i8, i8* %t, i32 %ii ; <i8*> [#uses=1]
   %c1 = icmp ult i8* %p, undef          ; <i1> [#uses=1]
   %i.next = add i32 %i, 1                        ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/mask64.ll b/llvm/test/CodeGen/PowerPC/mask64.ll
index 139621a..600cecd 100644
--- a/llvm/test/CodeGen/PowerPC/mask64.ll
+++ b/llvm/test/CodeGen/PowerPC/mask64.ll
@@ -9,8 +9,8 @@
 	br i1 false, label %bb16, label %bb49
 
 bb16:		; preds = %entry
-	%tmp19 = load i8** null, align 1		; <i8*> [#uses=1]
-	%tmp21 = load i8* %tmp19, align 1		; <i8> [#uses=1]
+	%tmp19 = load i8*, i8** null, align 1		; <i8*> [#uses=1]
+	%tmp21 = load i8, i8* %tmp19, align 1		; <i8> [#uses=1]
 	switch i8 %tmp21, label %bb49 [
 		 i8 0, label %bb45
 		 i8 1, label %bb34
diff --git a/llvm/test/CodeGen/PowerPC/mcm-1.ll b/llvm/test/CodeGen/PowerPC/mcm-1.ll
index 4e31550..7ef4a8e 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-1.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-1.ll
@@ -11,7 +11,7 @@
 
 define signext i32 @test_external() nounwind {
 entry:
-  %0 = load i32* @ei, align 4
+  %0 = load i32, i32* @ei, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @ei, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-10.ll b/llvm/test/CodeGen/PowerPC/mcm-10.ll
index 9565ebc7..affa249 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-10.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-10.ll
@@ -10,7 +10,7 @@
 
 define signext i32 @test_fn_static() nounwind {
 entry:
-  %0 = load i32* @test_fn_static.si, align 4
+  %0 = load i32, i32* @test_fn_static.si, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @test_fn_static.si, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-11.ll b/llvm/test/CodeGen/PowerPC/mcm-11.ll
index 033045c..457c60a 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-11.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-11.ll
@@ -10,7 +10,7 @@
 
 define signext i32 @test_file_static() nounwind {
 entry:
-  %0 = load i32* @gi, align 4
+  %0 = load i32, i32* @gi, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @gi, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-2.ll b/llvm/test/CodeGen/PowerPC/mcm-2.ll
index 811600e..03ce1c6 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-2.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-2.ll
@@ -11,7 +11,7 @@
 
 define signext i32 @test_fn_static() nounwind {
 entry:
-  %0 = load i32* @test_fn_static.si, align 4
+  %0 = load i32, i32* @test_fn_static.si, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @test_fn_static.si, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-3.ll b/llvm/test/CodeGen/PowerPC/mcm-3.ll
index b6d681d..40188bc 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-3.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-3.ll
@@ -11,7 +11,7 @@
 
 define signext i32 @test_file_static() nounwind {
 entry:
-  %0 = load i32* @gi, align 4
+  %0 = load i32, i32* @gi, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @gi, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-5.ll b/llvm/test/CodeGen/PowerPC/mcm-5.ll
index 92ddeca..0c25845 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-5.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-5.ll
@@ -11,7 +11,7 @@
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   switch i32 %0, label %sw.default [
     i32 3, label %sw.bb
     i32 4, label %sw.bb1
@@ -23,31 +23,31 @@
   br label %sw.epilog
 
 sw.bb:                                            ; preds = %entry
-  %1 = load i32* %i.addr, align 4
+  %1 = load i32, i32* %i.addr, align 4
   %mul = mul nsw i32 %1, 7
   store i32 %mul, i32* %i.addr, align 4
   br label %sw.bb1
 
 sw.bb1:                                           ; preds = %entry, %sw.bb
-  %2 = load i32* %i.addr, align 4
+  %2 = load i32, i32* %i.addr, align 4
   %dec = add nsw i32 %2, -1
   store i32 %dec, i32* %i.addr, align 4
   br label %sw.bb2
 
 sw.bb2:                                           ; preds = %entry, %sw.bb1
-  %3 = load i32* %i.addr, align 4
+  %3 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %3, 3
   store i32 %add, i32* %i.addr, align 4
   br label %sw.bb3
 
 sw.bb3:                                           ; preds = %entry, %sw.bb2
-  %4 = load i32* %i.addr, align 4
+  %4 = load i32, i32* %i.addr, align 4
   %shl = shl i32 %4, 1
   store i32 %shl, i32* %i.addr, align 4
   br label %sw.epilog
 
 sw.epilog:                                        ; preds = %sw.bb3, %sw.default
-  %5 = load i32* %i.addr, align 4
+  %5 = load i32, i32* %i.addr, align 4
   ret i32 %5
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/mcm-6.ll b/llvm/test/CodeGen/PowerPC/mcm-6.ll
index f7838b4..6a50d12 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-6.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-6.ll
@@ -11,7 +11,7 @@
 
 define signext i32 @test_tentative() nounwind {
 entry:
-  %0 = load i32* @ti, align 4
+  %0 = load i32, i32* @ti, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @ti, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-7.ll b/llvm/test/CodeGen/PowerPC/mcm-7.ll
index 7caa13b..9c8158d 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-7.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-7.ll
@@ -11,7 +11,7 @@
 entry:
   %func = alloca i32 (i32)*, align 8
   store i32 (i32)* @foo, i32 (i32)** %func, align 8
-  %0 = load i32 (i32)** %func, align 8
+  %0 = load i32 (i32)*, i32 (i32)** %func, align 8
   %1 = bitcast i32 (i32)* %0 to i8*
   ret i8* %1
 }
diff --git a/llvm/test/CodeGen/PowerPC/mcm-8.ll b/llvm/test/CodeGen/PowerPC/mcm-8.ll
index 8a388c6..b265c77 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-8.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-8.ll
@@ -12,7 +12,7 @@
 define signext i8 @test_avext() nounwind {
 entry:
   %0 = getelementptr inbounds [13 x i8], [13 x i8]* @x, i32 0, i32 0
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   ret i8 %1
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/mcm-9.ll b/llvm/test/CodeGen/PowerPC/mcm-9.ll
index 7906b6a..45a4e69 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-9.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-9.ll
@@ -11,7 +11,7 @@
 
 define signext i32 @test_external() nounwind {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @a, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-default.ll b/llvm/test/CodeGen/PowerPC/mcm-default.ll
index 8d4ff14..49e6513 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-default.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-default.ll
@@ -10,7 +10,7 @@
 
 define signext i32 @test_external() nounwind {
 entry:
-  %0 = load i32* @ei, align 4
+  %0 = load i32, i32* @ei, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @ei, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-obj-2.ll b/llvm/test/CodeGen/PowerPC/mcm-obj-2.ll
index c42cf0c..f31d852 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-obj-2.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-obj-2.ll
@@ -10,7 +10,7 @@
 
 define signext i32 @test_fn_static() nounwind {
 entry:
-  %0 = load i32* @test_fn_static.si, align 4
+  %0 = load i32, i32* @test_fn_static.si, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @test_fn_static.si, align 4
   ret i32 %0
@@ -29,7 +29,7 @@
 
 define signext i32 @test_file_static() nounwind {
 entry:
-  %0 = load i32* @gi, align 4
+  %0 = load i32, i32* @gi, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @gi, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/mcm-obj.ll b/llvm/test/CodeGen/PowerPC/mcm-obj.ll
index d3d05eb..770ef35 100644
--- a/llvm/test/CodeGen/PowerPC/mcm-obj.ll
+++ b/llvm/test/CodeGen/PowerPC/mcm-obj.ll
@@ -12,7 +12,7 @@
 
 define signext i32 @test_external() nounwind {
 entry:
-  %0 = load i32* @ei, align 4
+  %0 = load i32, i32* @ei, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @ei, align 4
   ret i32 %0
@@ -35,7 +35,7 @@
 
 define signext i32 @test_fn_static() nounwind {
 entry:
-  %0 = load i32* @test_fn_static.si, align 4
+  %0 = load i32, i32* @test_fn_static.si, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @test_fn_static.si, align 4
   ret i32 %0
@@ -57,7 +57,7 @@
 
 define signext i32 @test_file_static() nounwind {
 entry:
-  %0 = load i32* @gi, align 4
+  %0 = load i32, i32* @gi, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @gi, align 4
   ret i32 %0
@@ -96,7 +96,7 @@
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
   switch i32 %0, label %sw.default [
     i32 3, label %sw.bb
     i32 4, label %sw.bb1
@@ -108,31 +108,31 @@
   br label %sw.epilog
 
 sw.bb:                                            ; preds = %entry
-  %1 = load i32* %i.addr, align 4
+  %1 = load i32, i32* %i.addr, align 4
   %mul = mul nsw i32 %1, 7
   store i32 %mul, i32* %i.addr, align 4
   br label %sw.bb1
 
 sw.bb1:                                           ; preds = %entry, %sw.bb
-  %2 = load i32* %i.addr, align 4
+  %2 = load i32, i32* %i.addr, align 4
   %dec = add nsw i32 %2, -1
   store i32 %dec, i32* %i.addr, align 4
   br label %sw.bb2
 
 sw.bb2:                                           ; preds = %entry, %sw.bb1
-  %3 = load i32* %i.addr, align 4
+  %3 = load i32, i32* %i.addr, align 4
   %add = add nsw i32 %3, 3
   store i32 %add, i32* %i.addr, align 4
   br label %sw.bb3
 
 sw.bb3:                                           ; preds = %entry, %sw.bb2
-  %4 = load i32* %i.addr, align 4
+  %4 = load i32, i32* %i.addr, align 4
   %shl = shl i32 %4, 1
   store i32 %shl, i32* %i.addr, align 4
   br label %sw.epilog
 
 sw.epilog:                                        ; preds = %sw.bb3, %sw.default
-  %5 = load i32* %i.addr, align 4
+  %5 = load i32, i32* %i.addr, align 4
   ret i32 %5
 }
 
@@ -149,7 +149,7 @@
 
 define signext i32 @test_tentative() nounwind {
 entry:
-  %0 = load i32* @ti, align 4
+  %0 = load i32, i32* @ti, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* @ti, align 4
   ret i32 %0
@@ -168,7 +168,7 @@
 entry:
   %func = alloca i32 (i32)*, align 8
   store i32 (i32)* @foo, i32 (i32)** %func, align 8
-  %0 = load i32 (i32)** %func, align 8
+  %0 = load i32 (i32)*, i32 (i32)** %func, align 8
   %1 = bitcast i32 (i32)* %0 to i8*
   ret i8* %1
 }
diff --git a/llvm/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/llvm/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
index 57f6539..60a4bdb 100644
--- a/llvm/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
+++ b/llvm/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -6,11 +6,11 @@
 
 define void @func(<4 x float>* %a, <4 x float>* %b) {
         %tmp1 = getelementptr <4 x float>, <4 x float>* %b, i32 1            ; <<4 x float>*> [#uses=1]
-        %tmp = load <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]
+        %tmp = load <4 x float>, <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]
         %tmp3 = getelementptr <4 x float>, <4 x float>* %a, i32 1            ; <<4 x float>*> [#uses=1]
-        %tmp4 = load <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]
+        %tmp4 = load <4 x float>, <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]
         %tmp5 = fmul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
-        %tmp8 = load <4 x float>* %b            ; <<4 x float>> [#uses=1]
+        %tmp8 = load <4 x float>, <4 x float>* %b            ; <<4 x float>> [#uses=1]
         %tmp9 = fadd <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
         store <4 x float> %tmp9, <4 x float>* %a
         ret void
diff --git a/llvm/test/CodeGen/PowerPC/mem_update.ll b/llvm/test/CodeGen/PowerPC/mem_update.ll
index 47316f9..2fa0140 100644
--- a/llvm/test/CodeGen/PowerPC/mem_update.ll
+++ b/llvm/test/CodeGen/PowerPC/mem_update.ll
@@ -7,21 +7,21 @@
 
 define i32* @test0(i32* %X, i32* %dest) nounwind {
 	%Y = getelementptr i32, i32* %X, i32 4
-	%A = load i32* %Y
+	%A = load i32, i32* %Y
 	store i32 %A, i32* %dest
 	ret i32* %Y
 }
 
 define i32* @test1(i32* %X, i32* %dest) nounwind {
 	%Y = getelementptr i32, i32* %X, i32 4
-	%A = load i32* %Y
+	%A = load i32, i32* %Y
 	store i32 %A, i32* %dest
 	ret i32* %Y
 }
 
 define i16* @test2(i16* %X, i32* %dest) nounwind {
 	%Y = getelementptr i16, i16* %X, i32 4
-	%A = load i16* %Y
+	%A = load i16, i16* %Y
 	%B = sext i16 %A to i32
 	store i32 %B, i32* %dest
 	ret i16* %Y
@@ -29,7 +29,7 @@
 
 define i16* @test3(i16* %X, i32* %dest) nounwind {
 	%Y = getelementptr i16, i16* %X, i32 4
-	%A = load i16* %Y
+	%A = load i16, i16* %Y
 	%B = zext i16 %A to i32
 	store i32 %B, i32* %dest
 	ret i16* %Y
@@ -37,7 +37,7 @@
 
 define i16* @test3a(i16* %X, i64* %dest) nounwind {
 	%Y = getelementptr i16, i16* %X, i32 4
-	%A = load i16* %Y
+	%A = load i16, i16* %Y
 	%B = sext i16 %A to i64
 	store i64 %B, i64* %dest
 	ret i16* %Y
@@ -45,7 +45,7 @@
 
 define i64* @test4(i64* %X, i64* %dest) nounwind {
 	%Y = getelementptr i64, i64* %X, i32 4
-	%A = load i64* %Y
+	%A = load i64, i64* %Y
 	store i64 %A, i64* %dest
 	ret i64* %Y
 }
diff --git a/llvm/test/CodeGen/PowerPC/misched-inorder-latency.ll b/llvm/test/CodeGen/PowerPC/misched-inorder-latency.ll
index 638409d..0f57e90 100644
--- a/llvm/test/CodeGen/PowerPC/misched-inorder-latency.ll
+++ b/llvm/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -15,13 +15,13 @@
 define i32 @testload(i32 *%ptr, i32 %sumin) {
 entry:
   %sum1 = add i32 %sumin, 1
-  %val1 = load i32* %ptr
+  %val1 = load i32, i32* %ptr
   %p = icmp eq i32 %sumin, 0
   br i1 %p, label %true, label %end
 true:
   %sum2 = add i32 %sum1, 1
   %ptr2 = getelementptr i32, i32* %ptr, i32 1
-  %val = load i32* %ptr2
+  %val = load i32, i32* %ptr2
   %val2 = add i32 %val1, %val
   br label %end
 end:
diff --git a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
index 743cc62..ae7f0d1 100644
--- a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
+++ b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
@@ -165,7 +165,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -190,10 +190,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -205,10 +205,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -220,7 +220,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -277,10 +277,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -294,10 +294,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
index 29a5786..271b063 100644
--- a/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
+++ b/llvm/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
@@ -165,7 +165,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -190,10 +190,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -205,10 +205,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -220,7 +220,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -277,10 +277,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -294,10 +294,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll b/llvm/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
index 6beee25..26739bf 100644
--- a/llvm/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/no-extra-fp-conv-ldst.ll
@@ -5,7 +5,7 @@
 ; Function Attrs: nounwind readonly
 define double @test1(i64* nocapture readonly %x) #0 {
 entry:
-  %0 = load i64* %x, align 8
+  %0 = load i64, i64* %x, align 8
   %conv = sitofp i64 %0 to double
   ret double %conv
 
@@ -18,7 +18,7 @@
 ; Function Attrs: nounwind readonly
 define double @test2(i32* nocapture readonly %x) #0 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %conv = sitofp i32 %0 to double
   ret double %conv
 
diff --git a/llvm/test/CodeGen/PowerPC/novrsave.ll b/llvm/test/CodeGen/PowerPC/novrsave.ll
index a70576a..50be2a1 100644
--- a/llvm/test/CodeGen/PowerPC/novrsave.ll
+++ b/llvm/test/CodeGen/PowerPC/novrsave.ll
@@ -7,7 +7,7 @@
 entry:
   %v.addr = alloca <4 x float>, align 16
   store <4 x float> %v, <4 x float>* %v.addr, align 16
-  %0 = load <4 x float>* %v.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %v.addr, align 16
   ret <4 x float> %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/or-addressing-mode.ll b/llvm/test/CodeGen/PowerPC/or-addressing-mode.ll
index e50374e..f98b34c 100644
--- a/llvm/test/CodeGen/PowerPC/or-addressing-mode.ll
+++ b/llvm/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -8,7 +8,7 @@
         %tmp.11.i = and i32 %tmp.10.i, 2040             ; <i32> [#uses=1]
         %tmp.13.i = or i32 %tmp.11.i, %tmp.4.i          ; <i32> [#uses=1]
         %tmp.14.i = inttoptr i32 %tmp.13.i to i32*              ; <i32*> [#uses=1]
-        %tmp.3 = load i32* %tmp.14.i            ; <i32> [#uses=1]
+        %tmp.3 = load i32, i32* %tmp.14.i            ; <i32> [#uses=1]
         ret i32 %tmp.3
 }
 
@@ -16,7 +16,7 @@
         %tmp.2 = shl i32 %P, 4          ; <i32> [#uses=1]
         %tmp.3 = or i32 %tmp.2, 2               ; <i32> [#uses=1]
         %tmp.4 = inttoptr i32 %tmp.3 to i32*            ; <i32*> [#uses=1]
-        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]
+        %tmp.5 = load i32, i32* %tmp.4               ; <i32> [#uses=1]
         ret i32 %tmp.5
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/post-ra-ec.ll b/llvm/test/CodeGen/PowerPC/post-ra-ec.ll
index 9357497..e32441b 100644
--- a/llvm/test/CodeGen/PowerPC/post-ra-ec.ll
+++ b/llvm/test/CodeGen/PowerPC/post-ra-ec.ll
@@ -16,9 +16,9 @@
   br i1 undef, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
-  %0 = load i64* undef, align 8
+  %0 = load i64, i64* undef, align 8
   %conv.i = trunc i64 %0 to i32
-  %1 = load i32* null, align 4
+  %1 = load i32, i32* null, align 4
   %add = add i32 %1, %conv.i
   store i32 %add, i32* null, align 4
   %counter.i.i = getelementptr inbounds %struct.task_struct.4.16.124, %struct.task_struct.4.16.124* %call1.i, i64 0, i32 1, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/ppc-prologue.ll b/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
index 5538371..c84e6fb 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -14,12 +14,12 @@
   store i32 %a, i32* %a_addr
   %1 = call i32 @_Z3barPi(i32* %a_addr)           ; <i32> [#uses=1]
   store i32 %1, i32* %0, align 4
-  %2 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  %2 = load i32, i32* %0, align 4                      ; <i32> [#uses=1]
   store i32 %2, i32* %retval, align 4
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval                    ; <i32> [#uses=1]
   ret i32 %retval1
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-lshrti3.ll b/llvm/test/CodeGen/PowerPC/ppc32-lshrti3.ll
index f773cce..a2a280f 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-lshrti3.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-lshrti3.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind uwtable
 define i32 @fn1() #0 {
 entry:
-  %.promoted = load i72* inttoptr (i32 1 to i72*), align 4
+  %.promoted = load i72, i72* inttoptr (i32 1 to i72*), align 4
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %entry
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
index bb906ec..4c85ab9 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -5,7 +5,7 @@
 
 define i32 @foo() {
 entry:
-  %0 = load i32* @bar, align 4
+  %0 = load i32, i32* @bar, align 4
   %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
index abc1367..74f9394 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -5,7 +5,7 @@
 
 define i32 @foo() {
 entry:
-  %0 = load i32* @bar, align 4
+  %0 = load i32, i32* @bar, align 4
   %call = call i32 (i32, ...)* @call_foo(i32 %0, i32 0, i32 1, i32 2, i32 4, i32 8, i32 16, i32 32, i32 64)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll
index af0cef4..9577357 100644
--- a/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll
@@ -5,13 +5,13 @@
 define void @maybe_an_fma(%0* sret %agg.result, %0* byval %a, %0* byval %b, %0* byval %c) nounwind {
 entry:
   %a.realp = getelementptr inbounds %0, %0* %a, i32 0, i32 0
-  %a.real = load double* %a.realp
+  %a.real = load double, double* %a.realp
   %a.imagp = getelementptr inbounds %0, %0* %a, i32 0, i32 1
-  %a.imag = load double* %a.imagp
+  %a.imag = load double, double* %a.imagp
   %b.realp = getelementptr inbounds %0, %0* %b, i32 0, i32 0
-  %b.real = load double* %b.realp
+  %b.real = load double, double* %b.realp
   %b.imagp = getelementptr inbounds %0, %0* %b, i32 0, i32 1
-  %b.imag = load double* %b.imagp
+  %b.imag = load double, double* %b.imagp
   %mul.rl = fmul double %a.real, %b.real
   %mul.rr = fmul double %a.imag, %b.imag
   %mul.r = fsub double %mul.rl, %mul.rr
@@ -19,9 +19,9 @@
   %mul.ir = fmul double %a.real, %b.imag
   %mul.i = fadd double %mul.il, %mul.ir
   %c.realp = getelementptr inbounds %0, %0* %c, i32 0, i32 0
-  %c.real = load double* %c.realp
+  %c.real = load double, double* %c.realp
   %c.imagp = getelementptr inbounds %0, %0* %c, i32 0, i32 1
-  %c.imag = load double* %c.imagp
+  %c.imag = load double, double* %c.imagp
   %add.r = fadd double %mul.r, %c.real
   %add.i = fadd double %mul.i, %c.imag
   %real = getelementptr inbounds %0, %0* %agg.result, i32 0, i32 0
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-abi-extend.ll b/llvm/test/CodeGen/PowerPC/ppc64-abi-extend.ll
index 8baf1c6..f8e72e3 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-abi-extend.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-abi-extend.ll
@@ -15,7 +15,7 @@
 
 define void @pass_arg_si() nounwind {
 entry:
-  %0 = load i32* @si, align 4
+  %0 = load i32, i32* @si, align 4
   tail call void @arg_si(i32 signext %0) nounwind
   ret void
 }
@@ -25,7 +25,7 @@
 
 define void @pass_arg_ui() nounwind {
 entry:
-  %0 = load i32* @ui, align 4
+  %0 = load i32, i32* @ui, align 4
   tail call void @arg_ui(i32 zeroext %0) nounwind
   ret void
 }
@@ -53,7 +53,7 @@
 
 define signext i32 @pass_ret_si() nounwind readonly {
 entry:
-  %0 = load i32* @si, align 4
+  %0 = load i32, i32* @si, align 4
   ret i32 %0
 }
 ; CHECK: @pass_ret_si
@@ -62,7 +62,7 @@
 
 define zeroext i32 @pass_ret_ui() nounwind readonly {
 entry:
-  %0 = load i32* @ui, align 4
+  %0 = load i32, i32* @ui, align 4
   ret i32 %0
 }
 ; CHECK: @pass_ret_ui
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
index db50451..b9fd670 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-align-long-double.ll
@@ -14,7 +14,7 @@
 define ppc_fp128 @test(%struct.S* byval %x) nounwind {
 entry:
   %b = getelementptr inbounds %struct.S, %struct.S* %x, i32 0, i32 1
-  %0 = load ppc_fp128* %b, align 16
+  %0 = load ppc_fp128, ppc_fp128* %b, align 16
   ret ppc_fp128 %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll b/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll
index be1c502..7170f59 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-byval-align.ll
@@ -31,7 +31,7 @@
 define i64 @callee2(%struct.pad* byval nocapture readnone %x, i32 signext %y, %struct.test* byval align 16 nocapture readonly %z) {
 entry:
   %x1 = getelementptr inbounds %struct.test, %struct.test* %z, i64 0, i32 0
-  %0 = load i64* %x1, align 16
+  %0 = load i64, i64* %x1, align 16
   ret i64 %0
 }
 ; CHECK-LABEL: @callee2
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-calls.ll b/llvm/test/CodeGen/PowerPC/ppc64-calls.ll
index 707ba95..23a14e6 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-calls.ll
@@ -73,7 +73,7 @@
 @g = external global void ()*
 declare void @h(i64)
 define void @test_indir_toc_reload(i64 %x) {
-  %1 = load void ()** @g
+  %1 = load void ()*, void ()** @g
   call void %1()
   call void @h(i64 %x)
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll b/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll
index d252896..f238908 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-gep-opt.ll
@@ -15,13 +15,13 @@
 ; elimilate the common subexpression for the second use.
 define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) {
   %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3
-  %1 = load i32* %liberties, align 4
+  %1 = load i32, i32* %liberties, align 4
   %cmp = icmp eq i32 %1, %lib
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
   %origin = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 2
-  %2 = load i32* %origin, align 4
+  %2 = load i32, i32* %origin, align 4
   store i32 %2, i32* %adj, align 4
   br label %if.end
 
@@ -60,9 +60,9 @@
 ; use.
 define void @test_GEP_across_BB(%class.my* %this, i64 %idx) {
   %1 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 1
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = getelementptr %class.my, %class.my* %this, i64 0, i32 3, i64 %idx, i32 2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = icmp eq i32 %2, %4
   br i1 %5, label %if.true, label %exit
 
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll b/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll
index e2de70c..48ffb6a 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-patchpoint.ll
@@ -63,13 +63,13 @@
 entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
-  %tmp82 = load i64* %tmp81, align 8
+  %tmp82 = load i64, i64* %tmp81, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 8, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
-  %tmp83 = load i64* %tmp33, align 8
+  %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
-  %tmp86 = load i64* %tmp85, align 8
+  %tmp86 = load i64, i64* %tmp85, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 8, i64 %arg, i64 %tmp10, i64 %tmp86)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 32, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll
index 0d5b078..27aca10 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-smallarg.ll
@@ -17,7 +17,7 @@
 entry:
   %0 = bitcast %struct.small_arg* %x to i32*
   %1 = bitcast %struct.small_arg* %agg.result to i32*
-  %2 = load i32* %0, align 2
+  %2 = load i32, i32* %0, align 2
   store i32 %2, i32* %1, align 2
   ret void
 }
@@ -47,7 +47,7 @@
 
 define void @caller2() {
 entry:
-  %0 = load float* @gf, align 4
+  %0 = load float, float* @gf, align 4
   %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-toc.ll b/llvm/test/CodeGen/PowerPC/ppc64-toc.ll
index 5ea2a32..86bb9ce 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-toc.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-toc.ll
@@ -14,7 +14,7 @@
 ; CHECK-NEXT: .quad   .TOC.@tocbase
 ; CHECK-NEXT: .quad   0
 ; CHECK-NEXT: .text
-  %0 = load i64* @number64, align 8
+  %0 = load i64, i64* @number64, align 8
 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
   %cmp = icmp eq i64 %0, %a
   %conv1 = zext i1 %cmp to i64 
@@ -25,7 +25,7 @@
 entry:
 ; CHECK-LABEL: internal_static_var:
 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
-  %0 = load i64* @internal_static_var.x, align 8
+  %0 = load i64, i64* @internal_static_var.x, align 8
   %cmp = icmp eq i64 %0, %a
   %conv1 = zext i1 %cmp to i64 
   ret i64 %conv1 
@@ -46,7 +46,7 @@
 ; CHECK-LABEL: access_double_array:
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds [32 x double], [32 x double]* @double_array, i64 0, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
 ; CHECK: ld {{[0-9]+}}, .LC{{[0-9]+}}@toc(2)
   %cmp = fcmp oeq double %0, %a
   %conv = zext i1 %cmp to i32
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll b/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll
index 3fce36e..ac8d0c2 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-aggregates.ll
@@ -257,9 +257,9 @@
 
 define void @caller2() {
 entry:
-  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
-  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
-  %2 = load [2 x float]* getelementptr inbounds (%struct.float2* @g2, i64 0, i32 0), align 4
+  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
+  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
+  %2 = load [2 x float], [2 x float]* getelementptr inbounds (%struct.float2* @g2, i64 0, i32 0), align 4
   tail call void @test2([8 x float] %0, [5 x float] %1, [2 x float] %2)
   ret void
 }
@@ -299,8 +299,8 @@
 
 define void @caller3(double %d) {
 entry:
-  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
-  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
+  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
+  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
   tail call void @test3([8 x float] %0, [5 x float] %1, double %d)
   ret void
 }
@@ -322,8 +322,8 @@
 
 define void @caller4(float %f) {
 entry:
-  %0 = load [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
-  %1 = load [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
+  %0 = load [8 x float], [8 x float]* getelementptr inbounds (%struct.float8* @g8, i64 0, i32 0), align 4
+  %1 = load [5 x float], [5 x float]* getelementptr inbounds (%struct.float5* @g5, i64 0, i32 0), align 4
   tail call void @test4([8 x float] %0, [5 x float] %1, float %f)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-localentry.ll b/llvm/test/CodeGen/PowerPC/ppc64le-localentry.ll
index d9995de..73697b2 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-localentry.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-localentry.ll
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
 ; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
 ; CHECK-NEXT: %entry
-  %0 = load i64* @number64, align 8
+  %0 = load i64, i64* @number64, align 8
   %cmp = icmp eq i64 %0, %a
   %conv1 = zext i1 %cmp to i64
   ret i64 %conv1
diff --git a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
index 120c140..77d0663 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64le-smallarg.ll
@@ -17,7 +17,7 @@
 entry:
   %0 = bitcast %struct.small_arg* %x to i32*
   %1 = bitcast %struct.small_arg* %agg.result to i32*
-  %2 = load i32* %0, align 2
+  %2 = load i32, i32* %0, align 2
   store i32 %2, i32* %1, align 2
   ret void
 }
@@ -47,7 +47,7 @@
 
 define void @caller2() {
 entry:
-  %0 = load float* @gf, align 4
+  %0 = load float, float* @gf, align 4
   %call = tail call float @test2(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-1.ll b/llvm/test/CodeGen/PowerPC/ppcf128-1.ll
index 2cec934..f0e58f6 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-1.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -12,16 +12,16 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store ppc_fp128 %x, ppc_fp128* %x_addr
 	store ppc_fp128 %y, ppc_fp128* %y_addr
-	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp3 = fadd ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
-	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	%tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	%retval5 = load ppc_fp128, ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %retval5
 }
 
@@ -34,16 +34,16 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store ppc_fp128 %x, ppc_fp128* %x_addr
 	store ppc_fp128 %y, ppc_fp128* %y_addr
-	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp3 = fsub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
-	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	%tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	%retval5 = load ppc_fp128, ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %retval5
 }
 
@@ -56,16 +56,16 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store ppc_fp128 %x, ppc_fp128* %x_addr
 	store ppc_fp128 %y, ppc_fp128* %y_addr
-	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp3 = fmul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
-	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	%tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	%retval5 = load ppc_fp128, ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %retval5
 }
 
@@ -78,15 +78,15 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store ppc_fp128 %x, ppc_fp128* %x_addr
 	store ppc_fp128 %y, ppc_fp128* %y_addr
-	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
-	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp1 = load ppc_fp128, ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128, ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
 	%tmp3 = fdiv ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
-	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	%tmp4 = load ppc_fp128, ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	%retval5 = load ppc_fp128, ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
 	ret ppc_fp128 %retval5
 }
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
index 180fedf..ee314c1 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -9,7 +9,7 @@
 entry:
   %x.addr = alloca ppc_fp128, align 16
   store ppc_fp128 %x, ppc_fp128* %x.addr, align 16
-  %0 = load ppc_fp128* %x.addr, align 16
+  %0 = load ppc_fp128, ppc_fp128* %x.addr, align 16
   store ppc_fp128 %0, ppc_fp128* @g, align 16
   ret void
 }
@@ -21,7 +21,7 @@
 
 define void @caller() {
 entry:
-  %0 = load ppc_fp128* @g, align 16
+  %0 = load ppc_fp128, ppc_fp128* @g, align 16
   call void @test(ppc_fp128 %0)
   ret void
 }
@@ -51,7 +51,7 @@
 
 define ppc_fp128 @result() {
 entry:
-  %0 = load ppc_fp128* @g, align 16
+  %0 = load ppc_fp128, ppc_fp128* @g, align 16
   ret ppc_fp128 %0
 }
 ; CHECK: @result
diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll
index 4be65dd..5b695eb 100644
--- a/llvm/test/CodeGen/PowerPC/pr13891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr13891.ll
@@ -10,7 +10,7 @@
 ; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
 entry:
   %0 = bitcast %struct.foo* %f to i16*
-  %1 = load i16* %0, align 2
+  %1 = load i16, i16* %0, align 2
   %bf.val.sext = ashr i16 %1, 8
   %cmp = icmp eq i16 %bf.val.sext, %i
   br i1 %cmp, label %if.end, label %if.then
diff --git a/llvm/test/CodeGen/PowerPC/pr15031.ll b/llvm/test/CodeGen/PowerPC/pr15031.ll
index f22c2d1..d1b9932 100644
--- a/llvm/test/CodeGen/PowerPC/pr15031.ll
+++ b/llvm/test/CodeGen/PowerPC/pr15031.ll
@@ -300,7 +300,7 @@
 entry:
   %SubReg_TargetFlags.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 1
   %0 = bitcast [3 x i8]* %SubReg_TargetFlags.i to i24*
-  %bf.load.i = load i24* %0, align 1
+  %bf.load.i = load i24, i24* %0, align 1
   %bf.lshr.i = lshr i24 %bf.load.i, 12
   %tobool = icmp eq i24 %bf.lshr.i, 0
   br i1 %tobool, label %if.end, label %if.then
@@ -309,7 +309,7 @@
   %bf.cast.i = zext i24 %bf.lshr.i to i32
   %add.ptr = getelementptr inbounds %"class.llvm::TargetRegisterInfo", %"class.llvm::TargetRegisterInfo"* %TRI, i64 0, i32 1
   %call3 = tail call zeroext i32 @_ZNK4llvm14MCRegisterInfo9getSubRegEjj(%"class.llvm::MCRegisterInfo"* %add.ptr, i32 zeroext %Reg, i32 zeroext %bf.cast.i)
-  %bf.load.i10 = load i24* %0, align 1
+  %bf.load.i10 = load i24, i24* %0, align 1
   %bf.clear.i = and i24 %bf.load.i10, 4095
   store i24 %bf.clear.i, i24* %0, align 1
   br label %if.end
@@ -317,31 +317,31 @@
 if.end:                                           ; preds = %entry, %if.then
   %Reg.addr.0 = phi i32 [ %call3, %if.then ], [ %Reg, %entry ]
   %RegNo.i.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 2, i32 0
-  %1 = load i32* %RegNo.i.i, align 4
+  %1 = load i32, i32* %RegNo.i.i, align 4
   %cmp.i = icmp eq i32 %1, %Reg.addr.0
   br i1 %cmp.i, label %_ZN4llvm14MachineOperand6setRegEj.exit, label %if.end.i
 
 if.end.i:                                         ; preds = %if.end
   %ParentMI.i.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %this, i64 0, i32 3
-  %2 = load %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
+  %2 = load %"class.llvm::MachineInstr"*, %"class.llvm::MachineInstr"** %ParentMI.i.i, align 8
   %tobool.i = icmp eq %"class.llvm::MachineInstr"* %2, null
   br i1 %tobool.i, label %if.end13.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %Parent.i.i = getelementptr inbounds %"class.llvm::MachineInstr", %"class.llvm::MachineInstr"* %2, i64 0, i32 2
-  %3 = load %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
+  %3 = load %"class.llvm::MachineBasicBlock"*, %"class.llvm::MachineBasicBlock"** %Parent.i.i, align 8
   %tobool5.i = icmp eq %"class.llvm::MachineBasicBlock"* %3, null
   br i1 %tobool5.i, label %if.end13.i, label %if.then6.i
 
 if.then6.i:                                       ; preds = %if.then3.i
   %xParent.i.i = getelementptr inbounds %"class.llvm::MachineBasicBlock", %"class.llvm::MachineBasicBlock"* %3, i64 0, i32 4
-  %4 = load %"class.llvm::MachineFunction"** %xParent.i.i, align 8
+  %4 = load %"class.llvm::MachineFunction"*, %"class.llvm::MachineFunction"** %xParent.i.i, align 8
   %tobool8.i = icmp eq %"class.llvm::MachineFunction"* %4, null
   br i1 %tobool8.i, label %if.end13.i, label %if.then9.i
 
 if.then9.i:                                       ; preds = %if.then6.i
   %RegInfo.i.i = getelementptr inbounds %"class.llvm::MachineFunction", %"class.llvm::MachineFunction"* %4, i64 0, i32 5
-  %5 = load %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
+  %5 = load %"class.llvm::MachineRegisterInfo"*, %"class.llvm::MachineRegisterInfo"** %RegInfo.i.i, align 8
   tail call void @_ZN4llvm19MachineRegisterInfo27removeRegOperandFromUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
   store i32 %Reg.addr.0, i32* %RegNo.i.i, align 4
   tail call void @_ZN4llvm19MachineRegisterInfo22addRegOperandToUseListEPNS_14MachineOperandE(%"class.llvm::MachineRegisterInfo"* %5, %"class.llvm::MachineOperand"* %this)
diff --git a/llvm/test/CodeGen/PowerPC/pr15630.ll b/llvm/test/CodeGen/PowerPC/pr15630.ll
index 3c1b604..54a1b36 100644
--- a/llvm/test/CodeGen/PowerPC/pr15630.ll
+++ b/llvm/test/CodeGen/PowerPC/pr15630.ll
@@ -8,7 +8,7 @@
   %newval = alloca i8
   %ordering = alloca i32, align 4
   store i8 %newval_arg, i8* %newval
-  %tmp = load i8* %newval
+  %tmp = load i8, i8* %newval
   store atomic volatile i8 %tmp, i8* %val_arg seq_cst, align 1
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/pr16556-2.ll b/llvm/test/CodeGen/PowerPC/pr16556-2.ll
index ee38b5e..82dec67 100644
--- a/llvm/test/CodeGen/PowerPC/pr16556-2.ll
+++ b/llvm/test/CodeGen/PowerPC/pr16556-2.ll
@@ -23,15 +23,15 @@
   br i1 %tmp, label %noassert, label %assert
 
 assert:                                           ; preds = %entry
-  %tmp1 = load { i32, i8* }* @.modulefilename
+  %tmp1 = load { i32, i8* }, { i32, i8* }* @.modulefilename
   %0 = call i8* @_d_assert_msg({ i32, i8* } { i32 9, i8* getelementptr inbounds ([10 x i8]* @.str83, i32 0, i32 0) }, { i32, i8* } %tmp1, i32 1586)
   unreachable
 
 noassert:                                         ; preds = %entry
   %tmp2 = getelementptr %core.time.TickDuration, %core.time.TickDuration* %.this_arg, i32 0, i32 0
-  %tmp3 = load i64* %tmp2
+  %tmp3 = load i64, i64* %tmp2
   %tmp4 = sitofp i64 %tmp3 to ppc_fp128
-  %tmp5 = load i64* @_D4core4time12TickDuration11ticksPerSecyl
+  %tmp5 = load i64, i64* @_D4core4time12TickDuration11ticksPerSecyl
   %tmp6 = sitofp i64 %tmp5 to ppc_fp128
   %tmp7 = fdiv ppc_fp128 %tmp6, 0xM80000000000000000000000000000000
   %tmp8 = fdiv ppc_fp128 %tmp4, %tmp7
diff --git a/llvm/test/CodeGen/PowerPC/pr17168.ll b/llvm/test/CodeGen/PowerPC/pr17168.ll
index 62a9ede..af50703 100644
--- a/llvm/test/CodeGen/PowerPC/pr17168.ll
+++ b/llvm/test/CodeGen/PowerPC/pr17168.ll
@@ -24,7 +24,7 @@
 
 for.end1042:                                      ; preds = %for.cond968.preheader, %for.cond964.preheader, %entry
   %0 = phi i32 [ undef, %for.cond964.preheader ], [ undef, %for.cond968.preheader ], [ undef, %entry ]
-  %1 = load i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
+  %1 = load i32, i32* getelementptr inbounds ([3 x i32]* @grid_points, i64 0, i64 0), align 4, !dbg !443, !tbaa !444
   tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !119, metadata !{!"0x102"}), !dbg !448
   %sub10454270 = add nsw i32 %0, -1, !dbg !448
   %cmp10464271 = icmp sgt i32 %sub10454270, 1, !dbg !448
diff --git a/llvm/test/CodeGen/PowerPC/pr18663.ll b/llvm/test/CodeGen/PowerPC/pr18663.ll
index 1b85223..04bc392 100644
--- a/llvm/test/CodeGen/PowerPC/pr18663.ll
+++ b/llvm/test/CodeGen/PowerPC/pr18663.ll
@@ -61,21 +61,21 @@
 
 define void @_ZNK18TriaObjectAccessorILi3ELi3EE10barycenterEv(%class.Point.1* noalias nocapture sret %agg.result, %class.TriaObjectAccessor.57* %this) #0 align 2 {
 entry:
-  %0 = load double* null, align 8
-  %1 = load double* undef, align 8
+  %0 = load double, double* null, align 8
+  %1 = load double, double* undef, align 8
   %call18 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 6)
-  %2 = load double* undef, align 8
+  %2 = load double, double* undef, align 8
   %call21 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 7)
-  %3 = load double* undef, align 8
+  %3 = load double, double* undef, align 8
   %call33 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 3)
-  %4 = load double* null, align 8
-  %5 = load double* undef, align 8
+  %4 = load double, double* null, align 8
+  %5 = load double, double* undef, align 8
   %call45 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 7)
-  %6 = load double* undef, align 8
+  %6 = load double, double* undef, align 8
   %call48 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 0)
-  %7 = load double* undef, align 8
+  %7 = load double, double* undef, align 8
   %call66 = tail call dereferenceable(24) %class.Point.1* @_ZNK18TriaObjectAccessorILi3ELi3EE6vertexEj(%class.TriaObjectAccessor.57* %this, i32 zeroext 6)
-  %8 = load double* undef, align 8
+  %8 = load double, double* undef, align 8
   %mul334 = fmul double undef, 2.000000e+00
   %mul579 = fmul double %2, %5
   %mul597 = fmul double undef, %mul579
diff --git a/llvm/test/CodeGen/PowerPC/pr20442.ll b/llvm/test/CodeGen/PowerPC/pr20442.ll
index 167ff3d..555c3da 100644
--- a/llvm/test/CodeGen/PowerPC/pr20442.ll
+++ b/llvm/test/CodeGen/PowerPC/pr20442.ll
@@ -20,15 +20,15 @@
 ; Function Attrs: nounwind readonly uwtable
 define i32 @fn1() #0 {
 entry:
-  %0 = load %struct.anon** @b, align 4
+  %0 = load %struct.anon*, %struct.anon** @b, align 4
   %1 = ptrtoint %struct.anon* %0 to i32
   %cmp = icmp sgt %struct.anon* %0, null
-  %2 = load %struct.anon.0** @a, align 4
+  %2 = load %struct.anon.0*, %struct.anon.0** @a, align 4
   br i1 %cmp, label %for.bodythread-pre-split, label %if.end8
 
 for.bodythread-pre-split:                         ; preds = %entry
   %aclass = getelementptr inbounds %struct.anon.0, %struct.anon.0* %2, i32 0, i32 0
-  %.pr = load i32* %aclass, align 4
+  %.pr = load i32, i32* %aclass, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.bodythread-pre-split, %for.body
@@ -52,9 +52,9 @@
 while.body:                                       ; preds = %while.body.lr.ph, %while.cond
   %j.110 = phi i32 [ %j.1.ph13, %while.body.lr.ph ], [ %inc7, %while.cond ]
   %aclass_index = getelementptr inbounds %struct.anon, %struct.anon* %0, i32 %j.110, i32 0
-  %3 = load i32* %aclass_index, align 4
+  %3 = load i32, i32* %aclass_index, align 4
   %aclass5 = getelementptr inbounds %struct.anon.0, %struct.anon.0* %2, i32 %3, i32 0
-  %4 = load i32* %aclass5, align 4
+  %4 = load i32, i32* %aclass5, align 4
   %tobool = icmp eq i32 %4, 0
   %inc7 = add nsw i32 %j.110, 1
   br i1 %tobool, label %while.cond, label %if.then6
diff --git a/llvm/test/CodeGen/PowerPC/preincprep-invoke.ll b/llvm/test/CodeGen/PowerPC/preincprep-invoke.ll
index fcfec40..0e09ff1 100644
--- a/llvm/test/CodeGen/PowerPC/preincprep-invoke.ll
+++ b/llvm/test/CodeGen/PowerPC/preincprep-invoke.ll
@@ -36,7 +36,7 @@
 for.cond.i.i30:                                   ; preds = %for.cond.i.i30, %invoke.cont4
   %indvars.iv.i.i26 = phi i64 [ %indvars.iv.next.i.i29, %for.cond.i.i30 ], [ 0, %invoke.cont4 ]
   %arrayidx.i.i27 = getelementptr inbounds i8, i8* %call7, i64 %indvars.iv.i.i26
-  %0 = load i8* %arrayidx.i.i27, align 1
+  %0 = load i8, i8* %arrayidx.i.i27, align 1
   %indvars.iv.next.i.i29 = add nuw nsw i64 %indvars.iv.i.i26, 1
   br label %for.cond.i.i30
 
diff --git a/llvm/test/CodeGen/PowerPC/private.ll b/llvm/test/CodeGen/PowerPC/private.ll
index 633fa65..4665fd2 100644
--- a/llvm/test/CodeGen/PowerPC/private.ll
+++ b/llvm/test/CodeGen/PowerPC/private.ll
@@ -19,7 +19,7 @@
 
 ; LINUX: lis{{.*}}.Lbaz
 ; OSX:  lis{{.*}}l_baz
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll b/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll
index 8c8545d6..70f6dad 100644
--- a/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll
+++ b/llvm/test/CodeGen/PowerPC/pwr7-gt-nop.ll
@@ -8,11 +8,11 @@
 ; CHECK-LABEL: @foo
 
 entry:
-  %0 = load float* %b, align 4
+  %0 = load float, float* %b, align 4
   store float %0, float* %a, align 4
-  %1 = load float* %c, align 4
+  %1 = load float, float* %c, align 4
   store float %1, float* %b, align 4
-  %2 = load float* %a, align 4
+  %2 = load float, float* %a, align 4
   store float %2, float* %d, align 4
   ret void
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-load.ll b/llvm/test/CodeGen/PowerPC/qpx-load.ll
index bea3477..7637c43 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-load.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-load.ll
@@ -3,7 +3,7 @@
 
 define <4 x double> @foo(<4 x double>* %p) {
 entry:
-  %v = load <4 x double>* %p, align 8
+  %v = load <4 x double>, <4 x double>* %p, align 8
   ret <4 x double> %v
 }
 
@@ -17,7 +17,7 @@
 
 define <4 x double> @bar(<4 x double>* %p) {
 entry:
-  %v = load <4 x double>* %p, align 32
+  %v = load <4 x double>, <4 x double>* %p, align 32
   ret <4 x double> %v
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-load.ll b/llvm/test/CodeGen/PowerPC/qpx-s-load.ll
index 1ca0ae6..db14712 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-s-load.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-s-load.ll
@@ -3,7 +3,7 @@
 
 define <4 x float> @foo(<4 x float>* %p) {
 entry:
-  %v = load <4 x float>* %p, align 4
+  %v = load <4 x float>, <4 x float>* %p, align 4
   ret <4 x float> %v
 }
 
@@ -17,7 +17,7 @@
 
 define <4 x float> @bar(<4 x float>* %p) {
 entry:
-  %v = load <4 x float>* %p, align 16
+  %v = load <4 x float>, <4 x float>* %p, align 16
   ret <4 x float> %v
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
index e3a2dd9..09a615c 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
@@ -46,7 +46,7 @@
 
 define <4 x i1> @test4(<4 x i1> %a) nounwind {
 entry:
-  %q = load <4 x i1>* @Q, align 16
+  %q = load <4 x i1>, <4 x i1>* @Q, align 16
   %v = and <4 x i1> %a, %q
   ret <4 x i1> %v
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-sel.ll
index 6822735..a375e6e 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-sel.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-sel.ll
@@ -50,7 +50,7 @@
 
 define <4 x i1> @test4(<4 x i1> %a) nounwind {
 entry:
-  %q = load <4 x i1>* @Q, align 16
+  %q = load <4 x i1>, <4 x i1>* @Q, align 16
   %v = and <4 x i1> %a, %q
   ret <4 x i1> %v
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll b/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll
index b074535..51b340c 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll
@@ -4,7 +4,7 @@
 
 define <4 x double> @foo(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 32
+  %r = load <4 x double>, <4 x double>* %a, align 32
   ret <4 x double> %r
 ; CHECK: qvlfdx
 ; CHECK: blr
@@ -12,9 +12,9 @@
 
 define <4 x double> @bar(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 8
+  %r = load <4 x double>, <4 x double>* %a, align 8
   %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
-  %s = load <4 x double>* %b, align 32
+  %s = load <4 x double>, <4 x double>* %b, align 32
   %t = fadd <4 x double> %r, %s
   ret <4 x double> %t
 ; CHECK: qvlpcldx
@@ -25,38 +25,38 @@
 
 define <4 x double> @bar1(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 8
+  %r = load <4 x double>, <4 x double>* %a, align 8
   %b = getelementptr <4 x double>, <4 x double>* %a, i32 16
-  %s = load <4 x double>* %b, align 8
+  %s = load <4 x double>, <4 x double>* %b, align 8
   %t = fadd <4 x double> %r, %s
   ret <4 x double> %t
 }
 
 define <4 x double> @bar2(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 8
+  %r = load <4 x double>, <4 x double>* %a, align 8
   %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
-  %s = load <4 x double>* %b, align 32
+  %s = load <4 x double>, <4 x double>* %b, align 32
   %t = fadd <4 x double> %r, %s
   ret <4 x double> %t
 }
 
 define <4 x double> @bar3(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 8
+  %r = load <4 x double>, <4 x double>* %a, align 8
   %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
-  %s = load <4 x double>* %b, align 8
+  %s = load <4 x double>, <4 x double>* %b, align 8
   %t = fadd <4 x double> %r, %s
   ret <4 x double> %t
 }
 
 define <4 x double> @bar4(<4 x double>* %a) {
 entry:
-  %r = load <4 x double>* %a, align 8
+  %r = load <4 x double>, <4 x double>* %a, align 8
   %b = getelementptr <4 x double>, <4 x double>* %a, i32 1
-  %s = load <4 x double>* %b, align 8
+  %s = load <4 x double>, <4 x double>* %b, align 8
   %c = getelementptr <4 x double>, <4 x double>* %b, i32 1
-  %t = load <4 x double>* %c, align 8
+  %t = load <4 x double>, <4 x double>* %c, align 8
   %u = fadd <4 x double> %r, %s
   %v = fadd <4 x double> %u, %t
   ret <4 x double> %v
diff --git a/llvm/test/CodeGen/PowerPC/quadint-return.ll b/llvm/test/CodeGen/PowerPC/quadint-return.ll
index 0349991..0743ce4 100644
--- a/llvm/test/CodeGen/PowerPC/quadint-return.ll
+++ b/llvm/test/CodeGen/PowerPC/quadint-return.ll
@@ -8,7 +8,7 @@
 entry:
   %x = alloca i128, align 16
   store i128 27, i128* %x, align 16
-  %0 = load i128* %x, align 16
+  %0 = load i128, i128* %x, align 16
   ret i128 %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/llvm/test/CodeGen/PowerPC/reg-coalesce-simple.ll
index 3d14498..3f9cb8a 100644
--- a/llvm/test/CodeGen/PowerPC/reg-coalesce-simple.ll
+++ b/llvm/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -4,7 +4,7 @@
 
 define i32 @test(%struct.foo* %X) nounwind {
         %tmp1 = getelementptr %struct.foo, %struct.foo* %X, i32 0, i32 2, i32 100            ; <i8*> [#uses=1]
-        %tmp = load i8* %tmp1           ; <i8> [#uses=1]
+        %tmp = load i8, i8* %tmp1           ; <i8> [#uses=1]
         %tmp2 = zext i8 %tmp to i32             ; <i32> [#uses=1]
         ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/PowerPC/reloc-align.ll b/llvm/test/CodeGen/PowerPC/reloc-align.ll
index 13d6ada..754997b 100644
--- a/llvm/test/CodeGen/PowerPC/reloc-align.ll
+++ b/llvm/test/CodeGen/PowerPC/reloc-align.ll
@@ -24,7 +24,7 @@
 define internal fastcc signext i32 @func_90(%struct.S1* byval nocapture %p_91) #0 {
 entry:
   %0 = bitcast %struct.S1* %p_91 to i64*
-  %bf.load = load i64* %0, align 1
+  %bf.load = load i64, i64* %0, align 1
   %bf.shl = shl i64 %bf.load, 26
   %bf.ashr = ashr i64 %bf.shl, 54
   %bf.cast = trunc i64 %bf.ashr to i32
diff --git a/llvm/test/CodeGen/PowerPC/resolvefi-basereg.ll b/llvm/test/CodeGen/PowerPC/resolvefi-basereg.ll
index 83199ee..0b173e9 100644
--- a/llvm/test/CodeGen/PowerPC/resolvefi-basereg.ll
+++ b/llvm/test/CodeGen/PowerPC/resolvefi-basereg.ll
@@ -42,14 +42,14 @@
   store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
   store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 9), align 8
   store i64 16, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 10), align 8
-  %0 = load i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
+  %0 = load i64, i64* getelementptr inbounds (%struct.Info* @info, i32 0, i32 8), align 8
   %sub = sub i64 %0, 1
   %and = and i64 ptrtoint (%struct.S1998* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 3) to i64), %sub
   %tobool = icmp ne i64 %and, 0
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @fails, align 4
+  %1 = load i32, i32* @fails, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @fails, align 4
   br label %if.end
@@ -57,276 +57,276 @@
 if.end:                                           ; preds = %if.then, %entry
   store i32 0, i32* %i, align 4
   store i32 0, i32* %j, align 4
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %idxprom = sext i32 %2 to i64
   %arrayidx = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom
   store i8* bitcast (i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1) to i8*), i8** %arrayidx, align 8
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %idxprom1 = sext i32 %3 to i64
   %arrayidx2 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom1
   store i64 8, i64* %arrayidx2, align 8
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom3 = sext i32 %4 to i64
   %arrayidx4 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom3
   store i64 8, i64* %arrayidx4, align 8
   store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 190), i32** getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 0, i64 1), align 8
   store i32* getelementptr inbounds ([256 x i32]* @intarray, i32 0, i64 241), i32** getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 0, i64 1), align 8
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc5 = add nsw i32 %5, 1
   store i32 %inc5, i32* %i, align 4
-  %6 = load i32* %i, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %6 to i64
   %arrayidx7 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom6
   store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1) to i8*), i8** %arrayidx7, align 8
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %idxprom8 = sext i32 %7 to i64
   %arrayidx9 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom8
   store i64 8, i64* %arrayidx9, align 8
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom10
   store i64 8, i64* %arrayidx11, align 8
   store i64 -3866974208859106459, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 1), align 8
   store i64 -185376695371304091, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 1), align 8
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc12 = add nsw i32 %9, 1
   store i32 %inc12, i32* %i, align 4
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom13 = sext i32 %10 to i64
   %arrayidx14 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom13
   store i8* bitcast (i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2) to i8*), i8** %arrayidx14, align 8
-  %11 = load i32* %i, align 4
+  %11 = load i32, i32* %i, align 4
   %idxprom15 = sext i32 %11 to i64
   %arrayidx16 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom15
   store i64 8, i64* %arrayidx16, align 8
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %idxprom17 = sext i32 %12 to i64
   %arrayidx18 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom17
   store i64 8, i64* %arrayidx18, align 8
   store i64 -963638028680427187, i64* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 2), align 8
   store i64 7510542175772455554, i64* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 2), align 8
-  %13 = load i32* %i, align 4
+  %13 = load i32, i32* %i, align 4
   %inc19 = add nsw i32 %13, 1
   store i32 %inc19, i32* %i, align 4
-  %14 = load i32* %i, align 4
+  %14 = load i32, i32* %i, align 4
   %idxprom20 = sext i32 %14 to i64
   %arrayidx21 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom20
   store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3) to i8*), i8** %arrayidx21, align 8
-  %15 = load i32* %i, align 4
+  %15 = load i32, i32* %i, align 4
   %idxprom22 = sext i32 %15 to i64
   %arrayidx23 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom22
   store i64 8, i64* %arrayidx23, align 8
-  %16 = load i32* %i, align 4
+  %16 = load i32, i32* %i, align 4
   %idxprom24 = sext i32 %16 to i64
   %arrayidx25 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom24
   store i64 16, i64* %arrayidx25, align 8
   store double 0xC0F8783300000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 3), align 16
   store double 0xC10DF3CCC0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 3), align 16
-  %17 = load i32* %i, align 4
+  %17 = load i32, i32* %i, align 4
   %inc26 = add nsw i32 %17, 1
   store i32 %inc26, i32* %i, align 4
-  %18 = load i32* %i, align 4
+  %18 = load i32, i32* %i, align 4
   %idxprom27 = sext i32 %18 to i64
   %arrayidx28 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom27
   store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4) to i8*), i8** %arrayidx28, align 8
-  %19 = load i32* %i, align 4
+  %19 = load i32, i32* %i, align 4
   %idxprom29 = sext i32 %19 to i64
   %arrayidx30 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom29
   store i64 2, i64* %arrayidx30, align 8
-  %20 = load i32* %i, align 4
+  %20 = load i32, i32* %i, align 4
   %idxprom31 = sext i32 %20 to i64
   %arrayidx32 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom31
   store i64 2, i64* %arrayidx32, align 8
   store i16 -15897, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 4), align 2
   store i16 30935, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 4), align 2
-  %21 = load i32* %i, align 4
+  %21 = load i32, i32* %i, align 4
   %inc33 = add nsw i32 %21, 1
   store i32 %inc33, i32* %i, align 4
   store i32 -419541644, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 5), align 4
   store i32 2125926812, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 5), align 4
-  %22 = load i32* %j, align 4
+  %22 = load i32, i32* %j, align 4
   %inc34 = add nsw i32 %22, 1
   store i32 %inc34, i32* %j, align 4
-  %23 = load i32* %i, align 4
+  %23 = load i32, i32* %i, align 4
   %idxprom35 = sext i32 %23 to i64
   %arrayidx36 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom35
   store i8* bitcast (double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0) to i8*), i8** %arrayidx36, align 8
-  %24 = load i32* %i, align 4
+  %24 = load i32, i32* %i, align 4
   %idxprom37 = sext i32 %24 to i64
   %arrayidx38 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom37
   store i64 8, i64* %arrayidx38, align 8
-  %25 = load i32* %i, align 4
+  %25 = load i32, i32* %i, align 4
   %idxprom39 = sext i32 %25 to i64
   %arrayidx40 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom39
   store i64 8, i64* %arrayidx40, align 8
   store double 0xC0FC765780000000, double* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 0, i64 0), align 8
   store double 0xC1025CD7A0000000, double* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 0, i64 0), align 8
-  %26 = load i32* %i, align 4
+  %26 = load i32, i32* %i, align 4
   %inc41 = add nsw i32 %26, 1
   store i32 %inc41, i32* %i, align 4
-  %bf.load = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
+  %bf.load = load i32, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
   %bf.clear = and i32 %bf.load, 7
   %bf.set = or i32 %bf.clear, 16
   store i32 %bf.set, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 1), align 8
-  %bf.load42 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
+  %bf.load42 = load i32, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
   %bf.clear43 = and i32 %bf.load42, 7
   %bf.set44 = or i32 %bf.clear43, 24
   store i32 %bf.set44, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 1), align 8
-  %27 = load i32* %j, align 4
+  %27 = load i32, i32* %j, align 4
   %inc45 = add nsw i32 %27, 1
   store i32 %inc45, i32* %j, align 4
-  %bf.load46 = load i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
+  %bf.load46 = load i16, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
   %bf.clear47 = and i16 %bf.load46, 127
   store i16 %bf.clear47, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 2), align 4
-  %bf.load48 = load i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
+  %bf.load48 = load i16, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
   %bf.clear49 = and i16 %bf.load48, 127
   store i16 %bf.clear49, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 2), align 4
-  %28 = load i32* %j, align 4
+  %28 = load i32, i32* %j, align 4
   %inc50 = add nsw i32 %28, 1
   store i32 %inc50, i32* %j, align 4
-  %bf.load51 = load i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
+  %bf.load51 = load i32, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
   %bf.clear52 = and i32 %bf.load51, 63
   store i32 %bf.clear52, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 3), align 8
-  %bf.load53 = load i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
+  %bf.load53 = load i32, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
   %bf.clear54 = and i32 %bf.load53, 63
   %bf.set55 = or i32 %bf.clear54, 64
   store i32 %bf.set55, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 3), align 8
-  %29 = load i32* %j, align 4
+  %29 = load i32, i32* %j, align 4
   %inc56 = add nsw i32 %29, 1
   store i32 %inc56, i32* %j, align 4
-  %bf.load57 = load i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.load57 = load i24, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
   %bf.clear58 = and i24 %bf.load57, 63
   store i24 %bf.clear58, i24* bitcast ([3 x i8]* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 4) to i24*), align 4
-  %bf.load59 = load i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
+  %bf.load59 = load i24, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
   %bf.clear60 = and i24 %bf.load59, 63
   store i24 %bf.clear60, i24* bitcast ([3 x i8]* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 4) to i24*), align 4
-  %30 = load i32* %j, align 4
+  %30 = load i32, i32* %j, align 4
   %inc61 = add nsw i32 %30, 1
   store i32 %inc61, i32* %j, align 4
-  %31 = load i32* %i, align 4
+  %31 = load i32, i32* %i, align 4
   %idxprom62 = sext i32 %31 to i64
   %arrayidx63 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom62
   store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), i8** %arrayidx63, align 8
-  %32 = load i32* %i, align 4
+  %32 = load i32, i32* %i, align 4
   %idxprom64 = sext i32 %32 to i64
   %arrayidx65 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom64
   store i64 1, i64* %arrayidx65, align 8
-  %33 = load i32* %i, align 4
+  %33 = load i32, i32* %i, align 4
   %idxprom66 = sext i32 %33 to i64
   %arrayidx67 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom66
   store i64 1, i64* %arrayidx67, align 8
   store i8 -83, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 5), align 1
   store i8 -67, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 5), align 1
-  %34 = load i32* %i, align 4
+  %34 = load i32, i32* %i, align 4
   %inc68 = add nsw i32 %34, 1
   store i32 %inc68, i32* %i, align 4
-  %35 = load i32* %i, align 4
+  %35 = load i32, i32* %i, align 4
   %idxprom69 = sext i32 %35 to i64
   %arrayidx70 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom69
   store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), i8** %arrayidx70, align 8
-  %36 = load i32* %i, align 4
+  %36 = load i32, i32* %i, align 4
   %idxprom71 = sext i32 %36 to i64
   %arrayidx72 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom71
   store i64 1, i64* %arrayidx72, align 8
-  %37 = load i32* %i, align 4
+  %37 = load i32, i32* %i, align 4
   %idxprom73 = sext i32 %37 to i64
   %arrayidx74 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom73
   store i64 1, i64* %arrayidx74, align 8
   store i8 34, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 5, i64 1), align 1
   store i8 64, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 5, i64 1), align 1
-  %38 = load i32* %i, align 4
+  %38 = load i32, i32* %i, align 4
   %inc75 = add nsw i32 %38, 1
   store i32 %inc75, i32* %i, align 4
-  %39 = load i32* %i, align 4
+  %39 = load i32, i32* %i, align 4
   %idxprom76 = sext i32 %39 to i64
   %arrayidx77 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom76
   store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3) to i8*), i8** %arrayidx77, align 8
-  %40 = load i32* %i, align 4
+  %40 = load i32, i32* %i, align 4
   %idxprom78 = sext i32 %40 to i64
   %arrayidx79 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom78
   store i64 4, i64* %arrayidx79, align 8
-  %41 = load i32* %i, align 4
+  %41 = load i32, i32* %i, align 4
   %idxprom80 = sext i32 %41 to i64
   %arrayidx81 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom80
   store i64 4, i64* %arrayidx81, align 8
   store i32 -3, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 6, i64 3), align 4
   store i32 -3, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 6, i64 3), align 4
-  %42 = load i32* %i, align 4
+  %42 = load i32, i32* %i, align 4
   %inc82 = add nsw i32 %42, 1
   store i32 %inc82, i32* %i, align 4
-  %43 = load i32* %i, align 4
+  %43 = load i32, i32* %i, align 4
   %idxprom83 = sext i32 %43 to i64
   %arrayidx84 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom83
   store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), i8** %arrayidx84, align 8
-  %44 = load i32* %i, align 4
+  %44 = load i32, i32* %i, align 4
   %idxprom85 = sext i32 %44 to i64
   %arrayidx86 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom85
   store i64 1, i64* %arrayidx86, align 8
-  %45 = load i32* %i, align 4
+  %45 = load i32, i32* %i, align 4
   %idxprom87 = sext i32 %45 to i64
   %arrayidx88 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom87
   store i64 1, i64* %arrayidx88, align 8
   store i8 106, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 6, i64 4, i32 7), align 1
   store i8 -102, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 6, i64 4, i32 7), align 1
-  %46 = load i32* %i, align 4
+  %46 = load i32, i32* %i, align 4
   %inc89 = add nsw i32 %46, 1
   store i32 %inc89, i32* %i, align 4
-  %47 = load i32* %i, align 4
+  %47 = load i32, i32* %i, align 4
   %idxprom90 = sext i32 %47 to i64
   %arrayidx91 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom90
   store i8* bitcast (i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7) to i8*), i8** %arrayidx91, align 8
-  %48 = load i32* %i, align 4
+  %48 = load i32, i32* %i, align 4
   %idxprom92 = sext i32 %48 to i64
   %arrayidx93 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom92
   store i64 2, i64* %arrayidx93, align 8
-  %49 = load i32* %i, align 4
+  %49 = load i32, i32* %i, align 4
   %idxprom94 = sext i32 %49 to i64
   %arrayidx95 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom94
   store i64 2, i64* %arrayidx95, align 8
   store i16 29665, i16* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 7), align 2
   store i16 7107, i16* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 7), align 2
-  %50 = load i32* %i, align 4
+  %50 = load i32, i32* %i, align 4
   %inc96 = add nsw i32 %50, 1
   store i32 %inc96, i32* %i, align 4
-  %51 = load i32* %i, align 4
+  %51 = load i32, i32* %i, align 4
   %idxprom97 = sext i32 %51 to i64
   %arrayidx98 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom97
   store i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), i8** %arrayidx98, align 8
-  %52 = load i32* %i, align 4
+  %52 = load i32, i32* %i, align 4
   %idxprom99 = sext i32 %52 to i64
   %arrayidx100 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom99
   store i64 1, i64* %arrayidx100, align 8
-  %53 = load i32* %i, align 4
+  %53 = load i32, i32* %i, align 4
   %idxprom101 = sext i32 %53 to i64
   %arrayidx102 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom101
   store i64 1, i64* %arrayidx102, align 8
   store i8 52, i8* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 8), align 1
   store i8 -86, i8* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 8), align 1
-  %54 = load i32* %i, align 4
+  %54 = load i32, i32* %i, align 4
   %inc103 = add nsw i32 %54, 1
   store i32 %inc103, i32* %i, align 4
-  %55 = load i32* %i, align 4
+  %55 = load i32, i32* %i, align 4
   %idxprom104 = sext i32 %55 to i64
   %arrayidx105 = getelementptr inbounds [32 x i8*], [32 x i8*]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 5), i32 0, i64 %idxprom104
   store i8* bitcast (i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9) to i8*), i8** %arrayidx105, align 8
-  %56 = load i32* %i, align 4
+  %56 = load i32, i32* %i, align 4
   %idxprom106 = sext i32 %56 to i64
   %arrayidx107 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 7), i32 0, i64 %idxprom106
   store i64 4, i64* %arrayidx107, align 8
-  %57 = load i32* %i, align 4
+  %57 = load i32, i32* %i, align 4
   %idxprom108 = sext i32 %57 to i64
   %arrayidx109 = getelementptr inbounds [32 x i64], [32 x i64]* getelementptr inbounds (%struct.Info* @info, i32 0, i32 11), i32 0, i64 %idxprom108
   store i64 4, i64* %arrayidx109, align 8
   store i32 -54118453, i32* getelementptr inbounds (%struct.S1998* @s1998, i32 0, i32 9), align 4
   store i32 1668755823, i32* getelementptr inbounds ([5 x %struct.S1998]* @a1998, i32 0, i64 2, i32 9), align 4
-  %58 = load i32* %i, align 4
+  %58 = load i32, i32* %i, align 4
   %inc110 = add nsw i32 %58, 1
   store i32 %inc110, i32* %i, align 4
   store i32 %inc110, i32* %tmp
-  %59 = load i32* %tmp
-  %60 = load i32* %i, align 4
+  %59 = load i32, i32* %tmp
+  %60 = load i32, i32* %i, align 4
   store i32 %60, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 0), align 4
-  %61 = load i32* %j, align 4
+  %61 = load i32, i32* %j, align 4
   store i32 %61, i32* getelementptr inbounds (%struct.Info* @info, i32 0, i32 1), align 4
   %62 = bitcast %struct.S1998* %agg.tmp111 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %62, i8* bitcast (%struct.S1998* @s1998 to i8*), i64 5168, i32 16, i1 false)
diff --git a/llvm/test/CodeGen/PowerPC/resolvefi-disp.ll b/llvm/test/CodeGen/PowerPC/resolvefi-disp.ll
index e91d720..62b70b0 100644
--- a/llvm/test/CodeGen/PowerPC/resolvefi-disp.ll
+++ b/llvm/test/CodeGen/PowerPC/resolvefi-disp.ll
@@ -43,19 +43,19 @@
   call void @llvm.memset.p0i8.i64(i8* %8, i8 0, i64 11104, i32 32, i1 false)
   %b = getelementptr inbounds %struct.S2760, %struct.S2760* %arg0, i32 0, i32 1
   %g = getelementptr inbounds %struct.anon, %struct.anon* %b, i32 0, i32 1
-  %9 = load i64* %g, align 8
-  %10 = load i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
+  %9 = load i64, i64* %g, align 8
+  %10 = load i64, i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
   %cmp = icmp ne i64 %9, %10
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %11 = load i32* @fails, align 4
+  %11 = load i32, i32* @fails, align 4
   %inc = add nsw i32 %11, 1
   store i32 %inc, i32* @fails, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
-  %12 = load i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
+  %12 = load i64, i64* getelementptr inbounds (%struct.S2760* @s2760, i32 0, i32 1, i32 1), align 8
   %b3 = getelementptr inbounds %struct.S2760, %struct.S2760* %ret, i32 0, i32 1
   %g4 = getelementptr inbounds %struct.anon, %struct.anon* %b3, i32 0, i32 1
   store i64 %12, i64* %g4, align 8
diff --git a/llvm/test/CodeGen/PowerPC/return-val-i128.ll b/llvm/test/CodeGen/PowerPC/return-val-i128.ll
index e14a438..2f92409 100644
--- a/llvm/test/CodeGen/PowerPC/return-val-i128.ll
+++ b/llvm/test/CodeGen/PowerPC/return-val-i128.ll
@@ -7,29 +7,29 @@
 	%tmp = alloca i128, align 16		; <i128*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float %a, float* %a_addr
-	%tmp1 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp1 = load float, float* %a_addr, align 4		; <float> [#uses=1]
 	%tmp2 = fcmp olt float %tmp1, 0.000000e+00		; <i1> [#uses=1]
 	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
 	br i1 %toBool, label %bb, label %bb8
 bb:		; preds = %entry
-	%tmp4 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp4 = load float, float* %a_addr, align 4		; <float> [#uses=1]
 	%tmp5 = fsub float -0.000000e+00, %tmp4		; <float> [#uses=1]
 	%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind 		; <i128> [#uses=1]
 	%tmp7 = sub i128 0, %tmp6		; <i128> [#uses=1]
 	store i128 %tmp7, i128* %tmp, align 16
 	br label %bb11
 bb8:		; preds = %entry
-	%tmp9 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp9 = load float, float* %a_addr, align 4		; <float> [#uses=1]
 	%tmp10 = call i128 @__fixunssfDI( float %tmp9 ) nounwind 		; <i128> [#uses=1]
 	store i128 %tmp10, i128* %tmp, align 16
 	br label %bb11
 bb11:		; preds = %bb8, %bb
-	%tmp12 = load i128* %tmp, align 16		; <i128> [#uses=1]
+	%tmp12 = load i128, i128* %tmp, align 16		; <i128> [#uses=1]
 	store i128 %tmp12, i128* %retval, align 16
 	br label %return
 return:		; preds = %bb11
-	%retval13 = load i128* %retval		; <i128> [#uses=1]
+	%retval13 = load i128, i128* %retval		; <i128> [#uses=1]
 	ret i128 %retval13
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi-and.ll b/llvm/test/CodeGen/PowerPC/rlwimi-and.ll
index 9433f8e..b9b0c91 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi-and.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi-and.ll
@@ -16,11 +16,11 @@
   unreachable
 
 codeRepl17:                                       ; preds = %codeRepl4
-  %0 = load i8* undef, align 2
+  %0 = load i8, i8* undef, align 2
   %1 = and i8 %0, 1
   %not.tobool.i.i.i = icmp eq i8 %1, 0
   %2 = select i1 %not.tobool.i.i.i, i16 0, i16 256
-  %3 = load i8* undef, align 1
+  %3 = load i8, i8* undef, align 1
   %4 = and i8 %3, 1
   %not.tobool.i.1.i.i = icmp eq i8 %4, 0
   %rvml38.sroa.1.1.insert.ext = select i1 %not.tobool.i.1.i.i, i16 0, i16 1
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi-commute.ll b/llvm/test/CodeGen/PowerPC/rlwimi-commute.ll
index 3f90008..cd0f49e 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi-commute.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -4,8 +4,8 @@
 ; Make sure there is no register-register copies here.
 
 define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {
-	%A.upgrd.1 = load i32* %A		; <i32> [#uses=2]
-	%B.upgrd.2 = load i32* %B		; <i32> [#uses=1]
+	%A.upgrd.1 = load i32, i32* %A		; <i32> [#uses=2]
+	%B.upgrd.2 = load i32, i32* %B		; <i32> [#uses=1]
 	%X = and i32 %A.upgrd.1, 15		; <i32> [#uses=1]
 	%Y = and i32 %B.upgrd.2, -16		; <i32> [#uses=1]
 	%Z = or i32 %X, %Y		; <i32> [#uses=1]
@@ -15,8 +15,8 @@
 }
 
 define void @test2(i32* %A, i32* %B, i32* %D, i32* %E) {
-	%A.upgrd.3 = load i32* %A		; <i32> [#uses=1]
-	%B.upgrd.4 = load i32* %B		; <i32> [#uses=2]
+	%A.upgrd.3 = load i32, i32* %A		; <i32> [#uses=1]
+	%B.upgrd.4 = load i32, i32* %B		; <i32> [#uses=2]
 	%X = and i32 %A.upgrd.3, 15		; <i32> [#uses=1]
 	%Y = and i32 %B.upgrd.4, -16		; <i32> [#uses=1]
 	%Z = or i32 %X, %Y		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/rlwimi-dyn-and.ll b/llvm/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
index e02801fa..76f3da6 100644
--- a/llvm/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
+++ b/llvm/test/CodeGen/PowerPC/rlwimi-dyn-and.ll
@@ -4,13 +4,13 @@
 
 define i32 @test1() #0 {
 entry:
-  %conv67.reload = load i32* undef
+  %conv67.reload = load i32, i32* undef
   %const = bitcast i32 65535 to i32
   br label %next
 
 next:
   %shl161 = shl nuw nsw i32 %conv67.reload, 15
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv169 = zext i8 %0 to i32
   %shl170 = shl nuw nsw i32 %conv169, 7
   %const_mat = add i32 %const, -32767
@@ -25,13 +25,13 @@
 
 define i32 @test2() #0 {
 entry:
-  %conv67.reload = load i32* undef
+  %conv67.reload = load i32, i32* undef
   %const = bitcast i32 65535 to i32
   br label %next
 
 next:
   %shl161 = shl nuw nsw i32 %conv67.reload, 15
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv169 = zext i8 %0 to i32
   %shl170 = shl nuw nsw i32 %conv169, 7
   %shl161.masked = and i32 %shl161, 32768
diff --git a/llvm/test/CodeGen/PowerPC/rm-zext.ll b/llvm/test/CodeGen/PowerPC/rm-zext.ll
index 33995e1..97c546c 100644
--- a/llvm/test/CodeGen/PowerPC/rm-zext.ll
+++ b/llvm/test/CodeGen/PowerPC/rm-zext.ll
@@ -45,7 +45,7 @@
 ; Function Attrs: nounwind readonly
 define zeroext i32 @bs32(i32* nocapture readonly %x) #1 {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %1 = tail call i32 @llvm.bswap.i32(i32 %0)
   ret i32 %1
 
@@ -57,7 +57,7 @@
 ; Function Attrs: nounwind readonly
 define zeroext i16 @bs16(i16* nocapture readonly %x) #1 {
 entry:
-  %0 = load i16* %x, align 2
+  %0 = load i16, i16* %x, align 2
   %1 = tail call i16 @llvm.bswap.i16(i16 %0)
   ret i16 %1
 
diff --git a/llvm/test/CodeGen/PowerPC/rs-undef-use.ll b/llvm/test/CodeGen/PowerPC/rs-undef-use.ll
index 24dd5fd..007931e 100644
--- a/llvm/test/CodeGen/PowerPC/rs-undef-use.ll
+++ b/llvm/test/CodeGen/PowerPC/rs-undef-use.ll
@@ -15,7 +15,7 @@
   br label %CF82
 
 CF82:                                             ; preds = %CF82, %CF82.critedge
-  %L17 = load i8* %0
+  %L17 = load i8, i8* %0
   %E18 = extractelement <2 x i64> undef, i32 0
   %PC = bitcast <2 x i1>* %A3 to i64*
   br i1 undef, label %CF82, label %CF84.critedge
@@ -25,13 +25,13 @@
   br label %CF84
 
 CF84:                                             ; preds = %CF84, %CF84.critedge
-  %L40 = load i64* %PC
+  %L40 = load i64, i64* %PC
   store i64 -1, i64* %PC
   %Sl46 = select i1 undef, i1 undef, i1 false
   br i1 %Sl46, label %CF84, label %CF85
 
 CF85:                                             ; preds = %CF84
-  %L47 = load i64* %PC
+  %L47 = load i64, i64* %PC
   store i64 %E18, i64* %PC
   %PC52 = bitcast <8 x i32>* %A2 to ppc_fp128*
   store ppc_fp128 0xM4D436562A0416DE00000000000000000, ppc_fp128* %PC52
diff --git a/llvm/test/CodeGen/PowerPC/s000-alias-misched.ll b/llvm/test/CodeGen/PowerPC/s000-alias-misched.ll
index f67b9f2..8e29635 100644
--- a/llvm/test/CodeGen/PowerPC/s000-alias-misched.ll
+++ b/llvm/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -37,7 +37,7 @@
   %arrayidx = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv
   %arrayidx6 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv
   %0 = bitcast double* %arrayidx to <1 x double>*
-  %1 = load <1 x double>* %0, align 32
+  %1 = load <1 x double>, <1 x double>* %0, align 32
   %add = fadd <1 x double> %1, <double 1.000000e+00>
   %2 = bitcast double* %arrayidx6 to <1 x double>*
   store <1 x double> %add, <1 x double>* %2, align 32
@@ -45,7 +45,7 @@
   %arrayidx.4 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
   %arrayidx6.4 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
   %3 = bitcast double* %arrayidx.4 to <1 x double>*
-  %4 = load <1 x double>* %3, align 32
+  %4 = load <1 x double>, <1 x double>* %3, align 32
   %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
   %5 = bitcast double* %arrayidx6.4 to <1 x double>*
   store <1 x double> %add.4, <1 x double>* %5, align 32
@@ -53,7 +53,7 @@
   %arrayidx.8 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
   %arrayidx6.8 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
   %6 = bitcast double* %arrayidx.8 to <1 x double>*
-  %7 = load <1 x double>* %6, align 32
+  %7 = load <1 x double>, <1 x double>* %6, align 32
   %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
   %8 = bitcast double* %arrayidx6.8 to <1 x double>*
   store <1 x double> %add.8, <1 x double>* %8, align 32
@@ -61,7 +61,7 @@
   %arrayidx.12 = getelementptr inbounds [16000 x double], [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
   %arrayidx6.12 = getelementptr inbounds [16000 x double], [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
   %9 = bitcast double* %arrayidx.12 to <1 x double>*
-  %10 = load <1 x double>* %9, align 32
+  %10 = load <1 x double>, <1 x double>* %9, align 32
   %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
   %11 = bitcast double* %arrayidx6.12 to <1 x double>*
   store <1 x double> %add.12, <1 x double>* %11, align 32
diff --git a/llvm/test/CodeGen/PowerPC/sjlj.ll b/llvm/test/CodeGen/PowerPC/sjlj.ll
index f9f887a..682cee4 100644
--- a/llvm/test/CodeGen/PowerPC/sjlj.ll
+++ b/llvm/test/CodeGen/PowerPC/sjlj.ll
@@ -55,7 +55,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %3 = load i32* %retval
+  %3 = load i32, i32* %retval
   ret i32 %3
 
 ; FIXME: We should be saving VRSAVE on Darwin, but we're not!
@@ -128,7 +128,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %3 = load i32* %retval
+  %3 = load i32, i32* %retval
   ret i32 %3
 
 ; CHECK: @main2
diff --git a/llvm/test/CodeGen/PowerPC/small-arguments.ll b/llvm/test/CodeGen/PowerPC/small-arguments.ll
index 19ca098..3cef817 100644
--- a/llvm/test/CodeGen/PowerPC/small-arguments.ll
+++ b/llvm/test/CodeGen/PowerPC/small-arguments.ll
@@ -26,14 +26,14 @@
 }
 
 define i32 @test4(i16* %P) {
-        %tmp.1 = load i16* %P
+        %tmp.1 = load i16, i16* %P
         %tmp.2 = zext i16 %tmp.1 to i32
         %tmp.3 = and i32 %tmp.2, 255
         ret i32 %tmp.3
 }
 
 define i32 @test5(i16* %P) {
-        %tmp.1 = load i16* %P
+        %tmp.1 = load i16, i16* %P
         %tmp.2 = bitcast i16 %tmp.1 to i16
         %tmp.3 = zext i16 %tmp.2 to i32
         %tmp.4 = and i32 %tmp.3, 255
@@ -41,7 +41,7 @@
 }
 
 define i32 @test6(i32* %P) {
-        %tmp.1 = load i32* %P
+        %tmp.1 = load i32, i32* %P
         %tmp.2 = and i32 %tmp.1, 255
         ret i32 %tmp.2
 }
diff --git a/llvm/test/CodeGen/PowerPC/split-index-tc.ll b/llvm/test/CodeGen/PowerPC/split-index-tc.ll
index 1b27be7..38be93f 100644
--- a/llvm/test/CodeGen/PowerPC/split-index-tc.ll
+++ b/llvm/test/CodeGen/PowerPC/split-index-tc.ll
@@ -13,7 +13,7 @@
 ; CHECK-NOT: lhzu
 
 entry:
-  %0 = load %"class.llvm::MachineOperand"** undef, align 8
+  %0 = load %"class.llvm::MachineOperand"*, %"class.llvm::MachineOperand"** undef, align 8
   br i1 undef, label %_ZNK4llvm14MachineOperand6getRegEv.exit, label %cond.false.i123
 
 cond.false.i123:                                  ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
@@ -22,7 +22,7 @@
 _ZNK4llvm14MachineOperand6getRegEv.exit:          ; preds = %_ZN4llvm12MachineInstr10getOperandEj.exit
   %IsDef.i = getelementptr inbounds %"class.llvm::MachineOperand", %"class.llvm::MachineOperand"* %0, i64 undef, i32 1
   %1 = bitcast [3 x i8]* %IsDef.i to i24*
-  %bf.load.i = load i24* %1, align 1
+  %bf.load.i = load i24, i24* %1, align 1
   %2 = and i24 %bf.load.i, 128
   br i1 undef, label %for.cond.cleanup, label %for.body.lr.ph
 
@@ -61,7 +61,7 @@
   unreachable
 
 _ZNK4llvm14MachineOperand6isDeadEv.exit262:       ; preds = %if.end55
-  %bf.load.i259 = load i24* %1, align 1
+  %bf.load.i259 = load i24, i24* %1, align 1
   br i1 undef, label %if.then57, label %if.else59
 
 if.then57:                                        ; preds = %_ZNK4llvm14MachineOperand6isDeadEv.exit262
diff --git a/llvm/test/CodeGen/PowerPC/stack-protector.ll b/llvm/test/CodeGen/PowerPC/stack-protector.ll
index b81d941..bb59ac4 100644
--- a/llvm/test/CodeGen/PowerPC/stack-protector.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-protector.ll
@@ -11,7 +11,7 @@
   %"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i8* %a, i8** %a_addr
 	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
-	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
+	%0 = load i8*, i8** %a_addr, align 4		; <i8*> [#uses=1]
 	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
   %buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
 	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/PowerPC/stack-realign.ll b/llvm/test/CodeGen/PowerPC/stack-realign.ll
index bc668a1..00aee56 100644
--- a/llvm/test/CodeGen/PowerPC/stack-realign.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-realign.ll
@@ -15,12 +15,12 @@
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
-  %2 = load i32* @barbaz, align 4
+  %1 = load i32, i32* %b, align 4
+  %2 = load i32, i32* @barbaz, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %2, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
@@ -99,11 +99,11 @@
 entry:
   %x = alloca [200000 x i32], align 32
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [200000 x i32], [200000 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [200000 x i32], [200000 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
@@ -160,11 +160,11 @@
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx)
diff --git a/llvm/test/CodeGen/PowerPC/std-unal-fi.ll b/llvm/test/CodeGen/PowerPC/std-unal-fi.ll
index 8b9606e..74ea8cd 100644
--- a/llvm/test/CodeGen/PowerPC/std-unal-fi.ll
+++ b/llvm/test/CodeGen/PowerPC/std-unal-fi.ll
@@ -9,7 +9,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF80, %CF, %BB
-  %L5 = load i64* undef
+  %L5 = load i64, i64* undef
   store i8 %0, i8* %A4
   %Shuff7 = shufflevector <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> %Shuff, <16 x i32> <i32 28, i32 30, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 undef, i32 20, i32 22, i32 24, i32 26>
   %PC10 = bitcast i8* %A4 to ppc_fp128*
@@ -19,13 +19,13 @@
   br i1 undef, label %CF77, label %CF82
 
 CF82:                                             ; preds = %CF82, %CF77
-  %L19 = load i64* undef
+  %L19 = load i64, i64* undef
   store <1 x ppc_fp128> zeroinitializer, <1 x ppc_fp128>* %A
   store i8 -65, i8* %A4
   br i1 undef, label %CF82, label %CF83
 
 CF83:                                             ; preds = %CF82
-  %L34 = load i64* undef
+  %L34 = load i64, i64* undef
   br i1 undef, label %CF77, label %CF81
 
 CF81:                                             ; preds = %CF83
@@ -54,7 +54,7 @@
 BB:
   %A4 = alloca <2 x i1>
   %A = alloca <16 x float>
-  %L = load i8* %0
+  %L = load i8, i8* %0
   %Sl = select i1 false, <16 x float>* %A, <16 x float>* %A
   %PC = bitcast <2 x i1>* %A4 to i64*
   %Sl27 = select i1 false, i8 undef, i8 %L
@@ -66,7 +66,7 @@
 
 CF77:                                             ; preds = %CF80, %CF77, %CF
   store <16 x float> zeroinitializer, <16 x float>* %Sl
-  %L58 = load i32* %PC33
+  %L58 = load i32, i32* %PC33
   store i8 0, i8* %0
   br i1 undef, label %CF77, label %CF80
 
@@ -90,7 +90,7 @@
   %A1 = alloca i1
   %I8 = insertelement <1 x i32> <i32 -1>, i32 454855, i32 0
   %Cmp = icmp ult <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>, undef
-  %L10 = load i64* %2
+  %L10 = load i64, i64* %2
   %E11 = extractelement <4 x i1> %Cmp, i32 2
   br label %CF72
 
@@ -103,7 +103,7 @@
 CF74:                                             ; preds = %CF72
   store i8 0, i8* %0
   %PC = bitcast i1* %A1 to i64*
-  %L31 = load i64* %PC
+  %L31 = load i64, i64* %PC
   store i64 477323, i64* %PC
   %Sl37 = select i1 false, i32* undef, i32* %1
   %Cmp38 = icmp ugt i1 undef, undef
diff --git a/llvm/test/CodeGen/PowerPC/store-load-fwd.ll b/llvm/test/CodeGen/PowerPC/store-load-fwd.ll
index 25663c1..62dd79e 100644
--- a/llvm/test/CodeGen/PowerPC/store-load-fwd.ll
+++ b/llvm/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -2,7 +2,7 @@
 
 define i32 @test(i32* %P) {
         store i32 1, i32* %P
-        %V = load i32* %P               ; <i32> [#uses=1]
+        %V = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %V
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/structsinmem.ll b/llvm/test/CodeGen/PowerPC/structsinmem.ll
index 5d94e5a..4b51e3a 100644
--- a/llvm/test/CodeGen/PowerPC/structsinmem.ll
+++ b/llvm/test/CodeGen/PowerPC/structsinmem.ll
@@ -89,27 +89,27 @@
   store i32 %z7, i32* %z7.addr, align 4
   store i32 %z8, i32* %z8.addr, align 4
   %a = getelementptr inbounds %struct.s1, %struct.s1* %v1, i32 0, i32 0
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %conv = zext i8 %0 to i32
   %a1 = getelementptr inbounds %struct.s2, %struct.s2* %v2, i32 0, i32 0
-  %1 = load i16* %a1, align 2
+  %1 = load i16, i16* %a1, align 2
   %conv2 = sext i16 %1 to i32
   %add = add nsw i32 %conv, %conv2
   %a3 = getelementptr inbounds %struct.s3, %struct.s3* %v3, i32 0, i32 0
-  %2 = load i16* %a3, align 2
+  %2 = load i16, i16* %a3, align 2
   %conv4 = sext i16 %2 to i32
   %add5 = add nsw i32 %add, %conv4
   %a6 = getelementptr inbounds %struct.s4, %struct.s4* %v4, i32 0, i32 0
-  %3 = load i32* %a6, align 4
+  %3 = load i32, i32* %a6, align 4
   %add7 = add nsw i32 %add5, %3
   %a8 = getelementptr inbounds %struct.s5, %struct.s5* %v5, i32 0, i32 0
-  %4 = load i32* %a8, align 4
+  %4 = load i32, i32* %a8, align 4
   %add9 = add nsw i32 %add7, %4
   %a10 = getelementptr inbounds %struct.s6, %struct.s6* %v6, i32 0, i32 0
-  %5 = load i32* %a10, align 4
+  %5 = load i32, i32* %a10, align 4
   %add11 = add nsw i32 %add9, %5
   %a12 = getelementptr inbounds %struct.s7, %struct.s7* %v7, i32 0, i32 0
-  %6 = load i32* %a12, align 4
+  %6 = load i32, i32* %a12, align 4
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
@@ -181,27 +181,27 @@
   store i32 %z7, i32* %z7.addr, align 4
   store i32 %z8, i32* %z8.addr, align 4
   %a = getelementptr inbounds %struct.t1, %struct.t1* %v1, i32 0, i32 0
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %conv = zext i8 %0 to i32
   %a1 = getelementptr inbounds %struct.t2, %struct.t2* %v2, i32 0, i32 0
-  %1 = load i16* %a1, align 1
+  %1 = load i16, i16* %a1, align 1
   %conv2 = sext i16 %1 to i32
   %add = add nsw i32 %conv, %conv2
   %a3 = getelementptr inbounds %struct.t3, %struct.t3* %v3, i32 0, i32 0
-  %2 = load i16* %a3, align 1
+  %2 = load i16, i16* %a3, align 1
   %conv4 = sext i16 %2 to i32
   %add5 = add nsw i32 %add, %conv4
   %a6 = getelementptr inbounds %struct.t4, %struct.t4* %v4, i32 0, i32 0
-  %3 = load i32* %a6, align 1
+  %3 = load i32, i32* %a6, align 1
   %add7 = add nsw i32 %add5, %3
   %a8 = getelementptr inbounds %struct.t5, %struct.t5* %v5, i32 0, i32 0
-  %4 = load i32* %a8, align 1
+  %4 = load i32, i32* %a8, align 1
   %add9 = add nsw i32 %add7, %4
   %a10 = getelementptr inbounds %struct.t6, %struct.t6* %v6, i32 0, i32 0
-  %5 = load i32* %a10, align 1
+  %5 = load i32, i32* %a10, align 1
   %add11 = add nsw i32 %add9, %5
   %a12 = getelementptr inbounds %struct.t7, %struct.t7* %v7, i32 0, i32 0
-  %6 = load i32* %a12, align 1
+  %6 = load i32, i32* %a12, align 1
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
diff --git a/llvm/test/CodeGen/PowerPC/structsinregs.ll b/llvm/test/CodeGen/PowerPC/structsinregs.ll
index 1eea311..a5a4b2a 100644
--- a/llvm/test/CodeGen/PowerPC/structsinregs.ll
+++ b/llvm/test/CodeGen/PowerPC/structsinregs.ll
@@ -73,27 +73,27 @@
 define internal i32 @callee1(%struct.s1* byval %v1, %struct.s2* byval %v2, %struct.s3* byval %v3, %struct.s4* byval %v4, %struct.s5* byval %v5, %struct.s6* byval %v6, %struct.s7* byval %v7) nounwind {
 entry:
   %a = getelementptr inbounds %struct.s1, %struct.s1* %v1, i32 0, i32 0
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %conv = zext i8 %0 to i32
   %a1 = getelementptr inbounds %struct.s2, %struct.s2* %v2, i32 0, i32 0
-  %1 = load i16* %a1, align 2
+  %1 = load i16, i16* %a1, align 2
   %conv2 = sext i16 %1 to i32
   %add = add nsw i32 %conv, %conv2
   %a3 = getelementptr inbounds %struct.s3, %struct.s3* %v3, i32 0, i32 0
-  %2 = load i16* %a3, align 2
+  %2 = load i16, i16* %a3, align 2
   %conv4 = sext i16 %2 to i32
   %add5 = add nsw i32 %add, %conv4
   %a6 = getelementptr inbounds %struct.s4, %struct.s4* %v4, i32 0, i32 0
-  %3 = load i32* %a6, align 4
+  %3 = load i32, i32* %a6, align 4
   %add7 = add nsw i32 %add5, %3
   %a8 = getelementptr inbounds %struct.s5, %struct.s5* %v5, i32 0, i32 0
-  %4 = load i32* %a8, align 4
+  %4 = load i32, i32* %a8, align 4
   %add9 = add nsw i32 %add7, %4
   %a10 = getelementptr inbounds %struct.s6, %struct.s6* %v6, i32 0, i32 0
-  %5 = load i32* %a10, align 4
+  %5 = load i32, i32* %a10, align 4
   %add11 = add nsw i32 %add9, %5
   %a12 = getelementptr inbounds %struct.s7, %struct.s7* %v7, i32 0, i32 0
-  %6 = load i32* %a12, align 4
+  %6 = load i32, i32* %a12, align 4
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
@@ -160,27 +160,27 @@
 define internal i32 @callee2(%struct.t1* byval %v1, %struct.t2* byval %v2, %struct.t3* byval %v3, %struct.t4* byval %v4, %struct.t5* byval %v5, %struct.t6* byval %v6, %struct.t7* byval %v7) nounwind {
 entry:
   %a = getelementptr inbounds %struct.t1, %struct.t1* %v1, i32 0, i32 0
-  %0 = load i8* %a, align 1
+  %0 = load i8, i8* %a, align 1
   %conv = zext i8 %0 to i32
   %a1 = getelementptr inbounds %struct.t2, %struct.t2* %v2, i32 0, i32 0
-  %1 = load i16* %a1, align 1
+  %1 = load i16, i16* %a1, align 1
   %conv2 = sext i16 %1 to i32
   %add = add nsw i32 %conv, %conv2
   %a3 = getelementptr inbounds %struct.t3, %struct.t3* %v3, i32 0, i32 0
-  %2 = load i16* %a3, align 1
+  %2 = load i16, i16* %a3, align 1
   %conv4 = sext i16 %2 to i32
   %add5 = add nsw i32 %add, %conv4
   %a6 = getelementptr inbounds %struct.t4, %struct.t4* %v4, i32 0, i32 0
-  %3 = load i32* %a6, align 1
+  %3 = load i32, i32* %a6, align 1
   %add7 = add nsw i32 %add5, %3
   %a8 = getelementptr inbounds %struct.t5, %struct.t5* %v5, i32 0, i32 0
-  %4 = load i32* %a8, align 1
+  %4 = load i32, i32* %a8, align 1
   %add9 = add nsw i32 %add7, %4
   %a10 = getelementptr inbounds %struct.t6, %struct.t6* %v6, i32 0, i32 0
-  %5 = load i32* %a10, align 1
+  %5 = load i32, i32* %a10, align 1
   %add11 = add nsw i32 %add9, %5
   %a12 = getelementptr inbounds %struct.t7, %struct.t7* %v7, i32 0, i32 0
-  %6 = load i32* %a12, align 1
+  %6 = load i32, i32* %a12, align 1
   %add13 = add nsw i32 %add11, %6
   ret i32 %add13
 
diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll
index 105a845..893e4b9 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll
+++ b/llvm/test/CodeGen/PowerPC/subreg-postra-2.ll
@@ -134,7 +134,7 @@
   br label %while.body392
 
 while.body392:                                    ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
-  %0 = load i8** undef, align 8
+  %0 = load i8*, i8** undef, align 8
   %add.ptr399 = getelementptr inbounds i8, i8* %0, i64 -72
   %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
   %tobool.i1316 = icmp eq i64 undef, 0
@@ -144,7 +144,7 @@
   unreachable
 
 wait_on_buffer.exit1319:                          ; preds = %while.body392
-  %1 = load volatile i64* %b_state.i.i1314, align 8
+  %1 = load volatile i64, i64* %b_state.i.i1314, align 8
   %conv.i.i1322 = and i64 %1, 1
   %lnot404 = icmp eq i64 %conv.i.i1322, 0
   %.err.4 = select i1 %lnot404, i32 -5, i32 undef
diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra.ll b/llvm/test/CodeGen/PowerPC/subreg-postra.ll
index 9a97fb1..fa5fd7e 100644
--- a/llvm/test/CodeGen/PowerPC/subreg-postra.ll
+++ b/llvm/test/CodeGen/PowerPC/subreg-postra.ll
@@ -120,7 +120,7 @@
   br label %while.body392
 
 while.body392:                                    ; preds = %wait_on_buffer.exit1319, %while.body392.lr.ph
-  %0 = load i8** undef, align 8
+  %0 = load i8*, i8** undef, align 8
   %add.ptr399 = getelementptr inbounds i8, i8* %0, i64 -72
   %b_state.i.i1314 = bitcast i8* %add.ptr399 to i64*
   %tobool.i1316 = icmp eq i64 undef, 0
@@ -130,13 +130,13 @@
   unreachable
 
 wait_on_buffer.exit1319:                          ; preds = %while.body392
-  %1 = load volatile i64* %b_state.i.i1314, align 8
+  %1 = load volatile i64, i64* %b_state.i.i1314, align 8
   %conv.i.i1322 = and i64 %1, 1
   %lnot404 = icmp eq i64 %conv.i.i1322, 0
   %.err.4 = select i1 %lnot404, i32 -5, i32 undef
   %2 = call i64 asm sideeffect "1:.long 0x7c0000a8 $| ((($0) & 0x1f) << 21) $| (((0) & 0x1f) << 16) $| ((($3) & 0x1f) << 11) $| (((0) & 0x1) << 0) \0Aandc $0,$0,$2\0Astdcx. $0,0,$3\0Abne- 1b\0A", "=&r,=*m,r,r,*m,~{cc},~{memory}"(i64* %b_state.i.i1314, i64 262144, i64* %b_state.i.i1314, i64* %b_state.i.i1314) #1
   %prev.i.i.i1325 = getelementptr inbounds i8, i8* %0, i64 8
-  %3 = load i32** null, align 8
+  %3 = load i32*, i32** null, align 8
   store i32* %3, i32** undef, align 8
   call void @__brelse(i32* undef) #1
   br i1 undef, label %while.end418, label %while.body392
diff --git a/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll b/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll
index c510e36..5389c13 100644
--- a/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll
+++ b/llvm/test/CodeGen/PowerPC/subsumes-pred-regs.ll
@@ -20,7 +20,7 @@
   br i1 undef, label %return, label %if.end.i24
 
 if.end.i24:                                       ; preds = %if.then
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %lnot.i.i16.i23 = icmp eq i32 %0, 0
   br i1 %lnot.i.i16.i23, label %if.end7.i37, label %test.exit27.i34
 
diff --git a/llvm/test/CodeGen/PowerPC/tls-cse.ll b/llvm/test/CodeGen/PowerPC/tls-cse.ll
index aaa9591..43bf948 100644
--- a/llvm/test/CodeGen/PowerPC/tls-cse.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-cse.ll
@@ -25,7 +25,7 @@
 entry:
   %0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry", %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
   store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
-  %1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
+  %1 = load %"class.llvm::PrettyStackTraceEntry"*, %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
   %cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
   br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
 
@@ -36,7 +36,7 @@
 _ZN4llvm21PrettyStackTraceEntryD2Ev.exit:         ; preds = %entry
   %NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry", %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
   %2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
-  %3 = load i64* %2, align 8
+  %3 = load i64, i64* %2, align 8
   store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
   %4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
   tail call void @_ZdlPv(i8* %4)
diff --git a/llvm/test/CodeGen/PowerPC/tls-pic.ll b/llvm/test/CodeGen/PowerPC/tls-pic.ll
index 6c671b0..b7d9298 100644
--- a/llvm/test/CodeGen/PowerPC/tls-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/tls-pic.ll
@@ -13,7 +13,7 @@
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   ret i32 %0
 }
 
@@ -55,7 +55,7 @@
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @a2, align 4
+  %0 = load i32, i32* @a2, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/tls.ll b/llvm/test/CodeGen/PowerPC/tls.ll
index 59b4de7..c96e444 100644
--- a/llvm/test/CodeGen/PowerPC/tls.ll
+++ b/llvm/test/CodeGen/PowerPC/tls.ll
@@ -30,7 +30,7 @@
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @a2, align 4
+  %0 = load i32, i32* @a2, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/toc-load-sched-bug.ll b/llvm/test/CodeGen/PowerPC/toc-load-sched-bug.ll
index 2dd9b1c..2437fd4 100644
--- a/llvm/test/CodeGen/PowerPC/toc-load-sched-bug.ll
+++ b/llvm/test/CodeGen/PowerPC/toc-load-sched-bug.ll
@@ -177,48 +177,48 @@
   %ref.tmp = alloca %"class.llvm::SMDiagnostic", align 8
   %ref.tmp5 = alloca %"class.std::basic_string", align 8
   %_M_p.i.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %Filename, i64 0, i32 0, i32 0
-  %0 = load i8** %_M_p.i.i.i, align 8, !tbaa !1
+  %0 = load i8*, i8** %_M_p.i.i.i, align 8, !tbaa !1
   %1 = ptrtoint i8* %0 to i64
   %arrayidx.i.i.i = getelementptr inbounds i8, i8* %0, i64 -24
   %_M_length.i.i = bitcast i8* %arrayidx.i.i.i to i64*
-  %2 = load i64* %_M_length.i.i, align 8, !tbaa !7
+  %2 = load i64, i64* %_M_length.i.i, align 8, !tbaa !7
   %.fca.0.insert18 = insertvalue [2 x i64] undef, i64 %1, 0
   %.fca.1.insert21 = insertvalue [2 x i64] %.fca.0.insert18, i64 %2, 1
   call void @_ZN4llvm12MemoryBuffer14getFileOrSTDINENS_9StringRefEl(%"class.llvm::ErrorOr"* sret %FileOrErr, [2 x i64] %.fca.1.insert21, i64 -1) #3
   %HasError.i24 = getelementptr inbounds %"class.llvm::ErrorOr", %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 1
-  %bf.load.i25 = load i8* %HasError.i24, align 8
+  %bf.load.i25 = load i8, i8* %HasError.i24, align 8
   %3 = and i8 %bf.load.i25, 1
   %bf.cast.i26 = icmp eq i8 %3, 0
   br i1 %bf.cast.i26, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit, label %_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit
 
 _ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit: ; preds = %entry
   %retval.sroa.0.0..sroa_cast.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to i64*
-  %retval.sroa.0.0.copyload.i = load i64* %retval.sroa.0.0..sroa_cast.i, align 8
+  %retval.sroa.0.0.copyload.i = load i64, i64* %retval.sroa.0.0..sroa_cast.i, align 8
   %retval.sroa.3.0..sroa_idx.i = getelementptr inbounds %"class.llvm::ErrorOr", %"class.llvm::ErrorOr"* %FileOrErr, i64 0, i32 0, i32 0, i32 0, i32 0, i64 8
   %retval.sroa.3.0..sroa_cast.i = bitcast i8* %retval.sroa.3.0..sroa_idx.i to i64*
-  %retval.sroa.3.0.copyload.i = load i64* %retval.sroa.3.0..sroa_cast.i, align 8
+  %retval.sroa.3.0.copyload.i = load i64, i64* %retval.sroa.3.0..sroa_cast.i, align 8
   %phitmp = trunc i64 %retval.sroa.0.0.copyload.i to i32
   %cmp.i = icmp eq i32 %phitmp, 0
   br i1 %cmp.i, label %cond.false.i.i, label %if.then
 
 if.then:                                          ; preds = %_ZNK4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE8getErrorEv.exit
   %.c = inttoptr i64 %retval.sroa.3.0.copyload.i to %"class.std::error_category"*
-  %4 = load i8** %_M_p.i.i.i, align 8, !tbaa !1
+  %4 = load i8*, i8** %_M_p.i.i.i, align 8, !tbaa !1
   %arrayidx.i.i.i30 = getelementptr inbounds i8, i8* %4, i64 -24
   %_M_length.i.i31 = bitcast i8* %arrayidx.i.i.i30 to i64*
-  %5 = load i64* %_M_length.i.i31, align 8, !tbaa !7
+  %5 = load i64, i64* %_M_length.i.i31, align 8, !tbaa !7
   %6 = inttoptr i64 %retval.sroa.3.0.copyload.i to void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)***
-  %vtable.i = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*** %6, align 8, !tbaa !11
+  %vtable.i = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)**, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*** %6, align 8, !tbaa !11
   %vfn.i = getelementptr inbounds void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vtable.i, i64 3
-  %7 = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vfn.i, align 8
+  %7 = load void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)*, void (%"class.std::basic_string"*, %"class.std::error_category"*, i32)** %vfn.i, align 8
   call void %7(%"class.std::basic_string"* sret %ref.tmp5, %"class.std::error_category"* %.c, i32 signext %phitmp) #3
   %call2.i.i = call dereferenceable(8) %"class.std::basic_string"* @_ZNSs6insertEmPKcm(%"class.std::basic_string"* %ref.tmp5, i64 0, i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), i64 27) #3
   %_M_p2.i.i.i.i = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %call2.i.i, i64 0, i32 0, i32 0
-  %8 = load i8** %_M_p2.i.i.i.i, align 8, !tbaa !13
+  %8 = load i8*, i8** %_M_p2.i.i.i.i, align 8, !tbaa !13
   store i8* bitcast (i64* getelementptr inbounds ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8** %_M_p2.i.i.i.i, align 8, !tbaa !1
   %arrayidx.i.i.i36 = getelementptr inbounds i8, i8* %8, i64 -24
   %_M_length.i.i37 = bitcast i8* %arrayidx.i.i.i36 to i64*
-  %9 = load i64* %_M_length.i.i37, align 8, !tbaa !7
+  %9 = load i64, i64* %_M_length.i.i37, align 8, !tbaa !7
   %Filename.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 2
   %10 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i2.i, i64 0, i32 0
   %11 = bitcast %"class.llvm::SMDiagnostic"* %ref.tmp to i8*
@@ -289,21 +289,21 @@
   call void @_ZNSs4swapERSs(%"class.std::basic_string"* %LineContents.i, %"class.std::basic_string"* dereferenceable(8) %LineContents7.i) #3
   %Ranges.i41 = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8
   %_M_start.i7.i.i.i = getelementptr inbounds %"class.std::vector.79", %"class.std::vector.79"* %Ranges.i41, i64 0, i32 0, i32 0, i32 0
-  %18 = load %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
+  %18 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
   %_M_finish.i9.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 1
   %_M_end_of_storage.i11.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %Err, i64 0, i32 8, i32 0, i32 0, i32 2
   %_M_start2.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 0
   %19 = bitcast %"class.std::vector.79"* %Ranges.i41 to i8*
   call void @llvm.memset.p0i8.i64(i8* %19, i8 0, i64 16, i32 8, i1 false) #3
-  %20 = load %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
+  %20 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* %20, %"struct.std::pair"** %_M_start.i7.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* null, %"struct.std::pair"** %_M_start2.i.i.i.i, align 8, !tbaa !27
   %_M_finish3.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 1
-  %21 = load %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
+  %21 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* %21, %"struct.std::pair"** %_M_finish.i9.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* null, %"struct.std::pair"** %_M_finish3.i.i.i.i, align 8, !tbaa !27
   %_M_end_of_storage4.i.i.i.i = getelementptr inbounds %"class.llvm::SMDiagnostic", %"class.llvm::SMDiagnostic"* %ref.tmp, i64 0, i32 8, i32 0, i32 0, i32 2
-  %22 = load %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
+  %22 = load %"struct.std::pair"*, %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* %22, %"struct.std::pair"** %_M_end_of_storage.i11.i.i.i, align 8, !tbaa !27
   store %"struct.std::pair"* null, %"struct.std::pair"** %_M_end_of_storage4.i.i.i.i, align 8, !tbaa !27
   %tobool.i.i.i.i.i.i = icmp eq %"struct.std::pair"* %18, null
@@ -335,12 +335,12 @@
   call void @llvm.lifetime.start(i64 4, i8* %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
   %29 = atomicrmw volatile add i32* %28, i32 -1 acq_rel
   store i32 %29, i32* %.atomicdst.i.i.i.i.i, align 4
-  %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i = load volatile i32* %.atomicdst.i.i.i.i.i, align 4
+  %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i = load volatile i32, i32* %.atomicdst.i.i.i.i.i, align 4
   call void @llvm.lifetime.end(i64 4, i8* %.atomicdst.i.i.i.i.i.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
   br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
 
 if.else.i.i.i.i:                                  ; preds = %if.then.i.i.i45
-  %30 = load i32* %28, align 4, !tbaa !29
+  %30 = load i32, i32* %28, align 4, !tbaa !29
   %add.i.i.i.i.i = add nsw i32 %30, -1
   store i32 %add.i.i.i.i.i, i32* %28, align 4, !tbaa !29
   br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i
@@ -359,7 +359,7 @@
   %31 = getelementptr inbounds %"class.std::allocator", %"class.std::allocator"* %ref.tmp.i.i47, i64 0, i32 0
   call void @llvm.lifetime.start(i64 1, i8* %31) #3
   %_M_p.i.i.i.i48 = getelementptr inbounds %"class.std::basic_string", %"class.std::basic_string"* %ref.tmp5, i64 0, i32 0, i32 0
-  %32 = load i8** %_M_p.i.i.i.i48, align 8, !tbaa !1
+  %32 = load i8*, i8** %_M_p.i.i.i.i48, align 8, !tbaa !1
   %arrayidx.i.i.i49 = getelementptr inbounds i8, i8* %32, i64 -24
   %33 = bitcast i8* %arrayidx.i.i.i49 to %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Rep"*
   %cmp.i.i.i50 = icmp eq i8* %arrayidx.i.i.i49, bitcast ([0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE to i8*)
@@ -375,12 +375,12 @@
   call void @llvm.lifetime.start(i64 4, i8* %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
   %35 = atomicrmw volatile add i32* %34, i32 -1 acq_rel
   store i32 %35, i32* %.atomicdst.i.i.i.i.i46, align 4
-  %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i54 = load volatile i32* %.atomicdst.i.i.i.i.i46, align 4
+  %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..atomicdst.0..atomicdst.0..i.i.i.i.i54 = load volatile i32, i32* %.atomicdst.i.i.i.i.i46, align 4
   call void @llvm.lifetime.end(i64 4, i8* %.atomicdst.i.i.i.i.i46.0..atomicdst.i.i.i.i.0..atomicdst.i.i.i.0..atomicdst.i.i.0..atomicdst.i.0..sroa_cast)
   br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i60
 
 if.else.i.i.i.i57:                                ; preds = %if.then.i.i.i52
-  %36 = load i32* %34, align 4, !tbaa !29
+  %36 = load i32, i32* %34, align 4, !tbaa !29
   %add.i.i.i.i.i56 = add nsw i32 %36, -1
   store i32 %add.i.i.i.i.i56, i32* %34, align 4, !tbaa !29
   br label %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i60
@@ -404,28 +404,28 @@
 
 _ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit: ; preds = %entry
   %_M_head_impl.i.i.i.i.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to %"class.llvm::MemoryBuffer"**
-  %37 = load %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i, align 8, !tbaa !27
+  %37 = load %"class.llvm::MemoryBuffer"*, %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i, align 8, !tbaa !27
   %call9 = call %"class.llvm::Module"* @_ZN4llvm7ParseIREPNS_12MemoryBufferERNS_12SMDiagnosticERNS_11LLVMContextE(%"class.llvm::MemoryBuffer"* %37, %"class.llvm::SMDiagnostic"* dereferenceable(200) %Err, %"class.llvm::LLVMContext"* dereferenceable(8) %Context)
   br label %cleanup
 
 cleanup:                                          ; preds = %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit, %_ZNSsD1Ev.exit62
   %retval.0 = phi %"class.llvm::Module"* [ null, %_ZNSsD1Ev.exit62 ], [ %call9, %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE3getEv.exit ]
-  %bf.load.i = load i8* %HasError.i24, align 8
+  %bf.load.i = load i8, i8* %HasError.i24, align 8
   %38 = and i8 %bf.load.i, 1
   %bf.cast.i = icmp eq i8 %38, 0
   br i1 %bf.cast.i, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i, label %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEED2Ev.exit
 
 _ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i: ; preds = %cleanup
   %_M_head_impl.i.i.i.i.i.i = bitcast %"class.llvm::ErrorOr"* %FileOrErr to %"class.llvm::MemoryBuffer"**
-  %39 = load %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i.i, align 8, !tbaa !27
+  %39 = load %"class.llvm::MemoryBuffer"*, %"class.llvm::MemoryBuffer"** %_M_head_impl.i.i.i.i.i.i, align 8, !tbaa !27
   %cmp.i.i = icmp eq %"class.llvm::MemoryBuffer"* %39, null
   br i1 %cmp.i.i, label %_ZNSt10unique_ptrIN4llvm12MemoryBufferESt14default_deleteIS1_EED2Ev.exit.i, label %_ZNKSt14default_deleteIN4llvm12MemoryBufferEEclEPS1_.exit.i.i
 
 _ZNKSt14default_deleteIN4llvm12MemoryBufferEEclEPS1_.exit.i.i: ; preds = %_ZN4llvm7ErrorOrISt10unique_ptrINS_12MemoryBufferESt14default_deleteIS2_EEE10getStorageEv.exit.i
   %40 = bitcast %"class.llvm::MemoryBuffer"* %39 to void (%"class.llvm::MemoryBuffer"*)***
-  %vtable.i.i.i = load void (%"class.llvm::MemoryBuffer"*)*** %40, align 8, !tbaa !11
+  %vtable.i.i.i = load void (%"class.llvm::MemoryBuffer"*)**, void (%"class.llvm::MemoryBuffer"*)*** %40, align 8, !tbaa !11
   %vfn.i.i.i = getelementptr inbounds void (%"class.llvm::MemoryBuffer"*)*, void (%"class.llvm::MemoryBuffer"*)** %vtable.i.i.i, i64 1
-  %41 = load void (%"class.llvm::MemoryBuffer"*)** %vfn.i.i.i, align 8
+  %41 = load void (%"class.llvm::MemoryBuffer"*)*, void (%"class.llvm::MemoryBuffer"*)** %vfn.i.i.i, align 8
   call void %41(%"class.llvm::MemoryBuffer"* %39) #3
   br label %_ZNSt10unique_ptrIN4llvm12MemoryBufferESt14default_deleteIS1_EED2Ev.exit.i
 
diff --git a/llvm/test/CodeGen/PowerPC/trampoline.ll b/llvm/test/CodeGen/PowerPC/trampoline.ll
index fdc426b..f6269e1 100644
--- a/llvm/test/CodeGen/PowerPC/trampoline.ll
+++ b/llvm/test/CodeGen/PowerPC/trampoline.ll
@@ -63,7 +63,7 @@
 	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
 	store %struct.NSZone* %zone, %struct.NSZone** %zone_addr
 	%3 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
-	%4 = load %struct.NSBitmapImageRep** %self_addr, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%4 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %self_addr, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
 	store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4
 	%TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8*		; <i8*> [#uses=1]
 	%FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8*		; <i8*> [#uses=1]
@@ -71,7 +71,7 @@
         %tramp = call i8* @llvm.adjust.trampoline(i8* %TRAMP.91)
 	store i8* %tramp, i8** %0, align 4
 	%5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
-	%6 = load i8** %0, align 4		; <i8*> [#uses=1]
+	%6 = load i8*, i8** %0, align 4		; <i8*> [#uses=1]
 	%7 = bitcast i8* %6 to void (%struct.__block_1*, %struct.CGImage*)*		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
 	store void (%struct.__block_1*, %struct.CGImage*)* %7, void (%struct.__block_1*, %struct.CGImage*)** %5, align 4
 	store %struct.NSBitmapImageRep* null, %struct.NSBitmapImageRep** %new, align 4
@@ -85,32 +85,32 @@
 	%13 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %12, i32 0, i32 2		; <i32*> [#uses=1]
 	store i32 24, i32* %13, align 4
 	%14 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
-	%15 = load void (%struct.__block_1*, %struct.CGImage*)** %14, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	%15 = load void (%struct.__block_1*, %struct.CGImage*)*, void (%struct.__block_1*, %struct.CGImage*)** %14, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
 	store void (%struct.__block_1*, %struct.CGImage*)* %15, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4
 	%16 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
 	%17 = getelementptr %struct.__invoke_impl, %struct.__invoke_impl* %16, i32 0, i32 3		; <i8**> [#uses=1]
-	%18 = load void (%struct.__block_1*, %struct.CGImage*)** %1, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	%18 = load void (%struct.__block_1*, %struct.CGImage*)*, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
 	%19 = bitcast void (%struct.__block_1*, %struct.CGImage*)* %18 to i8*		; <i8*> [#uses=1]
 	store i8* %19, i8** %17, align 4
 	%20 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
-	%21 = load %struct.NSZone** %zone_addr, align 4		; <%struct.NSZone*> [#uses=1]
+	%21 = load %struct.NSZone*, %struct.NSZone** %zone_addr, align 4		; <%struct.NSZone*> [#uses=1]
 	store %struct.NSZone* %21, %struct.NSZone** %20, align 4
 	%22 = getelementptr %struct.__block_1, %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
 	store %struct.NSBitmapImageRep** %new, %struct.NSBitmapImageRep*** %22, align 4
 	%23 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
-	%24 = load %struct.NSBitmapImageRep** %23, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%24 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %23, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
 	store %struct.NSBitmapImageRep* %24, %struct.NSBitmapImageRep** %2, align 4
-	%25 = load %struct.NSBitmapImageRep** %2, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%25 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %2, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
 	%26 = bitcast %struct.NSBitmapImageRep* %25 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
 	store %struct.objc_object* %26, %struct.objc_object** %self.1, align 4
-	%27 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4		; <%struct.objc_selector*> [#uses=1]
+	%27 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4		; <%struct.objc_selector*> [#uses=1]
 	%__block_holder_tmp_1.03 = bitcast %struct.__block_1* %__block_holder_tmp_1.0 to void (%struct.CGImage*)*		; <void (%struct.CGImage*)*> [#uses=1]
-	%28 = load %struct.objc_object** %self.1, align 4		; <%struct.objc_object*> [#uses=1]
+	%28 = load %struct.objc_object*, %struct.objc_object** %self.1, align 4		; <%struct.objc_object*> [#uses=1]
 	%29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind		; <%struct.objc_object*> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load %struct.objc_object** %retval		; <%struct.objc_object*> [#uses=1]
+	%retval5 = load %struct.objc_object*, %struct.objc_object** %retval		; <%struct.objc_object*> [#uses=1]
 	ret %struct.objc_object* %retval5
 }
 
@@ -131,33 +131,33 @@
 	store %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %CHAIN.8, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr
 	store %struct.__block_1* %_self, %struct.__block_1** %_self_addr
 	store %struct.CGImage* %cgImage, %struct.CGImage** %cgImage_addr
-	%1 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%1 = load %struct.__block_1*, %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
 	%2 = getelementptr %struct.__block_1, %struct.__block_1* %1, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
-	%3 = load %struct.NSBitmapImageRep*** %2, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%3 = load %struct.NSBitmapImageRep**, %struct.NSBitmapImageRep*** %2, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
 	store %struct.NSBitmapImageRep** %3, %struct.NSBitmapImageRep*** %new, align 4
-	%4 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%4 = load %struct.__block_1*, %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
 	%5 = getelementptr %struct.__block_1, %struct.__block_1* %4, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
-	%6 = load %struct.NSZone** %5, align 4		; <%struct.NSZone*> [#uses=1]
+	%6 = load %struct.NSZone*, %struct.NSZone** %5, align 4		; <%struct.NSZone*> [#uses=1]
 	store %struct.NSZone* %6, %struct.NSZone** %zone, align 4
-	%7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
+	%7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
 	%8 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]", %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
-	%9 = load %struct.NSBitmapImageRep** %8, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%9 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %8, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
 	store %struct.NSBitmapImageRep* %9, %struct.NSBitmapImageRep** %0, align 4
-	%10 = load %struct.NSBitmapImageRep** %0, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%10 = load %struct.NSBitmapImageRep*, %struct.NSBitmapImageRep** %0, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
 	%11 = bitcast %struct.NSBitmapImageRep* %10 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
 	%12 = getelementptr %struct._objc_super, %struct._objc_super* %objc_super, i32 0, i32 0		; <%struct.objc_object**> [#uses=1]
 	store %struct.objc_object* %11, %struct.objc_object** %12, align 4
-	%13 = load %struct._objc_class** getelementptr (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4		; <%struct._objc_class*> [#uses=1]
+	%13 = load %struct._objc_class*, %struct._objc_class** getelementptr (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4		; <%struct._objc_class*> [#uses=1]
 	%14 = getelementptr %struct._objc_super, %struct._objc_super* %objc_super, i32 0, i32 1		; <%struct._objc_class**> [#uses=1]
 	store %struct._objc_class* %13, %struct._objc_class** %14, align 4
 	%objc_super1 = bitcast %struct._objc_super* %objc_super to %struct.objc_super*		; <%struct.objc_super*> [#uses=1]
 	store %struct.objc_super* %objc_super1, %struct.objc_super** %objc_super.5, align 4
-	%15 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
-	%16 = load %struct.objc_super** %objc_super.5, align 4		; <%struct.objc_super*> [#uses=1]
-	%17 = load %struct.NSZone** %zone, align 4		; <%struct.NSZone*> [#uses=1]
+	%15 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
+	%16 = load %struct.objc_super*, %struct.objc_super** %objc_super.5, align 4		; <%struct.objc_super*> [#uses=1]
+	%17 = load %struct.NSZone*, %struct.NSZone** %zone, align 4		; <%struct.NSZone*> [#uses=1]
 	%18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...)* @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind		; <%struct.objc_object*> [#uses=1]
 	%19 = bitcast %struct.objc_object* %18 to %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep*> [#uses=1]
-	%20 = load %struct.NSBitmapImageRep*** %new, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%20 = load %struct.NSBitmapImageRep**, %struct.NSBitmapImageRep*** %new, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
 	store %struct.NSBitmapImageRep* %19, %struct.NSBitmapImageRep** %20, align 4
 	br label %return
 
diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll b/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll
index 7d5dd60..b71a98b 100644
--- a/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll
+++ b/llvm/test/CodeGen/PowerPC/unal-altivec-wint.ll
@@ -10,7 +10,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
 
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
@@ -31,7 +31,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
 
   ret <4 x i32> %v0
 
diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec.ll b/llvm/test/CodeGen/PowerPC/unal-altivec.ll
index 70c1f53..02f7ab4 100644
--- a/llvm/test/CodeGen/PowerPC/unal-altivec.ll
+++ b/llvm/test/CodeGen/PowerPC/unal-altivec.ll
@@ -10,11 +10,11 @@
   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds float, float* %b, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %.sum11 = or i64 %index, 4
   %2 = getelementptr float, float* %b, i64 %.sum11
   %3 = bitcast float* %2 to <4 x float>*
-  %wide.load8 = load <4 x float>* %3, align 4
+  %wide.load8 = load <4 x float>, <4 x float>* %3, align 4
   %4 = fadd <4 x float> %wide.load, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
   %5 = fadd <4 x float> %wide.load8, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
   %6 = getelementptr inbounds float, float* %a, i64 %index
diff --git a/llvm/test/CodeGen/PowerPC/unal-altivec2.ll b/llvm/test/CodeGen/PowerPC/unal-altivec2.ll
index 5790096..0d15b97 100644
--- a/llvm/test/CodeGen/PowerPC/unal-altivec2.ll
+++ b/llvm/test/CodeGen/PowerPC/unal-altivec2.ll
@@ -14,7 +14,7 @@
   %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
   %0 = getelementptr inbounds float, float* %y, i64 %index
   %1 = bitcast float* %0 to <4 x float>*
-  %wide.load = load <4 x float>* %1, align 4
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
   %2 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load)
   %3 = getelementptr inbounds float, float* %x, i64 %index
   %4 = bitcast float* %3 to <4 x float>*
@@ -22,7 +22,7 @@
   %index.next = add i64 %index, 4
   %5 = getelementptr inbounds float, float* %y, i64 %index.next
   %6 = bitcast float* %5 to <4 x float>*
-  %wide.load.1 = load <4 x float>* %6, align 4
+  %wide.load.1 = load <4 x float>, <4 x float>* %6, align 4
   %7 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.1)
   %8 = getelementptr inbounds float, float* %x, i64 %index.next
   %9 = bitcast float* %8 to <4 x float>*
@@ -30,7 +30,7 @@
   %index.next.1 = add i64 %index.next, 4
   %10 = getelementptr inbounds float, float* %y, i64 %index.next.1
   %11 = bitcast float* %10 to <4 x float>*
-  %wide.load.2 = load <4 x float>* %11, align 4
+  %wide.load.2 = load <4 x float>, <4 x float>* %11, align 4
   %12 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.2)
   %13 = getelementptr inbounds float, float* %x, i64 %index.next.1
   %14 = bitcast float* %13 to <4 x float>*
@@ -38,7 +38,7 @@
   %index.next.2 = add i64 %index.next.1, 4
   %15 = getelementptr inbounds float, float* %y, i64 %index.next.2
   %16 = bitcast float* %15 to <4 x float>*
-  %wide.load.3 = load <4 x float>* %16, align 4
+  %wide.load.3 = load <4 x float>, <4 x float>* %16, align 4
   %17 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.3)
   %18 = getelementptr inbounds float, float* %x, i64 %index.next.2
   %19 = bitcast float* %18 to <4 x float>*
@@ -46,7 +46,7 @@
   %index.next.3 = add i64 %index.next.2, 4
   %20 = getelementptr inbounds float, float* %y, i64 %index.next.3
   %21 = bitcast float* %20 to <4 x float>*
-  %wide.load.4 = load <4 x float>* %21, align 4
+  %wide.load.4 = load <4 x float>, <4 x float>* %21, align 4
   %22 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.4)
   %23 = getelementptr inbounds float, float* %x, i64 %index.next.3
   %24 = bitcast float* %23 to <4 x float>*
@@ -54,7 +54,7 @@
   %index.next.4 = add i64 %index.next.3, 4
   %25 = getelementptr inbounds float, float* %y, i64 %index.next.4
   %26 = bitcast float* %25 to <4 x float>*
-  %wide.load.5 = load <4 x float>* %26, align 4
+  %wide.load.5 = load <4 x float>, <4 x float>* %26, align 4
   %27 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.5)
   %28 = getelementptr inbounds float, float* %x, i64 %index.next.4
   %29 = bitcast float* %28 to <4 x float>*
@@ -62,7 +62,7 @@
   %index.next.5 = add i64 %index.next.4, 4
   %30 = getelementptr inbounds float, float* %y, i64 %index.next.5
   %31 = bitcast float* %30 to <4 x float>*
-  %wide.load.6 = load <4 x float>* %31, align 4
+  %wide.load.6 = load <4 x float>, <4 x float>* %31, align 4
   %32 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.6)
   %33 = getelementptr inbounds float, float* %x, i64 %index.next.5
   %34 = bitcast float* %33 to <4 x float>*
@@ -70,7 +70,7 @@
   %index.next.6 = add i64 %index.next.5, 4
   %35 = getelementptr inbounds float, float* %y, i64 %index.next.6
   %36 = bitcast float* %35 to <4 x float>*
-  %wide.load.7 = load <4 x float>* %36, align 4
+  %wide.load.7 = load <4 x float>, <4 x float>* %36, align 4
   %37 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.7)
   %38 = getelementptr inbounds float, float* %x, i64 %index.next.6
   %39 = bitcast float* %38 to <4 x float>*
@@ -78,7 +78,7 @@
   %index.next.7 = add i64 %index.next.6, 4
   %40 = getelementptr inbounds float, float* %y, i64 %index.next.7
   %41 = bitcast float* %40 to <4 x float>*
-  %wide.load.8 = load <4 x float>* %41, align 4
+  %wide.load.8 = load <4 x float>, <4 x float>* %41, align 4
   %42 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.8)
   %43 = getelementptr inbounds float, float* %x, i64 %index.next.7
   %44 = bitcast float* %43 to <4 x float>*
@@ -86,7 +86,7 @@
   %index.next.8 = add i64 %index.next.7, 4
   %45 = getelementptr inbounds float, float* %y, i64 %index.next.8
   %46 = bitcast float* %45 to <4 x float>*
-  %wide.load.9 = load <4 x float>* %46, align 4
+  %wide.load.9 = load <4 x float>, <4 x float>* %46, align 4
   %47 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.9)
   %48 = getelementptr inbounds float, float* %x, i64 %index.next.8
   %49 = bitcast float* %48 to <4 x float>*
@@ -94,7 +94,7 @@
   %index.next.9 = add i64 %index.next.8, 4
   %50 = getelementptr inbounds float, float* %y, i64 %index.next.9
   %51 = bitcast float* %50 to <4 x float>*
-  %wide.load.10 = load <4 x float>* %51, align 4
+  %wide.load.10 = load <4 x float>, <4 x float>* %51, align 4
   %52 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.10)
   %53 = getelementptr inbounds float, float* %x, i64 %index.next.9
   %54 = bitcast float* %53 to <4 x float>*
@@ -102,7 +102,7 @@
   %index.next.10 = add i64 %index.next.9, 4
   %55 = getelementptr inbounds float, float* %y, i64 %index.next.10
   %56 = bitcast float* %55 to <4 x float>*
-  %wide.load.11 = load <4 x float>* %56, align 4
+  %wide.load.11 = load <4 x float>, <4 x float>* %56, align 4
   %57 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.11)
   %58 = getelementptr inbounds float, float* %x, i64 %index.next.10
   %59 = bitcast float* %58 to <4 x float>*
@@ -110,7 +110,7 @@
   %index.next.11 = add i64 %index.next.10, 4
   %60 = getelementptr inbounds float, float* %y, i64 %index.next.11
   %61 = bitcast float* %60 to <4 x float>*
-  %wide.load.12 = load <4 x float>* %61, align 4
+  %wide.load.12 = load <4 x float>, <4 x float>* %61, align 4
   %62 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.12)
   %63 = getelementptr inbounds float, float* %x, i64 %index.next.11
   %64 = bitcast float* %63 to <4 x float>*
@@ -118,7 +118,7 @@
   %index.next.12 = add i64 %index.next.11, 4
   %65 = getelementptr inbounds float, float* %y, i64 %index.next.12
   %66 = bitcast float* %65 to <4 x float>*
-  %wide.load.13 = load <4 x float>* %66, align 4
+  %wide.load.13 = load <4 x float>, <4 x float>* %66, align 4
   %67 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.13)
   %68 = getelementptr inbounds float, float* %x, i64 %index.next.12
   %69 = bitcast float* %68 to <4 x float>*
@@ -126,7 +126,7 @@
   %index.next.13 = add i64 %index.next.12, 4
   %70 = getelementptr inbounds float, float* %y, i64 %index.next.13
   %71 = bitcast float* %70 to <4 x float>*
-  %wide.load.14 = load <4 x float>* %71, align 4
+  %wide.load.14 = load <4 x float>, <4 x float>* %71, align 4
   %72 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.14)
   %73 = getelementptr inbounds float, float* %x, i64 %index.next.13
   %74 = bitcast float* %73 to <4 x float>*
@@ -134,7 +134,7 @@
   %index.next.14 = add i64 %index.next.13, 4
   %75 = getelementptr inbounds float, float* %y, i64 %index.next.14
   %76 = bitcast float* %75 to <4 x float>*
-  %wide.load.15 = load <4 x float>* %76, align 4
+  %wide.load.15 = load <4 x float>, <4 x float>* %76, align 4
   %77 = call <4 x float> @llvm_cos_v4f32(<4 x float> %wide.load.15)
   %78 = getelementptr inbounds float, float* %x, i64 %index.next.14
   %79 = bitcast float* %78 to <4 x float>*
@@ -153,7 +153,7 @@
 define <2 x double> @bar(double* %x) {
 entry:
   %p = bitcast double* %x to <2 x double>*
-  %r = load <2 x double>* %p, align 8
+  %r = load <2 x double>, <2 x double>* %p, align 8
 
 ; CHECK-LABEL: @bar
 ; CHECK-NOT: lvsl
diff --git a/llvm/test/CodeGen/PowerPC/unaligned.ll b/llvm/test/CodeGen/PowerPC/unaligned.ll
index 64c03cd..6b23b18 100644
--- a/llvm/test/CodeGen/PowerPC/unaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/unaligned.ll
@@ -5,7 +5,7 @@
 
 define void @foo1(i16* %p, i16* %r) nounwind {
 entry:
-  %v = load i16* %p, align 1
+  %v = load i16, i16* %p, align 1
   store i16 %v, i16* %r, align 1
   ret void
 
@@ -20,7 +20,7 @@
 
 define void @foo2(i32* %p, i32* %r) nounwind {
 entry:
-  %v = load i32* %p, align 1
+  %v = load i32, i32* %p, align 1
   store i32 %v, i32* %r, align 1
   ret void
 
@@ -35,7 +35,7 @@
 
 define void @foo3(i64* %p, i64* %r) nounwind {
 entry:
-  %v = load i64* %p, align 1
+  %v = load i64, i64* %p, align 1
   store i64 %v, i64* %r, align 1
   ret void
 
@@ -50,7 +50,7 @@
 
 define void @foo4(float* %p, float* %r) nounwind {
 entry:
-  %v = load float* %p, align 1
+  %v = load float, float* %p, align 1
   store float %v, float* %r, align 1
   ret void
 
@@ -65,7 +65,7 @@
 
 define void @foo5(double* %p, double* %r) nounwind {
 entry:
-  %v = load double* %p, align 1
+  %v = load double, double* %p, align 1
   store double %v, double* %r, align 1
   ret void
 
@@ -80,7 +80,7 @@
 
 define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind {
 entry:
-  %v = load <4 x float>* %p, align 1
+  %v = load <4 x float>, <4 x float>* %p, align 1
   store <4 x float> %v, <4 x float>* %r, align 1
   ret void
 
diff --git a/llvm/test/CodeGen/PowerPC/vaddsplat.ll b/llvm/test/CodeGen/PowerPC/vaddsplat.ll
index 4236fab..70a7ea0 100644
--- a/llvm/test/CodeGen/PowerPC/vaddsplat.ll
+++ b/llvm/test/CodeGen/PowerPC/vaddsplat.ll
@@ -10,7 +10,7 @@
 %v16i8 = type <16 x i8>
 
 define void @test_v4i32_pos_even(%v4i32* %P, %v4i32* %S) {
-       %p = load %v4i32* %P
+       %p = load %v4i32, %v4i32* %P
        %r = add %v4i32 %p, < i32 18, i32 18, i32 18, i32 18 >
        store %v4i32 %r, %v4i32* %S
        ret void
@@ -21,7 +21,7 @@
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v4i32_neg_even(%v4i32* %P, %v4i32* %S) {
-       %p = load %v4i32* %P
+       %p = load %v4i32, %v4i32* %P
        %r = add %v4i32 %p, < i32 -28, i32 -28, i32 -28, i32 -28 >
        store %v4i32 %r, %v4i32* %S
        ret void
@@ -32,7 +32,7 @@
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v8i16_pos_even(%v8i16* %P, %v8i16* %S) {
-       %p = load %v8i16* %P
+       %p = load %v8i16, %v8i16* %P
        %r = add %v8i16 %p, < i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30, i16 30 >
        store %v8i16 %r, %v8i16* %S
        ret void
@@ -43,7 +43,7 @@
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v8i16_neg_even(%v8i16* %P, %v8i16* %S) {
-       %p = load %v8i16* %P
+       %p = load %v8i16, %v8i16* %P
        %r = add %v8i16 %p, < i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32, i16 -32 >
        store %v8i16 %r, %v8i16* %S
        ret void
@@ -54,7 +54,7 @@
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v16i8_pos_even(%v16i8* %P, %v16i8* %S) {
-       %p = load %v16i8* %P
+       %p = load %v16i8, %v16i8* %P
        %r = add %v16i8 %p, < i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16 >
        store %v16i8 %r, %v16i8* %S
        ret void
@@ -65,7 +65,7 @@
 ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v16i8_neg_even(%v16i8* %P, %v16i8* %S) {
-       %p = load %v16i8* %P
+       %p = load %v16i8, %v16i8* %P
        %r = add %v16i8 %p, < i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18, i8 -18 >
        store %v16i8 %r, %v16i8* %S
        ret void
@@ -76,7 +76,7 @@
 ; CHECK: vaddubm {{[0-9]+}}, [[REG1]], [[REG1]]
 
 define void @test_v4i32_pos_odd(%v4i32* %P, %v4i32* %S) {
-       %p = load %v4i32* %P
+       %p = load %v4i32, %v4i32* %P
        %r = add %v4i32 %p, < i32 27, i32 27, i32 27, i32 27 >
        store %v4i32 %r, %v4i32* %S
        ret void
@@ -88,7 +88,7 @@
 ; CHECK: vsubuwm {{[0-9]+}}, [[REG1]], [[REG2]]
 
 define void @test_v4i32_neg_odd(%v4i32* %P, %v4i32* %S) {
-       %p = load %v4i32* %P
+       %p = load %v4i32, %v4i32* %P
        %r = add %v4i32 %p, < i32 -27, i32 -27, i32 -27, i32 -27 >
        store %v4i32 %r, %v4i32* %S
        ret void
@@ -100,7 +100,7 @@
 ; CHECK: vadduwm {{[0-9]+}}, [[REG1]], [[REG2]]
 
 define void @test_v8i16_pos_odd(%v8i16* %P, %v8i16* %S) {
-       %p = load %v8i16* %P
+       %p = load %v8i16, %v8i16* %P
        %r = add %v8i16 %p, < i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31 >
        store %v8i16 %r, %v8i16* %S
        ret void
@@ -112,7 +112,7 @@
 ; CHECK: vsubuhm {{[0-9]+}}, [[REG1]], [[REG2]]
 
 define void @test_v8i16_neg_odd(%v8i16* %P, %v8i16* %S) {
-       %p = load %v8i16* %P
+       %p = load %v8i16, %v8i16* %P
        %r = add %v8i16 %p, < i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31, i16 -31 >
        store %v8i16 %r, %v8i16* %S
        ret void
@@ -124,7 +124,7 @@
 ; CHECK: vadduhm {{[0-9]+}}, [[REG1]], [[REG2]]
 
 define void @test_v16i8_pos_odd(%v16i8* %P, %v16i8* %S) {
-       %p = load %v16i8* %P
+       %p = load %v16i8, %v16i8* %P
        %r = add %v16i8 %p, < i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17 >
        store %v16i8 %r, %v16i8* %S
        ret void
@@ -136,7 +136,7 @@
 ; CHECK: vsububm {{[0-9]+}}, [[REG1]], [[REG2]]
 
 define void @test_v16i8_neg_odd(%v16i8* %P, %v16i8* %S) {
-       %p = load %v16i8* %P
+       %p = load %v16i8, %v16i8* %P
        %r = add %v16i8 %p, < i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17 >
        store %v16i8 %r, %v16i8* %S
        ret void
diff --git a/llvm/test/CodeGen/PowerPC/varargs-struct-float.ll b/llvm/test/CodeGen/PowerPC/varargs-struct-float.ll
index 13bff73..dbdda05 100644
--- a/llvm/test/CodeGen/PowerPC/varargs-struct-float.ll
+++ b/llvm/test/CodeGen/PowerPC/varargs-struct-float.ll
@@ -11,7 +11,7 @@
   %coerce.dive = getelementptr %struct.Sf1, %struct.Sf1* %s, i32 0, i32 0
   store float %s.coerce, float* %coerce.dive, align 1
   %coerce.dive1 = getelementptr %struct.Sf1, %struct.Sf1* %s, i32 0, i32 0
-  %0 = load float* %coerce.dive1, align 1
+  %0 = load float, float* %coerce.dive1, align 1
   call void (i32, ...)* @testvaSf1(i32 1, float inreg %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/PowerPC/vcmp-fold.ll b/llvm/test/CodeGen/PowerPC/vcmp-fold.ll
index 7a42c27..ee16708 100644
--- a/llvm/test/CodeGen/PowerPC/vcmp-fold.ll
+++ b/llvm/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -5,11 +5,11 @@
 
 define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
 entry:
-	%tmp = load <4 x float>* %x		; <<4 x float>> [#uses=1]
-	%tmp2 = load <4 x float>* %y		; <<4 x float>> [#uses=1]
+	%tmp = load <4 x float>, <4 x float>* %x		; <<4 x float>> [#uses=1]
+	%tmp2 = load <4 x float>, <4 x float>* %y		; <<4 x float>> [#uses=1]
 	%tmp.upgrd.1 = call i32 @llvm.ppc.altivec.vcmpbfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp2 )		; <i32> [#uses=1]
-	%tmp4 = load <4 x float>* %x		; <<4 x float>> [#uses=1]
-	%tmp6 = load <4 x float>* %y		; <<4 x float>> [#uses=1]
+	%tmp4 = load <4 x float>, <4 x float>* %x		; <<4 x float>> [#uses=1]
+	%tmp6 = load <4 x float>, <4 x float>* %y		; <<4 x float>> [#uses=1]
 	%tmp.upgrd.2 = call <4 x i32> @llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 )		; <<4 x i32>> [#uses=1]
 	%tmp7 = bitcast <4 x i32> %tmp.upgrd.2 to <4 x float>		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp7, <4 x float>* %x
diff --git a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
index 04f12e2..48f1adb 100644
--- a/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-abi-align.ll
@@ -27,10 +27,10 @@
 define void @test2(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, %struct.s2* byval nocapture readonly %vs) #0 {
 entry:
   %m = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 0
-  %0 = load i64* %m, align 8
+  %0 = load i64, i64* %m, align 8
   store i64 %0, i64* @n, align 8
   %v = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 1
-  %1 = load <4 x float>* %v, align 16
+  %1 = load <4 x float>, <4 x float>* %v, align 16
   store <4 x float> %1, <4 x float>* @ve, align 16
   ret void
 
@@ -53,10 +53,10 @@
 define void @test3(i64 %d1, i64 %d2, i64 %d3, i64 %d4, i64 %d5, i64 %d6, i64 %d7, i64 %d8, i64 %d9, %struct.s2* byval nocapture readonly %vs) #0 {
 entry:
   %m = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 0
-  %0 = load i64* %m, align 8
+  %0 = load i64, i64* %m, align 8
   store i64 %0, i64* @n, align 8
   %v = getelementptr inbounds %struct.s2, %struct.s2* %vs, i64 0, i32 1
-  %1 = load <4 x float>* %v, align 16
+  %1 = load <4 x float>, <4 x float>* %v, align 16
   store <4 x float> %1, <4 x float>* @ve, align 16
   ret void
 
diff --git a/llvm/test/CodeGen/PowerPC/vec_auto_constant.ll b/llvm/test/CodeGen/PowerPC/vec_auto_constant.ll
index 973f089..ba8ef53 100644
--- a/llvm/test/CodeGen/PowerPC/vec_auto_constant.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_auto_constant.ll
@@ -25,8 +25,8 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store <16 x i8> %x, <16 x i8>* %x_addr
   store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
-  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
-  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>, <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
   %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
   store <16 x i8> %tmp, <16 x i8>* @baz, align 16
   br label %return
diff --git a/llvm/test/CodeGen/PowerPC/vec_br_cmp.ll b/llvm/test/CodeGen/PowerPC/vec_br_cmp.ll
index c34d850..14c9620 100644
--- a/llvm/test/CodeGen/PowerPC/vec_br_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -5,8 +5,8 @@
 ; A predicate compare used immediately by a branch should not generate an mfcr.
 
 define void @test(<4 x float>* %A, <4 x float>* %B) {
-	%tmp = load <4 x float>* %A		; <<4 x float>> [#uses=1]
-	%tmp3 = load <4 x float>* %B		; <<4 x float>> [#uses=1]
+	%tmp = load <4 x float>, <4 x float>* %A		; <<4 x float>> [#uses=1]
+	%tmp3 = load <4 x float>, <4 x float>* %B		; <<4 x float>> [#uses=1]
 	%tmp.upgrd.1 = tail call i32 @llvm.ppc.altivec.vcmpeqfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp3 )		; <i32> [#uses=1]
 	%tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.2, label %cond_true, label %UnifiedReturnBlock
diff --git a/llvm/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/llvm/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
index 7e58ec0..3b85077 100644
--- a/llvm/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
@@ -8,7 +8,7 @@
 ; CHECK: _foo:
 ; CHECK-NOT: stw
 entry:
-    %tmp0 = load <16 x i8>* @a, align 16
+    %tmp0 = load <16 x i8>, <16 x i8>* @a, align 16
   %tmp180.i = extractelement <16 x i8> %tmp0, i32 0 ; <i8> [#uses=1]
   %tmp181.i = insertelement <16 x i8> <i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp180.i, i32 2 ; <<16 x i8>> [#uses=1]
   %tmp182.i = extractelement <16 x i8> %tmp0, i32 1 ; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll
index f16b9f5..45df814 100644
--- a/llvm/test/CodeGen/PowerPC/vec_constants.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll
@@ -4,13 +4,13 @@
 target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
-	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
+	%tmp = load <4 x i32>, <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
 	%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp4, <4 x i32>* %P1
-	%tmp7 = load <4 x i32>* %P2		; <<4 x i32>> [#uses=1]
+	%tmp7 = load <4 x i32>, <4 x i32>* %P2		; <<4 x i32>> [#uses=1]
 	%tmp9 = and <4 x i32> %tmp7, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]
 	store <4 x i32> %tmp9, <4 x i32>* %P2
-	%tmp.upgrd.1 = load <4 x float>* %P3		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = load <4 x float>, <4 x float>* %P3		; <<4 x float>> [#uses=1]
 	%tmp11 = bitcast <4 x float> %tmp.upgrd.1 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp12 = and <4 x i32> %tmp11, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]
 	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv.ll b/llvm/test/CodeGen/PowerPC/vec_conv.ll
index a39ae91..6e19f5a 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv.ll
@@ -11,7 +11,7 @@
 
 define void @v4f32_to_v4i32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
 entry:
-  %0 = load <4 x float>* @cte_float, align 16
+  %0 = load <4 x float>, <4 x float>* @cte_float, align 16
   %mul = fmul <4 x float> %0, %x
   %1 = fptosi <4 x float> %mul to <4 x i32>
   store <4 x i32> %1, <4 x i32>* %y, align 16
@@ -23,7 +23,7 @@
 
 define void @v4f32_to_v4u32(<4 x float> %x, <4 x i32>* nocapture %y) nounwind {
 entry:
-  %0 = load <4 x float>* @cte_float, align 16
+  %0 = load <4 x float>, <4 x float>* @cte_float, align 16
   %mul = fmul <4 x float> %0, %x
   %1 = fptoui <4 x float> %mul to <4 x i32>
   store <4 x i32> %1, <4 x i32>* %y, align 16
@@ -35,7 +35,7 @@
 
 define void @v4i32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
 entry:
-  %0 = load <4 x i32>* @cte_int, align 16
+  %0 = load <4 x i32>, <4 x i32>* @cte_int, align 16
   %mul = mul <4 x i32> %0, %x
   %1 = sitofp <4 x i32> %mul to <4 x float>
   store <4 x float> %1, <4 x float>* %y, align 16
@@ -47,7 +47,7 @@
 
 define void @v4u32_to_v4f32(<4 x i32> %x, <4 x float>* nocapture %y) nounwind {
 entry:
-  %0 = load <4 x i32>* @cte_int, align 16
+  %0 = load <4 x i32>, <4 x i32>* @cte_int, align 16
   %mul = mul <4 x i32> %0, %x
   %1 = uitofp <4 x i32> %mul to <4 x float>
   store <4 x float> %1, <4 x float>* %y, align 16
diff --git a/llvm/test/CodeGen/PowerPC/vec_fneg.ll b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
index e01e659..d6f6def 100644
--- a/llvm/test/CodeGen/PowerPC/vec_fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_fneg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
 
 define void @t(<4 x float>* %A) {
-	%tmp2 = load <4 x float>* %A
+	%tmp2 = load <4 x float>, <4 x float>* %A
 	%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
 	store <4 x float> %tmp3, <4 x float>* %A
 	ret void
diff --git a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
index 2a2f048..ac639d7 100644
--- a/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -19,18 +19,18 @@
 	store i32 %x, i32* %x_addr
 	%ap1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
 	call void @llvm.va_start( i8* %ap1 )
-	%tmp = load i8** %ap, align 4		; <i8*> [#uses=1]
+	%tmp = load i8*, i8** %ap, align 4		; <i8*> [#uses=1]
 	store i8* %tmp, i8** %ap.0, align 4
-	%tmp2 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp2 = load i8*, i8** %ap.0, align 4		; <i8*> [#uses=1]
 	%tmp3 = getelementptr i8, i8* %tmp2, i64 16		; <i8*> [#uses=1]
 	store i8* %tmp3, i8** %ap, align 4
-	%tmp4 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i8*, i8** %ap.0, align 4		; <i8*> [#uses=1]
 	%tmp45 = bitcast i8* %tmp4 to %struct.S2203*		; <%struct.S2203*> [#uses=1]
 	%tmp6 = getelementptr %struct.S2203, %struct.S2203* @s, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
 	%tmp7 = getelementptr %struct.S2203, %struct.S2203* %tmp45, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
 	%tmp8 = getelementptr %struct.u16qi, %struct.u16qi* %tmp6, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
 	%tmp9 = getelementptr %struct.u16qi, %struct.u16qi* %tmp7, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
-	%tmp10 = load <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
+	%tmp10 = load <16 x i8>, <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
 ; CHECK: lvsl
 ; CHECK: vperm
 ; CHECK-LE: lvsr
diff --git a/llvm/test/CodeGen/PowerPC/vec_mul.ll b/llvm/test/CodeGen/PowerPC/vec_mul.ll
index 86596d4..e815725 100644
--- a/llvm/test/CodeGen/PowerPC/vec_mul.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_mul.ll
@@ -5,8 +5,8 @@
 ; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=+vsx -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE-VSX
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
-	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <4 x i32>* %Y		; <<4 x i32>> [#uses=1]
+	%tmp = load <4 x i32>, <4 x i32>* %X		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>, <4 x i32>* %Y		; <<4 x i32>> [#uses=1]
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
@@ -24,8 +24,8 @@
 ; CHECK-LE-VSX-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
-	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
-	%tmp2 = load <8 x i16>* %Y		; <<8 x i16>> [#uses=1]
+	%tmp = load <8 x i16>, <8 x i16>* %X		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>, <8 x i16>* %Y		; <<8 x i16>> [#uses=1]
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
@@ -43,8 +43,8 @@
 ; CHECK-LE-VSX-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
-	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
-	%tmp2 = load <16 x i8>* %Y		; <<16 x i8>> [#uses=1]
+	%tmp = load <16 x i8>, <16 x i8>* %X		; <<16 x i8>> [#uses=1]
+	%tmp2 = load <16 x i8>, <16 x i8>* %Y		; <<16 x i8>> [#uses=1]
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
@@ -68,8 +68,8 @@
 ; CHECK-LE-VSX-NOT: mullw
 
 define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
-	%tmp = load <4 x float>* %X
-	%tmp2 = load <4 x float>* %Y
+	%tmp = load <4 x float>, <4 x float>* %X
+	%tmp2 = load <4 x float>, <4 x float>* %Y
 	%tmp3 = fmul <4 x float> %tmp, %tmp2
 	ret <4 x float> %tmp3
 }
diff --git a/llvm/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/llvm/test/CodeGen/PowerPC/vec_perf_shuffle.ll
index 2c3594d..f8b37fa 100644
--- a/llvm/test/CodeGen/PowerPC/vec_perf_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -1,36 +1,36 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
-	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
-	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V1 = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>, <4 x float>* %P2		; <<4 x float>> [#uses=1]
 	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 undef, i32 undef, i32 7, i32 2 >		; <<4 x float>> [#uses=1]
 	ret <4 x float> %V3
 }
 
 define <4 x float> @test_30u5(<4 x float>* %P1, <4 x float>* %P2) {
-	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
-	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V1 = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>, <4 x float>* %P2		; <<4 x float>> [#uses=1]
 	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 0, i32 undef, i32 5 >		; <<4 x float>> [#uses=1]
 	ret <4 x float> %V3
 }
 
 define <4 x float> @test_3u73(<4 x float>* %P1, <4 x float>* %P2) {
-	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
-	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V1 = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>, <4 x float>* %P2		; <<4 x float>> [#uses=1]
 	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 undef, i32 7, i32 3 >		; <<4 x float>> [#uses=1]
 	ret <4 x float> %V3
 }
 
 define <4 x float> @test_3774(<4 x float>* %P1, <4 x float>* %P2) {
-	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
-	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V1 = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>, <4 x float>* %P2		; <<4 x float>> [#uses=1]
 	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 7, i32 7, i32 4 >		; <<4 x float>> [#uses=1]
 	ret <4 x float> %V3
 }
 
 define <4 x float> @test_4450(<4 x float>* %P1, <4 x float>* %P2) {
-	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
-	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V1 = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>, <4 x float>* %P2		; <<4 x float>> [#uses=1]
 	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 4, i32 4, i32 5, i32 0 >		; <<4 x float>> [#uses=1]
 	ret <4 x float> %V3
 }
diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
index 8270632..a942dd1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -9,8 +9,8 @@
 
 define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
-	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
 	%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
 	%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5		; <i8> [#uses=1]
@@ -51,8 +51,8 @@
 }
 
 define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
-	%tmp2 = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
 	%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
 	%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5		; <i8> [#uses=1]
@@ -94,9 +94,9 @@
 
 define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=1]
 	%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=1]
 	%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > )		; <<4 x i32>> [#uses=1]
 	%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16>		; <<8 x i16>> [#uses=1]
@@ -108,8 +108,8 @@
 
 define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
-	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
-	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
+	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=8]
+	%tmp2 = load <16 x i8>, <16 x i8>* %B		; <<16 x i8>> [#uses=8]
 	%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
 	%tmp3 = extractelement <16 x i8> %tmp2, i32 8		; <i8> [#uses=1]
 	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
@@ -148,8 +148,8 @@
 
 define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
-	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=4]
+	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=4]
 	%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
 	%tmp3 = extractelement <8 x i16> %tmp2, i32 4		; <i16> [#uses=1]
 	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
@@ -172,8 +172,8 @@
 
 define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
 entry:
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
-	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
 	%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
@@ -188,8 +188,8 @@
 
 define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
-	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
-	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
+	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=8]
+	%tmp2 = load <16 x i8>, <16 x i8>* %B		; <<16 x i8>> [#uses=8]
 	%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
 	%tmp3 = extractelement <16 x i8> %tmp2, i32 0		; <i8> [#uses=1]
 	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
@@ -228,8 +228,8 @@
 
 define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
-	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=4]
+	%tmp2 = load <8 x i16>, <8 x i16>* %B		; <<8 x i16>> [#uses=4]
 	%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
 	%tmp3 = extractelement <8 x i16> %tmp2, i32 0		; <i16> [#uses=1]
 	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
@@ -252,8 +252,8 @@
 
 define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
 entry:
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
-	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
 	%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
 	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
@@ -267,8 +267,8 @@
 }
 
 define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
-	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>, <4 x i32>* %B		; <<4 x i32>> [#uses=2]
 	%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
 	%tmp3 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
@@ -283,7 +283,7 @@
 
 define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
-	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
+	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=16]
 	%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
 	%tmp3 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
 	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
@@ -322,7 +322,7 @@
 
 define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=8]
 	%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
 	%tmp3 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
 	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
@@ -345,7 +345,7 @@
 
 define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
 entry:
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=4]
 	%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
 	%tmp3 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
@@ -360,7 +360,7 @@
 
 define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
-	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
+	%tmp = load <16 x i8>, <16 x i8>* %A		; <<16 x i8>> [#uses=16]
 	%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
 	%tmp3 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
 	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
@@ -399,7 +399,7 @@
 
 define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=8]
 	%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
 	%tmp3 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
 	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
@@ -422,7 +422,7 @@
 
 define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
 entry:
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=4]
 	%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
 	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
@@ -437,7 +437,7 @@
 
 define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
 entry:
-	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=2]
+	%tmp = load <8 x i16>, <8 x i16>* %A		; <<8 x i16>> [#uses=2]
 	%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
 	%tmp3 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
 	%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1		; <i8> [#uses=1]
@@ -479,7 +479,7 @@
 
 define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
 entry:
-	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp = load <4 x i32>, <4 x i32>* %A		; <<4 x i32>> [#uses=2]
 	%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
 	%tmp3 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
 	%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1		; <i16> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll
index c7fc1c6..46d451f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_shuffle_le.ll
@@ -3,8 +3,8 @@
 define void @VPKUHUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VPKUHUM_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -16,7 +16,7 @@
 define void @VPKUHUM_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VPKUHUM_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; CHECK: vpkuhum
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -26,8 +26,8 @@
 define void @VPKUWUM_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VPKUWUM_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 16, i32 17, i32 20, i32 21, i32 24, i32 25, i32 28, i32 29>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -39,7 +39,7 @@
 define void @VPKUWUM_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VPKUWUM_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13, i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
 ; CHECK: vpkuwum
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -49,8 +49,8 @@
 define void @VMRGLB_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGLB_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -62,7 +62,7 @@
 define void @VMRGLB_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGLB_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7>
 ; CHECK: vmrglb
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -72,8 +72,8 @@
 define void @VMRGHB_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGHB_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -85,7 +85,7 @@
 define void @VMRGHB_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGHB_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 8, i32 9, i32 9, i32 10, i32 10, i32 11, i32 11, i32 12, i32 12, i32 13, i32 13, i32 14, i32 14, i32 15, i32 15>
 ; CHECK: vmrghb
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -95,8 +95,8 @@
 define void @VMRGLH_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGLH_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -108,7 +108,7 @@
 define void @VMRGLH_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGLH_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 4, i32 5, i32 4, i32 5, i32 6, i32 7, i32 6, i32 7>
 ; CHECK: vmrglh
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -118,8 +118,8 @@
 define void @VMRGHH_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGHH_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -131,7 +131,7 @@
 define void @VMRGHH_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGHH_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11, i32 12, i32 13, i32 12, i32 13, i32 14, i32 15, i32 14, i32 15>
 ; CHECK: vmrghh
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -141,8 +141,8 @@
 define void @VMRGLW_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGLW_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -154,7 +154,7 @@
 define void @VMRGLW_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGLW_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
 ; CHECK: vmrglw
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -164,8 +164,8 @@
 define void @VMRGHW_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VMRGHW_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 12, i32 13, i32 14, i32 15, i32 28, i32 29, i32 30, i32 31>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -177,7 +177,7 @@
 define void @VMRGHW_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VMRGHW_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
 ; CHECK: vmrghw
         store <16 x i8> %tmp2, <16 x i8>* %A
@@ -187,8 +187,8 @@
 define void @VSLDOI_xy(<16 x i8>* %A, <16 x i8>* %B) {
 entry:
 ; CHECK: VSLDOI_xy:
-        %tmp = load <16 x i8>* %A
-        %tmp2 = load <16 x i8>* %B
+        %tmp = load <16 x i8>, <16 x i8>* %A
+        %tmp2 = load <16 x i8>, <16 x i8>* %B
         %tmp3 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
 ; CHECK: lvx [[REG1:[0-9]+]]
 ; CHECK: lvx [[REG2:[0-9]+]]
@@ -200,7 +200,7 @@
 define void @VSLDOI_xx(<16 x i8>* %A) {
 entry:
 ; CHECK: VSLDOI_xx:
-        %tmp = load <16 x i8>* %A
+        %tmp = load <16 x i8>, <16 x i8>* %A
         %tmp2 = shufflevector <16 x i8> %tmp, <16 x i8> %tmp, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
 ; CHECK: vsldoi
         store <16 x i8> %tmp2, <16 x i8>* %A
diff --git a/llvm/test/CodeGen/PowerPC/vec_splat.ll b/llvm/test/CodeGen/PowerPC/vec_splat.ll
index 6123728..aeed94c 100644
--- a/llvm/test/CodeGen/PowerPC/vec_splat.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_splat.ll
@@ -14,7 +14,7 @@
         %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
-        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
         %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
@@ -25,21 +25,21 @@
         %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
         %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
         %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
-        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
         %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
         store %i4 %R, %i4* %P
         ret void
 }
 
 define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
-        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
         %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >             ; <%i4> [#uses=1]
         store %i4 %R, %i4* %P
         ret void
 }
 
 define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
-        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
         %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >         ; <%i4> [#uses=1]
         store %i4 %R, %i4* %P
         ret void
@@ -60,7 +60,7 @@
 }
 
 define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
-        %tmp = load <16 x i8>* %B               ; <<16 x i8>> [#uses=1]
+        %tmp = load <16 x i8>, <16 x i8>* %B               ; <<16 x i8>> [#uses=1]
         %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>            ; <<16 x i8>> [#uses=1]
         %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
  15, i16 15, i16 15 > to <16 x i8>)             ; <<16 x i8>> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/vec_splat_constant.ll b/llvm/test/CodeGen/PowerPC/vec_splat_constant.ll
index b227794..53676fc 100644
--- a/llvm/test/CodeGen/PowerPC/vec_splat_constant.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_splat_constant.ll
@@ -12,8 +12,8 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store <16 x i8> %x, <16 x i8>* %x_addr
   store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
-  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
-  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %0 = load <16 x i8>, <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>, <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
   %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
   store <16 x i8> %tmp, <16 x i8>* @baz, align 16
   br label %return
diff --git a/llvm/test/CodeGen/PowerPC/vec_zero.ll b/llvm/test/CodeGen/PowerPC/vec_zero.ll
index f862b2c..aec61fbd 100644
--- a/llvm/test/CodeGen/PowerPC/vec_zero.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_zero.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
 
 define void @foo(<4 x float>* %P) {
-        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
+        %T = load <4 x float>, <4 x float>* %P               ; <<4 x float>> [#uses=1]
         %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
diff --git a/llvm/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/llvm/test/CodeGen/PowerPC/vector-identity-shuffle.ll
index dfa2e35..35979f6 100644
--- a/llvm/test/CodeGen/PowerPC/vector-identity-shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
 
 define void @test(<4 x float>* %tmp2.i) {
-        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
+        %tmp2.i.upgrd.1 = load <4 x float>, <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
         %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0      ; <float> [#uses=1]
         %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0              ; <<4 x float>> [#uses=1]
         %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1      ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/PowerPC/vector.ll b/llvm/test/CodeGen/PowerPC/vector.ll
index 859a85a..723ca54 100644
--- a/llvm/test/CodeGen/PowerPC/vector.ll
+++ b/llvm/test/CodeGen/PowerPC/vector.ll
@@ -12,56 +12,56 @@
 ;;; TEST HANDLING OF VARIOUS VECTOR SIZES
 
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
-        %p = load %f1* %P               ; <%f1> [#uses=1]
-        %q = load %f1* %Q               ; <%f1> [#uses=1]
+        %p = load %f1, %f1* %P               ; <%f1> [#uses=1]
+        %q = load %f1, %f1* %Q               ; <%f1> [#uses=1]
         %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
 
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
-        %p = load %f2* %P               ; <%f2> [#uses=1]
-        %q = load %f2* %Q               ; <%f2> [#uses=1]
+        %p = load %f2, %f2* %P               ; <%f2> [#uses=1]
+        %q = load %f2, %f2* %Q               ; <%f2> [#uses=1]
         %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
 
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
-        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
+        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
 
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
 
 define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
 
 define void @test_rem(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = frem %f8 %p, %q            ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
@@ -70,7 +70,7 @@
 ;;; TEST VECTOR CONSTRUCTS
 
 define void @test_cst(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
  2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
@@ -78,14 +78,14 @@
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
@@ -111,19 +111,19 @@
 }
 
 define float @test_extract_elt(%f8* %P) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
         %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]
         ret float %R
 }
 
 define double @test_extract_elt2(%d8* %P) {
-        %p = load %d8* %P               ; <%d8> [#uses=1]
+        %p = load %d8, %d8* %P               ; <%d8> [#uses=1]
         %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]
         ret double %R
 }
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
-        %tmp = load %f4* %b             ; <%f4> [#uses=1]
+        %tmp = load %f4, %f4* %b             ; <%f4> [#uses=1]
         %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
@@ -133,7 +133,7 @@
 }
 
 define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
-        %T = load %f8* %a               ; <%f8> [#uses=1]
+        %T = load %f8, %f8* %a               ; <%f8> [#uses=1]
         %T2 = bitcast %f8 %T to <8 x i32>               
         store <8 x i32> %T2, <8 x i32>* %b
         ret void
@@ -147,7 +147,7 @@
         %tmp2 = insertelement %f4 %tmp, float %X, i32 1       
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3   
-        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
         %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
@@ -158,7 +158,7 @@
         %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1         
         %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2       
         %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3     
-        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
         %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
         store %i4 %R, %i4* %P
         ret void
diff --git a/llvm/test/CodeGen/PowerPC/vsx-div.ll b/llvm/test/CodeGen/PowerPC/vsx-div.ll
index 8a9578e..0e83885 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-div.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-div.ll
@@ -7,7 +7,7 @@
 
 define void @test1() {
 entry:
-  %0 = load <4 x float>* @vf, align 16
+  %0 = load <4 x float>, <4 x float>* @vf, align 16
   %1 = tail call <4 x float> @llvm.ppc.vsx.xvdivsp(<4 x float> %0, <4 x float> %0)
   store <4 x float> %1, <4 x float>* @vf_res, align 16
   ret void
@@ -17,7 +17,7 @@
 
 define void @test2() {
 entry:
-  %0 = load <2 x double>* @vd, align 16
+  %0 = load <2 x double>, <2 x double>* @vd, align 16
   %1 = tail call <2 x double> @llvm.ppc.vsx.xvdivdp(<2 x double> %0, <2 x double> %0)
   store <2 x double> %1, <2 x double>* @vd_res, align 16
   ret void
diff --git a/llvm/test/CodeGen/PowerPC/vsx-infl-copy1.ll b/llvm/test/CodeGen/PowerPC/vsx-infl-copy1.ll
index 424e67e..531e3ad 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-infl-copy1.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-infl-copy1.ll
@@ -26,69 +26,69 @@
   %vec.phi28 = phi <4 x i32> [ zeroinitializer, %entry ], [ %51, %vector.body ]
   %vec.phi29 = phi <4 x i32> [ zeroinitializer, %entry ], [ %52, %vector.body ]
   %vec.phi30 = phi <4 x i32> [ zeroinitializer, %entry ], [ %53, %vector.body ]
-  %wide.load32 = load <4 x i32>* null, align 4
+  %wide.load32 = load <4 x i32>, <4 x i32>* null, align 4
   %.sum82 = add i64 %index, 24
   %0 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum82
   %1 = bitcast i32* %0 to <4 x i32>*
-  %wide.load36 = load <4 x i32>* %1, align 4
-  %wide.load37 = load <4 x i32>* undef, align 4
+  %wide.load36 = load <4 x i32>, <4 x i32>* %1, align 4
+  %wide.load37 = load <4 x i32>, <4 x i32>* undef, align 4
   %.sum84 = add i64 %index, 32
   %2 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum84
   %3 = bitcast i32* %2 to <4 x i32>*
-  %wide.load38 = load <4 x i32>* %3, align 4
+  %wide.load38 = load <4 x i32>, <4 x i32>* %3, align 4
   %.sum85 = add i64 %index, 36
   %4 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum85
   %5 = bitcast i32* %4 to <4 x i32>*
-  %wide.load39 = load <4 x i32>* %5, align 4
+  %wide.load39 = load <4 x i32>, <4 x i32>* %5, align 4
   %6 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 undef
   %7 = bitcast i32* %6 to <4 x i32>*
-  %wide.load40 = load <4 x i32>* %7, align 4
+  %wide.load40 = load <4 x i32>, <4 x i32>* %7, align 4
   %.sum87 = add i64 %index, 44
   %8 = getelementptr [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %.sum87
   %9 = bitcast i32* %8 to <4 x i32>*
-  %wide.load41 = load <4 x i32>* %9, align 4
+  %wide.load41 = load <4 x i32>, <4 x i32>* %9, align 4
   %10 = getelementptr inbounds [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %index
   %11 = bitcast i32* %10 to <4 x i32>*
-  %wide.load42 = load <4 x i32>* %11, align 4
+  %wide.load42 = load <4 x i32>, <4 x i32>* %11, align 4
   %.sum8889 = or i64 %index, 4
   %12 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum8889
   %13 = bitcast i32* %12 to <4 x i32>*
-  %wide.load43 = load <4 x i32>* %13, align 4
+  %wide.load43 = load <4 x i32>, <4 x i32>* %13, align 4
   %.sum9091 = or i64 %index, 8
   %14 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum9091
   %15 = bitcast i32* %14 to <4 x i32>*
-  %wide.load44 = load <4 x i32>* %15, align 4
+  %wide.load44 = load <4 x i32>, <4 x i32>* %15, align 4
   %.sum94 = add i64 %index, 16
   %16 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum94
   %17 = bitcast i32* %16 to <4 x i32>*
-  %wide.load46 = load <4 x i32>* %17, align 4
+  %wide.load46 = load <4 x i32>, <4 x i32>* %17, align 4
   %.sum95 = add i64 %index, 20
   %18 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum95
   %19 = bitcast i32* %18 to <4 x i32>*
-  %wide.load47 = load <4 x i32>* %19, align 4
+  %wide.load47 = load <4 x i32>, <4 x i32>* %19, align 4
   %20 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 undef
   %21 = bitcast i32* %20 to <4 x i32>*
-  %wide.load48 = load <4 x i32>* %21, align 4
+  %wide.load48 = load <4 x i32>, <4 x i32>* %21, align 4
   %.sum97 = add i64 %index, 28
   %22 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum97
   %23 = bitcast i32* %22 to <4 x i32>*
-  %wide.load49 = load <4 x i32>* %23, align 4
+  %wide.load49 = load <4 x i32>, <4 x i32>* %23, align 4
   %.sum98 = add i64 %index, 32
   %24 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum98
   %25 = bitcast i32* %24 to <4 x i32>*
-  %wide.load50 = load <4 x i32>* %25, align 4
+  %wide.load50 = load <4 x i32>, <4 x i32>* %25, align 4
   %.sum99 = add i64 %index, 36
   %26 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum99
   %27 = bitcast i32* %26 to <4 x i32>*
-  %wide.load51 = load <4 x i32>* %27, align 4
+  %wide.load51 = load <4 x i32>, <4 x i32>* %27, align 4
   %.sum100 = add i64 %index, 40
   %28 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum100
   %29 = bitcast i32* %28 to <4 x i32>*
-  %wide.load52 = load <4 x i32>* %29, align 4
+  %wide.load52 = load <4 x i32>, <4 x i32>* %29, align 4
   %.sum101 = add i64 %index, 44
   %30 = getelementptr [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %.sum101
   %31 = bitcast i32* %30 to <4 x i32>*
-  %wide.load53 = load <4 x i32>* %31, align 4
+  %wide.load53 = load <4 x i32>, <4 x i32>* %31, align 4
   %32 = add <4 x i32> zeroinitializer, %vec.phi
   %33 = add <4 x i32> zeroinitializer, %vec.phi20
   %34 = add <4 x i32> %wide.load32, %vec.phi21
diff --git a/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll
index 3b86e34..32d6f1e 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll
@@ -29,39 +29,39 @@
   %vec.phi70 = phi <4 x i32> [ %41, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
   %vec.phi71 = phi <4 x i32> [ %42, %vector.body ], [ zeroinitializer, %loop_start.preheader ]
   %.sum = add i64 0, 4
-  %wide.load72 = load <4 x i32>* null, align 4
+  %wide.load72 = load <4 x i32>, <4 x i32>* null, align 4
   %.sum109 = add i64 0, 8
   %0 = getelementptr i32, i32* %first, i64 %.sum109
   %1 = bitcast i32* %0 to <4 x i32>*
-  %wide.load73 = load <4 x i32>* %1, align 4
+  %wide.load73 = load <4 x i32>, <4 x i32>* %1, align 4
   %.sum110 = add i64 0, 12
   %2 = getelementptr i32, i32* %first, i64 %.sum110
   %3 = bitcast i32* %2 to <4 x i32>*
-  %wide.load74 = load <4 x i32>* %3, align 4
+  %wide.load74 = load <4 x i32>, <4 x i32>* %3, align 4
   %.sum112 = add i64 0, 20
   %4 = getelementptr i32, i32* %first, i64 %.sum112
   %5 = bitcast i32* %4 to <4 x i32>*
-  %wide.load76 = load <4 x i32>* %5, align 4
+  %wide.load76 = load <4 x i32>, <4 x i32>* %5, align 4
   %.sum114 = add i64 0, 28
   %6 = getelementptr i32, i32* %first, i64 %.sum114
   %7 = bitcast i32* %6 to <4 x i32>*
-  %wide.load78 = load <4 x i32>* %7, align 4
+  %wide.load78 = load <4 x i32>, <4 x i32>* %7, align 4
   %.sum115 = add i64 0, 32
   %8 = getelementptr i32, i32* %first, i64 %.sum115
   %9 = bitcast i32* %8 to <4 x i32>*
-  %wide.load79 = load <4 x i32>* %9, align 4
+  %wide.load79 = load <4 x i32>, <4 x i32>* %9, align 4
   %.sum116 = add i64 0, 36
   %10 = getelementptr i32, i32* %first, i64 %.sum116
   %11 = bitcast i32* %10 to <4 x i32>*
-  %wide.load80 = load <4 x i32>* %11, align 4
+  %wide.load80 = load <4 x i32>, <4 x i32>* %11, align 4
   %.sum117 = add i64 0, 40
   %12 = getelementptr i32, i32* %first, i64 %.sum117
   %13 = bitcast i32* %12 to <4 x i32>*
-  %wide.load81 = load <4 x i32>* %13, align 4
+  %wide.load81 = load <4 x i32>, <4 x i32>* %13, align 4
   %.sum118 = add i64 0, 44
   %14 = getelementptr i32, i32* %first, i64 %.sum118
   %15 = bitcast i32* %14 to <4 x i32>*
-  %wide.load82 = load <4 x i32>* %15, align 4
+  %wide.load82 = load <4 x i32>, <4 x i32>* %15, align 4
   %16 = mul <4 x i32> %wide.load72, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
   %17 = mul <4 x i32> %wide.load73, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
   %18 = mul <4 x i32> %wide.load74, <i32 269850533, i32 269850533, i32 269850533, i32 269850533>
diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
index c65a077..e6ddf64 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll
@@ -51,24 +51,24 @@
   %__b.addr.i = alloca <4 x i32>*, align 8
   store i32 0, i32* %__a.addr.i, align 4
   store <4 x i32>* @vsi, <4 x i32>** %__b.addr.i, align 8
-  %0 = load i32* %__a.addr.i, align 4
-  %1 = load <4 x i32>** %__b.addr.i, align 8
+  %0 = load i32, i32* %__a.addr.i, align 4
+  %1 = load <4 x i32>*, <4 x i32>** %__b.addr.i, align 8
   %2 = bitcast <4 x i32>* %1 to i8*
   %3 = getelementptr i8, i8* %2, i32 %0
   %4 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %3)
   store <4 x i32> %4, <4 x i32>* @res_vsi, align 16
   store i32 0, i32* %__a.addr.i31, align 4
   store <4 x i32>* @vui, <4 x i32>** %__b.addr.i32, align 8
-  %5 = load i32* %__a.addr.i31, align 4
-  %6 = load <4 x i32>** %__b.addr.i32, align 8
+  %5 = load i32, i32* %__a.addr.i31, align 4
+  %6 = load <4 x i32>*, <4 x i32>** %__b.addr.i32, align 8
   %7 = bitcast <4 x i32>* %6 to i8*
   %8 = getelementptr i8, i8* %7, i32 %5
   %9 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %8)
   store <4 x i32> %9, <4 x i32>* @res_vui, align 16
   store i32 0, i32* %__a.addr.i29, align 4
   store <4 x float>* @vf, <4 x float>** %__b.addr.i30, align 8
-  %10 = load i32* %__a.addr.i29, align 4
-  %11 = load <4 x float>** %__b.addr.i30, align 8
+  %10 = load i32, i32* %__a.addr.i29, align 4
+  %11 = load <4 x float>*, <4 x float>** %__b.addr.i30, align 8
   %12 = bitcast <4 x float>* %11 to i8*
   %13 = getelementptr i8, i8* %12, i32 %10
   %14 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %13)
@@ -76,8 +76,8 @@
   store <4 x float> %15, <4 x float>* @res_vf, align 16
   store i32 0, i32* %__a.addr.i27, align 4
   store <2 x i64>* @vsll, <2 x i64>** %__b.addr.i28, align 8
-  %16 = load i32* %__a.addr.i27, align 4
-  %17 = load <2 x i64>** %__b.addr.i28, align 8
+  %16 = load i32, i32* %__a.addr.i27, align 4
+  %17 = load <2 x i64>*, <2 x i64>** %__b.addr.i28, align 8
   %18 = bitcast <2 x i64>* %17 to i8*
   %19 = getelementptr i8, i8* %18, i32 %16
   %20 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %19)
@@ -85,8 +85,8 @@
   store <2 x i64> %21, <2 x i64>* @res_vsll, align 16
   store i32 0, i32* %__a.addr.i25, align 4
   store <2 x i64>* @vull, <2 x i64>** %__b.addr.i26, align 8
-  %22 = load i32* %__a.addr.i25, align 4
-  %23 = load <2 x i64>** %__b.addr.i26, align 8
+  %22 = load i32, i32* %__a.addr.i25, align 4
+  %23 = load <2 x i64>*, <2 x i64>** %__b.addr.i26, align 8
   %24 = bitcast <2 x i64>* %23 to i8*
   %25 = getelementptr i8, i8* %24, i32 %22
   %26 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %25)
@@ -94,72 +94,72 @@
   store <2 x i64> %27, <2 x i64>* @res_vull, align 16
   store i32 0, i32* %__a.addr.i23, align 4
   store <2 x double>* @vd, <2 x double>** %__b.addr.i24, align 8
-  %28 = load i32* %__a.addr.i23, align 4
-  %29 = load <2 x double>** %__b.addr.i24, align 8
+  %28 = load i32, i32* %__a.addr.i23, align 4
+  %29 = load <2 x double>*, <2 x double>** %__b.addr.i24, align 8
   %30 = bitcast <2 x double>* %29 to i8*
   %31 = getelementptr i8, i8* %30, i32 %28
   %32 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %31)
   store <2 x double> %32, <2 x double>* @res_vd, align 16
-  %33 = load <4 x i32>* @vsi, align 16
+  %33 = load <4 x i32>, <4 x i32>* @vsi, align 16
   store <4 x i32> %33, <4 x i32>* %__a.addr.i20, align 16
   store i32 0, i32* %__b.addr.i21, align 4
   store <4 x i32>* @res_vsi, <4 x i32>** %__c.addr.i22, align 8
-  %34 = load <4 x i32>* %__a.addr.i20, align 16
-  %35 = load i32* %__b.addr.i21, align 4
-  %36 = load <4 x i32>** %__c.addr.i22, align 8
+  %34 = load <4 x i32>, <4 x i32>* %__a.addr.i20, align 16
+  %35 = load i32, i32* %__b.addr.i21, align 4
+  %36 = load <4 x i32>*, <4 x i32>** %__c.addr.i22, align 8
   %37 = bitcast <4 x i32>* %36 to i8*
   %38 = getelementptr i8, i8* %37, i32 %35
   call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %34, i8* %38)
-  %39 = load <4 x i32>* @vui, align 16
+  %39 = load <4 x i32>, <4 x i32>* @vui, align 16
   store <4 x i32> %39, <4 x i32>* %__a.addr.i17, align 16
   store i32 0, i32* %__b.addr.i18, align 4
   store <4 x i32>* @res_vui, <4 x i32>** %__c.addr.i19, align 8
-  %40 = load <4 x i32>* %__a.addr.i17, align 16
-  %41 = load i32* %__b.addr.i18, align 4
-  %42 = load <4 x i32>** %__c.addr.i19, align 8
+  %40 = load <4 x i32>, <4 x i32>* %__a.addr.i17, align 16
+  %41 = load i32, i32* %__b.addr.i18, align 4
+  %42 = load <4 x i32>*, <4 x i32>** %__c.addr.i19, align 8
   %43 = bitcast <4 x i32>* %42 to i8*
   %44 = getelementptr i8, i8* %43, i32 %41
   call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %40, i8* %44)
-  %45 = load <4 x float>* @vf, align 16
+  %45 = load <4 x float>, <4 x float>* @vf, align 16
   store <4 x float> %45, <4 x float>* %__a.addr.i14, align 16
   store i32 0, i32* %__b.addr.i15, align 4
   store <4 x float>* @res_vf, <4 x float>** %__c.addr.i16, align 8
-  %46 = load <4 x float>* %__a.addr.i14, align 16
+  %46 = load <4 x float>, <4 x float>* %__a.addr.i14, align 16
   %47 = bitcast <4 x float> %46 to <4 x i32>
-  %48 = load i32* %__b.addr.i15, align 4
-  %49 = load <4 x float>** %__c.addr.i16, align 8
+  %48 = load i32, i32* %__b.addr.i15, align 4
+  %49 = load <4 x float>*, <4 x float>** %__c.addr.i16, align 8
   %50 = bitcast <4 x float>* %49 to i8*
   %51 = getelementptr i8, i8* %50, i32 %48
   call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %47, i8* %51) #1
-  %52 = load <2 x i64>* @vsll, align 16
+  %52 = load <2 x i64>, <2 x i64>* @vsll, align 16
   store <2 x i64> %52, <2 x i64>* %__a.addr.i11, align 16
   store i32 0, i32* %__b.addr.i12, align 4
   store <2 x i64>* @res_vsll, <2 x i64>** %__c.addr.i13, align 8
-  %53 = load <2 x i64>* %__a.addr.i11, align 16
+  %53 = load <2 x i64>, <2 x i64>* %__a.addr.i11, align 16
   %54 = bitcast <2 x i64> %53 to <2 x double>
-  %55 = load i32* %__b.addr.i12, align 4
-  %56 = load <2 x i64>** %__c.addr.i13, align 8
+  %55 = load i32, i32* %__b.addr.i12, align 4
+  %56 = load <2 x i64>*, <2 x i64>** %__c.addr.i13, align 8
   %57 = bitcast <2 x i64>* %56 to i8*
   %58 = getelementptr i8, i8* %57, i32 %55
   call void @llvm.ppc.vsx.stxvd2x(<2 x double> %54, i8* %58)
-  %59 = load <2 x i64>* @vull, align 16
+  %59 = load <2 x i64>, <2 x i64>* @vull, align 16
   store <2 x i64> %59, <2 x i64>* %__a.addr.i8, align 16
   store i32 0, i32* %__b.addr.i9, align 4
   store <2 x i64>* @res_vull, <2 x i64>** %__c.addr.i10, align 8
-  %60 = load <2 x i64>* %__a.addr.i8, align 16
+  %60 = load <2 x i64>, <2 x i64>* %__a.addr.i8, align 16
   %61 = bitcast <2 x i64> %60 to <2 x double>
-  %62 = load i32* %__b.addr.i9, align 4
-  %63 = load <2 x i64>** %__c.addr.i10, align 8
+  %62 = load i32, i32* %__b.addr.i9, align 4
+  %63 = load <2 x i64>*, <2 x i64>** %__c.addr.i10, align 8
   %64 = bitcast <2 x i64>* %63 to i8*
   %65 = getelementptr i8, i8* %64, i32 %62
   call void @llvm.ppc.vsx.stxvd2x(<2 x double> %61, i8* %65)
-  %66 = load <2 x double>* @vd, align 16
+  %66 = load <2 x double>, <2 x double>* @vd, align 16
   store <2 x double> %66, <2 x double>* %__a.addr.i6, align 16
   store i32 0, i32* %__b.addr.i7, align 4
   store <2 x double>* @res_vd, <2 x double>** %__c.addr.i, align 8
-  %67 = load <2 x double>* %__a.addr.i6, align 16
-  %68 = load i32* %__b.addr.i7, align 4
-  %69 = load <2 x double>** %__c.addr.i, align 8
+  %67 = load <2 x double>, <2 x double>* %__a.addr.i6, align 16
+  %68 = load i32, i32* %__b.addr.i7, align 4
+  %69 = load <2 x double>*, <2 x double>** %__c.addr.i, align 8
   %70 = bitcast <2 x double>* %69 to i8*
   %71 = getelementptr i8, i8* %70, i32 %68
   call void @llvm.ppc.vsx.stxvd2x(<2 x double> %67, i8* %71)
diff --git a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
index 688187d..4ed91bc 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-ldst.ll
@@ -30,12 +30,12 @@
 ; Function Attrs: nounwind
 define void @test1() {
 entry:
-  %0 = load <4 x i32>* @vsi, align 16
-  %1 = load <4 x i32>* @vui, align 16
-  %2 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 16
-  %3 = load <2 x double>* bitcast (<2 x i64>* @vsll to <2 x double>*), align 16
-  %4 = load <2 x double>* bitcast (<2 x i64>* @vull to <2 x double>*), align 16
-  %5 = load <2 x double>* @vd, align 16
+  %0 = load <4 x i32>, <4 x i32>* @vsi, align 16
+  %1 = load <4 x i32>, <4 x i32>* @vui, align 16
+  %2 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 16
+  %3 = load <2 x double>, <2 x double>* bitcast (<2 x i64>* @vsll to <2 x double>*), align 16
+  %4 = load <2 x double>, <2 x double>* bitcast (<2 x i64>* @vull to <2 x double>*), align 16
+  %5 = load <2 x double>, <2 x double>* @vd, align 16
   store <4 x i32> %0, <4 x i32>* @res_vsi, align 16
   store <4 x i32> %1, <4 x i32>* @res_vui, align 16
   store <4 x i32> %2, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 16
diff --git a/llvm/test/CodeGen/PowerPC/vsx-minmax.ll b/llvm/test/CodeGen/PowerPC/vsx-minmax.ll
index 47f50ab..ad72cac 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-minmax.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-minmax.ll
@@ -18,35 +18,35 @@
 define void @test1() #0 {
 ; CHECK-LABEL: @test1
 entry:
-  %0 = load volatile <4 x float>* @vf, align 16
-  %1 = load volatile <4 x float>* @vf, align 16
+  %0 = load volatile <4 x float>, <4 x float>* @vf, align 16
+  %1 = load volatile <4 x float>, <4 x float>* @vf, align 16
   %2 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %0, <4 x float> %1)
 ; CHECK: xvmaxsp
   store <4 x float> %2, <4 x float>* @vf1, align 16
-  %3 = load <2 x double>* @vd, align 16
+  %3 = load <2 x double>, <2 x double>* @vd, align 16
   %4 = tail call <2 x double> @llvm.ppc.vsx.xvmaxdp(<2 x double> %3, <2 x double> %3)
 ; CHECK: xvmaxdp
   store <2 x double> %4, <2 x double>* @vd1, align 16
-  %5 = load volatile <4 x float>* @vf, align 16
-  %6 = load volatile <4 x float>* @vf, align 16
+  %5 = load volatile <4 x float>, <4 x float>* @vf, align 16
+  %6 = load volatile <4 x float>, <4 x float>* @vf, align 16
   %7 = tail call <4 x float> @llvm.ppc.vsx.xvmaxsp(<4 x float> %5, <4 x float> %6)
 ; CHECK: xvmaxsp
   store <4 x float> %7, <4 x float>* @vf2, align 16
-  %8 = load volatile <4 x float>* @vf, align 16
-  %9 = load volatile <4 x float>* @vf, align 16
+  %8 = load volatile <4 x float>, <4 x float>* @vf, align 16
+  %9 = load volatile <4 x float>, <4 x float>* @vf, align 16
   %10 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %8, <4 x float> %9)
 ; CHECK: xvminsp
   store <4 x float> %10, <4 x float>* @vf3, align 16
-  %11 = load <2 x double>* @vd, align 16
+  %11 = load <2 x double>, <2 x double>* @vd, align 16
   %12 = tail call <2 x double> @llvm.ppc.vsx.xvmindp(<2 x double> %11, <2 x double> %11)
 ; CHECK: xvmindp
   store <2 x double> %12, <2 x double>* @vd2, align 16
-  %13 = load volatile <4 x float>* @vf, align 16
-  %14 = load volatile <4 x float>* @vf, align 16
+  %13 = load volatile <4 x float>, <4 x float>* @vf, align 16
+  %14 = load volatile <4 x float>, <4 x float>* @vf, align 16
   %15 = tail call <4 x float> @llvm.ppc.vsx.xvminsp(<4 x float> %13, <4 x float> %14)
 ; CHECK: xvminsp
   store <4 x float> %15, <4 x float>* @vf4, align 16
-  %16 = load double* @d, align 8
+  %16 = load double, double* @d, align 8
   %17 = tail call double @llvm.ppc.vsx.xsmaxdp(double %16, double %16)
 ; CHECK: xsmaxdp
   store double %17, double* @d1, align 8
diff --git a/llvm/test/CodeGen/PowerPC/vsx-p8.ll b/llvm/test/CodeGen/PowerPC/vsx-p8.ll
index d5a1905..878714b 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-p8.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-p8.ll
@@ -8,7 +8,7 @@
 ; Unaligned loads/stores on P8 and later should use VSX where possible.
 
 define <2 x double> @test28u(<2 x double>* %a) {
-  %v = load <2 x double>* %a, align 8
+  %v = load <2 x double>, <2 x double>* %a, align 8
   ret <2 x double> %v
 
 ; CHECK-LABEL: @test28u
@@ -26,7 +26,7 @@
 }
 
 define <4 x float> @test32u(<4 x float>* %a) {
-  %v = load <4 x float>* %a, align 8
+  %v = load <4 x float>, <4 x float>* %a, align 8
   ret <4 x float> %v
 
 ; CHECK-REG-LABEL: @test32u
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index f91ffdb..25cf3d4 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -501,7 +501,7 @@
 }
 
 define <2 x double> @test28(<2 x double>* %a) {
-  %v = load <2 x double>* %a, align 16
+  %v = load <2 x double>, <2 x double>* %a, align 16
   ret <2 x double> %v
 
 ; CHECK-LABEL: @test28
@@ -519,7 +519,7 @@
 }
 
 define <2 x double> @test28u(<2 x double>* %a) {
-  %v = load <2 x double>* %a, align 8
+  %v = load <2 x double>, <2 x double>* %a, align 8
   ret <2 x double> %v
 
 ; CHECK-LABEL: @test28u
@@ -537,7 +537,7 @@
 }
 
 define <2 x i64> @test30(<2 x i64>* %a) {
-  %v = load <2 x i64>* %a, align 16
+  %v = load <2 x i64>, <2 x i64>* %a, align 16
   ret <2 x i64> %v
 
 ; CHECK-REG-LABEL: @test30
@@ -562,7 +562,7 @@
 }
 
 define <4 x float> @test32(<4 x float>* %a) {
-  %v = load <4 x float>* %a, align 16
+  %v = load <4 x float>, <4 x float>* %a, align 16
   ret <4 x float> %v
 
 ; CHECK-REG-LABEL: @test32
@@ -590,7 +590,7 @@
 }
 
 define <4 x float> @test32u(<4 x float>* %a) {
-  %v = load <4 x float>* %a, align 8
+  %v = load <4 x float>, <4 x float>* %a, align 8
   ret <4 x float> %v
 
 ; CHECK-LABEL: @test32u
@@ -616,7 +616,7 @@
 }
 
 define <4 x i32> @test34(<4 x i32>* %a) {
-  %v = load <4 x i32>* %a, align 16
+  %v = load <4 x i32>, <4 x i32>* %a, align 16
   ret <4 x i32> %v
 
 ; CHECK-REG-LABEL: @test34
@@ -718,7 +718,7 @@
 }
 
 define <2 x double> @test50(double* %a) {
-  %v = load double* %a, align 8
+  %v = load double, double* %a, align 8
   %w = insertelement <2 x double> undef, double %v, i32 0
   %x = insertelement <2 x double> %w, double %v, i32 1
   ret <2 x double> %x
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 0a9df37..e00cc4b 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
-  %v = load <2 x double>* %p1
-  %s = load double* %p2
+  %v = load <2 x double>, <2 x double>* %p1
+  %s = load double, double* %p2
   %r = insertelement <2 x double> %v, double %s, i32 0
   ret <2 x double> %r
 
@@ -15,8 +15,8 @@
 }
 
 define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
-  %v = load <2 x double>* %p1
-  %s = load double* %p2
+  %v = load <2 x double>, <2 x double>* %p1
+  %s = load double, double* %p2
   %r = insertelement <2 x double> %v, double %s, i32 1
   ret <2 x double> %r
 
@@ -29,7 +29,7 @@
 }
 
 define double @teste0(<2 x double>* %p1) {
-  %v = load <2 x double>* %p1
+  %v = load <2 x double>, <2 x double>* %p1
   %r = extractelement <2 x double> %v, i32 0
   ret double %r
 
@@ -42,7 +42,7 @@
 }
 
 define double @teste1(<2 x double>* %p1) {
-  %v = load <2 x double>* %p1
+  %v = load <2 x double>, <2 x double>* %p1
   %r = extractelement <2 x double> %v, i32 1
   ret double %r
 
diff --git a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index 588cfda..6bfde93 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 0>
   ret <2 x double> %v3
 
@@ -13,8 +13,8 @@
 }
 
 define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 1>
   ret <2 x double> %v3
 
@@ -24,8 +24,8 @@
 }
 
 define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 2>
   ret <2 x double> %v3
 
@@ -38,8 +38,8 @@
 }
 
 define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 0, i32 3>
   ret <2 x double> %v3
 
@@ -52,8 +52,8 @@
 }
 
 define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 0>
   ret <2 x double> %v3
 
@@ -64,8 +64,8 @@
 }
 
 define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 1>
   ret <2 x double> %v3
 
@@ -76,8 +76,8 @@
 }
 
 define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 2>
   ret <2 x double> %v3
 
@@ -90,8 +90,8 @@
 }
 
 define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 1, i32 3>
   ret <2 x double> %v3
 
@@ -104,8 +104,8 @@
 }
 
 define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 0>
   ret <2 x double> %v3
 
@@ -118,8 +118,8 @@
 }
 
 define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 1>
   ret <2 x double> %v3
 
@@ -132,8 +132,8 @@
 }
 
 define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 2>
   ret <2 x double> %v3
 
@@ -144,8 +144,8 @@
 }
 
 define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 2, i32 3>
   ret <2 x double> %v3
 
@@ -155,8 +155,8 @@
 }
 
 define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 0>
   ret <2 x double> %v3
 
@@ -169,8 +169,8 @@
 }
 
 define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 1>
   ret <2 x double> %v3
 
@@ -183,8 +183,8 @@
 }
 
 define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 2>
   ret <2 x double> %v3
 
@@ -195,8 +195,8 @@
 }
 
 define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
-  %v1 = load <2 x double>* %p1
-  %v2 = load <2 x double>* %p2
+  %v1 = load <2 x double>, <2 x double>* %p1
+  %v2 = load <2 x double>, <2 x double>* %p2
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> < i32 3, i32 3>
   ret <2 x double> %v3
 
diff --git a/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll b/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
index e038b3f..0b87613 100644
--- a/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
+++ b/llvm/test/CodeGen/PowerPC/weak_def_can_be_hidden.ll
@@ -11,7 +11,7 @@
 ; CHECK-D89: .weak_definition _v1
 
 define i32 @f1() {
-  %x = load i32 * @v1
+  %x = load i32 , i32 * @v1
   ret i32 %x
 }
 
@@ -45,6 +45,6 @@
 ; CHECK-D89: .weak_definition _v4
 
 define i32 @f4() {
-  %x = load i32 * @v4
+  %x = load i32 , i32 * @v4
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/PowerPC/zero-not-run.ll b/llvm/test/CodeGen/PowerPC/zero-not-run.ll
index 9df0d6e..b3b7634 100644
--- a/llvm/test/CodeGen/PowerPC/zero-not-run.ll
+++ b/llvm/test/CodeGen/PowerPC/zero-not-run.ll
@@ -8,7 +8,7 @@
   br i1 undef, label %for.body, label %for.end731
 
 for.body:                                         ; preds = %entry
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %or31 = or i32 %0, 319143828
   store i32 %or31, i32* undef, align 4
   %cmp32 = icmp eq i32 319143828, %or31
diff --git a/llvm/test/CodeGen/PowerPC/zext-free.ll b/llvm/test/CodeGen/PowerPC/zext-free.ll
index 09651cf..ffbbb54 100644
--- a/llvm/test/CodeGen/PowerPC/zext-free.ll
+++ b/llvm/test/CodeGen/PowerPC/zext-free.ll
@@ -5,16 +5,16 @@
 ; Function Attrs: noreturn nounwind
 define signext i32 @_Z1fRPc(i8** nocapture dereferenceable(8) %p) #0 {
 entry:
-  %.pre = load i8** %p, align 8
+  %.pre = load i8*, i8** %p, align 8
   br label %loop
 
 loop:                                             ; preds = %loop.backedge, %entry
   %0 = phi i8* [ %.pre, %entry ], [ %.be, %loop.backedge ]
-  %1 = load i8* %0, align 1
+  %1 = load i8, i8* %0, align 1
   %tobool = icmp eq i8 %1, 0
   %incdec.ptr = getelementptr inbounds i8, i8* %0, i64 1
   store i8* %incdec.ptr, i8** %p, align 8
-  %2 = load i8* %incdec.ptr, align 1
+  %2 = load i8, i8* %incdec.ptr, align 1
   %tobool2 = icmp ne i8 %2, 0
   %or.cond = and i1 %tobool, %tobool2
   br i1 %or.cond, label %if.then3, label %loop.backedge
diff --git a/llvm/test/CodeGen/R600/32-bit-local-address-space.ll b/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
index ee0c4f0..5a6ce2f5 100644
--- a/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -15,7 +15,7 @@
 ; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
 define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -27,7 +27,7 @@
 define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -38,7 +38,7 @@
 define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 1
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -51,7 +51,7 @@
 define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -73,7 +73,7 @@
 ; SI: ds_read_b32
 define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
   %ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
-  %val = load float addrspace(3)* %ptr
+  %val = load float, float addrspace(3)* %ptr
   store float %val, float addrspace(1)* %out
   ret void
 }
@@ -84,7 +84,7 @@
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
 define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
-  %val = load float addrspace(3)* @g_lds
+  %val = load float, float addrspace(3)* @g_lds
   store float %val, float addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/add-debug.ll b/llvm/test/CodeGen/R600/add-debug.ll
index a83c689..529905d 100644
--- a/llvm/test/CodeGen/R600/add-debug.ll
+++ b/llvm/test/CodeGen/R600/add-debug.ll
@@ -9,7 +9,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/add.ll b/llvm/test/CodeGen/R600/add.ll
index ca95af3..7027161 100644
--- a/llvm/test/CodeGen/R600/add.ll
+++ b/llvm/test/CodeGen/R600/add.ll
@@ -10,8 +10,8 @@
 ;SI: buffer_store_dword [[REG]],
 define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = add <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -46,8 +46,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = add <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -136,7 +136,7 @@
 ; SI-NOT: v_addc_u32_e32 s
 define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
 entry:
-  %0 = load i64 addrspace(1)* %in
+  %0 = load i64, i64 addrspace(1)* %in
   %1 = add i64 %a, %0
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -152,7 +152,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/add_i64.ll b/llvm/test/CodeGen/R600/add_i64.ll
index 1e1065a..8346add 100644
--- a/llvm/test/CodeGen/R600/add_i64.ll
+++ b/llvm/test/CodeGen/R600/add_i64.ll
@@ -10,8 +10,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
-  %a = load i64 addrspace(1)* %a_ptr
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %a_ptr
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = add i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -22,7 +22,7 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
-  %foo = load i64 addrspace(1)* %in, align 8
+  %foo = load i64, i64 addrspace(1)* %in, align 8
   %result = add i64 %foo, %a
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -35,7 +35,7 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
-  %foo = load i64 addrspace(1)* %in, align 8
+  %foo = load i64, i64 addrspace(1)* %in, align 8
   %result = add i64 %a, %foo
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -62,8 +62,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <2 x i64> addrspace(1)* %a_ptr
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = add <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/address-space.ll b/llvm/test/CodeGen/R600/address-space.ll
index 97e3d93..4be8c58 100644
--- a/llvm/test/CodeGen/R600/address-space.ll
+++ b/llvm/test/CodeGen/R600/address-space.ll
@@ -21,8 +21,8 @@
   br label %bb32
 
 bb32:
-  %a = load float addrspace(3)* %x, align 4
-  %b = load float addrspace(3)* %y, align 4
+  %a = load float, float addrspace(3)* %x, align 4
+  %b = load float, float addrspace(3)* %y, align 4
   %cmp = fcmp one float %a, %b
   br i1 %cmp, label %bb34, label %bb33
 
diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll
index 54aaba7..5672d47 100644
--- a/llvm/test/CodeGen/R600/and.ll
+++ b/llvm/test/CodeGen/R600/and.ll
@@ -11,8 +11,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = and <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -31,8 +31,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = and <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -57,8 +57,8 @@
 ; FUNC-LABEL: {{^}}v_and_i32:
 ; SI: v_and_b32
 define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %and = and i32 %a, %b
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -67,7 +67,7 @@
 ; FUNC-LABEL: {{^}}v_and_constant_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
 define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, 1234567
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -76,7 +76,7 @@
 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
 define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, 64
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -85,7 +85,7 @@
 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
 define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, -16
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -120,8 +120,8 @@
 ; SI: v_and_b32
 ; SI: v_and_b32
 define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %and = and i64 %a, %b
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
@@ -136,8 +136,8 @@
   br i1 %tmp0, label %if, label %endif
 
 if:
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %and = and i64 %a, %b
   br label %endif
 
@@ -151,7 +151,7 @@
 ; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 1234567
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
@@ -162,7 +162,7 @@
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
 define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 64
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
index e588e29..8c2a079 100644
--- a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -29,14 +29,14 @@
   %tid = call i32 @llvm.SI.tid() readnone
   %a_ptr = getelementptr i32, i32 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
-  %a = load i32 addrspace(1)* %a_ptr
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %a_ptr
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
   store i32 %result, i32* %alloca_ptr, align 4
   ; Dummy call
   call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
-  %reload = load i32* %alloca_ptr, align 4
+  %reload = load i32, i32* %alloca_ptr, align 4
   %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
index f3db9d7..eae095e 100644
--- a/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -9,8 +9,8 @@
   %tid = call i32 @llvm.SI.tid() readnone
   %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
   %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
-  %a = load i32 addrspace(1)* %a_ptr
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %a_ptr
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/big_alu.ll b/llvm/test/CodeGen/R600/big_alu.ll
index 28be216..8206f33 100644
--- a/llvm/test/CodeGen/R600/big_alu.ll
+++ b/llvm/test/CodeGen/R600/big_alu.ll
@@ -51,29 +51,29 @@
   %43 = extractelement <4 x float> %reg7, i32 1
   %44 = extractelement <4 x float> %reg7, i32 2
   %45 = extractelement <4 x float> %reg7, i32 3
-  %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %47 = extractelement <4 x float> %46, i32 0
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %49 = extractelement <4 x float> %48, i32 1
-  %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %51 = extractelement <4 x float> %50, i32 2
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
   %53 = extractelement <4 x float> %52, i32 0
-  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %55 = extractelement <4 x float> %54, i32 0
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %57 = extractelement <4 x float> %56, i32 1
-  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %59 = extractelement <4 x float> %58, i32 2
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %61 = extractelement <4 x float> %60, i32 3
-  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %63 = extractelement <4 x float> %62, i32 0
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %65 = extractelement <4 x float> %64, i32 1
-  %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %67 = extractelement <4 x float> %66, i32 2
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %69 = extractelement <4 x float> %68, i32 0
   %70 = fcmp oge float %69, 3.500000e+00
   %71 = sext i1 %70 to i32
@@ -81,7 +81,7 @@
   %73 = bitcast float %72 to i32
   %74 = icmp ne i32 %73, 0
   %. = select i1 %74, float 0.000000e+00, float 0.000000e+00
-  %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %76 = extractelement <4 x float> %75, i32 0
   %77 = fcmp oge float %76, 2.000000e+00
   %78 = sext i1 %77 to i32
@@ -135,7 +135,7 @@
   %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
   %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
   %125 = fdiv float 1.000000e+00, %124
-  %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %127 = extractelement <4 x float> %126, i32 0
   %128 = fmul float %127, %125
   %129 = fmul float %103, %128
@@ -347,15 +347,15 @@
   %329 = fmul float %314, %328
   %330 = fmul float %316, %328
   %331 = fmul float %318, %328
-  %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %332 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %333 = extractelement <4 x float> %332, i32 0
   %334 = fsub float -0.000000e+00, %333
   %335 = fadd float 1.000000e+00, %334
-  %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %337 = extractelement <4 x float> %336, i32 0
   %338 = fsub float -0.000000e+00, %337
   %339 = fadd float 1.000000e+00, %338
-  %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %341 = extractelement <4 x float> %340, i32 0
   %342 = fsub float -0.000000e+00, %341
   %343 = fadd float 1.000000e+00, %342
@@ -1018,7 +1018,7 @@
   %temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
   %temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
   %temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
-  %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %880 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %881 = extractelement <4 x float> %880, i32 0
   %882 = fcmp olt float %881, %179
   %883 = sext i1 %882 to i32
@@ -1114,12 +1114,12 @@
   %960 = fmul float %temp87.6, %956
   %961 = fmul float %2, -2.000000e+00
   %962 = fadd float %961, 1.000000e+00
-  %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %963 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
   %964 = extractelement <4 x float> %963, i32 2
   %965 = fsub float -0.000000e+00, %964
   %966 = fadd float %962, %965
   %967 = fdiv float 1.000000e+00, %966
-  %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %968 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
   %969 = extractelement <4 x float> %968, i32 2
   %970 = fmul float %969, %967
   %971 = fsub float -0.000000e+00, %53
diff --git a/llvm/test/CodeGen/R600/bitcast.ll b/llvm/test/CodeGen/R600/bitcast.ll
index 1ba64af..fd56d95 100644
--- a/llvm/test/CodeGen/R600/bitcast.ll
+++ b/llvm/test/CodeGen/R600/bitcast.ll
@@ -9,7 +9,7 @@
 ; SI: s_endpgm
 define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
 entry:
-  %1 = load <32 x i8> addrspace(2)* %0
+  %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
   %2 = bitcast <32 x i8> %1 to <8 x i32>
   %3 = extractelement <8 x i32> %2, i32 1
   %4 = icmp ne i32 %3, 0
@@ -23,34 +23,34 @@
 define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
-  %1 = load <16 x i8> addrspace(1)* %0
+  %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
   store <16 x i8> %1, <16 x i8> addrspace(1)* %out
   ret void
 }
 
 define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %load = load float addrspace(1)* %in, align 4
+  %load = load float, float addrspace(1)* %in, align 4
   %bc = bitcast float %load to <2 x i16>
   store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
   ret void
 }
 
 define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in, align 4
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
   %bc = bitcast <2 x i16> %load to float
   store float %bc, float addrspace(1)* %out, align 4
   ret void
 }
 
 define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %bc = bitcast <4 x i8> %load to i32
   store i32 %bc, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %bc = bitcast i32 %load to <4 x i8>
   store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
   ret void
@@ -59,7 +59,7 @@
 ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
 ; SI: s_endpgm
 define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %add = add <2 x i32> %val, <i32 4, i32 9>
   %bc = bitcast <2 x i32> %add to double
   store double %bc, double addrspace(1)* %out, align 8
@@ -69,7 +69,7 @@
 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
 ; SI: s_endpgm
 define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
-  %val = load double addrspace(1)* %in, align 8
+  %val = load double, double addrspace(1)* %in, align 8
   %add = fadd double %val, 4.0
   %bc = bitcast double %add to <2 x i32>
   store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
diff --git a/llvm/test/CodeGen/R600/bswap.ll b/llvm/test/CodeGen/R600/bswap.ll
index e93543d..4cf8e4b 100644
--- a/llvm/test/CodeGen/R600/bswap.ll
+++ b/llvm/test/CodeGen/R600/bswap.ll
@@ -18,7 +18,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
   store i32 %bswap, i32 addrspace(1)* %out, align 4
   ret void
@@ -33,7 +33,7 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -54,7 +54,7 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
-  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
   store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
   ret void
@@ -87,28 +87,28 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
-  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
   %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
   store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
 
 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
   store i64 %bswap, i64 addrspace(1)* %out, align 8
   ret void
 }
 
 define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
   store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
   ret void
 }
 
 define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
-  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
   %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
   store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
   ret void
diff --git a/llvm/test/CodeGen/R600/call.ll b/llvm/test/CodeGen/R600/call.ll
index 4cc7501..eb71649 100644
--- a/llvm/test/CodeGen/R600/call.ll
+++ b/llvm/test/CodeGen/R600/call.ll
@@ -14,8 +14,8 @@
 
 define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %c = call i32 @defined_function(i32 %b) nounwind
   %result = add i32 %a, %c
   store i32 %result, i32 addrspace(1)* %out
@@ -24,8 +24,8 @@
 
 define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %c = call i32 @external_function(i32 %b) nounwind
   %result = add i32 %a, %c
   store i32 %result, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/combine_vloads.ll b/llvm/test/CodeGen/R600/combine_vloads.ll
index d8c9c04..01572af 100644
--- a/llvm/test/CodeGen/R600/combine_vloads.ll
+++ b/llvm/test/CodeGen/R600/combine_vloads.ll
@@ -23,7 +23,7 @@
   %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
   %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
   %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
-  %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+  %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32
   %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
   %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/R600/commute_modifiers.ll b/llvm/test/CodeGen/R600/commute_modifiers.ll
index cccc08e..7fc36ea 100644
--- a/llvm/test/CodeGen/R600/commute_modifiers.ll
+++ b/llvm/test/CodeGen/R600/commute_modifiers.ll
@@ -11,7 +11,7 @@
 define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %z = fadd float 2.0, %x.fabs
   store float %z, float addrspace(1)* %out
@@ -25,7 +25,7 @@
 define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
   %z = fmul float 4.0, %x.fneg.fabs
@@ -40,7 +40,7 @@
 define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fneg = fsub float -0.000000e+00, %x
   %z = fmul float 4.0, %x.fneg
   store float %z, float addrspace(1)* %out
@@ -56,7 +56,7 @@
 define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %z = fadd float 1024.0, %x.fabs
   store float %z, float addrspace(1)* %out
@@ -72,8 +72,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %z = fadd float %x, %y.fabs
   store float %z, float addrspace(1)* %out
@@ -89,8 +89,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fneg = fsub float -0.000000e+00, %y
   %z = fmul float %x, %y.fneg
   store float %z, float addrspace(1)* %out
@@ -106,8 +106,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
   %z = fmul float %x, %y.fabs.fneg
@@ -125,8 +125,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %z = fmul float %x.fabs, %y.fabs
@@ -143,8 +143,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
@@ -167,8 +167,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r2.fabs = call float @llvm.fabs.f32(float %r2)
 
diff --git a/llvm/test/CodeGen/R600/copy-illegal-type.ll b/llvm/test/CodeGen/R600/copy-illegal-type.ll
index 56c43d2..8b39756 100644
--- a/llvm/test/CodeGen/R600/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/R600/copy-illegal-type.ll
@@ -6,7 +6,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -17,7 +17,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   ret void
@@ -30,7 +30,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -45,7 +45,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -82,7 +82,7 @@
 
 ; SI: s_endpgm
 define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -120,7 +120,7 @@
 
 ; SI: s_endpgm
 define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -133,7 +133,7 @@
 ; SI-NOT: bfi
 ; SI: s_endpgm
 define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
-  %val = load <3 x i8> addrspace(1)* %in, align 4
+  %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
   store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -145,7 +145,7 @@
 ; SI: buffer_load_ubyte
 ; SI: s_endpgm
 define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load volatile <4 x i8> addrspace(1)* %in, align 4
+  %val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -161,7 +161,7 @@
 ; SI: buffer_store_byte
 ; SI: s_endpgm
 define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/copy-to-reg.ll b/llvm/test/CodeGen/R600/copy-to-reg.ll
index 784d2d0..fc875f6 100644
--- a/llvm/test/CodeGen/R600/copy-to-reg.ll
+++ b/llvm/test/CodeGen/R600/copy-to-reg.ll
@@ -21,7 +21,7 @@
 
 done:
   %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
-  %tmp1 = load i32* %tmp0
+  %tmp1 = load i32, i32* %tmp0
   store i32 %tmp1, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/ctlz_zero_undef.ll b/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
index 1a4317b..bd26c30 100644
--- a/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -28,7 +28,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
@@ -44,7 +44,7 @@
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
   store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -64,7 +64,7 @@
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
   store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/ctpop.ll b/llvm/test/CodeGen/R600/ctpop.ll
index c0e8e6d..0a031c5 100644
--- a/llvm/test/CodeGen/R600/ctpop.ll
+++ b/llvm/test/CodeGen/R600/ctpop.ll
@@ -31,7 +31,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   store i32 %ctpop, i32 addrspace(1)* %out, align 4
   ret void
@@ -49,8 +49,8 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
-  %val0 = load i32 addrspace(1)* %in0, align 4
-  %val1 = load i32 addrspace(1)* %in1, align 4
+  %val0 = load i32, i32 addrspace(1)* %in0, align 4
+  %val1 = load i32, i32 addrspace(1)* %in1, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
   %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
   %add = add i32 %ctpop0, %ctpop1
@@ -65,7 +65,7 @@
 ; GCN-NEXT: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
-  %val0 = load i32 addrspace(1)* %in0, align 4
+  %val0 = load i32, i32 addrspace(1)* %in0, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
   %add = add i32 %ctpop0, %sval
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -80,7 +80,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -98,7 +98,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
   store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
   ret void
@@ -124,7 +124,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
   store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
   ret void
@@ -166,7 +166,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <16 x i32> addrspace(1)* %in, align 32
+  %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
   store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
   ret void
@@ -180,7 +180,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 4
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -195,7 +195,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 4, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -210,7 +210,7 @@
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 99999
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -226,7 +226,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, %const
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -242,7 +242,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %const, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -259,10 +259,10 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
-  %const = load i32 addrspace(1)* %gep, align 4
+  %const = load i32, i32 addrspace(1)* %gep, align 4
   %add = add i32 %const, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
   ret void
@@ -290,7 +290,7 @@
 
 else:
   %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %tmp4 = load i32 addrspace(1)* %tmp3
+  %tmp4 = load i32, i32 addrspace(1)* %tmp3
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/ctpop64.ll b/llvm/test/CodeGen/R600/ctpop64.ll
index 9841319..e1a0ee3 100644
--- a/llvm/test/CodeGen/R600/ctpop64.ll
+++ b/llvm/test/CodeGen/R600/ctpop64.ll
@@ -29,7 +29,7 @@
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
   %truncctpop = trunc i64 %ctpop to i32
   store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@@ -67,7 +67,7 @@
 ; GCN: v_bcnt_u32_b32
 ; GCN: s_endpgm
 define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
   %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
   store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@@ -85,7 +85,7 @@
 ; GCN: v_bcnt_u32_b32
 ; GCN: s_endpgm
 define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
-  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
   %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
   store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@@ -114,7 +114,7 @@
 
 else:
   %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1
-  %tmp4 = load i64 addrspace(1)* %tmp3
+  %tmp4 = load i64, i64 addrspace(1)* %tmp3
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/cttz_zero_undef.ll b/llvm/test/CodeGen/R600/cttz_zero_undef.ll
index d9d284c..56fcb51 100644
--- a/llvm/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/R600/cttz_zero_undef.ll
@@ -28,7 +28,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
   store i32 %cttz, i32 addrspace(1)* %out, align 4
   ret void
@@ -44,7 +44,7 @@
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
   store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -64,7 +64,7 @@
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
   store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
index 4d4bf93..3399d9d 100644
--- a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -8,7 +8,7 @@
 ; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dword [[CONV]],
 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
-  %load = load i8 addrspace(1)* %in, align 1
+  %load = load i8, i8 addrspace(1)* %in, align 1
   %cvt = uitofp i8 %load to float
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -23,7 +23,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in, align 2
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
   %cvt = uitofp <2 x i8> %load to <2 x float>
   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
   ret void
@@ -37,7 +37,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32
 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <3 x i8> addrspace(1)* %in, align 4
+  %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <3 x i8> %load to <3 x float>
   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
   ret void
@@ -53,7 +53,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   ret void
@@ -77,7 +77,7 @@
 
 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   ret void
@@ -105,7 +105,7 @@
 ; XSI: v_cvt_f32_u32_e32
 ; SI: s_endpgm
 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
@@ -117,7 +117,7 @@
 ; SI-LABEL: {{^}}load_v7i8_to_v7f32:
 ; SI: s_endpgm
 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <7 x i8> addrspace(1)* %in, align 1
+  %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <7 x i8> %load to <7 x float>
   store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
   ret void
@@ -146,7 +146,7 @@
 ; SI: buffer_store_dword
 ; SI: buffer_store_dword
 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in, align 8
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
   %cvt = uitofp <8 x i8> %load to <8 x float>
   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
   ret void
@@ -158,7 +158,7 @@
 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
 ; SI: buffer_store_dword [[CONV]],
 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 2
   %inreg = and i32 %add, 255
   %cvt = uitofp i32 %inreg to float
@@ -168,7 +168,7 @@
 
 ; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %inreg = and i32 %load, 65280
   %shr = lshr i32 %inreg, 8
   %cvt = uitofp i32 %shr to float
@@ -180,7 +180,7 @@
 ; We don't get these ones because of the zext, but instcombine removes
 ; them so it shouldn't really matter.
 define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
-  %load = load i8 addrspace(1)* %in, align 1
+  %load = load i8, i8 addrspace(1)* %in, align 1
   %ext = zext i8 %load to i32
   %cvt = uitofp i32 %ext to float
   store float %cvt, float addrspace(1)* %out, align 4
@@ -188,7 +188,7 @@
 }
 
 define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
   %ext = zext <4 x i8> %load to <4 x i32>
   %cvt = uitofp <4 x i32> %ext to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
diff --git a/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index f51cdc1..fb43ff4 100644
--- a/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -13,7 +13,7 @@
 define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %sint = load i32 addrspace(1) * %in
+  %sint = load i32, i32 addrspace(1) * %in
   %conv = sitofp i32 %sint to float
   %0 = insertelement <4 x float> undef, float %conv, i32 0
   %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
@@ -27,7 +27,7 @@
 define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %uint = load i32 addrspace(1) * %in
+  %uint = load i32, i32 addrspace(1) * %in
   %conv = uitofp i32 %uint to float
   %0 = insertelement <4 x float> undef, float %conv, i32 0
   %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/R600/dot4-folding.ll b/llvm/test/CodeGen/R600/dot4-folding.ll
index dca6a59..d8975f6 100644
--- a/llvm/test/CodeGen/R600/dot4-folding.ll
+++ b/llvm/test/CodeGen/R600/dot4-folding.ll
@@ -14,8 +14,8 @@
 
 define void @main(float addrspace(1)* %out) {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
   %3 = insertelement <4 x float> undef, float %2, i32 0
   call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index 6ec87a8..c381fc4 100644
--- a/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -34,19 +34,19 @@
   %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   tail call void @llvm.AMDGPU.barrier.local() #1
   %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
-  %tmp = load float addrspace(3)* %arrayidx, align 4
+  %tmp = load float, float addrspace(3)* %arrayidx, align 4
   %add1 = add nsw i32 %offset.02, 1
   %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1
-  %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+  %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4
   %add3 = add nsw i32 %offset.02, 32
   %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3
-  %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+  %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4
   %add5 = add nsw i32 %offset.02, 33
   %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5
-  %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+  %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4
   %add7 = add nsw i32 %offset.02, 64
   %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7
-  %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+  %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4
   %add9 = fadd float %tmp, %tmp1
   %add10 = fadd float %add9, %tmp2
   %add11 = fadd float %add10, %tmp3
diff --git a/llvm/test/CodeGen/R600/ds_read2.ll b/llvm/test/CodeGen/R600/ds_read2.ll
index 5901e85..f53b6c0 100644
--- a/llvm/test/CodeGen/R600/ds_read2.ll
+++ b/llvm/test/CodeGen/R600/ds_read2.ll
@@ -15,10 +15,10 @@
 define void @simple_read2_f32(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -34,10 +34,10 @@
 define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -52,10 +52,10 @@
 define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -70,20 +70,20 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -102,22 +102,22 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   call void @llvm.AMDGPU.barrier.local() #2
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -137,20 +137,20 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -177,8 +177,8 @@
   %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
   %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
   %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -207,8 +207,8 @@
   ; Apply an additional offset after the vector that will be more obviously folded.
   %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
 
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1.offset, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1.offset, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -231,8 +231,8 @@
   %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
   %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
   %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -248,10 +248,10 @@
 define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+  %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -266,10 +266,10 @@
 define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+  %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -285,10 +285,10 @@
 define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 1
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 1
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 1
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 1
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -301,10 +301,10 @@
 define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 2
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 2
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 2
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 2
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -320,10 +320,10 @@
 define void @simple_read2_f64(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -336,10 +336,10 @@
 define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 255
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -354,10 +354,10 @@
 define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 257
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -372,10 +372,10 @@
 define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
@@ -388,8 +388,8 @@
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
 define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
-  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
-  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
   %sum = add i32 %val0, %val1
   store i32 %sum, i32 addrspace(1)* %out, align 4
   ret void
@@ -399,8 +399,8 @@
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
 define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
-  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
-  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
   %sum = add i32 %val0, %val1
   store i32 %sum, i32 addrspace(1)* %out, align 4
   ret void
@@ -413,8 +413,8 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
 define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
-  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
-  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
   %sum = add i64 %val0, %val1
   store i64 %sum, i64 addrspace(1)* %out, align 8
   ret void
@@ -429,8 +429,8 @@
 ; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
 ; SI: s_endpgm
 define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
-  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
-  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
   %sum = add i64 %val0, %val1
   store i64 %sum, i64 addrspace(1)* %out, align 8
   ret void
@@ -443,33 +443,33 @@
   %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
   %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
-  %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+  %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
   %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
-  %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+  %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4
   %add51 = add nsw i32 %x.i, 16
   %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
-  %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+  %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4
   %add55 = add nsw i32 %x.i, 17
   %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
-  %tmp19 = load float addrspace(3)* %arrayidx56, align 4
+  %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4
   %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
-  %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+  %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4
   %add63 = add nsw i32 %y.i, 1
   %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
-  %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+  %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4
   %add67 = add nsw i32 %y.i, 32
   %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
-  %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+  %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4
   %add71 = add nsw i32 %y.i, 33
   %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
-  %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+  %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4
   %add75 = add nsw i32 %y.i, 64
   %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
-  %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+  %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4
   %add79 = add nsw i32 %y.i, 65
   %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
-  %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+  %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4
   %sum.0 = fadd float %tmp16, %tmp17
   %sum.1 = fadd float %sum.0, %tmp18
   %sum.2 = fadd float %sum.1, %tmp19
@@ -484,13 +484,13 @@
 }
 
 define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
-  %load = load <2 x i32> addrspace(3)* %in, align 4
+  %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4
   store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
   ret void
 }
 
 define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
-  %load = load i64 addrspace(3)* %in, align 4
+  %load = load i64, i64 addrspace(3)* %in, align 4
   store i64 %load, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/ds_read2_offset_order.ll b/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
index 107c861..9ea9a5a 100644
--- a/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
+++ b/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -15,30 +15,30 @@
 define void @offset_order(float addrspace(1)* %out) {
 entry:
   %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
-  %val0 = load float addrspace(3)* %ptr0
+  %val0 = load float, float addrspace(3)* %ptr0
 
   %ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
-  %val1 = load float addrspace(3)* %ptr1
+  %val1 = load float, float addrspace(3)* %ptr1
   %add1 = fadd float %val0, %val1
 
   %ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
-  %val2 = load float addrspace(3)* %ptr2
+  %val2 = load float, float addrspace(3)* %ptr2
   %add2 = fadd float %add1, %val2
 
   %ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
-  %val3 = load float addrspace(3)* %ptr3
+  %val3 = load float, float addrspace(3)* %ptr3
   %add3 = fadd float %add2, %val3
 
   %ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
-  %val4 = load float addrspace(3)* %ptr4
+  %val4 = load float, float addrspace(3)* %ptr4
   %add4 = fadd float %add3, %val4
 
   %ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
-  %val5 = load float addrspace(3)* %ptr5
+  %val5 = load float, float addrspace(3)* %ptr5
   %add5 = fadd float %add4, %val5
 
   %ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
-  %val6 = load float addrspace(3)* %ptr6
+  %val6 = load float, float addrspace(3)* %ptr6
   %add6 = fadd float %add5, %val6
   store float %add6, float addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ds_read2st64.ll b/llvm/test/CodeGen/R600/ds_read2st64.ll
index 163c687..482debb 100644
--- a/llvm/test/CodeGen/R600/ds_read2st64.ll
+++ b/llvm/test/CodeGen/R600/ds_read2st64.ll
@@ -13,10 +13,10 @@
 define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -33,10 +33,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 128
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -53,10 +53,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 16320
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -73,10 +73,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 16384
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -89,10 +89,10 @@
 define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 63
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -106,10 +106,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 127
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -125,10 +125,10 @@
 define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -145,10 +145,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 128
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -164,10 +164,10 @@
 define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
@@ -185,10 +185,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8128
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -205,10 +205,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8192
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -222,10 +222,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8129
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -242,10 +242,10 @@
 define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
diff --git a/llvm/test/CodeGen/R600/ds_write2.ll b/llvm/test/CodeGen/R600/ds_write2.ll
index aaa3f59..d06f780 100644
--- a/llvm/test/CodeGen/R600/ds_write2.ll
+++ b/llvm/test/CodeGen/R600/ds_write2.ll
@@ -12,7 +12,7 @@
 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
-  %val = load float addrspace(1)* %in.gep, align 4
+  %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -31,8 +31,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -50,8 +50,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -69,8 +69,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -90,8 +90,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
-  %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
-  %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+  %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
+  %val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
   %val0.0 = extractelement <2 x float> %val0, i32 0
   %val1.1 = extractelement <2 x float> %val1, i32 1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -110,7 +110,7 @@
 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
-  %val = load <2 x float> addrspace(1)* %in.gep, align 8
+  %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
   %val0 = extractelement <2 x float> %val, i32 0
   %val1 = extractelement <2 x float> %val, i32 1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -129,7 +129,7 @@
 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
-  %val = load <4 x float> addrspace(1)* %in.gep, align 16
+  %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
   %val0 = extractelement <4 x float> %val, i32 0
   %val1 = extractelement <4 x float> %val, i32 3
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -150,8 +150,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
@@ -168,8 +168,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
@@ -186,8 +186,8 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
@@ -216,8 +216,8 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %idx.0 = add nsw i32 %tid.x, 3
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
@@ -247,8 +247,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
@@ -273,7 +273,7 @@
 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -291,7 +291,7 @@
 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
@@ -310,8 +310,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
-  %val0 = load double addrspace(1)* %in.gep.0, align 8
-  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double, double addrspace(1)* %in.gep.1, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   store double %val0, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -373,7 +373,7 @@
 define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
   %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
-  %val = load float addrspace(1)* %in
+  %val = load float, float addrspace(1)* %in
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
diff --git a/llvm/test/CodeGen/R600/ds_write2st64.ll b/llvm/test/CodeGen/R600/ds_write2st64.ll
index 3070771..2044df2 100644
--- a/llvm/test/CodeGen/R600/ds_write2st64.ll
+++ b/llvm/test/CodeGen/R600/ds_write2st64.ll
@@ -12,7 +12,7 @@
 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
-  %val = load float addrspace(1)* %in.gep, align 4
+  %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -31,8 +31,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %add.x.0 = add nsw i32 %x.i, 128
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
   store float %val0, float addrspace(3)* %arrayidx0, align 4
@@ -52,8 +52,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 16320
@@ -72,8 +72,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
-  %val0 = load double addrspace(1)* %in.gep.0, align 8
-  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double, double addrspace(1)* %in.gep.1, align 8
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   store double %val0, double addrspace(3)* %arrayidx0, align 8
@@ -90,7 +90,7 @@
 define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
diff --git a/llvm/test/CodeGen/R600/extload-private.ll b/llvm/test/CodeGen/R600/extload-private.ll
index fec8682..294c3a9 100644
--- a/llvm/test/CodeGen/R600/extload-private.ll
+++ b/llvm/test/CodeGen/R600/extload-private.ll
@@ -6,7 +6,7 @@
 define void @load_i8_sext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i8
-  %tmp1 = load i8* %tmp0
+  %tmp1 = load i8, i8* %tmp0
   %tmp2 = sext i8 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -17,7 +17,7 @@
 define void @load_i8_zext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i8
-  %tmp1 = load i8* %tmp0
+  %tmp1 = load i8, i8* %tmp0
   %tmp2 = zext i8 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -28,7 +28,7 @@
 define void @load_i16_sext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i16
-  %tmp1 = load i16* %tmp0
+  %tmp1 = load i16, i16* %tmp0
   %tmp2 = sext i16 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -39,7 +39,7 @@
 define void @load_i16_zext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i16
-  %tmp1 = load i16* %tmp0
+  %tmp1 = load i16, i16* %tmp0
   %tmp2 = zext i16 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/extload.ll b/llvm/test/CodeGen/R600/extload.ll
index 77e5dc3..662eb7a 100644
--- a/llvm/test/CodeGen/R600/extload.ll
+++ b/llvm/test/CodeGen/R600/extload.ll
@@ -8,7 +8,7 @@
 
 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
-  %load = load i32 addrspace(1)* %cast, align 1
+  %load = load i32, i32 addrspace(1)* %cast, align 1
   %x = bitcast i32 %load to <4 x i8>
   %castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
   store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
@@ -21,7 +21,7 @@
 
 define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
-  %load = load i32 addrspace(1)* %cast, align 1
+  %load = load i32, i32 addrspace(1)* %cast, align 1
   %x = bitcast i32 %load to <2 x i16>
   %castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
   store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
@@ -33,7 +33,7 @@
 ; EG: LDS_WRITE * [[VAL]]
 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
-  %load = load i32 addrspace(3)* %cast, align 1
+  %load = load i32, i32 addrspace(3)* %cast, align 1
   %x = bitcast i32 %load to <4 x i8>
   %castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
   store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
@@ -45,7 +45,7 @@
 ; EG: LDS_WRITE * [[VAL]]
 define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
-  %load = load i32 addrspace(3)* %cast, align 1
+  %load = load i32, i32 addrspace(3)* %cast, align 1
   %x = bitcast i32 %load to <2 x i16>
   %castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
   store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
diff --git a/llvm/test/CodeGen/R600/fabs.f64.ll b/llvm/test/CodeGen/R600/fabs.f64.ll
index 30c8952..3c6136c 100644
--- a/llvm/test/CodeGen/R600/fabs.f64.ll
+++ b/llvm/test/CodeGen/R600/fabs.f64.ll
@@ -14,7 +14,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %tidext = sext i32 %tid to i64
   %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %fabs = call double @llvm.fabs.f64(double %val)
   store double %fabs, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fadd.ll b/llvm/test/CodeGen/R600/fadd.ll
index 647eee4..5fac328 100644
--- a/llvm/test/CodeGen/R600/fadd.ll
+++ b/llvm/test/CodeGen/R600/fadd.ll
@@ -33,8 +33,8 @@
 ; SI: v_add_f32
 define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
   %result = fadd <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/fadd64.ll b/llvm/test/CodeGen/R600/fadd64.ll
index f1f6fef..485c558 100644
--- a/llvm/test/CodeGen/R600/fadd64.ll
+++ b/llvm/test/CodeGen/R600/fadd64.ll
@@ -6,8 +6,8 @@
 
 define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fadd double %r0, %r1
    store double %r2, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fcmp-cnd.ll b/llvm/test/CodeGen/R600/fcmp-cnd.ll
index 1d4e323..530274f 100644
--- a/llvm/test/CodeGen/R600/fcmp-cnd.ll
+++ b/llvm/test/CodeGen/R600/fcmp-cnd.ll
@@ -6,7 +6,7 @@
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %cmp = fcmp oeq float %0, 0.000000e+00
   %value = select i1 %cmp, i32 2, i32 3 
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll b/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 55aba0d..c402805 100644
--- a/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -8,7 +8,7 @@
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %cmp = fcmp oeq float %0, 0.000000e+00
   %value = select i1 %cmp, i32 -1, i32 0
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp.ll b/llvm/test/CodeGen/R600/fcmp.ll
index 718f3e8..5207ab5 100644
--- a/llvm/test/CodeGen/R600/fcmp.ll
+++ b/llvm/test/CodeGen/R600/fcmp.ll
@@ -5,9 +5,9 @@
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %in, i32 1
-  %1 = load float addrspace(1)* %arrayidx1
+  %1 = load float, float addrspace(1)* %arrayidx1
   %cmp = fcmp oeq float %0, %1
   %sext = sext i1 %cmp to i32
   store i32 %sext, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp64.ll b/llvm/test/CodeGen/R600/fcmp64.ll
index 9dc8b50..053ab0e 100644
--- a/llvm/test/CodeGen/R600/fcmp64.ll
+++ b/llvm/test/CodeGen/R600/fcmp64.ll
@@ -5,8 +5,8 @@
 ; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ult double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -17,8 +17,8 @@
 ; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ule double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -29,8 +29,8 @@
 ; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ugt double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -41,8 +41,8 @@
 ; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp uge double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -53,8 +53,8 @@
 ; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp une double %r0, %r1
    %r3 = select i1 %r2, double %r0, double %r1
    store double %r3, double addrspace(1)* %out
@@ -65,8 +65,8 @@
 ; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ueq double %r0, %r1
    %r3 = select i1 %r2, double %r0, double %r1
    store double %r3, double addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fconst64.ll b/llvm/test/CodeGen/R600/fconst64.ll
index 28e0c90..89af375 100644
--- a/llvm/test/CodeGen/R600/fconst64.ll
+++ b/llvm/test/CodeGen/R600/fconst64.ll
@@ -6,7 +6,7 @@
 ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
 
 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-   %r1 = load double addrspace(1)* %in
+   %r1 = load double, double addrspace(1)* %in
    %r2 = fadd double %r1, 5.000000e+00
    store double %r2, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fdiv.f64.ll b/llvm/test/CodeGen/R600/fdiv.f64.ll
index c96f141b..7c022e3 100644
--- a/llvm/test/CodeGen/R600/fdiv.f64.ll
+++ b/llvm/test/CodeGen/R600/fdiv.f64.ll
@@ -31,8 +31,8 @@
 ; COMMON: s_endpgm
 define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1
-  %num = load double addrspace(1)* %in
-  %den = load double addrspace(1)* %gep.1
+  %num = load double, double addrspace(1)* %in
+  %den = load double, double addrspace(1)* %gep.1
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -40,7 +40,7 @@
 
 ; COMMON-LABEL: {{^}}fdiv_f64_s_v:
 define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
-  %den = load double addrspace(1)* %in
+  %den = load double, double addrspace(1)* %in
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 
 ; COMMON-LABEL: {{^}}fdiv_f64_v_s:
 define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
-  %num = load double addrspace(1)* %in
+  %num = load double, double addrspace(1)* %in
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -64,8 +64,8 @@
 ; COMMON-LABEL: {{^}}v_fdiv_v2f64:
 define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in, i32 1
-  %num = load <2 x double> addrspace(1)* %in
-  %den = load <2 x double> addrspace(1)* %gep.1
+  %num = load <2 x double>, <2 x double> addrspace(1)* %in
+  %den = load <2 x double>, <2 x double> addrspace(1)* %gep.1
   %result = fdiv <2 x double> %num, %den
   store <2 x double> %result, <2 x double> addrspace(1)* %out
   ret void
@@ -81,8 +81,8 @@
 ; COMMON-LABEL: {{^}}v_fdiv_v4f64:
 define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %num = load <4 x double> addrspace(1)* %in
-  %den = load <4 x double> addrspace(1)* %gep.1
+  %num = load <4 x double>, <4 x double> addrspace(1)* %in
+  %den = load <4 x double>, <4 x double> addrspace(1)* %gep.1
   %result = fdiv <4 x double> %num, %den
   store <4 x double> %result, <4 x double> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fdiv.ll b/llvm/test/CodeGen/R600/fdiv.ll
index 43ead29..7cbf873 100644
--- a/llvm/test/CodeGen/R600/fdiv.ll
+++ b/llvm/test/CodeGen/R600/fdiv.ll
@@ -60,8 +60,8 @@
 ; SI-DAG: v_mul_f32
 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float>, <4 x float> addrspace(1) * %in
+  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
   %result = fdiv <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fetch-limits.r600.ll b/llvm/test/CodeGen/R600/fetch-limits.r600.ll
index d35573e..821760c 100644
--- a/llvm/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/R600/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
 
 define void @fetch_limits_r600() #0 {
 entry:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/llvm/test/CodeGen/R600/fetch-limits.r700+.ll b/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
index 17760a0..2e1916e 100644
--- a/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
 
 define void @fetch_limits_r700() #0 {
 entry:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/llvm/test/CodeGen/R600/flat-address-space.ll b/llvm/test/CodeGen/R600/flat-address-space.ll
index 22ad576..425d67d 100644
--- a/llvm/test/CodeGen/R600/flat-address-space.ll
+++ b/llvm/test/CodeGen/R600/flat-address-space.ll
@@ -26,7 +26,7 @@
 end:
   %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
   store i32 %x, i32 addrspace(4)* %fptr, align 4
-;  %val = load i32 addrspace(4)* %fptr, align 4
+;  %val = load i32, i32 addrspace(4)* %fptr, align 4
 ;  store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -87,7 +87,7 @@
 ; CHECK: flat_load_dword
 define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
-  %fload = load i32 addrspace(4)* %fptr, align 4
+  %fload = load i32, i32 addrspace(4)* %fptr, align 4
   store i32 %fload, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -96,7 +96,7 @@
 ; CHECK: flat_load_dwordx2
 define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
-  %fload = load i64 addrspace(4)* %fptr, align 4
+  %fload = load i64, i64 addrspace(4)* %fptr, align 4
   store i64 %fload, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -105,7 +105,7 @@
 ; CHECK: flat_load_dwordx4
 define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
-  %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+  %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
   store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
   ret void
 }
@@ -114,7 +114,7 @@
 ; CHECK: flat_load_sbyte
 define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
-  %fload = load i8 addrspace(4)* %fptr, align 4
+  %fload = load i8, i8 addrspace(4)* %fptr, align 4
   %ext = sext i8 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -124,7 +124,7 @@
 ; CHECK: flat_load_ubyte
 define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
-  %fload = load i8 addrspace(4)* %fptr, align 4
+  %fload = load i8, i8 addrspace(4)* %fptr, align 4
   %ext = zext i8 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -134,7 +134,7 @@
 ; CHECK: flat_load_sshort
 define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
-  %fload = load i16 addrspace(4)* %fptr, align 4
+  %fload = load i16, i16 addrspace(4)* %fptr, align 4
   %ext = sext i16 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -144,7 +144,7 @@
 ; CHECK: flat_load_ushort
 define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
-  %fload = load i16 addrspace(4)* %fptr, align 4
+  %fload = load i16, i16 addrspace(4)* %fptr, align 4
   %ext = zext i16 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -171,7 +171,7 @@
   store i32 %x, i32 addrspace(4)* %fptr
   ; Dummy call
   call void @llvm.AMDGPU.barrier.local() #1
-  %reload = load i32 addrspace(4)* %fptr, align 4
+  %reload = load i32, i32 addrspace(4)* %fptr, align 4
   store i32 %reload, i32 addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/fma-combine.ll b/llvm/test/CodeGen/R600/fma-combine.ll
index 9c77c15..bd574b8 100644
--- a/llvm/test/CodeGen/R600/fma-combine.ll
+++ b/llvm/test/CodeGen/R600/fma-combine.ll
@@ -20,9 +20,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %mul, %c
@@ -50,10 +50,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fadd double %mul, %c
@@ -77,9 +77,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %c, %mul
@@ -101,9 +101,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %mul, %c
@@ -131,10 +131,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %mul, %c
@@ -158,9 +158,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %c, %mul
@@ -188,10 +188,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %c, %mul
@@ -215,9 +215,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -246,10 +246,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -280,10 +280,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -315,11 +315,11 @@
   %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %x = load double addrspace(1)* %gep.0
-  %y = load double addrspace(1)* %gep.1
-  %z = load double addrspace(1)* %gep.2
-  %u = load double addrspace(1)* %gep.3
-  %v = load double addrspace(1)* %gep.4
+  %x = load double, double addrspace(1)* %gep.0
+  %y = load double, double addrspace(1)* %gep.1
+  %z = load double, double addrspace(1)* %gep.2
+  %u = load double, double addrspace(1)* %gep.3
+  %v = load double, double addrspace(1)* %gep.4
 
   %tmp0 = fmul double %u, %v
   %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
@@ -350,11 +350,11 @@
   %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %x = load double addrspace(1)* %gep.0
-  %y = load double addrspace(1)* %gep.1
-  %z = load double addrspace(1)* %gep.2
-  %u = load double addrspace(1)* %gep.3
-  %v = load double addrspace(1)* %gep.4
+  %x = load double, double addrspace(1)* %gep.0
+  %y = load double, double addrspace(1)* %gep.1
+  %z = load double, double addrspace(1)* %gep.2
+  %u = load double, double addrspace(1)* %gep.3
+  %v = load double, double addrspace(1)* %gep.4
 
   %tmp0 = fmul double %u, %v
   %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
diff --git a/llvm/test/CodeGen/R600/fma.f64.ll b/llvm/test/CodeGen/R600/fma.f64.ll
index bca312b..0a55ef7 100644
--- a/llvm/test/CodeGen/R600/fma.f64.ll
+++ b/llvm/test/CodeGen/R600/fma.f64.ll
@@ -10,9 +10,9 @@
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
-   %r2 = load double addrspace(1)* %in3
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
+   %r2 = load double, double addrspace(1)* %in3
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
    store double %r3, double addrspace(1)* %out
    ret void
@@ -23,9 +23,9 @@
 ; SI: v_fma_f64
 define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                        <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
-   %r0 = load <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double> addrspace(1)* %in2
-   %r2 = load <2 x double> addrspace(1)* %in3
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+   %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
    %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
    store <2 x double> %r3, <2 x double> addrspace(1)* %out
    ret void
@@ -38,9 +38,9 @@
 ; SI: v_fma_f64
 define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
                        <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
-   %r0 = load <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double> addrspace(1)* %in2
-   %r2 = load <4 x double> addrspace(1)* %in3
+   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
+   %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
    %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
    store <4 x double> %r3, <4 x double> addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fma.ll b/llvm/test/CodeGen/R600/fma.ll
index 3c874b2..d6024aa 100644
--- a/llvm/test/CodeGen/R600/fma.ll
+++ b/llvm/test/CodeGen/R600/fma.ll
@@ -14,9 +14,9 @@
 ; EG: FMA {{\*? *}}[[RES]]
 define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                      float addrspace(1)* %in2, float addrspace(1)* %in3) {
-  %r0 = load float addrspace(1)* %in1
-  %r1 = load float addrspace(1)* %in2
-  %r2 = load float addrspace(1)* %in3
+  %r0 = load float, float addrspace(1)* %in1
+  %r1 = load float, float addrspace(1)* %in2
+  %r2 = load float, float addrspace(1)* %in3
   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %out
   ret void
@@ -31,9 +31,9 @@
 ; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
 define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
                        <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
-  %r0 = load <2 x float> addrspace(1)* %in1
-  %r1 = load <2 x float> addrspace(1)* %in2
-  %r2 = load <2 x float> addrspace(1)* %in3
+  %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1
+  %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2
+  %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3
   %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
   store <2 x float> %r3, <2 x float> addrspace(1)* %out
   ret void
@@ -52,9 +52,9 @@
 ; EG-DAG: FMA {{\*? *}}[[RES]].W
 define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
                        <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
-  %r0 = load <4 x float> addrspace(1)* %in1
-  %r1 = load <4 x float> addrspace(1)* %in2
-  %r2 = load <4 x float> addrspace(1)* %in3
+  %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1
+  %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2
+  %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3
   %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
   store <4 x float> %r3, <4 x float> addrspace(1)* %out
   ret void
@@ -68,8 +68,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
   store float %fma, float addrspace(1)* %out.gep, align 4
@@ -83,8 +83,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %c = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %c = load float, float addrspace(1)* %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
   store float %fma, float addrspace(1)* %out.gep, align 4
diff --git a/llvm/test/CodeGen/R600/fmax3.ll b/llvm/test/CodeGen/R600/fmax3.ll
index 629c032..c3028a6 100644
--- a/llvm/test/CodeGen/R600/fmax3.ll
+++ b/llvm/test/CodeGen/R600/fmax3.ll
@@ -11,9 +11,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
@@ -29,9 +29,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fmax_legacy.f64.ll b/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
index 0168900..8282438 100644
--- a/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
+++ b/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -9,8 +9,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp uge double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -24,8 +24,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp oge double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -39,8 +39,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ugt double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -54,8 +54,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ogt double %a, %b
   %val = select i1 %cmp, double %a, double %b
diff --git a/llvm/test/CodeGen/R600/fmax_legacy.ll b/llvm/test/CodeGen/R600/fmax_legacy.ll
index 310aff8..413957d 100644
--- a/llvm/test/CodeGen/R600/fmax_legacy.ll
+++ b/llvm/test/CodeGen/R600/fmax_legacy.ll
@@ -18,8 +18,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp uge float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -38,8 +38,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp oge float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -58,8 +58,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ugt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -78,8 +78,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -102,8 +102,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
diff --git a/llvm/test/CodeGen/R600/fmin3.ll b/llvm/test/CodeGen/R600/fmin3.ll
index e3acb31..0a76699 100644
--- a/llvm/test/CodeGen/R600/fmin3.ll
+++ b/llvm/test/CodeGen/R600/fmin3.ll
@@ -12,9 +12,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
@@ -30,9 +30,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fmin_legacy.f64.ll b/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
index 395d927..e19a48f 100644
--- a/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
+++ b/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -19,8 +19,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ule double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -34,8 +34,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ole double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -49,8 +49,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp olt double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -64,8 +64,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ult double %a, %b
   %val = select i1 %cmp, double %a, double %b
diff --git a/llvm/test/CodeGen/R600/fmin_legacy.ll b/llvm/test/CodeGen/R600/fmin_legacy.ll
index dc24383..6a625c2 100644
--- a/llvm/test/CodeGen/R600/fmin_legacy.ll
+++ b/llvm/test/CodeGen/R600/fmin_legacy.ll
@@ -30,8 +30,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ule float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -49,8 +49,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ole float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -68,8 +68,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp olt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -87,8 +87,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ult float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -109,8 +109,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ole float %a, %b
   %val0 = select i1 %cmp, float %a, float %b
diff --git a/llvm/test/CodeGen/R600/fmul.ll b/llvm/test/CodeGen/R600/fmul.ll
index 29b4c48..68ebc4d 100644
--- a/llvm/test/CodeGen/R600/fmul.ll
+++ b/llvm/test/CodeGen/R600/fmul.ll
@@ -43,8 +43,8 @@
 ; SI: v_mul_f32
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float>, <4 x float> addrspace(1) * %in
+  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
   %result = fmul <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fmul64.ll b/llvm/test/CodeGen/R600/fmul64.ll
index 9d7787c..3c222ea 100644
--- a/llvm/test/CodeGen/R600/fmul64.ll
+++ b/llvm/test/CodeGen/R600/fmul64.ll
@@ -5,8 +5,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fmul double %r0, %r1
    store double %r2, double addrspace(1)* %out
    ret void
@@ -17,8 +17,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                         <2 x double> addrspace(1)* %in2) {
-   %r0 = load <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double> addrspace(1)* %in2
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
    %r2 = fmul <2 x double> %r0, %r1
    store <2 x double> %r2, <2 x double> addrspace(1)* %out
    ret void
@@ -31,8 +31,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
                         <4 x double> addrspace(1)* %in2) {
-   %r0 = load <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double> addrspace(1)* %in2
+   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
    %r2 = fmul <4 x double> %r0, %r1
    store <4 x double> %r2, <4 x double> addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fmuladd.ll b/llvm/test/CodeGen/R600/fmuladd.ll
index 7297b27..ae84d84 100644
--- a/llvm/test/CodeGen/R600/fmuladd.ll
+++ b/llvm/test/CodeGen/R600/fmuladd.ll
@@ -10,9 +10,9 @@
 
 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                          float addrspace(1)* %in2, float addrspace(1)* %in3) {
-   %r0 = load float addrspace(1)* %in1
-   %r1 = load float addrspace(1)* %in2
-   %r2 = load float addrspace(1)* %in3
+   %r0 = load float, float addrspace(1)* %in1
+   %r1 = load float, float addrspace(1)* %in2
+   %r2 = load float, float addrspace(1)* %in3
    %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
    store float %r3, float addrspace(1)* %out
    ret void
@@ -23,9 +23,9 @@
 
 define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                          double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
-   %r2 = load double addrspace(1)* %in3
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
+   %r2 = load double, double addrspace(1)* %in3
    %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
    store double %r3, double addrspace(1)* %out
    ret void
@@ -42,8 +42,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -61,8 +61,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -82,8 +82,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r0 = load float addrspace(1)* %gep.0
-  %r1 = load float addrspace(1)* %gep.1
+  %r0 = load float, float addrspace(1)* %gep.0
+  %r1 = load float, float addrspace(1)* %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %add.0, %r1
@@ -104,8 +104,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r0 = load float addrspace(1)* %gep.0
-  %r1 = load float addrspace(1)* %gep.1
+  %r0 = load float, float addrspace(1)* %gep.0
+  %r1 = load float, float addrspace(1)* %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %r1, %add.0
@@ -124,8 +124,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -144,8 +144,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r1.fneg = fsub float -0.000000e+00, %r1
 
@@ -166,8 +166,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r1.fneg = fsub float -0.000000e+00, %r1
 
@@ -188,8 +188,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r2.fneg = fsub float -0.000000e+00, %r2
 
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
index 7e6ede6..8830e82 100644
--- a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -15,8 +15,8 @@
 }
 
 define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
-  %x = load double addrspace(1)* %xptr, align 8
-  %y = load double addrspace(1)* %xptr, align 8
+  %x = load double, double addrspace(1)* %xptr, align 8
+  %y = load double, double addrspace(1)* %xptr, align 8
   %fabs = call double @llvm.fabs.f64(double %x)
   %fsub = fsub double -0.000000e+00, %fabs
   %fadd = fadd double %y, %fsub
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.ll b/llvm/test/CodeGen/R600/fneg-fabs.ll
index 4fde048..3b4930d 100644
--- a/llvm/test/CodeGen/R600/fneg-fabs.ll
+++ b/llvm/test/CodeGen/R600/fneg-fabs.ll
@@ -72,7 +72,7 @@
 ; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %fabs = call float @llvm.fabs.f32(float %val)
   %fsub = fsub float -0.000000e+00, %fabs
   store float %fsub, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fp16_to_fp.ll b/llvm/test/CodeGen/R600/fp16_to_fp.ll
index da78f61..5a79ca8 100644
--- a/llvm/test/CodeGen/R600/fp16_to_fp.ll
+++ b/llvm/test/CodeGen/R600/fp16_to_fp.ll
@@ -9,7 +9,7 @@
 ; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -22,7 +22,7 @@
 ; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
   store double %cvt, double addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/fp32_to_fp16.ll b/llvm/test/CodeGen/R600/fp32_to_fp16.ll
index c3c65ae..67925eb 100644
--- a/llvm/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/llvm/test/CodeGen/R600/fp32_to_fp16.ll
@@ -8,7 +8,7 @@
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; SI: buffer_store_short [[RESULT]]
 define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
   store i16 %cvt, i16 addrspace(1)* %out, align 2
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_sint.f64.ll b/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
index 93fc847..12df660 100644
--- a/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -49,7 +49,7 @@
 define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %cast = fptosi double %val to i64
   store i64 %cast, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_sint.ll b/llvm/test/CodeGen/R600/fp_to_sint.ll
index 16549c3..301a94b 100644
--- a/llvm/test/CodeGen/R600/fp_to_sint.ll
+++ b/llvm/test/CodeGen/R600/fp_to_sint.ll
@@ -44,7 +44,7 @@
 ; SI: v_cvt_i32_f32_e32
 ; SI: v_cvt_i32_f32_e32
 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, <4 x float> addrspace(1) * %in
   %result = fptosi <4 x float> %value to <4 x i32>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_uint.f64.ll b/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
index 472c378..41bc2a7 100644
--- a/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -49,7 +49,7 @@
 define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %cast = fptoui double %val to i64
   store i64 %cast, i64 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_uint.ll b/llvm/test/CodeGen/R600/fp_to_uint.ll
index 804d90f..b7b6ccc 100644
--- a/llvm/test/CodeGen/R600/fp_to_uint.ll
+++ b/llvm/test/CodeGen/R600/fp_to_uint.ll
@@ -36,7 +36,7 @@
 ; SI: v_cvt_u32_f32_e32
 
 define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, <4 x float> addrspace(1) * %in
   %result = fptoui <4 x float> %value to <4 x i32>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/frem.ll b/llvm/test/CodeGen/R600/frem.ll
index 9bc0542..f245ef0 100644
--- a/llvm/test/CodeGen/R600/frem.ll
+++ b/llvm/test/CodeGen/R600/frem.ll
@@ -16,8 +16,8 @@
 define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                       float addrspace(1)* %in2) #0 {
    %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float addrspace(1)* %in1, align 4
-   %r1 = load float addrspace(1)* %gep2, align 4
+   %r0 = load float, float addrspace(1)* %in1, align 4
+   %r1 = load float, float addrspace(1)* %gep2, align 4
    %r2 = frem float %r0, %r1
    store float %r2, float addrspace(1)* %out, align 4
    ret void
@@ -35,8 +35,8 @@
 define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                              float addrspace(1)* %in2) #1 {
    %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float addrspace(1)* %in1, align 4
-   %r1 = load float addrspace(1)* %gep2, align 4
+   %r0 = load float, float addrspace(1)* %in1, align 4
+   %r1 = load float, float addrspace(1)* %gep2, align 4
    %r2 = frem float %r0, %r1
    store float %r2, float addrspace(1)* %out, align 4
    ret void
@@ -55,8 +55,8 @@
 ; GCN: s_endpgm
 define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) #0 {
-   %r0 = load double addrspace(1)* %in1, align 8
-   %r1 = load double addrspace(1)* %in2, align 8
+   %r0 = load double, double addrspace(1)* %in1, align 8
+   %r1 = load double, double addrspace(1)* %in2, align 8
    %r2 = frem double %r0, %r1
    store double %r2, double addrspace(1)* %out, align 8
    ret void
@@ -71,8 +71,8 @@
 ; GCN: s_endpgm
 define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                              double addrspace(1)* %in2) #1 {
-   %r0 = load double addrspace(1)* %in1, align 8
-   %r1 = load double addrspace(1)* %in2, align 8
+   %r0 = load double, double addrspace(1)* %in1, align 8
+   %r1 = load double, double addrspace(1)* %in2, align 8
    %r2 = frem double %r0, %r1
    store double %r2, double addrspace(1)* %out, align 8
    ret void
@@ -81,8 +81,8 @@
 define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
                         <2 x float> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x float> addrspace(1)* %in1, align 8
-   %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+   %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8
+   %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8
    %r2 = frem <2 x float> %r0, %r1
    store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
    ret void
@@ -91,8 +91,8 @@
 define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
                         <4 x float> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <4 x float> addrspace(1)* %in1, align 16
-   %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+   %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16
+   %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16
    %r2 = frem <4 x float> %r0, %r1
    store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
    ret void
@@ -101,8 +101,8 @@
 define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                         <2 x double> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x double> addrspace(1)* %in1, align 16
-   %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16
    %r2 = frem <2 x double> %r0, %r1
    store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
    ret void
diff --git a/llvm/test/CodeGen/R600/fsqrt.ll b/llvm/test/CodeGen/R600/fsqrt.ll
index 1fdf3e4..0410134 100644
--- a/llvm/test/CodeGen/R600/fsqrt.ll
+++ b/llvm/test/CodeGen/R600/fsqrt.ll
@@ -9,7 +9,7 @@
 ; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
 
 define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-   %r0 = load float addrspace(1)* %in
+   %r0 = load float, float addrspace(1)* %in
    %r1 = call float @llvm.sqrt.f32(float %r0)
    store float %r1, float addrspace(1)* %out
    ret void
@@ -19,7 +19,7 @@
 ; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-   %r0 = load double addrspace(1)* %in
+   %r0 = load double, double addrspace(1)* %in
    %r1 = call double @llvm.sqrt.f64(double %r0)
    store double %r1, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fsub.ll b/llvm/test/CodeGen/R600/fsub.ll
index 59866a0..dfe41cb 100644
--- a/llvm/test/CodeGen/R600/fsub.ll
+++ b/llvm/test/CodeGen/R600/fsub.ll
@@ -7,8 +7,8 @@
 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float addrspace(1)* %in, align 4
-  %b = load float addrspace(1)* %b_ptr, align 4
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
   %result = fsub float %a, %b
   store float %result, float addrspace(1)* %out, align 4
   ret void
@@ -53,8 +53,8 @@
 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
   %result = fsub <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/fsub64.ll b/llvm/test/CodeGen/R600/fsub64.ll
index d18863f..f34a48e 100644
--- a/llvm/test/CodeGen/R600/fsub64.ll
+++ b/llvm/test/CodeGen/R600/fsub64.ll
@@ -7,8 +7,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r2 = fsub double %r0, %r1
   store double %r2, double addrspace(1)* %out
   ret void
@@ -18,8 +18,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
 define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                            double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
   %r2 = fsub double %r0, %r1.fabs
   store double %r2, double addrspace(1)* %out
@@ -30,8 +30,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                                double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
   %r2 = fsub double %r0.fabs, %r1
   store double %r2, double addrspace(1)* %out
@@ -86,8 +86,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %a = load <4 x double> addrspace(1)* %in
-  %b = load <4 x double> addrspace(1)* %b_ptr
+  %a = load <4 x double>, <4 x double> addrspace(1)* %in
+  %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr
   %result = fsub <4 x double> %a, %b
   store <4 x double> %result, <4 x double> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ftrunc.f64.ll b/llvm/test/CodeGen/R600/ftrunc.f64.ll
index 21399a8..dd51f64 100644
--- a/llvm/test/CodeGen/R600/ftrunc.f64.ll
+++ b/llvm/test/CodeGen/R600/ftrunc.f64.ll
@@ -14,7 +14,7 @@
 ; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
 ; SI: s_endpgm
 define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %x = load double addrspace(1)* %in, align 8
+  %x = load double, double addrspace(1)* %in, align 8
   %y = call double @llvm.trunc.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/global-directive.ll b/llvm/test/CodeGen/R600/global-directive.ll
index 67ba4b6..be775cf 100644
--- a/llvm/test/CodeGen/R600/global-directive.ll
+++ b/llvm/test/CodeGen/R600/global-directive.ll
@@ -7,8 +7,8 @@
 ; SI: {{^}}foo:
 define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i1.ll b/llvm/test/CodeGen/R600/global-extload-i1.ll
index 67d36ce..bd9557d 100644
--- a/llvm/test/CodeGen/R600/global-extload-i1.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i1.ll
@@ -8,7 +8,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -20,7 +20,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
 ; SI: s_endpgm
 define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = zext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -38,7 +38,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
 ; SI: s_endpgm
 define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = sext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -47,7 +47,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = zext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -56,7 +56,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = sext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = zext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -74,7 +74,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = sext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -83,7 +83,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = zext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -92,7 +92,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = sext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -101,7 +101,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = zext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -110,7 +110,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = sext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -119,7 +119,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = zext <32 x i1> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -128,7 +128,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = sext <32 x i1> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -137,7 +137,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = zext <64 x i1> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -146,7 +146,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = sext <64 x i1> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -157,7 +157,7 @@
 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
 ; SI: buffer_store_dwordx2
 define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -169,7 +169,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = zext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = sext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = zext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -205,7 +205,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = sext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -214,7 +214,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = zext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = sext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = zext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -241,7 +241,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = sext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -250,7 +250,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = zext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -259,7 +259,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = sext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -268,7 +268,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = zext <32 x i1> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -277,7 +277,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = sext <32 x i1> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -286,7 +286,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = zext <64 x i1> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
@@ -295,7 +295,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = sext <64 x i1> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i16.ll b/llvm/test/CodeGen/R600/global-extload-i16.ll
index f3e3312..103a40d 100644
--- a/llvm/test/CodeGen/R600/global-extload-i16.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i16.ll
@@ -8,7 +8,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = zext i16 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -19,7 +19,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = sext i16 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; SI: buffer_load_ushort
 ; SI: s_endpgm
 define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = zext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -39,7 +39,7 @@
 ; SI: buffer_load_sshort
 ; SI: s_endpgm
 define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = sext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = zext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -57,7 +57,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = sext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -66,7 +66,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = zext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -75,7 +75,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = sext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -84,7 +84,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = zext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -93,7 +93,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = sext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -102,7 +102,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = zext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -111,7 +111,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = sext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -120,7 +120,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
 ; SI: s_endpgm
 define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = zext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
   ret void
@@ -129,7 +129,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
 ; SI: s_endpgm
 define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = sext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
   ret void
@@ -138,7 +138,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
 ; SI: s_endpgm
 define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = zext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
   ret void
@@ -147,7 +147,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
 ; SI: s_endpgm
 define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = sext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
   ret void
@@ -158,7 +158,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = zext i16 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -169,7 +169,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = sext i16 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = zext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = sext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = zext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -205,7 +205,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = sext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -214,7 +214,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = zext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = sext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = zext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -241,7 +241,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = sext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -250,7 +250,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = zext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -259,7 +259,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = sext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -268,7 +268,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
 ; SI: s_endpgm
 define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = zext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -277,7 +277,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
 ; SI: s_endpgm
 define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = sext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -286,7 +286,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
 ; SI: s_endpgm
 define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = zext <64 x i16> %load to <64 x i64>
   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
   ret void
@@ -295,7 +295,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
 ; SI: s_endpgm
 define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = sext <64 x i16> %load to <64 x i64>
   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i32.ll b/llvm/test/CodeGen/R600/global-extload-i32.ll
index b3d5438..79b8345 100644
--- a/llvm/test/CodeGen/R600/global-extload-i32.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i32.ll
@@ -7,7 +7,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %ext = zext i32 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -18,7 +18,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %ext = sext i32 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32> addrspace(1)* %in
+  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
   %ext = zext <1 x i32> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -41,7 +41,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32> addrspace(1)* %in
+  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
   %ext = sext <1 x i32> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -53,7 +53,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32> addrspace(1)* %in
+  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %ext = zext <2 x i32> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -67,7 +67,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32> addrspace(1)* %in
+  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %ext = sext <2 x i32> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -81,7 +81,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32> addrspace(1)* %in
+  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
   %ext = zext <4 x i32> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -99,7 +99,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32> addrspace(1)* %in
+  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
   %ext = sext <4 x i32> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -124,7 +124,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32> addrspace(1)* %in
+  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
   %ext = zext <8 x i32> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -159,7 +159,7 @@
 
 ; SI: s_endpgm
 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32> addrspace(1)* %in
+  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
   %ext = sext <8 x i32> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -212,7 +212,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32> addrspace(1)* %in
+  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
   %ext = sext <16 x i32> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -255,7 +255,7 @@
 
 ; SI: s_endpgm
 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32> addrspace(1)* %in
+  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
   %ext = zext <16 x i32> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -369,7 +369,7 @@
 
 ; SI: s_endpgm
 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32> addrspace(1)* %in
+  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
   %ext = sext <32 x i32> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -450,7 +450,7 @@
 
 ; SI: s_endpgm
 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32> addrspace(1)* %in
+  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
   %ext = zext <32 x i32> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i8.ll b/llvm/test/CodeGen/R600/global-extload-i8.ll
index 4c37f3f..b31d5361 100644
--- a/llvm/test/CodeGen/R600/global-extload-i8.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i8.ll
@@ -7,7 +7,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -18,7 +18,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = sext i8 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -27,7 +27,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
 ; SI: s_endpgm
 define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = zext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -36,7 +36,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
 ; SI: s_endpgm
 define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = sext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -45,7 +45,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = zext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -54,7 +54,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = sext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -63,7 +63,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = zext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -72,7 +72,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = sext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -81,7 +81,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = zext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -90,7 +90,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = sext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -99,7 +99,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = zext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -108,7 +108,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = sext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -117,7 +117,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = zext <32 x i8> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -126,7 +126,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = sext <32 x i8> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -135,7 +135,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = zext <64 x i8> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -144,7 +144,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = sext <64 x i8> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -155,7 +155,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -166,7 +166,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = sext i8 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -175,7 +175,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = zext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -184,7 +184,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = sext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -193,7 +193,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = zext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -202,7 +202,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = sext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -211,7 +211,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = zext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -220,7 +220,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = sext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -229,7 +229,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = zext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -238,7 +238,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = sext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -247,7 +247,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = zext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -256,7 +256,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = sext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -265,7 +265,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = zext <32 x i8> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -274,7 +274,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = sext <32 x i8> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -283,7 +283,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = zext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
@@ -292,7 +292,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = sext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/global-zero-initializer.ll b/llvm/test/CodeGen/R600/global-zero-initializer.ll
index 0a54248..45aa8bf 100644
--- a/llvm/test/CodeGen/R600/global-zero-initializer.ll
+++ b/llvm/test/CodeGen/R600/global-zero-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(1)* %gep
+  %ld = load i32, i32 addrspace(1)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll b/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
index 18062f0..014b0a5 100644
--- a/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -10,7 +10,7 @@
 ; SI: s_endpgm
 define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i8], [1 x i8] addrspace(2)* @a, i32 0, i32 %s
-  %1 = load i8 addrspace(2)* %arrayidx, align 1
+  %1 = load i8, i8 addrspace(2)* %arrayidx, align 1
   store i8 %1, i8 addrspace(1)* %out
   ret void
 }
@@ -23,7 +23,7 @@
 ; SI: s_endpgm
 define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i16], [1 x i16] addrspace(2)* @b, i32 0, i32 %s
-  %1 = load i16 addrspace(2)* %arrayidx, align 2
+  %1 = load i16, i16 addrspace(2)* %arrayidx, align 2
   store i16 %1, i16 addrspace(1)* %out
   ret void
 }
@@ -36,7 +36,7 @@
 ; FUNC-LABEL: {{^}}struct_bar_gv_load:
 define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.bar], [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
-  %load = load i8 addrspace(2)* %gep, align 1
+  %load = load i8, i8 addrspace(2)* %gep, align 1
   store i8 %load, i8 addrspace(1)* %out, align 1
   ret void
 }
@@ -51,7 +51,7 @@
 ; FUNC-LABEL: {{^}}array_vector_gv_load:
 define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
-  %load = load <4 x i32> addrspace(2)* %gep, align 16
+  %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16
   store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace.ll b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
index 2fb6672..3c1fc6c 100644
--- a/llvm/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
@@ -22,7 +22,7 @@
 define void @float(float addrspace(1)* %out, i32 %index) {
 entry:
   %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
-  %1 = load float addrspace(2)* %0
+  %1 = load float, float addrspace(2)* %0
   store float %1, float addrspace(1)* %out
   ret void
 }
@@ -45,7 +45,7 @@
 define void @i32(i32 addrspace(1)* %out, i32 %index) {
 entry:
   %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -60,7 +60,7 @@
 
 define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
-  %load = load i32 addrspace(2)* %gep, align 4
+  %load = load i32, i32 addrspace(2)* %gep, align 4
   store i32 %load, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -76,7 +76,7 @@
 ; VI: s_load_dword
 define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
-  %load = load <1 x i32> addrspace(2)* %gep, align 4
+  %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
   store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
   ret void
 }
@@ -88,7 +88,7 @@
 
 if:
   %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
-  %2 = load float addrspace(2)* %1
+  %2 = load float, float addrspace(2)* %1
   store float %2, float addrspace(1)* %out
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/half.ll b/llvm/test/CodeGen/R600/half.ll
index 35a41c5..42ee788 100644
--- a/llvm/test/CodeGen/R600/half.ll
+++ b/llvm/test/CodeGen/R600/half.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: {{^}}test_load_store:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load half addrspace(1)* %in
+  %val = load half, half addrspace(1)* %in
   store half %val, half addrspace(1) * %out
   ret void
 }
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: {{^}}test_bitcast_from_half:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load half addrspace(1) * %in
+  %val = load half, half addrspace(1) * %in
   %val_int = bitcast half %val to i16
   store i16 %val_int, i16 addrspace(1)* %out
   ret void
@@ -24,7 +24,7 @@
 ; CHECK-LABEL: {{^}}test_bitcast_to_half:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load i16 addrspace(1)* %in
+  %val = load i16, i16 addrspace(1)* %in
   %val_fp = bitcast i16 %val to half
   store half %val_fp, half addrspace(1)* %out
   ret void
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: {{^}}test_extend32:
 ; CHECK: v_cvt_f32_f16_e32
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val32 = fpext half %val16 to float
   store float %val32, float addrspace(1)* %out
   ret void
@@ -45,7 +45,7 @@
 ; CHECK: v_cvt_f32_f16_e32
 ; CHECK: v_cvt_f64_f32_e32
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val64 = fpext half %val16 to double
   store double %val64, double addrspace(1)* %out
   ret void
@@ -55,7 +55,7 @@
 ; CHECK-LABEL: {{^}}test_trunc32:
 ; CHECK: v_cvt_f16_f32_e32
 
-  %val32 = load float addrspace(1)* %in
+  %val32 = load float, float addrspace(1)* %in
   %val16 = fptrunc float %val32 to half
   store half %val16, half addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
index 6047466..c218e19 100644
--- a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
+++ b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
@@ -3,7 +3,7 @@
 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
+  %1 = load i8, i8 addrspace(1)* %in
   %2 = uitofp i8 %1 to double
   %3 = fptrunc double %2 to float
   store float %3, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index 0cde06c..60e59a5 100644
--- a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -8,9 +8,9 @@
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %1 = load i32 addrspace(1)* %arrayidx1
+  %1 = load i32, i32 addrspace(1)* %arrayidx1
   %cmp = icmp eq i32 %0, %1
   %value = select i1 %cmp, i32 0, i32 -1
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/imm.ll b/llvm/test/CodeGen/R600/imm.ll
index 9b95fd6..8917cd6 100644
--- a/llvm/test/CodeGen/R600/imm.ll
+++ b/llvm/test/CodeGen/R600/imm.ll
@@ -225,7 +225,7 @@
 ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
 ; CHECK: buffer_store_dword [[REG]]
 define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %x = load float addrspace(1)* %in
+  %x = load float, float addrspace(1)* %in
   %y = fadd float %x, 0.5
   store float %y, float addrspace(1)* %out
   ret void
@@ -236,7 +236,7 @@
 ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
 ; CHECK: buffer_store_dword [[REG]]
 define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %x = load float addrspace(1)* %in
+  %x = load float, float addrspace(1)* %in
   %y = fadd float %x, 1024.0
   store float %y, float addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/indirect-private-64.ll b/llvm/test/CodeGen/R600/indirect-private-64.ll
index 378db4d..d63e1b6 100644
--- a/llvm/test/CodeGen/R600/indirect-private-64.ll
+++ b/llvm/test/CodeGen/R600/indirect-private-64.ll
@@ -14,12 +14,12 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load double addrspace(1)* %in, align 8
+  %val = load double, double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
   %ptr = getelementptr double, double* %array, i32 %b
   store double %val, double* %ptr, align 8
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load double* %ptr, align 8
+  %result = load double, double* %ptr, align 8
   store double %result, double addrspace(1)* %out, align 8
   ret void
 }
@@ -38,12 +38,12 @@
 ; SI-PROMOTE: ds_read_b32
 ; SI-PROMOTE: ds_read_b32
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load <2 x double> addrspace(1)* %in, align 16
+  %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
   %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
   store <2 x double> %val, <2 x double>* %ptr, align 16
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load <2 x double>* %ptr, align 16
+  %result = load <2 x double>, <2 x double>* %ptr, align 16
   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
   ret void
 }
@@ -56,12 +56,12 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
   %ptr = getelementptr i64, i64* %array, i32 %b
   store i64 %val, i64* %ptr, align 8
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load i64* %ptr, align 8
+  %result = load i64, i64* %ptr, align 8
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -80,12 +80,12 @@
 ; SI-PROMOTE: ds_read_b32
 ; SI-PROMOTE: ds_read_b32
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
   %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
   store <2 x i64> %val, <2 x i64>* %ptr, align 16
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load <2 x i64>* %ptr, align 16
+  %result = load <2 x i64>, <2 x i64>* %ptr, align 16
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/insert_vector_elt.ll b/llvm/test/CodeGen/R600/insert_vector_elt.ll
index 67b0a7a..6de3d40 100644
--- a/llvm/test/CodeGen/R600/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/R600/insert_vector_elt.ll
@@ -185,13 +185,13 @@
   br i1 %1, label %if, label %else
 
 if:
-  %2 = load i32 addrspace(1)* %in
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = insertelement <2 x i32> %0, i32 %2, i32 1
   br label %endif
 
 else:
   %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %5 = load i32 addrspace(1)* %4
+  %5 = load i32, i32 addrspace(1)* %4
   %6 = insertelement <2 x i32> %0, i32 %5, i32 1
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/jump-address.ll b/llvm/test/CodeGen/R600/jump-address.ll
index a1cd388..9dbc21c 100644
--- a/llvm/test/CodeGen/R600/jump-address.ll
+++ b/llvm/test/CodeGen/R600/jump-address.ll
@@ -6,7 +6,7 @@
 
 define void @main() #0 {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = bitcast float %1 to i32
   %3 = icmp eq i32 %2, 0
@@ -17,7 +17,7 @@
   br i1 %7, label %ENDIF, label %ELSE
 
 ELSE:                                             ; preds = %main_body
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %9 = extractelement <4 x float> %8, i32 0
   %10 = bitcast float %9 to i32
   %11 = icmp eq i32 %10, 1
@@ -40,7 +40,7 @@
   ret void
 
 IF13:                                             ; preds = %ELSE
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fsub float -0.000000e+00, %21
   %23 = fadd float 0xFFF8000000000000, %22
diff --git a/llvm/test/CodeGen/R600/kcache-fold.ll b/llvm/test/CodeGen/R600/kcache-fold.ll
index 27840b2..6c405fa 100644
--- a/llvm/test/CodeGen/R600/kcache-fold.ll
+++ b/llvm/test/CodeGen/R600/kcache-fold.ll
@@ -4,35 +4,35 @@
 ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
   %1 = extractelement <4 x float> %0, i32 0
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %3 = extractelement <4 x float> %2, i32 0
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %11 = extractelement <4 x float> %10, i32 1
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
   %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
-  %16 = load <4 x float> addrspace(8)* null
+  %16 = load <4 x float>, <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
-  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %19 = extractelement <4 x float> %18, i32 2
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %21 = extractelement <4 x float> %20, i32 2
   %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
-  %24 = load <4 x float> addrspace(8)* null
+  %24 = load <4 x float>, <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
-  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %27 = extractelement <4 x float> %26, i32 3
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 3
   %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
@@ -52,35 +52,35 @@
 ; CHECK-NOT: MOV
 define void @main2() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
   %1 = extractelement <4 x float> %0, i32 0
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %3 = extractelement <4 x float> %2, i32 0
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 1
   %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %11 = extractelement <4 x float> %10, i32 0
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
   %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
-  %16 = load <4 x float> addrspace(8)* null
+  %16 = load <4 x float>, <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
-  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %19 = extractelement <4 x float> %18, i32 3
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %21 = extractelement <4 x float> %20, i32 2
   %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
-  %24 = load <4 x float> addrspace(8)* null
+  %24 = load <4 x float>, <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
-  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %27 = extractelement <4 x float> %26, i32 3
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 2
   %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
diff --git a/llvm/test/CodeGen/R600/large-alloca.ll b/llvm/test/CodeGen/R600/large-alloca.ll
index 26ae217..671833d 100644
--- a/llvm/test/CodeGen/R600/large-alloca.ll
+++ b/llvm/test/CodeGen/R600/large-alloca.ll
@@ -8,7 +8,7 @@
   %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
   store i32 %x, i32* %gep
   %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
-  %0 = load i32* %gep1
+  %0 = load i32, i32* %gep1
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/large-constant-initializer.ll b/llvm/test/CodeGen/R600/large-constant-initializer.ll
index c8671ef..81c09ae 100644
--- a/llvm/test/CodeGen/R600/large-constant-initializer.ll
+++ b/llvm/test/CodeGen/R600/large-constant-initializer.ll
@@ -5,7 +5,7 @@
 @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
 
 define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
   %mul12 = mul nsw i32 %val, 7
   br i1 undef, label %exit, label %bb
 
diff --git a/llvm/test/CodeGen/R600/lds-initializer.ll b/llvm/test/CodeGen/R600/lds-initializer.ll
index 1a80a57..bf8df63 100644
--- a/llvm/test/CodeGen/R600/lds-initializer.ll
+++ b/llvm/test/CodeGen/R600/lds-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(3)* %gep
+  %ld = load i32, i32 addrspace(3)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/lds-oqap-crash.ll b/llvm/test/CodeGen/R600/lds-oqap-crash.ll
index fbcd778..6ff6fc3 100644
--- a/llvm/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/llvm/test/CodeGen/R600/lds-oqap-crash.ll
@@ -12,7 +12,7 @@
 ; CHECK: {{^}}lds_crash:
 define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   ; This block needs to be > 115 ISA instructions to hit the bug,
   ; so we'll use udiv instructions.
   %div0 = udiv i32 %0, %b
diff --git a/llvm/test/CodeGen/R600/lds-output-queue.ll b/llvm/test/CodeGen/R600/lds-output-queue.ll
index f34cad4..44ffc36 100644
--- a/llvm/test/CodeGen/R600/lds-output-queue.ll
+++ b/llvm/test/CodeGen/R600/lds-output-queue.ll
@@ -13,11 +13,11 @@
 define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
 entry:
   %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   call void @llvm.AMDGPU.barrier.local()
 
   ; This will start a new clause for the vertex fetch
-  %2 = load i32 addrspace(1)* %in
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = add i32 %1, %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -41,8 +41,8 @@
 ; has been declared in the local memory space:
 ;
 ;  %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-;  %1 = load i32 addrspace(3)* %0
-;  %2 = load i32 addrspace(1)* %in
+;  %1 = load i32, i32 addrspace(3)* %0
+;  %2 = load i32, i32 addrspace(1)* %in
 ;
 ; The instruction selection phase will generate ISA that looks like this:
 ; %OQAP = LDS_READ_RET
@@ -91,8 +91,8 @@
 define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
-  %1 = load i32 addrspace(3)* %0
-  %2 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(3)* %0
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = add i32 %2, %1
   store i32 %3, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/lds-zero-initializer.ll b/llvm/test/CodeGen/R600/lds-zero-initializer.ll
index f18462e..fb51bc0 100644
--- a/llvm/test/CodeGen/R600/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/R600/lds-zero-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(3)* %gep
+  %ld = load i32, i32 addrspace(3)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index 8bc2583..8bf094b8 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -28,7 +28,7 @@
 ; EG: SUB_INT
 ; EG: MAX_INT
 define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
-  %val = load i32 addrspace(1)* %src, align 4
+  %val = load i32, i32 addrspace(1)* %src, align 4
   %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
   store i32 %abs, i32 addrspace(1)* %out, align 4
   ret void
@@ -42,7 +42,7 @@
 ; EG: SUB_INT
 ; EG: MAX_INT
 define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
-  %val = load i32 addrspace(1)* %src, align 4
+  %val = load i32, i32 addrspace(1)* %src, align 4
   %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
   store i32 %abs, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index 0268e5b..db88397 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -17,7 +17,7 @@
   %3 = sub i32 %2, 1
   %4 = sub i32 %3, %0
   %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
-  %6 = load i32 addrspace(1)* %5
+  %6 = load i32, i32 addrspace(1)* %5
   store i32 %6, i32 addrspace(1)* %1
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 3ca9f3e..48fb2e0 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -18,7 +18,7 @@
   %3 = sub i32 %2, 1
   %4 = sub i32 %3, %0
   %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
-  %6 = load i32 addrspace(1)* %5
+  %6 = load i32, i32 addrspace(1)* %5
   store i32 %6, i32 addrspace(1)* %1
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index 2ec2546..ffd3d6c 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -44,7 +44,7 @@
 ; FUNC-LABEL: {{^}}v_bfe_print_arg:
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
-  %load = load i32 addrspace(1)* %src0, align 4
+  %load = load i32, i32 addrspace(1)* %src0, align 4
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
   ret void
@@ -75,7 +75,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -89,7 +89,7 @@
 ; SI: buffer_store_dword [[VREG]],
 ; SI: s_endpgm
 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -102,7 +102,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -115,7 +115,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -127,7 +127,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -139,7 +139,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -151,7 +151,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -162,7 +162,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -173,7 +173,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -418,7 +418,7 @@
 ; XSI-NOT: SHR
 ; XSI: buffer_store_dword [[BFE]],
 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
   %shl = shl i32 %bfe, 8
   %ashr = ashr i32 %shl, 8
@@ -434,7 +434,7 @@
 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
 ; SI: buffer_store_dword [[TMP2]]
 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %src = load i32 addrspace(1)* %in, align 4
+  %src = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
   %div = sdiv i32 %bfe, 2
   store i32 %div, i32 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 6cd0108..83bdb15 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -65,7 +65,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %load = load i8 addrspace(1)* %in
+  %load = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -79,7 +79,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
@@ -94,7 +94,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
@@ -108,7 +108,7 @@
 ; SI: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
@@ -123,7 +123,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
@@ -138,7 +138,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
@@ -152,7 +152,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
@@ -166,14 +166,14 @@
 ; SI: s_endpgm
 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -181,7 +181,7 @@
 }
 
 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -196,7 +196,7 @@
 ; SI: buffer_store_dword [[VREG]],
 ; SI: s_endpgm
 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = lshr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
@@ -211,7 +211,7 @@
 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
@@ -224,7 +224,7 @@
 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -249,7 +249,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -262,7 +262,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -274,7 +274,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -286,7 +286,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -298,7 +298,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -309,7 +309,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -320,7 +320,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -568,7 +568,7 @@
 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
                                             i32 addrspace(1)* %out1,
                                             i32 addrspace(1)* %in) nounwind {
-  %src = load i32 addrspace(1)* %in, align 4
+  %src = load i32, i32 addrspace(1)* %in, align 4
   %and = and i32 %src, 63
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
   store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 3973f53..301de4b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -21,7 +21,7 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
index 5f31289..805a88b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -136,7 +136,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
   %sext = sext i1 %result to i32
@@ -154,7 +154,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -174,7 +174,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -292,7 +292,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load double addrspace(1)* %in
+  %a = load double, double addrspace(1)* %in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
   %sext = sext i1 %result to i32
@@ -308,7 +308,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -323,7 +323,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -340,7 +340,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
@@ -360,7 +360,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -383,7 +383,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -418,7 +418,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
@@ -438,7 +438,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
@@ -458,7 +458,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index aa07afd..be3e0a4 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -8,15 +8,15 @@
 ; CHECK: CUBE * T{{[0-9]}}.W
 define void @cube() #0 {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %1 = extractelement <4 x float> %0, i32 3
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %3 = extractelement <4 x float> %2, i32 0
   %4 = fdiv float %3, %1
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %6 = extractelement <4 x float> %5, i32 1
   %7 = fdiv float %6, %1
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %9 = extractelement <4 x float> %8, i32 2
   %10 = fdiv float %9, %1
   %11 = insertelement <4 x float> undef, float %4, i32 0
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 799817e..8b32f69 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -9,7 +9,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte0_to_float:
 ; SI: v_cvt_f32_ubyte0
 define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -18,7 +18,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte1_to_float:
 ; SI: v_cvt_f32_ubyte1
 define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -27,7 +27,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte2_to_float:
 ; SI: v_cvt_f32_ubyte2
 define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -36,7 +36,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte3_to_float:
 ; SI: v_cvt_f32_ubyte3
 define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index f93d5f0..48a4af1 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -122,9 +122,9 @@
   %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
 
-  %a = load float addrspace(1)* %gep.a
-  %b = load float addrspace(1)* %gep.b
-  %c = load float addrspace(1)* %gep.c
+  %a = load float, float addrspace(1)* %gep.a
+  %b = load float, float addrspace(1)* %gep.b
+  %c = load float, float addrspace(1)* %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   %cmp1 = icmp ne i32 %d, 0
@@ -159,15 +159,15 @@
   %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
   %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
 
-  %a = load float addrspace(1)* %gep.a
-  %b = load float addrspace(1)* %gep.b
-  %c = load float addrspace(1)* %gep.c
+  %a = load float, float addrspace(1)* %gep.a
+  %b = load float, float addrspace(1)* %gep.b
+  %c = load float, float addrspace(1)* %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %bb, label %exit
 
 bb:
-  %val = load i32 addrspace(1)* %dummy
+  %val = load i32, i32 addrspace(1)* %dummy
   %cmp1 = icmp ne i32 %val, 0
   br label %exit
 
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 7be97c2..de830de 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -16,8 +16,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -36,8 +36,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -56,8 +56,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -76,8 +76,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -95,7 +95,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -113,7 +113,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -131,7 +131,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -149,7 +149,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -167,7 +167,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -185,7 +185,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -203,7 +203,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -221,7 +221,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -295,7 +295,7 @@
 define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -311,7 +311,7 @@
 define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -330,8 +330,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -352,8 +352,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 7d15300..f4cf7fc 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -11,7 +11,7 @@
 ; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
-  %val = load float addrspace(1)* %src, align 4
+  %val = load float, float addrspace(1)* %src, align 4
   %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
   store float %fract, float addrspace(1)* %out, align 4
   ret void
@@ -21,7 +21,7 @@
 ; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
-  %val = load float addrspace(1)* %src, align 4
+  %val = load float, float addrspace(1)* %src, align 4
   %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
   store float %fract, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index ce7fca0..46662f9 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -5,7 +5,7 @@
 ; SI: v_max_i32_e32
 define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
   %bc = bitcast i32 %max to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 15cd38b..34b454e 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -5,7 +5,7 @@
 ; SI: v_min_i32_e32
 define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
   %bc = bitcast i32 %min to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
index aac014b..1020660 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -18,7 +18,7 @@
 ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
 
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-   %addr = load <4 x float> addrspace(1)* %in
+   %addr = load <4 x float>, <4 x float> addrspace(1)* %in
    %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
    %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
    %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 5829f73..6b546a7 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -10,8 +10,8 @@
 ; SI: buffer_store_dwordx2 [[RESULT]],
 ; SI: s_endpgm
 define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load double, double addrspace(1)* %aptr, align 8
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
   ret void
@@ -23,7 +23,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]],
 ; SI: s_endpgm
 define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
+  %a = load double, double addrspace(1)* %aptr, align 8
   %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index ea02d3f..77a073b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -29,8 +29,8 @@
   %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1
 
-  %src0 = load i32 addrspace(1)* %src0.gep, align 4
-  %src2 = load i32 addrspace(1)* %src2.gep, align 4
+  %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4
+  %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4
   %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
   store i32 %mad, i32 addrspace(1)* %out.gep, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 4320dfe..a97d103 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -5,7 +5,7 @@
 ; SI: v_max_u32_e32
 define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
   %bc = bitcast i32 %max to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@
 ; SI-NOT: and
 ; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index e4cac33..2acd10e 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -5,7 +5,7 @@
 ; SI: v_min_u32_e32
 define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
   %bc = bitcast i32 %min to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@
 ; SI-NOT: and
 ; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
diff --git a/llvm/test/CodeGen/R600/llvm.SI.imageload.ll b/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
index 14db226..b67716c 100644
--- a/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -89,15 +89,15 @@
 define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
-  %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
+  %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
   %22 = getelementptr float, float addrspace(2)* %21, i32 0
-  %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+  %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
   %24 = getelementptr float, float addrspace(2)* %21, i32 1
-  %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+  %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
   %26 = getelementptr float, float addrspace(2)* %21, i32 4
-  %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+  %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
   %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+  %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
   %30 = bitcast float %27 to i32
   %31 = bitcast float %23 to i32
   %32 = bitcast float %25 to i32
diff --git a/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll b/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
index f8f4520..f6c2585 100644
--- a/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -17,7 +17,7 @@
 define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
 main_body:
   %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
-  %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp11 = shl i32 %arg6, 2
   %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
   %tmp13 = bitcast i32 %tmp12 to float
diff --git a/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll b/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
index 812b6a4..036cd2c 100644
--- a/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
+++ b/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
@@ -3,8 +3,8 @@
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) nounwind readnone
 
 define void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
-  %src0 = load <4 x float> addrspace(1)* %a, align 16
-  %src1 = load <4 x float> addrspace(1)* %b, align 16
+  %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
+  %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
   %dp4 = call float @llvm.AMDGPU.dp4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
   store float %dp4, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.round.f64.ll b/llvm/test/CodeGen/R600/llvm.round.f64.ll
index 0d39a45..7d082a2 100644
--- a/llvm/test/CodeGen/R600/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/R600/llvm.round.f64.ll
@@ -33,7 +33,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %x = load double addrspace(1)* %gep
+  %x = load double, double addrspace(1)* %gep
   %result = call double @llvm.round.f64(double %x) #1
   store double %result, double addrspace(1)* %out.gep
   ret void
diff --git a/llvm/test/CodeGen/R600/load-i1.ll b/llvm/test/CodeGen/R600/load-i1.ll
index 315c0a3..0ca49fd 100644
--- a/llvm/test/CodeGen/R600/load-i1.ll
+++ b/llvm/test/CodeGen/R600/load-i1.ll
@@ -11,7 +11,7 @@
 ; EG: VTX_READ_8
 ; EG: AND_INT
 define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   store i1 %load, i1 addrspace(1)* %out, align 1
   ret void
 }
@@ -26,7 +26,7 @@
 ; EG: AND_INT
 ; EG: LDS_BYTE_WRITE
 define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
-  %load = load i1 addrspace(3)* %in
+  %load = load i1, i1 addrspace(3)* %in
   store i1 %load, i1 addrspace(3)* %out, align 1
   ret void
 }
@@ -40,7 +40,7 @@
 ; EG: VTX_READ_8
 ; EG: AND_INT
 define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
-  %load = load i1 addrspace(2)* %in
+  %load = load i1, i1 addrspace(2)* %in
   store i1 %load, i1 addrspace(1)* %out, align 1
   ret void
 }
@@ -54,7 +54,7 @@
 ; EG: VTX_READ_8
 ; EG: BFE_INT
 define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -66,7 +66,7 @@
 ; SI: s_endpgm
 
 define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -78,7 +78,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i64
   store i64 %ext, i64 addrspace(1)* %out, align 4
   ret void
@@ -90,7 +90,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i64
   store i64 %ext, i64 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/load-input-fold.ll b/llvm/test/CodeGen/R600/load-input-fold.ll
index 265fa9b..e45fb78 100644
--- a/llvm/test/CodeGen/R600/load-input-fold.ll
+++ b/llvm/test/CodeGen/R600/load-input-fold.ll
@@ -14,71 +14,71 @@
   %9 = extractelement <4 x float> %reg3, i32 1
   %10 = extractelement <4 x float> %reg3, i32 2
   %11 = extractelement <4 x float> %reg3, i32 3
-  %12 = load <4 x float> addrspace(8)* null
+  %12 = load <4 x float>, <4 x float> addrspace(8)* null
   %13 = extractelement <4 x float> %12, i32 0
   %14 = fmul float %0, %13
-  %15 = load <4 x float> addrspace(8)* null
+  %15 = load <4 x float>, <4 x float> addrspace(8)* null
   %16 = extractelement <4 x float> %15, i32 1
   %17 = fmul float %0, %16
-  %18 = load <4 x float> addrspace(8)* null
+  %18 = load <4 x float>, <4 x float> addrspace(8)* null
   %19 = extractelement <4 x float> %18, i32 2
   %20 = fmul float %0, %19
-  %21 = load <4 x float> addrspace(8)* null
+  %21 = load <4 x float>, <4 x float> addrspace(8)* null
   %22 = extractelement <4 x float> %21, i32 3
   %23 = fmul float %0, %22
-  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %25 = extractelement <4 x float> %24, i32 0
   %26 = fmul float %1, %25
   %27 = fadd float %26, %14
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %29 = extractelement <4 x float> %28, i32 1
   %30 = fmul float %1, %29
   %31 = fadd float %30, %17
-  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %33 = extractelement <4 x float> %32, i32 2
   %34 = fmul float %1, %33
   %35 = fadd float %34, %20
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %37 = extractelement <4 x float> %36, i32 3
   %38 = fmul float %1, %37
   %39 = fadd float %38, %23
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %41 = extractelement <4 x float> %40, i32 0
   %42 = fmul float %2, %41
   %43 = fadd float %42, %27
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %45 = extractelement <4 x float> %44, i32 1
   %46 = fmul float %2, %45
   %47 = fadd float %46, %31
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %49 = extractelement <4 x float> %48, i32 2
   %50 = fmul float %2, %49
   %51 = fadd float %50, %35
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %53 = extractelement <4 x float> %52, i32 3
   %54 = fmul float %2, %53
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %57 = extractelement <4 x float> %56, i32 0
   %58 = fmul float %3, %57
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %61 = extractelement <4 x float> %60, i32 1
   %62 = fmul float %3, %61
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %65 = extractelement <4 x float> %64, i32 2
   %66 = fmul float %3, %65
   %67 = fadd float %66, %51
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %69 = extractelement <4 x float> %68, i32 3
   %70 = fmul float %3, %69
   %71 = fadd float %70, %55
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %73 = extractelement <4 x float> %72, i32 0
-  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %75 = extractelement <4 x float> %74, i32 1
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %77 = extractelement <4 x float> %76, i32 2
   %78 = insertelement <4 x float> undef, float %4, i32 0
   %79 = insertelement <4 x float> %78, float %5, i32 1
diff --git a/llvm/test/CodeGen/R600/load.ll b/llvm/test/CodeGen/R600/load.ll
index 8145ee4..e285831 100644
--- a/llvm/test/CodeGen/R600/load.ll
+++ b/llvm/test/CodeGen/R600/load.ll
@@ -13,7 +13,7 @@
 
 ; SI: buffer_load_ubyte v{{[0-9]+}},
 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
+  %1 = load i8, i8 addrspace(1)* %in
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -28,7 +28,7 @@
 ; SI: buffer_load_sbyte
 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
-  %0 = load i8 addrspace(1)* %in
+  %0 = load i8, i8 addrspace(1)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -41,7 +41,7 @@
 ; SI: buffer_load_ubyte
 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(1)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %1 = zext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -62,7 +62,7 @@
 ; SI: buffer_load_sbyte
 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(1)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %1 = sext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -79,7 +79,7 @@
 ; SI: buffer_load_ubyte
 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(1)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %1 = zext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -112,7 +112,7 @@
 ; SI: buffer_load_sbyte
 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(1)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %1 = sext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -124,7 +124,7 @@
 ; SI: buffer_load_ushort
 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
-  %0 = load i16	 addrspace(1)* %in
+  %0 = load i16	, i16	 addrspace(1)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -139,7 +139,7 @@
 ; SI: buffer_load_sshort
 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
-  %0 = load i16 addrspace(1)* %in
+  %0 = load i16, i16 addrspace(1)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -152,7 +152,7 @@
 ; SI: buffer_load_ushort
 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(1)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %1 = zext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -173,7 +173,7 @@
 ; SI: buffer_load_sshort
 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(1)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %1 = sext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -190,7 +190,7 @@
 ; SI: buffer_load_ushort
 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(1)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %1 = zext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; SI: buffer_load_sshort
 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(1)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %1 = sext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -236,7 +236,7 @@
 ; SI: buffer_load_dword v{{[0-9]+}}
 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -248,7 +248,7 @@
 ; SI: buffer_load_dword v{{[0-9]+}}
 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   store float %0, float addrspace(1)* %out
   ret void
 }
@@ -260,7 +260,7 @@
 ; SI: buffer_load_dwordx2
 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x float> addrspace(1)* %in
+  %0 = load <2 x float>, <2 x float> addrspace(1)* %in
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
@@ -270,7 +270,7 @@
 ; SI: buffer_load_dwordx2
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
-  %0 = load i64 addrspace(1)* %in
+  %0 = load i64, i64 addrspace(1)* %in
   store i64 %0, i64 addrspace(1)* %out
   ret void
 }
@@ -284,7 +284,7 @@
 
 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = sext i32 %0 to i64
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -295,7 +295,7 @@
 ; R600: MEM_RAT
 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = zext i32 %0 to i64
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -315,7 +315,7 @@
 ; SI: buffer_load_dword
 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <8 x i32> addrspace(1)* %in
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
   store <8 x i32> %0, <8 x i32> addrspace(1)* %out
   ret void
 }
@@ -344,7 +344,7 @@
 ; SI: buffer_load_dword
 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <16 x i32> addrspace(1)* %in
+  %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
   store <16 x i32> %0, <16 x i32> addrspace(1)* %out
   ret void
 }
@@ -363,7 +363,7 @@
 ; SI: buffer_load_sbyte v{{[0-9]+}},
 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
-  %0 = load i8 addrspace(2)* %in
+  %0 = load i8, i8 addrspace(2)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -375,7 +375,7 @@
 ; SI: buffer_load_ubyte v{{[0-9]+}},
 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
-  %0 = load i8 addrspace(2)* %in
+  %0 = load i8, i8 addrspace(2)* %in
   %1 = zext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -388,7 +388,7 @@
 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
-  %1 = load i8 addrspace(2)* %0
+  %1 = load i8, i8 addrspace(2)* %0
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -404,7 +404,7 @@
 ; SI: buffer_load_sshort
 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
-  %0 = load i16 addrspace(2)* %in
+  %0 = load i16, i16 addrspace(2)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -416,7 +416,7 @@
 ; SI: buffer_load_ushort
 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
-  %0 = load i16 addrspace(2)* %in
+  %0 = load i16, i16 addrspace(2)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -429,7 +429,7 @@
 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
-  %1 = load i16 addrspace(2)* %0
+  %1 = load i16, i16 addrspace(2)* %0
   %2 = zext i16 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -442,7 +442,7 @@
 ; SI: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
 entry:
-  %0 = load i32 addrspace(2)* %in
+  %0 = load i32, i32 addrspace(2)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -453,7 +453,7 @@
 
 ; SI: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
-  %1 = load float addrspace(2)* %in
+  %1 = load float, float addrspace(2)* %in
   store float %1, float addrspace(1)* %out
   ret void
 }
@@ -469,7 +469,7 @@
 ; SI: s_mov_b32 m0
 ; SI: ds_read_u8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-  %1 = load i8 addrspace(3)* %in
+  %1 = load i8, i8 addrspace(3)* %in
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -483,7 +483,7 @@
 ; SI: ds_read_i8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
-  %0 = load i8 addrspace(3)* %in
+  %0 = load i8, i8 addrspace(3)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -498,7 +498,7 @@
 ; SI: ds_read_u8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(3)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
   %1 = zext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -515,7 +515,7 @@
 ; SI: ds_read_i8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(3)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
   %1 = sext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -534,7 +534,7 @@
 ; SI: ds_read_u8
 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(3)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
   %1 = zext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -557,7 +557,7 @@
 ; SI: ds_read_i8
 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(3)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
   %1 = sext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -571,7 +571,7 @@
 ; SI: ds_read_u16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
-  %0 = load i16	 addrspace(3)* %in
+  %0 = load i16	, i16	 addrspace(3)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -585,7 +585,7 @@
 ; SI: ds_read_i16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
-  %0 = load i16 addrspace(3)* %in
+  %0 = load i16, i16 addrspace(3)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -600,7 +600,7 @@
 ; SI: ds_read_u16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(3)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
   %1 = zext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -617,7 +617,7 @@
 ; SI: ds_read_i16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(3)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
   %1 = sext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -636,7 +636,7 @@
 ; SI: ds_read_u16
 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(3)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
   %1 = zext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -659,7 +659,7 @@
 ; SI: ds_read_i16
 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(3)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
   %1 = sext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -673,7 +673,7 @@
 ; SI: ds_read_b32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -685,7 +685,7 @@
 ; SI: ds_read_b32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
-  %0 = load float addrspace(3)* %in
+  %0 = load float, float addrspace(3)* %in
   store float %0, float addrspace(1)* %out
   ret void
 }
@@ -698,7 +698,7 @@
 ; SI: ds_read_b64
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x float> addrspace(3)* %in
+  %0 = load <2 x float>, <2 x float> addrspace(3)* %in
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
@@ -711,10 +711,10 @@
 ; SI-DAG: ds_read_b32
 ; SI-DAG: ds_read2_b32
 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
-  %scalar = load i32 addrspace(3)* %in
+  %scalar = load i32, i32 addrspace(3)* %in
   %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
   %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
-  %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
   %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
   %vec = add <2 x i32> %vec0, %vec1
   store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
@@ -733,7 +733,7 @@
 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
-  %tmp1 = load i32 addrspace(3)* %tmp0
+  %tmp1 = load i32, i32 addrspace(3)* %tmp0
   %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
   store i32 %tmp1, i32 addrspace(1)* %tmp2
   ret void
diff --git a/llvm/test/CodeGen/R600/load.vec.ll b/llvm/test/CodeGen/R600/load.vec.ll
index 346d8dc..02f883c 100644
--- a/llvm/test/CodeGen/R600/load.vec.ll
+++ b/llvm/test/CodeGen/R600/load.vec.ll
@@ -8,7 +8,7 @@
 ; SI: {{^}}load_v2i32:
 ; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %a = load <2 x i32> addrspace(1) * %in
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
   store <2 x i32> %a, <2 x i32> addrspace(1)* %out
   ret void
 }
@@ -19,7 +19,7 @@
 ; SI: {{^}}load_v4i32:
 ; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
 define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %a = load <4 x i32> addrspace(1) * %in
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
   store <4 x i32> %a, <4 x i32> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/load64.ll b/llvm/test/CodeGen/R600/load64.ll
index cb3d654..74beabd 100644
--- a/llvm/test/CodeGen/R600/load64.ll
+++ b/llvm/test/CodeGen/R600/load64.ll
@@ -6,7 +6,7 @@
 ; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %1 = load double addrspace(1)* %in
+  %1 = load double, double addrspace(1)* %in
   store double %1, double addrspace(1)* %out
   ret void
 }
@@ -15,7 +15,7 @@
 ; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %tmp = load i64 addrspace(1)* %in
+  %tmp = load i64, i64 addrspace(1)* %in
   store i64 %tmp, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -25,7 +25,7 @@
 ; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
-  %1 = load double addrspace(2)* %in
+  %1 = load double, double addrspace(2)* %in
   store double %1, double addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/local-64.ll b/llvm/test/CodeGen/R600/local-64.ll
index 768b038..33f3159 100644
--- a/llvm/test/CodeGen/R600/local-64.ll
+++ b/llvm/test/CodeGen/R600/local-64.ll
@@ -7,7 +7,7 @@
 ; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
-  %val = load i32 addrspace(3)* %gep, align 4
+  %val = load i32, i32 addrspace(3)* %gep, align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -16,7 +16,7 @@
 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
 ; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
-  %val = load i32 addrspace(3)* %in, align 4
+  %val = load i32, i32 addrspace(3)* %in, align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -27,7 +27,7 @@
 ; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
-  %val = load i8 addrspace(3)* %gep, align 4
+  %val = load i8, i8 addrspace(3)* %gep, align 4
   store i8 %val, i8 addrspace(1)* %out, align 4
   ret void
 }
@@ -42,7 +42,7 @@
 ; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
-  %val = load i8 addrspace(3)* %gep, align 4
+  %val = load i8, i8 addrspace(3)* %gep, align 4
   store i8 %val, i8 addrspace(1)* %out, align 4
   ret void
 }
@@ -53,7 +53,7 @@
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
-  %val = load i64 addrspace(3)* %gep, align 8
+  %val = load i64, i64 addrspace(3)* %gep, align 8
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -62,7 +62,7 @@
 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
-  %val = load i64 addrspace(3)* %in, align 8
+  %val = load i64, i64 addrspace(3)* %in, align 8
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -73,7 +73,7 @@
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %gep = getelementptr double, double addrspace(3)* %in, i32 7
-  %val = load double addrspace(3)* %gep, align 8
+  %val = load double, double addrspace(3)* %gep, align 8
   store double %val, double addrspace(1)* %out, align 8
   ret void
 }
@@ -82,7 +82,7 @@
 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
-  %val = load double addrspace(3)* %in, align 8
+  %val = load double, double addrspace(3)* %in, align 8
   store double %val, double addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/local-memory-two-objects.ll b/llvm/test/CodeGen/R600/local-memory-two-objects.ll
index 1d38570..75f8d54 100644
--- a/llvm/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/llvm/test/CodeGen/R600/local-memory-two-objects.ll
@@ -45,11 +45,11 @@
   %sub = sub nsw i32 3, %x.i
   call void @llvm.AMDGPU.barrier.local()
   %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
-  %0 = load i32 addrspace(3)* %arrayidx2, align 4
+  %0 = load i32, i32 addrspace(3)* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
   store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
-  %1 = load i32 addrspace(3)* %arrayidx4, align 4
+  %1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
   %add = add nsw i32 %x.i, 4
   %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
   store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
diff --git a/llvm/test/CodeGen/R600/local-memory.ll b/llvm/test/CodeGen/R600/local-memory.ll
index 2c082da..4ec0418 100644
--- a/llvm/test/CodeGen/R600/local-memory.ll
+++ b/llvm/test/CodeGen/R600/local-memory.ll
@@ -36,7 +36,7 @@
   %.add = select i1 %cmp, i32 0, i32 %add
   call void @llvm.AMDGPU.barrier.local()
   %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
-  %0 = load i32 addrspace(3)* %arrayidx1, align 4
+  %0 = load i32, i32 addrspace(3)* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
   store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/loop-idiom.ll b/llvm/test/CodeGen/R600/loop-idiom.ll
index 4c30b69..810b34f 100644
--- a/llvm/test/CodeGen/R600/loop-idiom.ll
+++ b/llvm/test/CodeGen/R600/loop-idiom.ll
@@ -22,7 +22,7 @@
   %0 = phi i32 [0, %entry], [%4, %for.body]
   %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0
   %2 = getelementptr i8, i8* %dest, i32 %0
-  %3 = load i8 addrspace(3)* %1
+  %3 = load i8, i8 addrspace(3)* %1
   store i8 %3, i8* %2
   %4 = add i32 %0, 1
   %5 = icmp eq i32 %4, %size
diff --git a/llvm/test/CodeGen/R600/m0-spill.ll b/llvm/test/CodeGen/R600/m0-spill.ll
index 37bc10a..1dddc85 100644
--- a/llvm/test/CodeGen/R600/m0-spill.ll
+++ b/llvm/test/CodeGen/R600/m0-spill.ll
@@ -13,7 +13,7 @@
 
 if:
   %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
-  %lds_data = load float addrspace(3)* %lds_ptr
+  %lds_data = load float, float addrspace(3)* %lds_ptr
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/mad-combine.ll b/llvm/test/CodeGen/R600/mad-combine.ll
index 435efe0..bc07162 100644
--- a/llvm/test/CodeGen/R600/mad-combine.ll
+++ b/llvm/test/CodeGen/R600/mad-combine.ll
@@ -37,9 +37,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fadd float %mul, %c
@@ -76,10 +76,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fadd float %mul, %c
@@ -110,9 +110,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fadd float %c, %mul
@@ -140,9 +140,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fsub float %mul, %c
@@ -179,10 +179,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fsub float %mul, %c
@@ -212,9 +212,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fsub float %c, %mul
@@ -250,10 +250,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fsub float %c, %mul
@@ -284,9 +284,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -324,10 +324,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -367,10 +367,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -412,11 +412,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
@@ -458,11 +458,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
@@ -503,11 +503,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
@@ -549,11 +549,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
diff --git a/llvm/test/CodeGen/R600/mad-sub.ll b/llvm/test/CodeGen/R600/mad-sub.ll
index 6ea6771..aa4194f 100644
--- a/llvm/test/CodeGen/R600/mad-sub.ll
+++ b/llvm/test/CodeGen/R600/mad-sub.ll
@@ -18,9 +18,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   store float %sub, float addrspace(1)* %outgep, align 4
@@ -42,9 +42,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %c, %mul
   store float %sub, float addrspace(1)* %outgep, align 4
@@ -63,9 +63,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load double addrspace(1)* %gep0, align 8
-  %b = load double addrspace(1)* %gep1, align 8
-  %c = load double addrspace(1)* %gep2, align 8
+  %a = load double, double addrspace(1)* %gep0, align 8
+  %b = load double, double addrspace(1)* %gep1, align 8
+  %c = load double, double addrspace(1)* %gep2, align 8
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   store double %sub, double addrspace(1)* %outgep, align 8
@@ -87,9 +87,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c.abs
@@ -112,9 +112,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %c.abs, %mul
@@ -133,9 +133,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %nega = fsub float -0.000000e+00, %a
   %negb = fsub float -0.000000e+00, %b
   %mul = fmul float %nega, %negb
@@ -159,9 +159,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %b.abs = call float @llvm.fabs.f32(float %b) #0
   %mul = fmul float %a, %b.abs
   %sub = fsub float %mul, %c
@@ -180,8 +180,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %r2, %add
@@ -201,8 +201,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %add, %r2
diff --git a/llvm/test/CodeGen/R600/madak.ll b/llvm/test/CodeGen/R600/madak.ll
index f958783..cc3e91e 100644
--- a/llvm/test/CodeGen/R600/madak.ll
+++ b/llvm/test/CodeGen/R600/madak.ll
@@ -16,8 +16,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -47,9 +47,9 @@
   %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
 
-  %a = load float addrspace(1)* %in.gep.0, align 4
-  %b = load float addrspace(1)* %in.gep.1, align 4
-  %c = load float addrspace(1)* %in.gep.2, align 4
+  %a = load float, float addrspace(1)* %in.gep.0, align 4
+  %b = load float, float addrspace(1)* %in.gep.1, align 4
+  %c = load float, float addrspace(1)* %in.gep.2, align 4
 
   %mul0 = fmul float %a, %b
   %mul1 = fmul float %a, %c
@@ -69,7 +69,7 @@
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
 
   %mul = fmul float 4.0, %a
   %madak = fadd float %mul, 10.0
@@ -90,8 +90,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 4.0
@@ -111,7 +111,7 @@
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -130,7 +130,7 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -159,8 +159,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -181,8 +181,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
diff --git a/llvm/test/CodeGen/R600/madmk.ll b/llvm/test/CodeGen/R600/madmk.ll
index ffd5a94..17d1b8a 100644
--- a/llvm/test/CodeGen/R600/madmk.ll
+++ b/llvm/test/CodeGen/R600/madmk.ll
@@ -14,8 +14,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -41,9 +41,9 @@
   %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
 
-  %a = load float addrspace(1)* %in.gep.0, align 4
-  %b = load float addrspace(1)* %in.gep.1, align 4
-  %c = load float addrspace(1)* %in.gep.2, align 4
+  %a = load float, float addrspace(1)* %in.gep.0, align 4
+  %b = load float, float addrspace(1)* %in.gep.1, align 4
+  %c = load float, float addrspace(1)* %in.gep.2, align 4
 
   %mul0 = fmul float %a, 10.0
   %mul1 = fmul float %a, 10.0
@@ -66,8 +66,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %mul = fmul float %a, 4.0
   %madmk = fadd float %mul, %b
@@ -97,7 +97,7 @@
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -113,7 +113,7 @@
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %b = load float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -131,8 +131,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -152,8 +152,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
@@ -172,7 +172,7 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, 2.0
diff --git a/llvm/test/CodeGen/R600/max.ll b/llvm/test/CodeGen/R600/max.ll
index 90931c2..e6ab96c 100644
--- a/llvm/test/CodeGen/R600/max.ll
+++ b/llvm/test/CodeGen/R600/max.ll
@@ -9,8 +9,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sge i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -33,8 +33,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sgt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -57,8 +57,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp uge i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -81,8 +81,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ugt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
diff --git a/llvm/test/CodeGen/R600/max3.ll b/llvm/test/CodeGen/R600/max3.ll
index 8b53584..cfb94b2 100644
--- a/llvm/test/CodeGen/R600/max3.ll
+++ b/llvm/test/CodeGen/R600/max3.ll
@@ -10,9 +10,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp sgt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp sgt i32 %i0, %c
@@ -29,9 +29,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp ugt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ugt i32 %i0, %c
diff --git a/llvm/test/CodeGen/R600/min.ll b/llvm/test/CodeGen/R600/min.ll
index 9f85356..d1febf5 100644
--- a/llvm/test/CodeGen/R600/min.ll
+++ b/llvm/test/CodeGen/R600/min.ll
@@ -9,8 +9,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sle i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -33,8 +33,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp slt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -57,8 +57,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ule i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -81,8 +81,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ult i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -110,8 +110,8 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
   %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ult i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep0, align 4
diff --git a/llvm/test/CodeGen/R600/min3.ll b/llvm/test/CodeGen/R600/min3.ll
index f14e28c..38ef46d 100644
--- a/llvm/test/CodeGen/R600/min3.ll
+++ b/llvm/test/CodeGen/R600/min3.ll
@@ -10,9 +10,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp slt i32 %i0, %c
@@ -29,9 +29,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp ult i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ult i32 %i0, %c
@@ -57,10 +57,10 @@
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
-  %d = load i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %d = load i32, i32 addrspace(1)* %gep3, align 4
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -91,10 +91,10 @@
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
-  %d = load i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %d = load i32, i32 addrspace(1)* %gep3, align 4
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
diff --git a/llvm/test/CodeGen/R600/missing-store.ll b/llvm/test/CodeGen/R600/missing-store.ll
index 162fe97..4af9cdf 100644
--- a/llvm/test/CodeGen/R600/missing-store.ll
+++ b/llvm/test/CodeGen/R600/missing-store.ll
@@ -12,11 +12,11 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   store i32 %tmp2, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/mubuf.ll b/llvm/test/CodeGen/R600/mubuf.ll
index ee4b80d..b19163f 100644
--- a/llvm/test/CodeGen/R600/mubuf.ll
+++ b/llvm/test/CodeGen/R600/mubuf.ll
@@ -12,7 +12,7 @@
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
-  %1 = load i32 addrspace(1)* %0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -23,7 +23,7 @@
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
-  %1 = load i8 addrspace(1)* %0
+  %1 = load i8, i8 addrspace(1)* %0
   store i8 %1, i8 addrspace(1)* %out
   ret void
 }
@@ -35,7 +35,7 @@
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
-  %1 = load i32 addrspace(1)* %0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -48,7 +48,7 @@
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
   %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
-  %2 = load i32 addrspace(1)* %1
+  %2 = load i32, i32 addrspace(1)* %1
   store i32 %2, i32 addrspace(1)* %out
   ret void
 }
@@ -58,7 +58,7 @@
 define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
 main_body:
   %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
-  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
   %tmp2 = shl i32 %6, 2
   %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
   %tmp4 = add i32 %6, 16
@@ -77,7 +77,7 @@
 define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
 main_body:
   %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
-  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
   %tmp2 = shl i32 %6, 2
   %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
   %tmp4 = add i32 %6, 16
diff --git a/llvm/test/CodeGen/R600/mul.ll b/llvm/test/CodeGen/R600/mul.ll
index 119a4c0..94e0f96 100644
--- a/llvm/test/CodeGen/R600/mul.ll
+++ b/llvm/test/CodeGen/R600/mul.ll
@@ -13,8 +13,8 @@
 
 define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = mul <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 
 define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = mul <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -58,8 +58,8 @@
 ; SI: v_mul_lo_i32
 ; SI: buffer_store_dword
 define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %mul = mul i64 %b, %a
   %trunc = trunc i64 %mul to i32
   store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -88,7 +88,7 @@
 ; SI-DAG: v_mul_hi_i32
 ; SI: s_endpgm
 define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ext = sext i32 %val to i64
   %mul = mul i64 %ext, 80
   store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -100,7 +100,7 @@
 ; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ext = sext i32 %val to i64
   %mul = mul i64 %ext, 9
   store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -124,8 +124,8 @@
 ; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = mul i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -148,8 +148,8 @@
 ; FUNC-LABEL: {{^}}v_mul_i64:
 ; SI: v_mul_lo_i32
 define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %mul = mul i64 %a, %b
   store i64 %mul, i64 addrspace(1)* %out, align 8
   ret void
@@ -163,7 +163,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   br label %endif
 
 else:
@@ -186,7 +186,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index 532edf0..ef0cb0c 100644
--- a/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -6,7 +6,7 @@
 
 ; FUNC-LABEL: {{^}}load_extern_const_init:
 define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -15,7 +15,7 @@
 
 ; FUNC-LABEL: {{^}}load_undef_const_init:
 define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/no-shrink-extloads.ll b/llvm/test/CodeGen/R600/no-shrink-extloads.ll
index 2bd1a86..e4328ec 100644
--- a/llvm/test/CodeGen/R600/no-shrink-extloads.ll
+++ b/llvm/test/CodeGen/R600/no-shrink-extloads.ll
@@ -25,7 +25,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i16
   store i16 %trunc, i16 addrspace(1)* %gep.out
   ret void
@@ -47,7 +47,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
@@ -69,7 +69,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i1
   store i1 %trunc, i1 addrspace(1)* %gep.out
   ret void
@@ -91,7 +91,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %trunc = trunc i64 %load to i32
   store i32 %trunc, i32 addrspace(1)* %gep.out
   ret void
@@ -114,7 +114,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %srl = lshr i64 %load, 32
   %trunc = trunc i64 %srl to i32
   store i32 %trunc, i32 addrspace(1)* %gep.out
@@ -138,7 +138,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i16 addrspace(1)* %gep.in
+  %load = load i16, i16 addrspace(1)* %gep.in
   %trunc = trunc i16 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
@@ -161,7 +161,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %srl = lshr i64 %load, 32
   %trunc = trunc i64 %srl to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
@@ -184,7 +184,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %trunc = trunc i64 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index c62ef37..1b1cb9a 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -11,8 +11,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = or <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -30,8 +30,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = or <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 ; FUNC-LABEL: {{^}}vector_or_i32:
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
-  %loada = load i32 addrspace(1)* %a
+  %loada = load i32, i32 addrspace(1)* %a
   %or = or i32 %loada, %b
   store i32 %or, i32 addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; FUNC-LABEL: {{^}}vector_or_literal_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32 addrspace(1)* %a, align 4
+  %loada = load i32, i32 addrspace(1)* %a, align 4
   %or = or i32 %loada, 65535
   store i32 %or, i32 addrspace(1)* %out, align 4
   ret void
@@ -74,7 +74,7 @@
 ; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
 define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32 addrspace(1)* %a, align 4
+  %loada = load i32, i32 addrspace(1)* %a, align 4
   %or = or i32 %loada, 4
   store i32 %or, i32 addrspace(1)* %out, align 4
   ret void
@@ -95,8 +95,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
-  %loadb = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %loadb = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, %loadb
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -106,7 +106,7 @@
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
-  %loada = load i64 addrspace(1)* %a
+  %loada = load i64, i64 addrspace(1)* %a
   %or = or i64 %loada, %b
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -120,7 +120,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 22470723082367
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -133,7 +133,7 @@
 ; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
 ; SI: s_endpgm
 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 8
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -157,8 +157,8 @@
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float addrspace(1)* %in0
-  %b = load float addrspace(1)* %in1
+  %a = load float, float addrspace(1)* %in0
+  %b = load float, float addrspace(1)* %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 0.000000e+00
   %or = or i1 %acmp, %bcmp
diff --git a/llvm/test/CodeGen/R600/parallelandifcollapse.ll b/llvm/test/CodeGen/R600/parallelandifcollapse.ll
index 82b1150..f32b044 100644
--- a/llvm/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/llvm/test/CodeGen/R600/parallelandifcollapse.ll
@@ -23,14 +23,14 @@
   %c1 = alloca i32, align 4
   %d1 = alloca i32, align 4
   %data = alloca i32, align 4
-  %0 = load i32* %a0, align 4
-  %1 = load i32* %b0, align 4
+  %0 = load i32, i32* %a0, align 4
+  %1 = load i32, i32* %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32* %c0, align 4
-  %3 = load i32* %d0, align 4
+  %2 = load i32, i32* %c0, align 4
+  %3 = load i32, i32* %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.end
 
@@ -39,14 +39,14 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %land.lhs.true, %entry
-  %4 = load i32* %a1, align 4
-  %5 = load i32* %b1, align 4
+  %4 = load i32, i32* %a1, align 4
+  %5 = load i32, i32* %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.end6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32* %c1, align 4
-  %7 = load i32* %d1, align 4
+  %6 = load i32, i32* %c1, align 4
+  %7 = load i32, i32* %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.end6
 
diff --git a/llvm/test/CodeGen/R600/parallelorifcollapse.ll b/llvm/test/CodeGen/R600/parallelorifcollapse.ll
index feca688..1da1e91b8 100644
--- a/llvm/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/llvm/test/CodeGen/R600/parallelorifcollapse.ll
@@ -23,14 +23,14 @@
   %c1 = alloca i32, align 4
   %d1 = alloca i32, align 4
   %data = alloca i32, align 4
-  %0 = load i32* %a0, align 4
-  %1 = load i32* %b0, align 4
+  %0 = load i32, i32* %a0, align 4
+  %1 = load i32, i32* %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.else
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32* %c0, align 4
-  %3 = load i32* %d0, align 4
+  %2 = load i32, i32* %c0, align 4
+  %3 = load i32, i32* %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -42,14 +42,14 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %4 = load i32* %a1, align 4
-  %5 = load i32* %b1, align 4
+  %4 = load i32, i32* %a1, align 4
+  %5 = load i32, i32* %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.else6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32* %c1, align 4
-  %7 = load i32* %d1, align 4
+  %6 = load i32, i32* %c1, align 4
+  %7 = load i32, i32* %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.else6
 
diff --git a/llvm/test/CodeGen/R600/private-memory.ll b/llvm/test/CodeGen/R600/private-memory.ll
index 881baf3..1c56297 100644
--- a/llvm/test/CodeGen/R600/private-memory.ll
+++ b/llvm/test/CodeGen/R600/private-memory.ll
@@ -23,18 +23,18 @@
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
-  %0 = load i32 addrspace(1)* %in, align 4
+  %0 = load i32, i32 addrspace(1)* %in, align 4
   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
   store i32 4, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %1 = load i32 addrspace(1)* %arrayidx2, align 4
+  %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
   store i32 5, i32* %arrayidx3, align 4
   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   store i32 %2, i32 addrspace(1)* %out, align 4
   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
-  %3 = load i32* %arrayidx12
+  %3 = load i32, i32* %arrayidx12
   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
   store i32 %3, i32 addrspace(1)* %arrayidx13
   ret void
@@ -67,8 +67,8 @@
   store i32 3, i32* %b.y.ptr
   %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
   %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
-  %a.indirect = load i32* %a.indirect.ptr
-  %b.indirect = load i32* %b.indirect.ptr
+  %a.indirect = load i32, i32* %a.indirect.ptr
+  %b.indirect = load i32, i32* %b.indirect.ptr
   %0 = add i32 %a.indirect, %b.indirect
   store i32 %0, i32 addrspace(1)* %out
   ret void
@@ -86,9 +86,9 @@
 entry:
   %prv_array_const = alloca [2 x i32]
   %prv_array = alloca [2 x i32]
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %b = load i32 addrspace(1)* %b_src_ptr
+  %b = load i32, i32 addrspace(1)* %b_src_ptr
   %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
   store i32 %a, i32* %a_dst_ptr
   %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
@@ -98,9 +98,9 @@
 for.body:
   %inc = phi i32 [0, %entry], [%count, %for.body]
   %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
-  %x = load i32* %x_ptr
+  %x = load i32, i32* %x_ptr
   %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
-  %y = load i32* %y_ptr
+  %y = load i32, i32* %y_ptr
   %xy = add i32 %x, %y
   store i32 %xy, i32* %y_ptr
   %count = add i32 %inc, 1
@@ -109,7 +109,7 @@
 
 for.end:
   %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
-  %value = load i32* %value_ptr
+  %value = load i32, i32* %value_ptr
   store i32 %value, i32 addrspace(1)* %out
   ret void
 }
@@ -129,7 +129,7 @@
   store i16 0, i16* %1
   store i16 1, i16* %2
   %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = sext i16 %4 to i32
   store i32 %5, i32 addrspace(1)* %out
   ret void
@@ -149,7 +149,7 @@
   store i8 0, i8* %1
   store i8 1, i8* %2
   %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = sext i8 %4 to i32
   store i32 %5, i32 addrspace(1)* %out
   ret void
@@ -172,7 +172,7 @@
   store i32 0, i32* %1
   store i32 1, i32* %2
   %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   %5 = call i32 @llvm.r600.read.tidig.x()
   %6 = add i32 %4, %5
   store i32 %6, i32 addrspace(1)* %out
@@ -202,8 +202,8 @@
   store i8 0, i8* %6
   %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
   %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
-  %9 = load i8* %7
-  %10 = load i8* %8
+  %9 = load i8, i8* %7
+  %10 = load i8, i8* %8
   %11 = add i8 %9, %10
   %12 = sext i8 %11 to i32
   store i32 %12, i32 addrspace(1)* %out
@@ -218,7 +218,7 @@
   store i8 0, i8* %gep0
   store i8 1, i8* %gep1
   %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i8* %gep2
+  %load = load i8, i8* %gep2
   %sext = sext i8 %load to i32
   store i32 %sext, i32 addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -245,7 +245,7 @@
   store i64 0, i64* %gep0
   store i64 1, i64* %gep1
   %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i64* %gep2
+  %load = load i64, i64* %gep2
   store i64 %load, i64 addrspace(1)* %out
   ret void
 }
@@ -260,7 +260,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -273,7 +273,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -287,7 +287,7 @@
   store i32 1, i32* %tmp2
   %cmp = icmp eq i32 %in, 0
   %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
-  %load = load i32* %sel
+  %load = load i32, i32* %sel
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -307,7 +307,7 @@
   %tmp2 = add i32 %tmp1, 5
   %tmp3 = inttoptr i32 %tmp2 to i32*
   %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
-  %tmp5 = load i32* %tmp4
+  %tmp5 = load i32, i32* %tmp4
   store i32 %tmp5, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/pv-packing.ll b/llvm/test/CodeGen/R600/pv-packing.ll
index e5615b9..445c0bf 100644
--- a/llvm/test/CodeGen/R600/pv-packing.ll
+++ b/llvm/test/CodeGen/R600/pv-packing.ll
@@ -14,8 +14,8 @@
   %6 = extractelement <4 x float> %reg3, i32 0
   %7 = extractelement <4 x float> %reg3, i32 1
   %8 = extractelement <4 x float> %reg3, i32 2
-  %9 = load <4 x float> addrspace(8)* null
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = load <4 x float>, <4 x float> addrspace(8)* null
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
   %12 = fmul float %0, %3
   %13 = fadd float %12, %6
diff --git a/llvm/test/CodeGen/R600/pv.ll b/llvm/test/CodeGen/R600/pv.ll
index 1908f15..26bfa0d 100644
--- a/llvm/test/CodeGen/R600/pv.ll
+++ b/llvm/test/CodeGen/R600/pv.ll
@@ -33,63 +33,63 @@
   %25 = extractelement <4 x float> %reg7, i32 1
   %26 = extractelement <4 x float> %reg7, i32 2
   %27 = extractelement <4 x float> %reg7, i32 3
-  %28 = load <4 x float> addrspace(8)* null
+  %28 = load <4 x float>, <4 x float> addrspace(8)* null
   %29 = extractelement <4 x float> %28, i32 0
   %30 = fmul float %0, %29
-  %31 = load <4 x float> addrspace(8)* null
+  %31 = load <4 x float>, <4 x float> addrspace(8)* null
   %32 = extractelement <4 x float> %31, i32 1
   %33 = fmul float %0, %32
-  %34 = load <4 x float> addrspace(8)* null
+  %34 = load <4 x float>, <4 x float> addrspace(8)* null
   %35 = extractelement <4 x float> %34, i32 2
   %36 = fmul float %0, %35
-  %37 = load <4 x float> addrspace(8)* null
+  %37 = load <4 x float>, <4 x float> addrspace(8)* null
   %38 = extractelement <4 x float> %37, i32 3
   %39 = fmul float %0, %38
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %41 = extractelement <4 x float> %40, i32 0
   %42 = fmul float %1, %41
   %43 = fadd float %42, %30
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %45 = extractelement <4 x float> %44, i32 1
   %46 = fmul float %1, %45
   %47 = fadd float %46, %33
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %49 = extractelement <4 x float> %48, i32 2
   %50 = fmul float %1, %49
   %51 = fadd float %50, %36
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %53 = extractelement <4 x float> %52, i32 3
   %54 = fmul float %1, %53
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %57 = extractelement <4 x float> %56, i32 0
   %58 = fmul float %2, %57
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %61 = extractelement <4 x float> %60, i32 1
   %62 = fmul float %2, %61
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %65 = extractelement <4 x float> %64, i32 2
   %66 = fmul float %2, %65
   %67 = fadd float %66, %51
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %69 = extractelement <4 x float> %68, i32 3
   %70 = fmul float %2, %69
   %71 = fadd float %70, %55
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %73 = extractelement <4 x float> %72, i32 0
   %74 = fmul float %3, %73
   %75 = fadd float %74, %59
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %77 = extractelement <4 x float> %76, i32 1
   %78 = fmul float %3, %77
   %79 = fadd float %78, %63
-  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %81 = extractelement <4 x float> %80, i32 2
   %82 = fmul float %3, %81
   %83 = fadd float %82, %67
-  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %85 = extractelement <4 x float> %84, i32 3
   %86 = fmul float %3, %85
   %87 = fadd float %86, %71
@@ -107,15 +107,15 @@
   %99 = fmul float %4, %98
   %100 = fmul float %5, %98
   %101 = fmul float %6, %98
-  %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %102 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %103 = extractelement <4 x float> %102, i32 0
   %104 = fmul float %103, %8
   %105 = fadd float %104, %20
-  %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %106 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %107 = extractelement <4 x float> %106, i32 1
   %108 = fmul float %107, %9
   %109 = fadd float %108, %21
-  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %110 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %111 = extractelement <4 x float> %110, i32 2
   %112 = fmul float %111, %10
   %113 = fadd float %112, %22
@@ -123,11 +123,11 @@
   %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
   %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
   %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
-  %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %119 = extractelement <4 x float> %118, i32 0
-  %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %121 = extractelement <4 x float> %120, i32 1
-  %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %123 = extractelement <4 x float> %122, i32 2
   %124 = insertelement <4 x float> undef, float %99, i32 0
   %125 = insertelement <4 x float> %124, float %100, i32 1
@@ -138,11 +138,11 @@
   %130 = insertelement <4 x float> %129, float %123, i32 2
   %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
   %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
-  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %133 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %134 = extractelement <4 x float> %133, i32 0
-  %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %135 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %136 = extractelement <4 x float> %135, i32 1
-  %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %137 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %138 = extractelement <4 x float> %137, i32 2
   %139 = insertelement <4 x float> undef, float %99, i32 0
   %140 = insertelement <4 x float> %139, float %100, i32 1
@@ -153,31 +153,31 @@
   %145 = insertelement <4 x float> %144, float %138, i32 2
   %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
   %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
-  %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %148 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %149 = extractelement <4 x float> %148, i32 0
   %150 = fmul float %149, %8
-  %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %151 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %152 = extractelement <4 x float> %151, i32 1
   %153 = fmul float %152, %9
-  %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %154 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %155 = extractelement <4 x float> %154, i32 2
   %156 = fmul float %155, %10
-  %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %157 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %158 = extractelement <4 x float> %157, i32 0
   %159 = fmul float %158, %12
-  %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %160 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %161 = extractelement <4 x float> %160, i32 1
   %162 = fmul float %161, %13
-  %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %163 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %164 = extractelement <4 x float> %163, i32 2
   %165 = fmul float %164, %14
-  %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %166 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %167 = extractelement <4 x float> %166, i32 0
   %168 = fmul float %167, %16
-  %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %169 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %170 = extractelement <4 x float> %169, i32 1
   %171 = fmul float %170, %17
-  %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %172 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %173 = extractelement <4 x float> %172, i32 2
   %174 = fmul float %173, %18
   %175 = fcmp uge float %132, 0.000000e+00
diff --git a/llvm/test/CodeGen/R600/r600-export-fix.ll b/llvm/test/CodeGen/R600/r600-export-fix.ll
index 7d72856..fd789b0 100644
--- a/llvm/test/CodeGen/R600/r600-export-fix.ll
+++ b/llvm/test/CodeGen/R600/r600-export-fix.ll
@@ -16,83 +16,83 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = fmul float %5, %0
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %8 = extractelement <4 x float> %7, i32 1
   %9 = fmul float %8, %0
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %11 = extractelement <4 x float> %10, i32 2
   %12 = fmul float %11, %0
-  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %14 = extractelement <4 x float> %13, i32 3
   %15 = fmul float %14, %0
-  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %17 = extractelement <4 x float> %16, i32 0
   %18 = fmul float %17, %1
   %19 = fadd float %18, %6
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %1
   %23 = fadd float %22, %9
-  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %25 = extractelement <4 x float> %24, i32 2
   %26 = fmul float %25, %1
   %27 = fadd float %26, %12
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %29 = extractelement <4 x float> %28, i32 3
   %30 = fmul float %29, %1
   %31 = fadd float %30, %15
-  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float %33, %2
   %35 = fadd float %34, %19
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %37 = extractelement <4 x float> %36, i32 1
   %38 = fmul float %37, %2
   %39 = fadd float %38, %23
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %41 = extractelement <4 x float> %40, i32 2
   %42 = fmul float %41, %2
   %43 = fadd float %42, %27
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %45 = extractelement <4 x float> %44, i32 3
   %46 = fmul float %45, %2
   %47 = fadd float %46, %31
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %49 = extractelement <4 x float> %48, i32 0
   %50 = fmul float %49, %3
   %51 = fadd float %50, %35
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %53 = extractelement <4 x float> %52, i32 1
   %54 = fmul float %53, %3
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %57 = extractelement <4 x float> %56, i32 2
   %58 = fmul float %57, %3
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %61 = extractelement <4 x float> %60, i32 3
   %62 = fmul float %61, %3
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* null
+  %64 = load <4 x float>, <4 x float> addrspace(8)* null
   %65 = extractelement <4 x float> %64, i32 0
-  %66 = load <4 x float> addrspace(8)* null
+  %66 = load <4 x float>, <4 x float> addrspace(8)* null
   %67 = extractelement <4 x float> %66, i32 1
-  %68 = load <4 x float> addrspace(8)* null
+  %68 = load <4 x float>, <4 x float> addrspace(8)* null
   %69 = extractelement <4 x float> %68, i32 2
-  %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %71 = extractelement <4 x float> %70, i32 0
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %73 = extractelement <4 x float> %72, i32 1
-  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %75 = extractelement <4 x float> %74, i32 2
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %77 = extractelement <4 x float> %76, i32 0
-  %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %79 = extractelement <4 x float> %78, i32 1
-  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %81 = extractelement <4 x float> %80, i32 2
   %82 = insertelement <4 x float> undef, float %51, i32 0
   %83 = insertelement <4 x float> %82, float %55, i32 1
diff --git a/llvm/test/CodeGen/R600/r600cfg.ll b/llvm/test/CodeGen/R600/r600cfg.ll
index dddc9de..c7b9d65 100644
--- a/llvm/test/CodeGen/R600/r600cfg.ll
+++ b/llvm/test/CodeGen/R600/r600cfg.ll
@@ -83,7 +83,7 @@
 ENDIF43:                                          ; preds = %ELSE45, %IF44
   %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
   %52 = bitcast i32 %.sink to float
-  %53 = load <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, <4 x float> addrspace(8)* null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = bitcast float %54 to i32
   br label %LOOP47
diff --git a/llvm/test/CodeGen/R600/register-count-comments.ll b/llvm/test/CodeGen/R600/register-count-comments.ll
index 7f36fd2..de6bfb3 100644
--- a/llvm/test/CodeGen/R600/register-count-comments.ll
+++ b/llvm/test/CodeGen/R600/register-count-comments.ll
@@ -12,8 +12,8 @@
   %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
   %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
   %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %outptr, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/reorder-stores.ll b/llvm/test/CodeGen/R600/reorder-stores.ll
index ea50d5e..187650f 100644
--- a/llvm/test/CodeGen/R600/reorder-stores.ll
+++ b/llvm/test/CodeGen/R600/reorder-stores.ll
@@ -12,8 +12,8 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
-  %tmp1 = load <2 x double> addrspace(1)* %x, align 16
-  %tmp4 = load <2 x double> addrspace(1)* %y, align 16
+  %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
+  %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
   store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
   store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
   ret void
@@ -26,8 +26,8 @@
 ; SI: ds_write_b64
 ; SI: s_endpgm
 define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
-  %tmp1 = load <2 x double> addrspace(3)* %x, align 16
-  %tmp4 = load <2 x double> addrspace(3)* %y, align 16
+  %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
+  %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
   store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
   store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
   ret void
@@ -76,8 +76,8 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
-  %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
-  %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
+  %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
+  %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
   store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
   store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
   ret void
@@ -91,8 +91,8 @@
 ; SI: ds_write_b64
 ; SI: s_endpgm
 define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
-  %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
-  %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
+  %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
+  %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
   %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
   %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
   %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/R600/rotl.i64.ll b/llvm/test/CodeGen/R600/rotl.i64.ll
index 6da17a4..3f4ceb7 100644
--- a/llvm/test/CodeGen/R600/rotl.i64.ll
+++ b/llvm/test/CodeGen/R600/rotl.i64.ll
@@ -28,8 +28,8 @@
 ; BOTH: s_endpgm
 define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
-  %x = load i64 addrspace(1)* %xptr, align 8
-  %y = load i64 addrspace(1)* %yptr, align 8
+  %x = load i64, i64 addrspace(1)* %xptr, align 8
+  %y = load i64, i64 addrspace(1)* %yptr, align 8
   %tmp0 = shl i64 %x, %y
   %tmp1 = sub i64 64, %y
   %tmp2 = lshr i64 %x, %tmp1
diff --git a/llvm/test/CodeGen/R600/rotr.i64.ll b/llvm/test/CodeGen/R600/rotr.i64.ll
index f1d1d26..586de44 100644
--- a/llvm/test/CodeGen/R600/rotr.i64.ll
+++ b/llvm/test/CodeGen/R600/rotr.i64.ll
@@ -26,8 +26,8 @@
 ; BOTH: v_or_b32
 define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
-  %x = load i64 addrspace(1)* %xptr, align 8
-  %y = load i64 addrspace(1)* %yptr, align 8
+  %x = load i64, i64 addrspace(1)* %xptr, align 8
+  %y = load i64, i64 addrspace(1)* %yptr, align 8
   %tmp0 = sub i64 64, %y
   %tmp1 = shl i64 %x, %tmp0
   %tmp2 = lshr i64 %x, %y
@@ -50,8 +50,8 @@
 ; BOTH-LABEL: {{^}}v_rotr_v2i64:
 define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
 entry:
-  %x = load <2 x i64> addrspace(1)* %xptr, align 8
-  %y = load <2 x i64> addrspace(1)* %yptr, align 8
+  %x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
+  %y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
   %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
   %tmp1 = shl <2 x i64> %x, %tmp0
   %tmp2 = lshr <2 x i64> %x, %y
diff --git a/llvm/test/CodeGen/R600/rsq.ll b/llvm/test/CodeGen/R600/rsq.ll
index 183c717..b67b800 100644
--- a/llvm/test/CodeGen/R600/rsq.ll
+++ b/llvm/test/CodeGen/R600/rsq.ll
@@ -9,7 +9,7 @@
 ; SI: v_rsq_f32_e32
 ; SI: s_endpgm
 define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt
   store float %div, float addrspace(1)* %out, align 4
@@ -21,7 +21,7 @@
 ; SI-SAFE: v_sqrt_f64_e32
 ; SI: s_endpgm
 define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
-  %val = load double addrspace(1)* %in, align 4
+  %val = load double, double addrspace(1)* %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
   %div = fdiv double 1.0, %sqrt
   store double %div, double addrspace(1)* %out, align 4
@@ -62,9 +62,9 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %x = call float @llvm.sqrt.f32(float %a)
   %y = fmul float %x, %b
diff --git a/llvm/test/CodeGen/R600/s_movk_i32.ll b/llvm/test/CodeGen/R600/s_movk_i32.ll
index 8be2d1d..6b1a36c 100644
--- a/llvm/test/CodeGen/R600/s_movk_i32.ll
+++ b/llvm/test/CodeGen/R600/s_movk_i32.ll
@@ -9,7 +9,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -23,7 +23,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -37,7 +37,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -51,7 +51,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -79,7 +79,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -93,7 +93,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -107,7 +107,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -122,7 +122,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -136,7 +136,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -150,7 +150,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -164,7 +164,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
   store i64 %or, i64 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/saddo.ll b/llvm/test/CodeGen/R600/saddo.ll
index 8e625c1..f8ced79 100644
--- a/llvm/test/CodeGen/R600/saddo.ll
+++ b/llvm/test/CodeGen/R600/saddo.ll
@@ -28,8 +28,8 @@
 
 ; FUNC-LABEL: {{^}}v_saddo_i32:
 define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %sadd, 0
   %carry = extractvalue { i32, i1 } %sadd, 1
@@ -52,8 +52,8 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %sadd, 0
   %carry = extractvalue { i64, i1 } %sadd, 1
diff --git a/llvm/test/CodeGen/R600/salu-to-valu.ll b/llvm/test/CodeGen/R600/salu-to-valu.ll
index e9c4228..0b964957 100644
--- a/llvm/test/CodeGen/R600/salu-to-valu.ll
+++ b/llvm/test/CodeGen/R600/salu-to-valu.ll
@@ -28,10 +28,10 @@
   %4 = phi i64 [0, %entry], [%5, %loop]
   %5 = add i64 %2, %4
   %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
-  %7 = load i8 addrspace(1)* %6, align 1
+  %7 = load i8, i8 addrspace(1)* %6, align 1
   %8 = or i64 %5, 1
   %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
-  %10 = load i8 addrspace(1)* %9, align 1
+  %10 = load i8, i8 addrspace(1)* %9, align 1
   %11 = add i8 %7, %10
   %12 = sext i8 %11 to i32
   store i32 %12, i32 addrspace(1)* %out
@@ -59,18 +59,18 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i32 addrspace(2)* addrspace(1)* %in
+  %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
   br label %endif
 
 else:
   %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
-  %3 = load i32 addrspace(2)* addrspace(1)* %2
+  %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
   br label %endif
 
 endif:
   %4 = phi i32 addrspace(2)*  [%1, %if], [%3, %else]
   %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
-  %6 = load i32 addrspace(2)* %5
+  %6 = load i32, i32 addrspace(2)* %5
   store i32 %6, i32 addrspace(1)* %out
   ret void
 }
@@ -84,7 +84,7 @@
   %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %1 = add i32 %0, 4
   %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
-  %3 = load i32 addrspace(2)* %2
+  %3 = load i32, i32 addrspace(2)* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
@@ -97,7 +97,7 @@
   %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
-  %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+  %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
   store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
@@ -112,7 +112,7 @@
   %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
-  %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+  %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
   store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/scalar_to_vector.ll b/llvm/test/CodeGen/R600/scalar_to_vector.ll
index b82e552..0970e5d 100644
--- a/llvm/test/CodeGen/R600/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/R600/scalar_to_vector.ll
@@ -11,7 +11,7 @@
 ; SI: buffer_store_short [[RESULT]]
 ; SI: s_endpgm
 define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %tmp1 = load i32 addrspace(1)* %in, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -27,7 +27,7 @@
 ; SI: buffer_store_short [[RESULT]]
 ; SI: s_endpgm
 define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tmp1 = load float addrspace(1)* %in, align 4
+  %tmp1 = load float, float addrspace(1)* %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -39,7 +39,7 @@
 
 
 ; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-;   %tmp1 = load i32 addrspace(1)* %in, align 4
+;   %tmp1 = load i32, i32 addrspace(1)* %in, align 4
 ;   %bc = bitcast i32 %tmp1 to <4 x i8>
 
 ;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll b/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
index b917ec6..759197c 100644
--- a/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -3,7 +3,7 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(9)* null
+  %0 = load <4 x float>, <4 x float> addrspace(9)* null
   %1 = extractelement <4 x float> %0, i32 3
   %2 = fptosi float %1 to i32
   %3 = bitcast i32 %2 to float
@@ -20,11 +20,11 @@
   %14 = bitcast float %12 to i32
   %15 = add i32 %13, %14
   %16 = bitcast i32 %15 to float
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 0
-  %19 = load <4 x float> addrspace(9)* null
+  %19 = load <4 x float>, <4 x float> addrspace(9)* null
   %20 = extractelement <4 x float> %19, i32 1
-  %21 = load <4 x float> addrspace(9)* null
+  %21 = load <4 x float>, <4 x float> addrspace(9)* null
   %22 = extractelement <4 x float> %21, i32 2
   br label %LOOP
 
diff --git a/llvm/test/CodeGen/R600/schedule-fs-loop.ll b/llvm/test/CodeGen/R600/schedule-fs-loop.ll
index d6c194b..28cc08a 100644
--- a/llvm/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/llvm/test/CodeGen/R600/schedule-fs-loop.ll
@@ -3,15 +3,15 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(9)* null
+  %0 = load <4 x float>, <4 x float> addrspace(9)* null
   %1 = extractelement <4 x float> %0, i32 3
   %2 = fptosi float %1 to i32
   %3 = bitcast i32 %2 to float
-  %4 = load <4 x float> addrspace(9)* null
+  %4 = load <4 x float>, <4 x float> addrspace(9)* null
   %5 = extractelement <4 x float> %4, i32 0
-  %6 = load <4 x float> addrspace(9)* null
+  %6 = load <4 x float>, <4 x float> addrspace(9)* null
   %7 = extractelement <4 x float> %6, i32 1
-  %8 = load <4 x float> addrspace(9)* null
+  %8 = load <4 x float>, <4 x float> addrspace(9)* null
   %9 = extractelement <4 x float> %8, i32 2
   br label %LOOP
 
diff --git a/llvm/test/CodeGen/R600/schedule-global-loads.ll b/llvm/test/CodeGen/R600/schedule-global-loads.ll
index 3763237..3f728fd 100644
--- a/llvm/test/CodeGen/R600/schedule-global-loads.ll
+++ b/llvm/test/CodeGen/R600/schedule-global-loads.ll
@@ -14,9 +14,9 @@
 ; SI: buffer_store_dword [[REG0]]
 ; SI: buffer_store_dword [[REG1]]
 define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
-  %load0 = load i32 addrspace(1)* %ptr, align 4
+  %load0 = load i32, i32 addrspace(1)* %ptr, align 4
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1
-  %load1 = load i32 addrspace(1)* %gep, align 4
+  %load1 = load i32, i32 addrspace(1)* %gep, align 4
   store i32 %load0, i32 addrspace(1)* %out0, align 4
   store i32 %load1, i32 addrspace(1)* %out1, align 4
   ret void
@@ -30,8 +30,8 @@
 define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
 entry:
   %out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
-  %tmp0 = load i32 addrspace(1)* %out
-  %tmp1 = load i32 addrspace(1)* %out1
+  %tmp0 = load i32, i32 addrspace(1)* %out
+  %tmp1 = load i32, i32 addrspace(1)* %out1
   %tmp2 = add i32 %tmp0, %tmp1
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/schedule-if-2.ll b/llvm/test/CodeGen/R600/schedule-if-2.ll
index 38aad18..b1a8879 100644
--- a/llvm/test/CodeGen/R600/schedule-if-2.ll
+++ b/llvm/test/CodeGen/R600/schedule-if-2.ll
@@ -3,10 +3,10 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = fadd float 1.000000e+03, %1
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %4 = extractelement <4 x float> %3, i32 0
   %5 = bitcast float %4 to i32
   %6 = icmp eq i32 %5, 0
@@ -47,7 +47,7 @@
   br label %ENDIF
 
 ELSE:                                             ; preds = %main_body
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %37 = extractelement <4 x float> %36, i32 0
   %38 = bitcast float %37 to i32
   %39 = icmp eq i32 %38, 1
@@ -80,7 +80,7 @@
   %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
   %55 = bitcast float %.28 to i32
   %56 = sitofp i32 %55 to float
-  %57 = load <4 x float> addrspace(8)* null
+  %57 = load <4 x float>, <4 x float> addrspace(8)* null
   %58 = extractelement <4 x float> %57, i32 0
   %59 = fsub float -0.000000e+00, %58
   %60 = fadd float %2, %59
diff --git a/llvm/test/CodeGen/R600/schedule-if.ll b/llvm/test/CodeGen/R600/schedule-if.ll
index f960c93..4fcb040 100644
--- a/llvm/test/CodeGen/R600/schedule-if.ll
+++ b/llvm/test/CodeGen/R600/schedule-if.ll
@@ -3,7 +3,7 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = bitcast float %1 to i32
   %3 = icmp eq i32 %2, 0
@@ -14,7 +14,7 @@
   br i1 %7, label %ENDIF, label %ELSE
 
 ELSE:                                             ; preds = %main_body
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %9 = extractelement <4 x float> %8, i32 0
   %10 = bitcast float %9 to i32
   %11 = icmp eq i32 %10, 1
@@ -36,7 +36,7 @@
   ret void
 
 IF13:                                             ; preds = %ELSE
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fsub float -0.000000e+00, %21
   %23 = fadd float 1.000000e+03, %22
diff --git a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 76b655d..9eb9c13 100644
--- a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -39,63 +39,63 @@
   %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
   %15 = extractelement <4 x float> %reg1, i32 1
   %16 = extractelement <4 x float> %reg1, i32 3
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float %18, %0
-  %20 = load <4 x float> addrspace(9)* null
+  %20 = load <4 x float>, <4 x float> addrspace(9)* null
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %0
-  %23 = load <4 x float> addrspace(9)* null
+  %23 = load <4 x float>, <4 x float> addrspace(9)* null
   %24 = extractelement <4 x float> %23, i32 2
   %25 = fmul float %24, %0
-  %26 = load <4 x float> addrspace(9)* null
+  %26 = load <4 x float>, <4 x float> addrspace(9)* null
   %27 = extractelement <4 x float> %26, i32 3
   %28 = fmul float %27, %0
-  %29 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %29 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float %30, %15
   %32 = fadd float %31, %19
-  %33 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %33 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %34 = extractelement <4 x float> %33, i32 1
   %35 = fmul float %34, %15
   %36 = fadd float %35, %22
-  %37 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %37 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %38 = extractelement <4 x float> %37, i32 2
   %39 = fmul float %38, %15
   %40 = fadd float %39, %25
-  %41 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %41 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %42 = extractelement <4 x float> %41, i32 3
   %43 = fmul float %42, %15
   %44 = fadd float %43, %28
-  %45 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %45 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %46 = extractelement <4 x float> %45, i32 0
   %47 = fmul float %46, %1
   %48 = fadd float %47, %32
-  %49 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %49 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %50 = extractelement <4 x float> %49, i32 1
   %51 = fmul float %50, %1
   %52 = fadd float %51, %36
-  %53 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %53 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %54 = extractelement <4 x float> %53, i32 2
   %55 = fmul float %54, %1
   %56 = fadd float %55, %40
-  %57 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %57 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %58 = extractelement <4 x float> %57, i32 3
   %59 = fmul float %58, %1
   %60 = fadd float %59, %44
-  %61 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %61 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %62 = extractelement <4 x float> %61, i32 0
   %63 = fmul float %62, %16
   %64 = fadd float %63, %48
-  %65 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %65 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %66 = extractelement <4 x float> %65, i32 1
   %67 = fmul float %66, %16
   %68 = fadd float %67, %52
-  %69 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %69 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %70 = extractelement <4 x float> %69, i32 2
   %71 = fmul float %70, %16
   %72 = fadd float %71, %56
-  %73 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %73 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %74 = extractelement <4 x float> %73, i32 3
   %75 = fmul float %74, %16
   %76 = fadd float %75, %60
diff --git a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 33b20d3..bcecb15 100644
--- a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -21,63 +21,63 @@
   %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
   %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
   %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
-  %11 = load <4 x float> addrspace(9)* null
+  %11 = load <4 x float>, <4 x float> addrspace(9)* null
   %12 = extractelement <4 x float> %11, i32 0
   %13 = fmul float %12, %0
-  %14 = load <4 x float> addrspace(9)* null
+  %14 = load <4 x float>, <4 x float> addrspace(9)* null
   %15 = extractelement <4 x float> %14, i32 1
   %16 = fmul float %15, %0
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 2
   %19 = fmul float %18, %0
-  %20 = load <4 x float> addrspace(9)* null
+  %20 = load <4 x float>, <4 x float> addrspace(9)* null
   %21 = extractelement <4 x float> %20, i32 3
   %22 = fmul float %21, %0
-  %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %24 = extractelement <4 x float> %23, i32 0
   %25 = fmul float %24, %1
   %26 = fadd float %25, %13
-  %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %28 = extractelement <4 x float> %27, i32 1
   %29 = fmul float %28, %1
   %30 = fadd float %29, %16
-  %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %32 = extractelement <4 x float> %31, i32 2
   %33 = fmul float %32, %1
   %34 = fadd float %33, %19
-  %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %36 = extractelement <4 x float> %35, i32 3
   %37 = fmul float %36, %1
   %38 = fadd float %37, %22
-  %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %40 = extractelement <4 x float> %39, i32 0
   %41 = fmul float %40, %2
   %42 = fadd float %41, %26
-  %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %44 = extractelement <4 x float> %43, i32 1
   %45 = fmul float %44, %2
   %46 = fadd float %45, %30
-  %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %48 = extractelement <4 x float> %47, i32 2
   %49 = fmul float %48, %2
   %50 = fadd float %49, %34
-  %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %52 = extractelement <4 x float> %51, i32 3
   %53 = fmul float %52, %2
   %54 = fadd float %53, %38
-  %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %56 = extractelement <4 x float> %55, i32 0
   %57 = fmul float %56, %3
   %58 = fadd float %57, %42
-  %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %60 = extractelement <4 x float> %59, i32 1
   %61 = fmul float %60, %3
   %62 = fadd float %61, %46
-  %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %64 = extractelement <4 x float> %63, i32 2
   %65 = fmul float %64, %3
   %66 = fadd float %65, %50
-  %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %68 = extractelement <4 x float> %67, i32 3
   %69 = fmul float %68, %3
   %70 = fadd float %69, %54
diff --git a/llvm/test/CodeGen/R600/scratch-buffer.ll b/llvm/test/CodeGen/R600/scratch-buffer.ll
index 838a7f9..5608871 100644
--- a/llvm/test/CodeGen/R600/scratch-buffer.ll
+++ b/llvm/test/CodeGen/R600/scratch-buffer.ll
@@ -30,12 +30,12 @@
 
 if:
   %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
-  %if_value = load i32* %if_ptr
+  %if_value = load i32, i32* %if_ptr
   br label %done
 
 else:
   %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
-  %else_value = load i32* %else_ptr
+  %else_value = load i32, i32* %else_ptr
   br label %done
 
 done:
@@ -57,12 +57,12 @@
   %scratch0 = alloca [8192 x i32]
   %scratch1 = alloca [8192 x i32]
 
-  %offset0 = load i32 addrspace(1)* %offsets
+  %offset0 = load i32, i32 addrspace(1)* %offsets
   %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
   store i32 %offset0, i32* %scratchptr0
 
   %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
-  %offset1 = load i32 addrspace(1)* %offsetptr1
+  %offset1 = load i32, i32 addrspace(1)* %offsetptr1
   %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
   store i32 %offset1, i32* %scratchptr1
 
@@ -71,12 +71,12 @@
 
 if:
   %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
-  %if_value = load i32* %if_ptr
+  %if_value = load i32, i32* %if_ptr
   br label %done
 
 else:
   %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
-  %else_value = load i32* %else_ptr
+  %else_value = load i32, i32* %else_ptr
   br label %done
 
 done:
diff --git a/llvm/test/CodeGen/R600/sdiv.ll b/llvm/test/CodeGen/R600/sdiv.ll
index 0805ca6..de64535 100644
--- a/llvm/test/CodeGen/R600/sdiv.ll
+++ b/llvm/test/CodeGen/R600/sdiv.ll
@@ -15,8 +15,8 @@
 ; EG: CF_END
 define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in
-  %den = load i32 addrspace(1) * %den_ptr
+  %num = load i32, i32 addrspace(1) * %in
+  %den = load i32, i32 addrspace(1) * %den_ptr
   %result = sdiv i32 %num, %den
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -24,7 +24,7 @@
 
 ; FUNC-LABEL: {{^}}sdiv_i32_4:
 define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 4
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -44,7 +44,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 3435
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -52,15 +52,15 @@
 
 define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %num = load <2 x i32> addrspace(1) * %in
-  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
   %result = sdiv <2 x i32> %num, %den
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %num = load <2 x i32> addrspace(1) * %in
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %result = sdiv <2 x i32> %num, <i32 4, i32 4>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -68,15 +68,15 @@
 
 define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %num = load <4 x i32> addrspace(1) * %in
-  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
   %result = sdiv <4 x i32> %num, %den
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %num = load <4 x i32> addrspace(1) * %in
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/sdivrem24.ll b/llvm/test/CodeGen/R600/sdivrem24.ll
index 56c15e3..ad5df39 100644
--- a/llvm/test/CodeGen/R600/sdivrem24.ll
+++ b/llvm/test/CodeGen/R600/sdivrem24.ll
@@ -14,8 +14,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = sdiv i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = sdiv i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -52,8 +52,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -71,8 +71,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -90,8 +90,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -109,8 +109,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -132,8 +132,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = srem i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -151,8 +151,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = srem i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -170,8 +170,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -189,8 +189,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -208,8 +208,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -227,8 +227,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 7
diff --git a/llvm/test/CodeGen/R600/select64.ll b/llvm/test/CodeGen/R600/select64.ll
index 0245dae..5cebb30 100644
--- a/llvm/test/CodeGen/R600/select64.ll
+++ b/llvm/test/CodeGen/R600/select64.ll
@@ -42,8 +42,8 @@
 ; CHECK-NOT: v_cndmask_b32
 define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %sel = select i1 %cmp, i64 %a, i64 %b
   %trunc = trunc i64 %sel to i32
   store i32 %trunc, i32 addrspace(1)* %out, align 4
@@ -60,8 +60,8 @@
 ; CHECK: s_endpgm
 define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
   store i64 %sel, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/selectcc-cnd.ll b/llvm/test/CodeGen/R600/selectcc-cnd.ll
index 0bfca69..94d0ace 100644
--- a/llvm/test/CodeGen/R600/selectcc-cnd.ll
+++ b/llvm/test/CodeGen/R600/selectcc-cnd.ll
@@ -4,7 +4,7 @@
 ;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
 ;CHECK: 1073741824
 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %1 = load float addrspace(1)* %in
+  %1 = load float, float addrspace(1)* %in
   %2 = fcmp oeq float %1, 0.0
   %3 = select i1 %2, float 1.0, float 2.0
   store float %3, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
index d568888..58a4ee7 100644
--- a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -4,7 +4,7 @@
 ;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
 ;CHECK-NEXT: 2
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   %2 = icmp eq i32 %1, 0
   %3 = select i1 %2, i32 1, i32 2
   store i32 %3, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll b/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
index 6743800..e870ee8 100644
--- a/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -8,7 +8,7 @@
 
 define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = icmp sge i32 %0, 0
   %2 = select i1 %1, float 1.0, float 0.0
   store float %2, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/setcc-opt.ll b/llvm/test/CodeGen/R600/setcc-opt.ll
index 93860f5..0219cdb 100644
--- a/llvm/test/CodeGen/R600/setcc-opt.ll
+++ b/llvm/test/CodeGen/R600/setcc-opt.ll
@@ -162,7 +162,7 @@
 ; GCN-NEXT: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
 define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
-  %b = load i8 addrspace(1)* %b.ptr
+  %b = load i8, i8 addrspace(1)* %b.ptr
   %b.ext = sext i8 %b to i32
   %icmp0 = icmp ne i32 %b.ext, -1
   store i1 %icmp0, i1 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/setcc.ll b/llvm/test/CodeGen/R600/setcc.ll
index 0867e83..f33a82d 100644
--- a/llvm/test/CodeGen/R600/setcc.ll
+++ b/llvm/test/CodeGen/R600/setcc.ll
@@ -22,8 +22,8 @@
 
 define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = icmp eq <4 x i32> %a, %b
   %sext = sext <4 x i1> %result to <4 x i32>
   store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
@@ -347,8 +347,8 @@
   %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
   %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
   %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
-  %a = load <3 x i32> addrspace(1)* %gep.a
-  %b = load <3 x i32> addrspace(1)* %gep.b
+  %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
+  %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
   %cmp = icmp eq <3 x i32> %a, %b
   %ext = sext <3 x i1> %cmp to <3 x i32>
   store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
@@ -368,8 +368,8 @@
   %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
   %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
   %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
-  %a = load <3 x i8> addrspace(1)* %gep.a
-  %b = load <3 x i8> addrspace(1)* %gep.b
+  %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
+  %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
   %cmp = icmp eq <3 x i8> %a, %b
   %ext = sext <3 x i1> %cmp to <3 x i8>
   store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
diff --git a/llvm/test/CodeGen/R600/sext-in-reg.ll b/llvm/test/CodeGen/R600/sext-in-reg.ll
index 0668e1e..e8d1428 100644
--- a/llvm/test/CodeGen/R600/sext-in-reg.ll
+++ b/llvm/test/CodeGen/R600/sext-in-reg.ll
@@ -190,8 +190,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
@@ -211,8 +211,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
@@ -232,8 +232,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
@@ -252,8 +252,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
@@ -428,8 +428,8 @@
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
-  %loada = load <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -441,8 +441,8 @@
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
-  %loada = load <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
@@ -459,7 +459,7 @@
 ; SI: v_bfe_i32
 ; SI: buffer_store_short
 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = sext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
@@ -474,7 +474,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -485,7 +485,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -496,7 +496,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 ; SI: s_endpgm
 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -509,7 +509,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -545,7 +545,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
-  %load = load i8 addrspace(1)* %ptr, align 1
+  %load = load i8, i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
   %shl = shl i32 %bfe, 24
@@ -559,7 +559,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
-  %load = load i8 addrspace(1)* %ptr, align 1
+  %load = load i8, i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
   %shl = shl i32 %bfe, 24
@@ -574,7 +574,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
@@ -589,7 +589,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
 ; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
@@ -604,7 +604,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
 ; SI: s_endpgm
 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
diff --git a/llvm/test/CodeGen/R600/sgpr-control-flow.ll b/llvm/test/CodeGen/R600/sgpr-control-flow.ll
index ba479d5..fae7cd2 100644
--- a/llvm/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/R600/sgpr-control-flow.ll
@@ -83,13 +83,13 @@
 
 if:
   %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
-  %a.val = load i32 addrspace(1)* %gep.if
+  %a.val = load i32, i32 addrspace(1)* %gep.if
   %cmp.if = icmp eq i32 %a.val, 0
   br label %endif
 
 else:
   %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
-  %b.val = load i32 addrspace(1)* %gep.else
+  %b.val = load i32, i32 addrspace(1)* %gep.else
   %cmp.else = icmp slt i32 %b.val, 0
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 893f5a3..df67fcc 100644
--- a/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -7,7 +7,7 @@
 ; SI-LABEL: {{^}}test_dup_operands:
 ; SI: v_add_i32_e32
 define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
-  %a = load <2 x i32> addrspace(1)* %in
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %lo = extractelement <2 x i32> %a, i32 0
   %hi = extractelement <2 x i32> %a, i32 1
   %add = add i32 %lo, %lo
diff --git a/llvm/test/CodeGen/R600/sgpr-copy.ll b/llvm/test/CodeGen/R600/sgpr-copy.ll
index eb57b1a..b849c40 100644
--- a/llvm/test/CodeGen/R600/sgpr-copy.ll
+++ b/llvm/test/CodeGen/R600/sgpr-copy.ll
@@ -10,7 +10,7 @@
 define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
@@ -34,7 +34,7 @@
 define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
@@ -51,9 +51,9 @@
   %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
   %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
   %37 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
+  %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, !tbaa !1
   %39 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
-  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+  %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, !tbaa !1
   %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
   %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
   %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
@@ -155,7 +155,7 @@
 define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
@@ -237,12 +237,12 @@
 
 entry:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !2
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
   %24 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
+  %25 = load <32 x i8>, <32 x i8> addrspace(2)* %24, !tbaa !2
   %26 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+  %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, !tbaa !2
   %28 = fcmp oeq float %23, 0.0
   br i1 %28, label %if, label %else
 
@@ -276,7 +276,7 @@
 ; CHECK: s_endpgm
 define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
 entry:
-  %0 = load float addrspace(1)* %in0
+  %0 = load float, float addrspace(1)* %in0
   %1 = fcmp oeq float %0, 0.0
   br i1 %1, label %if0, label %endif
 
@@ -335,12 +335,12 @@
 define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
 bb:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
-  %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
   %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
-  %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+  %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !0
   %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
-  %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+  %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !0
   %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
   %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
   %tmp31 = bitcast float %tmp23 to i32
diff --git a/llvm/test/CodeGen/R600/shl.ll b/llvm/test/CodeGen/R600/shl.ll
index 18293a8..53b63dc 100644
--- a/llvm/test/CodeGen/R600/shl.ll
+++ b/llvm/test/CodeGen/R600/shl.ll
@@ -16,8 +16,8 @@
 
 define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = shl <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -43,8 +43,8 @@
 
 define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = shl <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -70,8 +70,8 @@
 
 define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1) * %in
-  %b = load i64 addrspace(1) * %b_ptr
+  %a = load i64, i64 addrspace(1) * %in
+  %b = load i64, i64 addrspace(1) * %b_ptr
   %result = shl i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -109,8 +109,8 @@
 
 define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1) * %in
-  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
   %result = shl <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -172,8 +172,8 @@
 
 define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1) * %in
-  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
   %result = shl <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/shl_add_constant.ll b/llvm/test/CodeGen/R600/shl_add_constant.ll
index a62b6c9..b1485bf 100644
--- a/llvm/test/CodeGen/R600/shl_add_constant.ll
+++ b/llvm/test/CodeGen/R600/shl_add_constant.ll
@@ -12,7 +12,7 @@
 define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
   store i32 %result, i32 addrspace(1)* %out, align 4
@@ -28,7 +28,7 @@
 define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
   store i32 %result, i32 addrspace(1)* %out0, align 4
@@ -46,7 +46,7 @@
 define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %shl = add i32 %val, 999
   %result = shl i32 %shl, 2
   store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/shl_add_ptr.ll b/llvm/test/CodeGen/R600/shl_add_ptr.ll
index 15565fc..066dafb 100644
--- a/llvm/test/CodeGen/R600/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/R600/shl_add_ptr.ll
@@ -23,7 +23,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
   store float %val0, float addrspace(1)* %out
   ret void
@@ -43,7 +43,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %shl_add_use = shl i32 %idx.0, 2
   store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
   store float %val0, float addrspace(1)* %out
@@ -59,7 +59,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 65535
   %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
-  %val0 = load i8 addrspace(3)* %arrayidx0
+  %val0 = load i8, i8 addrspace(3)* %arrayidx0
   store i32 %idx.0, i32 addrspace(1)* %add_use
   store i8 %val0, i8 addrspace(1)* %out
   ret void
@@ -77,9 +77,9 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   store float %sum, float addrspace(1)* %out, align 4
   ret void
@@ -108,7 +108,7 @@
 ;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-;   %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+;   %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
 ;   store i32 %val, i32 addrspace(1)* %out, align 4
 ;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/si-lod-bias.ll b/llvm/test/CodeGen/R600/si-lod-bias.ll
index cdcc119..944499a 100644
--- a/llvm/test/CodeGen/R600/si-lod-bias.ll
+++ b/llvm/test/CodeGen/R600/si-lod-bias.ll
@@ -10,12 +10,12 @@
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %23 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
+  %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, !tbaa !1
   %25 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
-  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+  %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, !tbaa !1
   %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
   %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
   %29 = bitcast float %22 to i32
diff --git a/llvm/test/CodeGen/R600/si-sgpr-spill.ll b/llvm/test/CodeGen/R600/si-sgpr-spill.ll
index 781be58..8465270 100644
--- a/llvm/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/llvm/test/CodeGen/R600/si-sgpr-spill.ll
@@ -14,7 +14,7 @@
 define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
   %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
   %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
@@ -54,37 +54,37 @@
   %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
   %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
   %61 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
+  %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, !tbaa !0
   %63 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
+  %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, !tbaa !0
   %65 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
-  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
+  %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, !tbaa !0
   %67 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
-  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
+  %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, !tbaa !0
   %69 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
-  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
+  %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, !tbaa !0
   %71 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
-  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
+  %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, !tbaa !0
   %73 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
-  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
+  %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, !tbaa !0
   %75 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
-  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
+  %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, !tbaa !0
   %77 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
-  %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
+  %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, !tbaa !0
   %79 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
-  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
+  %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, !tbaa !0
   %81 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
-  %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
+  %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, !tbaa !0
   %83 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
-  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
+  %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, !tbaa !0
   %85 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
-  %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
+  %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, !tbaa !0
   %87 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
-  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
+  %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, !tbaa !0
   %89 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
-  %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
+  %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, !tbaa !0
   %91 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
-  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+  %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, !tbaa !0
   %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
   %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
   %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
@@ -116,16 +116,16 @@
   %119 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
   %120 = bitcast float %93 to i32
   store i32 %120, i32 addrspace(3)* %115
-  %121 = load i32 addrspace(3)* %117
+  %121 = load i32, i32 addrspace(3)* %117
   %122 = bitcast i32 %121 to float
-  %123 = load i32 addrspace(3)* %119
+  %123 = load i32, i32 addrspace(3)* %119
   %124 = bitcast i32 %123 to float
   %125 = fsub float %124, %122
   %126 = bitcast float %94 to i32
   store i32 %126, i32 addrspace(3)* %115
-  %127 = load i32 addrspace(3)* %117
+  %127 = load i32, i32 addrspace(3)* %117
   %128 = bitcast i32 %127 to float
-  %129 = load i32 addrspace(3)* %119
+  %129 = load i32, i32 addrspace(3)* %119
   %130 = bitcast i32 %129 to float
   %131 = fsub float %130, %128
   %132 = insertelement <4 x float> undef, float %125, i32 0
@@ -156,30 +156,30 @@
   %153 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
   %154 = bitcast float %138 to i32
   store i32 %154, i32 addrspace(3)* %149
-  %155 = load i32 addrspace(3)* %151
+  %155 = load i32, i32 addrspace(3)* %151
   %156 = bitcast i32 %155 to float
-  %157 = load i32 addrspace(3)* %153
+  %157 = load i32, i32 addrspace(3)* %153
   %158 = bitcast i32 %157 to float
   %159 = fsub float %158, %156
   %160 = bitcast float %139 to i32
   store i32 %160, i32 addrspace(3)* %149
-  %161 = load i32 addrspace(3)* %151
+  %161 = load i32, i32 addrspace(3)* %151
   %162 = bitcast i32 %161 to float
-  %163 = load i32 addrspace(3)* %153
+  %163 = load i32, i32 addrspace(3)* %153
   %164 = bitcast i32 %163 to float
   %165 = fsub float %164, %162
   %166 = bitcast float %140 to i32
   store i32 %166, i32 addrspace(3)* %149
-  %167 = load i32 addrspace(3)* %151
+  %167 = load i32, i32 addrspace(3)* %151
   %168 = bitcast i32 %167 to float
-  %169 = load i32 addrspace(3)* %153
+  %169 = load i32, i32 addrspace(3)* %153
   %170 = bitcast i32 %169 to float
   %171 = fsub float %170, %168
   %172 = bitcast float %141 to i32
   store i32 %172, i32 addrspace(3)* %149
-  %173 = load i32 addrspace(3)* %151
+  %173 = load i32, i32 addrspace(3)* %151
   %174 = bitcast i32 %173 to float
-  %175 = load i32 addrspace(3)* %153
+  %175 = load i32, i32 addrspace(3)* %153
   %176 = bitcast i32 %175 to float
   %177 = fsub float %176, %174
   %178 = insertelement <4 x float> undef, float %159, i32 0
@@ -695,7 +695,7 @@
 define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0)
   %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4)
   %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8)
@@ -800,41 +800,41 @@
   %124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864)
   %125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868)
   %126 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %127 = load <32 x i8> addrspace(2)* %126, !tbaa !0
+  %127 = load <32 x i8>, <32 x i8> addrspace(2)* %126, !tbaa !0
   %128 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %129 = load <16 x i8> addrspace(2)* %128, !tbaa !0
+  %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, !tbaa !0
   %130 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
-  %131 = load <32 x i8> addrspace(2)* %130, !tbaa !0
+  %131 = load <32 x i8>, <32 x i8> addrspace(2)* %130, !tbaa !0
   %132 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
-  %133 = load <16 x i8> addrspace(2)* %132, !tbaa !0
+  %133 = load <16 x i8>, <16 x i8> addrspace(2)* %132, !tbaa !0
   %134 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
-  %135 = load <32 x i8> addrspace(2)* %134, !tbaa !0
+  %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, !tbaa !0
   %136 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
-  %137 = load <16 x i8> addrspace(2)* %136, !tbaa !0
+  %137 = load <16 x i8>, <16 x i8> addrspace(2)* %136, !tbaa !0
   %138 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
-  %139 = load <32 x i8> addrspace(2)* %138, !tbaa !0
+  %139 = load <32 x i8>, <32 x i8> addrspace(2)* %138, !tbaa !0
   %140 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
-  %141 = load <16 x i8> addrspace(2)* %140, !tbaa !0
+  %141 = load <16 x i8>, <16 x i8> addrspace(2)* %140, !tbaa !0
   %142 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
-  %143 = load <32 x i8> addrspace(2)* %142, !tbaa !0
+  %143 = load <32 x i8>, <32 x i8> addrspace(2)* %142, !tbaa !0
   %144 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
-  %145 = load <16 x i8> addrspace(2)* %144, !tbaa !0
+  %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, !tbaa !0
   %146 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
-  %147 = load <32 x i8> addrspace(2)* %146, !tbaa !0
+  %147 = load <32 x i8>, <32 x i8> addrspace(2)* %146, !tbaa !0
   %148 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
-  %149 = load <16 x i8> addrspace(2)* %148, !tbaa !0
+  %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, !tbaa !0
   %150 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
-  %151 = load <32 x i8> addrspace(2)* %150, !tbaa !0
+  %151 = load <32 x i8>, <32 x i8> addrspace(2)* %150, !tbaa !0
   %152 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
-  %153 = load <16 x i8> addrspace(2)* %152, !tbaa !0
+  %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, !tbaa !0
   %154 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
-  %155 = load <32 x i8> addrspace(2)* %154, !tbaa !0
+  %155 = load <32 x i8>, <32 x i8> addrspace(2)* %154, !tbaa !0
   %156 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
-  %157 = load <16 x i8> addrspace(2)* %156, !tbaa !0
+  %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, !tbaa !0
   %158 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
-  %159 = load <32 x i8> addrspace(2)* %158, !tbaa !0
+  %159 = load <32 x i8>, <32 x i8> addrspace(2)* %158, !tbaa !0
   %160 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
-  %161 = load <16 x i8> addrspace(2)* %160, !tbaa !0
+  %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, !tbaa !0
   %162 = fcmp ugt float %17, 0.000000e+00
   %163 = select i1 %162, float 1.000000e+00, float 0.000000e+00
   %164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
diff --git a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index bb49a5b..5a6129a 100644
--- a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -14,14 +14,14 @@
 ; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; CI: buffer_store_dword
 define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -34,14 +34,14 @@
 ; CI: buffer_store_dword
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store volatile i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -54,15 +54,15 @@
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; CI: buffer_store_dword
 define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
   call void @llvm.AMDGPU.barrier.local() #2
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -79,14 +79,14 @@
 ; CI: buffer_load_dword
 ; CI: buffer_store_dword
 define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -100,14 +100,14 @@
 ; CI: ds_write_b32
 ; CI: buffer_store_dword
 define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -125,9 +125,9 @@
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -144,9 +144,9 @@
   %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -168,10 +168,10 @@
   %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101
 
   store i32 123, i32 addrspace(3)* %ptr1, align 4
-  %tmp1 = load i32 addrspace(3)* %ptr2, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4
   store i32 123, i32 addrspace(3)* %ptr2, align 4
-  %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 789, i32 addrspace(3)* %ptr3, align 4
 
   %add.0 = add nsw i32 %tmp2, %tmp1
@@ -194,10 +194,10 @@
   %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101
 
   store i32 123, i32 addrspace(1)* %ptr1, align 4
-  %tmp1 = load i32 addrspace(1)* %ptr2, align 4
-  %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4
   store i32 123, i32 addrspace(1)* %ptr2, align 4
-  %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+  %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4
   store i32 789, i32 addrspace(1)* %ptr3, align 4
 
   %add.0 = add nsw i32 %tmp2, %tmp1
@@ -211,19 +211,19 @@
 ; XCI: TBUFFER_STORE_FORMAT
 ; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
 ; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
-;   %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+;   %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
 ;   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
 ;   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-;   %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+;   %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
 
 ;   %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
 ;   call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
 ;         i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
 ;         i32 1, i32 0)
 
-;   %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+;   %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
 ;   %add = add nsw i32 %tmp1, %tmp2
 
diff --git a/llvm/test/CodeGen/R600/si-vector-hang.ll b/llvm/test/CodeGen/R600/si-vector-hang.ll
index a26f973..94c47fe 100644
--- a/llvm/test/CodeGen/R600/si-vector-hang.ll
+++ b/llvm/test/CodeGen/R600/si-vector-hang.ll
@@ -17,52 +17,52 @@
 ; Function Attrs: nounwind
 define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
 entry:
-  %0 = load i8 addrspace(1)* %in0, align 1
+  %0 = load i8, i8 addrspace(1)* %in0, align 1
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
-  %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+  %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
   %3 = insertelement <8 x i8> %1, i8 %2, i32 1
   %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
-  %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+  %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
   %5 = insertelement <8 x i8> %3, i8 %4, i32 2
   %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
-  %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+  %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
   %7 = insertelement <8 x i8> %5, i8 %6, i32 3
   %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
-  %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+  %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
   %9 = insertelement <8 x i8> undef, i8 %8, i32 0
   %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
-  %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+  %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
   %11 = insertelement <8 x i8> %9, i8 %10, i32 1
   %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
-  %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+  %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
   %13 = insertelement <8 x i8> %11, i8 %12, i32 2
   %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
-  %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+  %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
   %15 = insertelement <8 x i8> %13, i8 %14, i32 3
   %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  %16 = load i8 addrspace(1)* %in1, align 1
+  %16 = load i8, i8 addrspace(1)* %in1, align 1
   %17 = insertelement <8 x i8> undef, i8 %16, i32 0
   %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
-  %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+  %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
   %19 = insertelement <8 x i8> %17, i8 %18, i32 1
   %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
-  %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+  %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
   %21 = insertelement <8 x i8> %19, i8 %20, i32 2
   %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
-  %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+  %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
   %23 = insertelement <8 x i8> %21, i8 %22, i32 3
   %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
-  %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+  %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
   %25 = insertelement <8 x i8> undef, i8 %24, i32 0
   %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
-  %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+  %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
   %27 = insertelement <8 x i8> %25, i8 %26, i32 1
   %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
-  %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+  %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
   %29 = insertelement <8 x i8> %27, i8 %28, i32 2
   %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
-  %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+  %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
   %31 = insertelement <8 x i8> %29, i8 %30, i32 3
   %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
diff --git a/llvm/test/CodeGen/R600/sign_extend.ll b/llvm/test/CodeGen/R600/sign_extend.ll
index f194759..06bee11 100644
--- a/llvm/test/CodeGen/R600/sign_extend.ll
+++ b/llvm/test/CodeGen/R600/sign_extend.ll
@@ -48,7 +48,7 @@
 ; SI: v_ashr
 ; SI: s_endpgm
 define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %sext = sext i32 %val to i64
   store i64 %sext, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index c75b846..dffee70 100644
--- a/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -31,7 +31,7 @@
   store i64 3935, i64* %gep2, align 8
   store i64 9342, i64* %gep3, align 8
   %gep = getelementptr i64, i64* %alloca, i32 %idx
-  %load = load i64* %gep, align 8
+  %load = load i64, i64* %gep, align 8
   %mask = and i64 %load, 4294967296
   %add = add i64 %mask, -1
   store i64 %add, i64 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
index f6f1e13..da4e91d 100644
--- a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -54,7 +54,7 @@
 define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64 addrspace(1)* %gep, align 8
+  %val = load i64, i64 addrspace(1)* %gep, align 8
   %result = sitofp i64 %val to double
   store double %result, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/sint_to_fp.ll b/llvm/test/CodeGen/R600/sint_to_fp.ll
index 6a291cf..8506441 100644
--- a/llvm/test/CodeGen/R600/sint_to_fp.ll
+++ b/llvm/test/CodeGen/R600/sint_to_fp.ll
@@ -35,7 +35,7 @@
 ; SI: v_cvt_f32_i32_e32
 ; SI: v_cvt_f32_i32_e32
 define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
+  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = sitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/smrd.ll b/llvm/test/CodeGen/R600/smrd.ll
index 46cbe1b..b0c18ca 100644
--- a/llvm/test/CodeGen/R600/smrd.ll
+++ b/llvm/test/CodeGen/R600/smrd.ll
@@ -8,7 +8,7 @@
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -20,7 +20,7 @@
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -34,7 +34,7 @@
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -55,7 +55,7 @@
 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -67,7 +67,7 @@
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
@@ -81,7 +81,7 @@
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
@@ -96,7 +96,7 @@
 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
diff --git a/llvm/test/CodeGen/R600/split-scalar-i64-add.ll b/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
index 7826dd1..46409cd 100644
--- a/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
+++ b/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -38,7 +38,7 @@
 define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %load = load i32 addrspace(1)* %gep
+  %load = load i32, i32 addrspace(1)* %gep
   %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
   %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
   %bc = bitcast <2 x i32> %vec.1 to i64
diff --git a/llvm/test/CodeGen/R600/sra.ll b/llvm/test/CodeGen/R600/sra.ll
index a64544e..bcbc32f 100644
--- a/llvm/test/CodeGen/R600/sra.ll
+++ b/llvm/test/CodeGen/R600/sra.ll
@@ -16,8 +16,8 @@
 
 define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = ashr <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -43,8 +43,8 @@
 
 define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = ashr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -90,8 +90,8 @@
 define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1) * %in
-  %b = load i64 addrspace(1) * %b_ptr
+  %a = load i64, i64 addrspace(1) * %in
+  %b = load i64, i64 addrspace(1) * %b_ptr
   %result = ashr i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -133,8 +133,8 @@
 
 define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1) * %in
-  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
   %result = ashr <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -204,8 +204,8 @@
 
 define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1) * %in
-  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
   %result = ashr <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/srem.ll b/llvm/test/CodeGen/R600/srem.ll
index e0f627a..c78fd54 100644
--- a/llvm/test/CodeGen/R600/srem.ll
+++ b/llvm/test/CodeGen/R600/srem.ll
@@ -4,15 +4,15 @@
 
 define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in
-  %den = load i32 addrspace(1) * %den_ptr
+  %num = load i32, i32 addrspace(1) * %in
+  %den = load i32, i32 addrspace(1) * %den_ptr
   %result = srem i32 %num, %den
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
 define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = srem i32 %num, 4
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -25,7 +25,7 @@
 ; SI: v_sub_i32
 ; SI: s_endpgm
 define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = srem i32 %num, 7
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -33,15 +33,15 @@
 
 define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %num = load <2 x i32> addrspace(1) * %in
-  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
   %result = srem <2 x i32> %num, %den
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %num = load <2 x i32> addrspace(1) * %in
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %result = srem <2 x i32> %num, <i32 4, i32 4>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -49,15 +49,15 @@
 
 define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %num = load <4 x i32> addrspace(1) * %in
-  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
   %result = srem <4 x i32> %num, %den
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %num = load <4 x i32> addrspace(1) * %in
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -65,15 +65,15 @@
 
 define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %num = load i64 addrspace(1) * %in
-  %den = load i64 addrspace(1) * %den_ptr
+  %num = load i64, i64 addrspace(1) * %in
+  %den = load i64, i64 addrspace(1) * %den_ptr
   %result = srem i64 %num, %den
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
 define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %num = load i64 addrspace(1) * %in
+  %num = load i64, i64 addrspace(1) * %in
   %result = srem i64 %num, 4
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -81,15 +81,15 @@
 
 define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %num = load <2 x i64> addrspace(1) * %in
-  %den = load <2 x i64> addrspace(1) * %den_ptr
+  %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
   %result = srem <2 x i64> %num, %den
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
-  %num = load <2 x i64> addrspace(1) * %in
+  %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
   %result = srem <2 x i64> %num, <i64 4, i64 4>
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -97,15 +97,15 @@
 
 define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %num = load <4 x i64> addrspace(1) * %in
-  %den = load <4 x i64> addrspace(1) * %den_ptr
+  %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
   %result = srem <4 x i64> %num, %den
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
-  %num = load <4 x i64> addrspace(1) * %in
+  %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
   %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/srl.ll b/llvm/test/CodeGen/R600/srl.ll
index 5594161..4904d7f 100644
--- a/llvm/test/CodeGen/R600/srl.ll
+++ b/llvm/test/CodeGen/R600/srl.ll
@@ -8,8 +8,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = lshr i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = lshr <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -50,8 +50,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = lshr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1)* %in
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %in
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = lshr i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -112,8 +112,8 @@
 ; EG-DAG: CNDE_INT
 define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = lshr <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -178,8 +178,8 @@
 ; EG-DAG: CNDE_INT
 define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = lshr <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ssubo.ll b/llvm/test/CodeGen/R600/ssubo.ll
index 09d3959..26884a1 100644
--- a/llvm/test/CodeGen/R600/ssubo.ll
+++ b/llvm/test/CodeGen/R600/ssubo.ll
@@ -28,8 +28,8 @@
 
 ; FUNC-LABEL: {{^}}v_ssubo_i32:
 define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %ssub, 0
   %carry = extractvalue { i32, i1 } %ssub, 1
@@ -54,8 +54,8 @@
 ; SI: v_sub_i32_e32
 ; SI: v_subb_u32_e32
 define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %ssub, 0
   %carry = extractvalue { i64, i1 } %ssub, 1
diff --git a/llvm/test/CodeGen/R600/store-barrier.ll b/llvm/test/CodeGen/R600/store-barrier.ll
index c14383b..4a72b4d0 100644
--- a/llvm/test/CodeGen/R600/store-barrier.ll
+++ b/llvm/test/CodeGen/R600/store-barrier.ll
@@ -15,22 +15,22 @@
 define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
 bb:
   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
-  %tmp13 = load i32 addrspace(1)* %tmp10, align 2
+  %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
   %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
-  %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+  %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 2
   %tmp16 = add i32 %tmp13, 1
   %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
   store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
   tail call void @llvm.AMDGPU.barrier.local() #2
-  %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+  %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
   %tmp26 = sext i32 %tmp25 to i64
   %tmp27 = sext i32 %arg4 to i64
   %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
-  %tmp29 = load i8 addrspace(3)* %tmp28, align 1
+  %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
   %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
   store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
   %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
-  %tmp33 = load i8 addrspace(3)* %tmp32, align 1
+  %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
   %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
   store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
   ret void
diff --git a/llvm/test/CodeGen/R600/store.ll b/llvm/test/CodeGen/R600/store.ll
index fdce4f3..b5d68d2 100644
--- a/llvm/test/CodeGen/R600/store.ll
+++ b/llvm/test/CodeGen/R600/store.ll
@@ -334,9 +334,9 @@
 ; SI: buffer_store_dwordx2
 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
 entry:
-  %0 = load i32 addrspace(2)* %mem, align 4
+  %0 = load i32, i32 addrspace(2)* %mem, align 4
   %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
-  %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+  %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
   store i32 %0, i32 addrspace(1)* %out, align 4
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
   store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
diff --git a/llvm/test/CodeGen/R600/store.r600.ll b/llvm/test/CodeGen/R600/store.r600.ll
index 2197260..696fb03 100644
--- a/llvm/test/CodeGen/R600/store.r600.ll
+++ b/llvm/test/CodeGen/R600/store.r600.ll
@@ -7,7 +7,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %1 = load <4 x i32> addrspace(1) * %in
+  %1 = load <4 x i32>, <4 x i32> addrspace(1) * %in
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
 }
@@ -16,7 +16,7 @@
 ; EG: {{^}}store_v4f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %1 = load <4 x float> addrspace(1) * %in
+  %1 = load <4 x float>, <4 x float> addrspace(1) * %in
   store <4 x float> %1, <4 x float> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/sub.ll b/llvm/test/CodeGen/R600/sub.ll
index b8ef279..03303f5 100644
--- a/llvm/test/CodeGen/R600/sub.ll
+++ b/llvm/test/CodeGen/R600/sub.ll
@@ -10,8 +10,8 @@
 ; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = sub i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -27,8 +27,8 @@
 
 define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = sub <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -47,8 +47,8 @@
 
 define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = sub <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -82,8 +82,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
-  %a = load i64 addrspace(1)* %a_ptr
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %a_ptr
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = sub i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
@@ -98,8 +98,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <2 x i64> addrspace(1)* %a_ptr
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = sub <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -118,8 +118,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <4 x i64> addrspace(1)* %a_ptr
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = sub <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/swizzle-export.ll b/llvm/test/CodeGen/R600/swizzle-export.ll
index 5eaca76..7010e93 100644
--- a/llvm/test/CodeGen/R600/swizzle-export.ll
+++ b/llvm/test/CodeGen/R600/swizzle-export.ll
@@ -12,56 +12,56 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float> addrspace(8)* null
+  %4 = load <4 x float>, <4 x float> addrspace(8)* null
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = load <4 x float> addrspace(8)* null
+  %6 = load <4 x float>, <4 x float> addrspace(8)* null
   %7 = extractelement <4 x float> %6, i32 2
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 0
   %10 = fmul float 0.000000e+00, %9
-  %11 = load <4 x float> addrspace(8)* null
+  %11 = load <4 x float>, <4 x float> addrspace(8)* null
   %12 = extractelement <4 x float> %11, i32 0
   %13 = fmul float %5, %12
-  %14 = load <4 x float> addrspace(8)* null
+  %14 = load <4 x float>, <4 x float> addrspace(8)* null
   %15 = extractelement <4 x float> %14, i32 0
   %16 = fmul float 0.000000e+00, %15
-  %17 = load <4 x float> addrspace(8)* null
+  %17 = load <4 x float>, <4 x float> addrspace(8)* null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float 0.000000e+00, %18
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fmul float %7, %21
-  %23 = load <4 x float> addrspace(8)* null
+  %23 = load <4 x float>, <4 x float> addrspace(8)* null
   %24 = extractelement <4 x float> %23, i32 0
   %25 = fmul float 0.000000e+00, %24
-  %26 = load <4 x float> addrspace(8)* null
+  %26 = load <4 x float>, <4 x float> addrspace(8)* null
   %27 = extractelement <4 x float> %26, i32 0
   %28 = fmul float 0.000000e+00, %27
-  %29 = load <4 x float> addrspace(8)* null
+  %29 = load <4 x float>, <4 x float> addrspace(8)* null
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float 0.000000e+00, %30
-  %32 = load <4 x float> addrspace(8)* null
+  %32 = load <4 x float>, <4 x float> addrspace(8)* null
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float 0.000000e+00, %33
-  %35 = load <4 x float> addrspace(8)* null
+  %35 = load <4 x float>, <4 x float> addrspace(8)* null
   %36 = extractelement <4 x float> %35, i32 0
   %37 = fmul float 0.000000e+00, %36
-  %38 = load <4 x float> addrspace(8)* null
+  %38 = load <4 x float>, <4 x float> addrspace(8)* null
   %39 = extractelement <4 x float> %38, i32 0
   %40 = fmul float 1.000000e+00, %39
-  %41 = load <4 x float> addrspace(8)* null
+  %41 = load <4 x float>, <4 x float> addrspace(8)* null
   %42 = extractelement <4 x float> %41, i32 0
   %43 = fmul float 0.000000e+00, %42
-  %44 = load <4 x float> addrspace(8)* null
+  %44 = load <4 x float>, <4 x float> addrspace(8)* null
   %45 = extractelement <4 x float> %44, i32 0
   %46 = fmul float 0.000000e+00, %45
-  %47 = load <4 x float> addrspace(8)* null
+  %47 = load <4 x float>, <4 x float> addrspace(8)* null
   %48 = extractelement <4 x float> %47, i32 0
   %49 = fmul float 0.000000e+00, %48
-  %50 = load <4 x float> addrspace(8)* null
+  %50 = load <4 x float>, <4 x float> addrspace(8)* null
   %51 = extractelement <4 x float> %50, i32 0
   %52 = fmul float 0.000000e+00, %51
-  %53 = load <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, <4 x float> addrspace(8)* null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = fmul float 1.000000e+00, %54
   %56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = fadd float %0, 2.5
   %3 = fmul float %1, 3.5
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = call float @llvm.cos.f32(float %5)
-  %7 = load <4 x float> addrspace(8)* null
+  %7 = load <4 x float>, <4 x float> addrspace(8)* null
   %8 = extractelement <4 x float> %7, i32 0
-  %9 = load <4 x float> addrspace(8)* null
+  %9 = load <4 x float>, <4 x float> addrspace(8)* null
   %10 = extractelement <4 x float> %9, i32 1
   %11 = insertelement <4 x float> undef, float %2, i32 0
   %12 = insertelement <4 x float> %11, float %3, i32 1
diff --git a/llvm/test/CodeGen/R600/trunc-cmp-constant.ll b/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
index a097ab0..21dfade 100644
--- a/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
+++ b/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -9,7 +9,7 @@
 ; SI: v_cndmask_b32_e64
 ; SI: buffer_store_byte
 define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -25,7 +25,7 @@
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -36,7 +36,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -48,7 +48,7 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -60,7 +60,7 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -71,7 +71,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -84,7 +84,7 @@
 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -96,7 +96,7 @@
 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -107,7 +107,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -122,7 +122,7 @@
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -137,7 +137,7 @@
 ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
 ; XSI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -148,7 +148,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -161,7 +161,7 @@
 ; SI-NEXT: v_cndmask_b32_e64
 ; SI-NEXT: buffer_store_byte
 define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %load = load i8 addrspace(1)* %in
+  %load = load i8, i8 addrspace(1)* %in
   %masked = and i8 %load, 255
   %ext = sext i8 %masked to i32
   %cmp = icmp ne i32 %ext, -1
diff --git a/llvm/test/CodeGen/R600/trunc.ll b/llvm/test/CodeGen/R600/trunc.ll
index 5d557ab..5580bd3 100644
--- a/llvm/test/CodeGen/R600/trunc.ll
+++ b/llvm/test/CodeGen/R600/trunc.ll
@@ -53,7 +53,7 @@
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: v_cmp_eq_i32
 define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
-  %a = load i32 addrspace(1)* %ptr, align 4
+  %a = load i32, i32 addrspace(1)* %ptr, align 4
   %trunc = trunc i32 %a to i1
   %result = select i1 %trunc, i32 1, i32 0
   store i32 %result, i32 addrspace(1)* %out, align 4
@@ -91,7 +91,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %x = load i64 addrspace(1)* %gep
+  %x = load i64, i64 addrspace(1)* %gep
 
   %trunc = trunc i64 %x to i1
   %sel = select i1 %trunc, i32 63, i32 -12
diff --git a/llvm/test/CodeGen/R600/uaddo.ll b/llvm/test/CodeGen/R600/uaddo.ll
index 57d7835..9f38365 100644
--- a/llvm/test/CodeGen/R600/uaddo.ll
+++ b/llvm/test/CodeGen/R600/uaddo.ll
@@ -33,8 +33,8 @@
 ; FUNC-LABEL: {{^}}v_uaddo_i32:
 ; SI: v_add_i32
 define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %uadd, 0
   %carry = extractvalue { i32, i1 } %uadd, 1
@@ -59,8 +59,8 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %uadd, 0
   %carry = extractvalue { i64, i1 } %uadd, 1
diff --git a/llvm/test/CodeGen/R600/udiv.ll b/llvm/test/CodeGen/R600/udiv.ll
index e350ecb..de22a22 100644
--- a/llvm/test/CodeGen/R600/udiv.ll
+++ b/llvm/test/CodeGen/R600/udiv.ll
@@ -8,8 +8,8 @@
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1) * %in
-  %b = load i32 addrspace(1) * %b_ptr
+  %a = load i32, i32 addrspace(1) * %in
+  %b = load i32, i32 addrspace(1) * %b_ptr
   %result = udiv i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = udiv <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -40,8 +40,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = udiv <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/udivrem24.ll b/llvm/test/CodeGen/R600/udivrem24.ll
index bbb0108..4de881b 100644
--- a/llvm/test/CodeGen/R600/udivrem24.ll
+++ b/llvm/test/CodeGen/R600/udivrem24.ll
@@ -14,8 +14,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = udiv i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = udiv i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -52,8 +52,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -72,8 +72,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -92,8 +92,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -112,8 +112,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -135,8 +135,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = urem i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -154,8 +154,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = urem i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -173,8 +173,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -193,8 +193,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -213,8 +213,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -233,8 +233,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 7
diff --git a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
index e79bdd5..dfec8eb 100644
--- a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -12,7 +12,7 @@
 define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64 addrspace(1)* %gep, align 8
+  %val = load i64, i64 addrspace(1)* %gep, align 8
   %result = uitofp i64 %val to double
   store double %result, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/uint_to_fp.ll b/llvm/test/CodeGen/R600/uint_to_fp.ll
index 1c8a175..d3f0281 100644
--- a/llvm/test/CodeGen/R600/uint_to_fp.ll
+++ b/llvm/test/CodeGen/R600/uint_to_fp.ll
@@ -38,7 +38,7 @@
 ; SI: v_cvt_f32_u32_e32
 ; SI: s_endpgm
 define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
+  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = uitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/unaligned-load-store.ll b/llvm/test/CodeGen/R600/unaligned-load-store.ll
index 79ff5c9..efb1de2 100644
--- a/llvm/test/CodeGen/R600/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/R600/unaligned-load-store.ll
@@ -8,7 +8,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
-  %v = load i16 addrspace(3)* %p, align 1
+  %v = load i16, i16 addrspace(3)* %p, align 1
   store i16 %v, i16 addrspace(3)* %r, align 1
   ret void
 }
@@ -20,7 +20,7 @@
 ; SI: buffer_store_byte
 ; SI: s_endpgm
 define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
-  %v = load i16 addrspace(1)* %p, align 1
+  %v = load i16, i16 addrspace(1)* %p, align 1
   store i16 %v, i16 addrspace(1)* %r, align 1
   ret void
 }
@@ -36,7 +36,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
-  %v = load i32 addrspace(3)* %p, align 1
+  %v = load i32, i32 addrspace(3)* %p, align 1
   store i32 %v, i32 addrspace(3)* %r, align 1
   ret void
 }
@@ -51,7 +51,7 @@
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
 define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
-  %v = load i32 addrspace(1)* %p, align 1
+  %v = load i32, i32 addrspace(1)* %p, align 1
   store i32 %v, i32 addrspace(1)* %r, align 1
   ret void
 }
@@ -75,7 +75,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
-  %v = load i64 addrspace(3)* %p, align 1
+  %v = load i64, i64 addrspace(3)* %p, align 1
   store i64 %v, i64 addrspace(3)* %r, align 1
   ret void
 }
@@ -98,7 +98,7 @@
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
 define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
-  %v = load i64 addrspace(1)* %p, align 1
+  %v = load i64, i64 addrspace(1)* %p, align 1
   store i64 %v, i64 addrspace(1)* %r, align 1
   ret void
 }
@@ -145,7 +145,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
-  %v = load <4 x i32> addrspace(3)* %p, align 1
+  %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
   ret void
 }
@@ -169,7 +169,7 @@
 ; FIXME-SI: buffer_load_ubyte
 ; FIXME-SI: buffer_load_ubyte
 define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
-  %v = load <4 x i32> addrspace(1)* %p, align 1
+  %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
   ret void
 }
@@ -178,7 +178,7 @@
 ; SI: ds_read2_b32
 ; SI: s_endpgm
 define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
-  %val = load i64 addrspace(3)* %in, align 4
+  %val = load i64, i64 addrspace(3)* %in, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -188,7 +188,7 @@
 ; SI: s_endpgm
 define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
-  %val = load i64 addrspace(3)* %ptr, align 4
+  %val = load i64, i64 addrspace(3)* %ptr, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -201,7 +201,7 @@
   %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
-  %val = load i64 addrspace(3)* %ptri64, align 4
+  %val = load i64, i64 addrspace(3)* %ptri64, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -219,7 +219,7 @@
 ; SI: s_endpgm
 
 define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
-  %val = load i64 addrspace(3)* %in, align 1
+  %val = load i64, i64 addrspace(3)* %in, align 1
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index 5c6d7ff..036a7e9 100644
--- a/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -20,19 +20,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
@@ -56,19 +56,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
@@ -92,19 +92,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
diff --git a/llvm/test/CodeGen/R600/unroll.ll b/llvm/test/CodeGen/R600/unroll.ll
index 23ff71c..ca8d822 100644
--- a/llvm/test/CodeGen/R600/unroll.ll
+++ b/llvm/test/CodeGen/R600/unroll.ll
@@ -31,7 +31,7 @@
 
 exit:
   %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/urem.ll b/llvm/test/CodeGen/R600/urem.ll
index ab5ba93..62841ec 100644
--- a/llvm/test/CodeGen/R600/urem.ll
+++ b/llvm/test/CodeGen/R600/urem.ll
@@ -11,8 +11,8 @@
 ; EG: CF_END
 define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = urem i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -27,7 +27,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = urem i32 %num, 7
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -38,8 +38,8 @@
 ; EG: CF_END
 define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = urem <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -50,8 +50,8 @@
 ; EG: CF_END
 define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = urem <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -62,8 +62,8 @@
 ; EG: CF_END
 define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1)* %in
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %in
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = urem i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; EG: CF_END
 define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = urem <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -86,8 +86,8 @@
 ; EG: CF_END
 define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = urem <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/usubo.ll b/llvm/test/CodeGen/R600/usubo.ll
index be1e666..a753ca4 100644
--- a/llvm/test/CodeGen/R600/usubo.ll
+++ b/llvm/test/CodeGen/R600/usubo.ll
@@ -30,8 +30,8 @@
 ; FUNC-LABEL: {{^}}v_usubo_i32:
 ; SI: v_subrev_i32_e32
 define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %usub, 0
   %carry = extractvalue { i32, i1 } %usub, 1
@@ -56,8 +56,8 @@
 ; SI: v_sub_i32
 ; SI: v_subb_u32
 define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %usub, 0
   %carry = extractvalue { i64, i1 } %usub, 1
diff --git a/llvm/test/CodeGen/R600/v_cndmask.ll b/llvm/test/CodeGen/R600/v_cndmask.ll
index 9bd96c2..c368c5a 100644
--- a/llvm/test/CodeGen/R600/v_cndmask.ll
+++ b/llvm/test/CodeGen/R600/v_cndmask.ll
@@ -11,7 +11,7 @@
 define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
   %idx = call i32 @llvm.r600.read.tidig.x() #1
   %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
-  %f = load float addrspace(1)* %fptr
+  %f = load float, float addrspace(1)* %fptr
   %setcc = icmp ne i32 %c, 0
   %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
   store float %select, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/valu-i1.ll b/llvm/test/CodeGen/R600/valu-i1.ll
index 8e30972..ef4f3ef 100644
--- a/llvm/test/CodeGen/R600/valu-i1.ll
+++ b/llvm/test/CodeGen/R600/valu-i1.ll
@@ -95,7 +95,7 @@
   %i = phi i32 [%tid, %entry], [%i.inc, %loop]
   %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
   %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
-  %load = load i32 addrspace(1)* %src
+  %load = load i32, i32 addrspace(1)* %src
   store i32 %load, i32 addrspace(1)* %gep.dst
   %i.inc = add nsw i32 %i, 1
   %cmp = icmp eq i32 %limit, %i.inc
@@ -155,7 +155,7 @@
   %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
   %tmp4 = sext i32 %tmp to i64
   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
-  %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
   %tmp7 = icmp sgt i32 %tmp6, 0
   %tmp8 = sext i32 %tmp6 to i64
   br i1 %tmp7, label %bb10, label %bb26
@@ -164,9 +164,9 @@
   %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
   %tmp12 = add nsw i64 %tmp11, %tmp4
   %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
-  %tmp14 = load i32 addrspace(1)* %tmp13, align 4
+  %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
   %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
-  %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
   %tmp17 = icmp ne i32 %tmp14, -1
   %tmp18 = icmp ne i32 %tmp16, -1
   %tmp19 = and i1 %tmp17, %tmp18
diff --git a/llvm/test/CodeGen/R600/vector-alloca.ll b/llvm/test/CodeGen/R600/vector-alloca.ll
index 81441ee..6f3b484 100644
--- a/llvm/test/CodeGen/R600/vector-alloca.ll
+++ b/llvm/test/CodeGen/R600/vector-alloca.ll
@@ -22,7 +22,7 @@
   store i32 2, i32* %z
   store i32 3, i32* %w
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index
-  %2 = load i32* %1
+  %2 = load i32, i32* %1
   store i32 %2, i32 addrspace(1)* %out
   ret void
 }
@@ -48,7 +48,7 @@
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index
   store i32 1, i32* %1
   %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
@@ -71,7 +71,7 @@
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
   %2 = bitcast i32* %1 to [4 x i32]*
   %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   store i32 %4, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
index e4d117f..fb6a17e 100644
--- a/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -8,7 +8,7 @@
 
 define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -19,7 +19,7 @@
 
 define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i32> addrspace(1)* %in
+  %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in
   store <4 x i32> %0, <4 x i32> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/vselect.ll b/llvm/test/CodeGen/R600/vselect.ll
index a6152f7..a3014b0 100644
--- a/llvm/test/CodeGen/R600/vselect.ll
+++ b/llvm/test/CodeGen/R600/vselect.ll
@@ -12,8 +12,8 @@
 
 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
 entry:
-  %0 = load <2 x i32> addrspace(1)* %in0
-  %1 = load <2 x i32> addrspace(1)* %in1
+  %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
+  %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
   %cmp = icmp ne <2 x i32> %0, %1
   %result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -30,8 +30,8 @@
 
 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
 entry:
-  %0 = load <2 x float> addrspace(1)* %in0
-  %1 = load <2 x float> addrspace(1)* %in1
+  %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
+  %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
   %cmp = fcmp une <2 x float> %0, %1
   %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
   store <2 x float> %result, <2 x float> addrspace(1)* %out
@@ -52,8 +52,8 @@
 
 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
 entry:
-  %0 = load <4 x i32> addrspace(1)* %in0
-  %1 = load <4 x i32> addrspace(1)* %in1
+  %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
+  %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
   %cmp = icmp ne <4 x i32> %0, %1
   %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -68,8 +68,8 @@
 
 define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
 entry:
-  %0 = load <4 x float> addrspace(1)* %in0
-  %1 = load <4 x float> addrspace(1)* %in1
+  %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
+  %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
   %cmp = fcmp une <4 x float> %0, %1
   %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
   store <4 x float> %result, <4 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/vtx-fetch-branch.ll b/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
index bcbe34e..4584d6e 100644
--- a/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -16,7 +16,7 @@
   br i1 %0, label %endif, label %if
 
 if:
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/vtx-schedule.ll b/llvm/test/CodeGen/R600/vtx-schedule.ll
index 8254c99..912e258 100644
--- a/llvm/test/CodeGen/R600/vtx-schedule.ll
+++ b/llvm/test/CodeGen/R600/vtx-schedule.ll
@@ -11,8 +11,8 @@
 ; CHECK: VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
 define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
 entry:
-  %0 = load i32 addrspace(1)* addrspace(1)* %in0
-  %1 = load i32 addrspace(1)* %0
+  %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/wait.ll b/llvm/test/CodeGen/R600/wait.ll
index 36b96a2..5cc7577 100644
--- a/llvm/test/CodeGen/R600/wait.ll
+++ b/llvm/test/CodeGen/R600/wait.ll
@@ -9,16 +9,16 @@
 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
-  %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
   %tmp12 = extractelement <4 x float> %tmp11, i32 0
   %tmp13 = extractelement <4 x float> %tmp11, i32 1
   call void @llvm.AMDGPU.barrier.global() #1
   %tmp14 = extractelement <4 x float> %tmp11, i32 2
 ;  %tmp15 = extractelement <4 x float> %tmp11, i32 3
-  %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+  %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
   %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
-  %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+  %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
   %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
   %tmp19 = extractelement <4 x float> %tmp18, i32 0
   %tmp20 = extractelement <4 x float> %tmp18, i32 1
diff --git a/llvm/test/CodeGen/R600/xor.ll b/llvm/test/CodeGen/R600/xor.ll
index 1526e28..ea78cca 100644
--- a/llvm/test/CodeGen/R600/xor.ll
+++ b/llvm/test/CodeGen/R600/xor.ll
@@ -11,8 +11,8 @@
 ; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
-  %a = load <2 x i32> addrspace(1) * %in0
-  %b = load <2 x i32> addrspace(1) * %in1
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in0
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %in1
   %result = xor <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -30,8 +30,8 @@
 ; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
-  %a = load <4 x i32> addrspace(1) * %in0
-  %b = load <4 x i32> addrspace(1) * %in1
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in0
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %in1
   %result = xor <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -47,8 +47,8 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float addrspace(1) * %in0
-  %b = load float addrspace(1) * %in1
+  %a = load float, float addrspace(1) * %in0
+  %b = load float, float addrspace(1) * %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 1.000000e+00
   %xor = xor i1 %acmp, %bcmp
@@ -64,8 +64,8 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
 ; SI: buffer_store_byte [[RESULT]]
 define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
-  %a = load i1 addrspace(1)* %in0
-  %b = load i1 addrspace(1)* %in1
+  %a = load i1, i1 addrspace(1)* %in0
+  %b = load i1, i1 addrspace(1)* %in1
   %xor = xor i1 %a, %b
   store i1 %xor, i1 addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; FUNC-LABEL: {{^}}vector_xor_i32:
 ; SI: v_xor_b32_e32
 define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
-  %a = load i32 addrspace(1)* %in0
-  %b = load i32 addrspace(1)* %in1
+  %a = load i32, i32 addrspace(1)* %in0
+  %b = load i32, i32 addrspace(1)* %in1
   %result = xor i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -100,8 +100,8 @@
 ; FUNC-LABEL: {{^}}vector_not_i32:
 ; SI: v_not_b32
 define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
-  %a = load i32 addrspace(1)* %in0
-  %b = load i32 addrspace(1)* %in1
+  %a = load i32, i32 addrspace(1)* %in0
+  %b = load i32, i32 addrspace(1)* %in1
   %result = xor i32 %a, -1
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -112,8 +112,8 @@
 ; SI: v_xor_b32_e32
 ; SI: s_endpgm
 define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
-  %a = load i64 addrspace(1)* %in0
-  %b = load i64 addrspace(1)* %in1
+  %a = load i64, i64 addrspace(1)* %in0
+  %b = load i64, i64 addrspace(1)* %in1
   %result = xor i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -140,8 +140,8 @@
 ; SI: v_not_b32
 ; SI: v_not_b32
 define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
-  %a = load i64 addrspace(1)* %in0
-  %b = load i64 addrspace(1)* %in1
+  %a = load i64, i64 addrspace(1)* %in0
+  %b = load i64, i64 addrspace(1)* %in1
   %result = xor i64 %a, -1
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -163,7 +163,7 @@
   br label %endif
 
 else:
-  %2 = load i64 addrspace(1)* %in
+  %2 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
index 373a196..07e250b 100644
--- a/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
+++ b/llvm/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -8,7 +8,7 @@
 entry:
 	%fsr = alloca i32		; <i32*> [#uses=4]
 	call void asm "st %fsr, $0", "=*m"(i32* %fsr) nounwind
-	%0 = load i32* %fsr, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %fsr, align 4		; <i32> [#uses=1]
 	%1 = or i32 %0, 4194304		; <i32> [#uses=1]
 	store i32 %1, i32* %fsr, align 4
 	call void asm sideeffect "ld $0, %fsr", "*m"(i32* %fsr) nounwind
diff --git a/llvm/test/CodeGen/SPARC/2009-08-28-PIC.ll b/llvm/test/CodeGen/SPARC/2009-08-28-PIC.ll
index b004b11..baad2ae 100644
--- a/llvm/test/CodeGen/SPARC/2009-08-28-PIC.ll
+++ b/llvm/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -14,7 +14,7 @@
 
 define i32 @func(i32 %a) nounwind readonly {
 entry:
-  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  %0 = load i32, i32* @foo, align 4                    ; <i32> [#uses=1]
   ret i32 %0
 }
 
@@ -36,7 +36,7 @@
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:
-  %ret =  load i32* @foo, align 4
+  %ret =  load i32, i32* @foo, align 4
   ret i32 %ret
 
 if.end:
diff --git a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
index 50f3a65..6ea78dd 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -177,13 +177,13 @@
 entry:
    %0 = bitcast i8* %a to i128*
    %1 = bitcast i8* %b to i128*
-   %2 = load i128* %0
-   %3 = load i128* %1
+   %2 = load i128, i128* %0
+   %3 = load i128, i128* %1
    %4 = add i128 %2, %3
    %5 = bitcast i8* %sum to i128*
    store i128 %4, i128* %5
    tail call void asm sideeffect "", "=*m,*m"(i128 *%0, i128* %5) nounwind
-   %6 = load i128* %0
+   %6 = load i128, i128* %0
    %7 = sub i128 %2, %6
    %8 = bitcast i8* %diff to i128*
    store i128 %7, i128* %8
diff --git a/llvm/test/CodeGen/SPARC/2011-01-22-SRet.ll b/llvm/test/CodeGen/SPARC/2011-01-22-SRet.ll
index d9ebf3a..ae9764e 100644
--- a/llvm/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/llvm/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -25,11 +25,11 @@
   %f = alloca %struct.foo_t, align 8
   call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
   %0 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 0
-  %1 = load i32* %0, align 8
+  %1 = load i32, i32* %0, align 8
   %2 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 1
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds %struct.foo_t, %struct.foo_t* %f, i32 0, i32 2
-  %5 = load i32* %4, align 8
+  %5 = load i32, i32* %4, align 8
   %6 = add nsw i32 %3, %1
   %7 = add nsw i32 %6, %5
   ret i32 %7
diff --git a/llvm/test/CodeGen/SPARC/64abi.ll b/llvm/test/CodeGen/SPARC/64abi.ll
index d31fc4f..a7e482c 100644
--- a/llvm/test/CodeGen/SPARC/64abi.ll
+++ b/llvm/test/CodeGen/SPARC/64abi.ll
@@ -240,10 +240,10 @@
 ; CHECK: ldx [%i2], %i0
 ; CHECK: ldx [%i3], %i1
 define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
-  %r1 = load i64* %p
+  %r1 = load i64, i64* %p
   %rv1 = insertvalue { i64, i64 } undef, i64 %r1, 0
   store i64 0, i64* %p
-  %r2 = load i64* %q
+  %r2 = load i64, i64* %q
   %rv2 = insertvalue { i64, i64 } %rv1, i64 %r2, 1
   ret { i64, i64 } %rv2
 }
@@ -268,10 +268,10 @@
 ; CHECK: ld [%i3], %f2
 define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
                                           i32* %p, float* %q) {
-  %r1 = load i32* %p
+  %r1 = load i32, i32* %p
   %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
   store i32 0, i32* %p
-  %r2 = load float* %q
+  %r2 = load float, float* %q
   %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
   ret { i32, float } %rv2
 }
@@ -297,10 +297,10 @@
 ; CHECK: ld [%i3], %f1
 define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
                                                   i32* %p, float* %q) {
-  %r1 = load i32* %p
+  %r1 = load i32, i32* %p
   %rv1 = insertvalue { i32, float } undef, i32 %r1, 0
   store i32 0, i32* %p
-  %r2 = load float* %q
+  %r2 = load float, float* %q
   %rv2 = insertvalue { i32, float } %rv1, float %r2, 1
   ret { i32, float } %rv2
 }
@@ -329,10 +329,10 @@
 ; CHECK: or [[R3]], [[R1]], %i0
 define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
                                           i32* %p, i32* %q) {
-  %r1 = load i32* %p
+  %r1 = load i32, i32* %p
   %rv1 = insertvalue { i32, i32 } undef, i32 %r1, 1
   store i32 0, i32* %p
-  %r2 = load i32* %q
+  %r2 = load i32, i32* %q
   %rv2 = insertvalue { i32, i32 } %rv1, i32 %r2, 0
   ret { i32, i32 } %rv2
 }
diff --git a/llvm/test/CodeGen/SPARC/64bit.ll b/llvm/test/CodeGen/SPARC/64bit.ll
index 57e1fd7..274fa32 100644
--- a/llvm/test/CodeGen/SPARC/64bit.ll
+++ b/llvm/test/CodeGen/SPARC/64bit.ll
@@ -140,17 +140,17 @@
 ; CHECK: ldsh [%i3]
 ; CHECK: sth %
 define i64 @loads(i64* %p, i32* %q, i32* %r, i16* %s) {
-  %a = load i64* %p
+  %a = load i64, i64* %p
   %ai = add i64 1, %a
   store i64 %ai, i64* %p
-  %b = load i32* %q
+  %b = load i32, i32* %q
   %b2 = zext i32 %b to i64
   %bi = trunc i64 %ai to i32
   store i32 %bi, i32* %q
-  %c = load i32* %r
+  %c = load i32, i32* %r
   %c2 = sext i32 %c to i64
   store i64 %ai, i64* %p
-  %d = load i16* %s
+  %d = load i16, i16* %s
   %d2 = sext i16 %d to i64
   %di = trunc i64 %ai to i16
   store i16 %di, i16* %s
@@ -164,7 +164,7 @@
 ; CHECK: load_bool
 ; CHECK: ldub [%i0], %i0
 define i64 @load_bool(i1* %p) {
-  %a = load i1* %p
+  %a = load i1, i1* %p
   %b = zext i1 %a to i64
   ret i64 %b
 }
@@ -178,7 +178,7 @@
 define void @stores(i64* %p, i32* %q, i16* %r, i8* %s) {
   %p1 = getelementptr i64, i64* %p, i64 1
   %p2 = getelementptr i64, i64* %p, i64 2
-  %pv = load i64* %p1
+  %pv = load i64, i64* %p1
   store i64 %pv, i64* %p2
 
   %q2 = getelementptr i32, i32* %q, i32 -2
@@ -200,8 +200,8 @@
 ; CHECK: ldub [%i0], [[R:%[goli][0-7]]]
 ; CHECK: sll [[R]], [[R]], %i0
 define i8 @promote_shifts(i8* %p) {
-  %L24 = load i8* %p
-  %L32 = load i8* %p
+  %L24 = load i8, i8* %p
+  %L32 = load i8, i8* %p
   %B36 = shl i8 %L24, %L32
   ret i8 %B36
 }
diff --git a/llvm/test/CodeGen/SPARC/atomics.ll b/llvm/test/CodeGen/SPARC/atomics.ll
index ee6c1f8..bea9a33 100644
--- a/llvm/test/CodeGen/SPARC/atomics.ll
+++ b/llvm/test/CodeGen/SPARC/atomics.ll
@@ -9,8 +9,8 @@
 ; CHECK:       st {{.+}}, [%o2]
 define i32 @test_atomic_i32(i32* %ptr1, i32* %ptr2, i32* %ptr3) {
 entry:
-  %0 = load atomic i32* %ptr1 acquire, align 8
-  %1 = load atomic i32* %ptr2 acquire, align 8
+  %0 = load atomic i32, i32* %ptr1 acquire, align 8
+  %1 = load atomic i32, i32* %ptr2 acquire, align 8
   %2 = add i32 %0, %1
   store atomic i32 %2, i32* %ptr3 release, align 8
   ret i32 %2
@@ -25,8 +25,8 @@
 ; CHECK:       stx {{.+}}, [%o2]
 define i64 @test_atomic_i64(i64* %ptr1, i64* %ptr2, i64* %ptr3) {
 entry:
-  %0 = load atomic i64* %ptr1 acquire, align 8
-  %1 = load atomic i64* %ptr2 acquire, align 8
+  %0 = load atomic i64, i64* %ptr1 acquire, align 8
+  %1 = load atomic i64, i64* %ptr2 acquire, align 8
   %2 = add i64 %0, %1
   store atomic i64 %2, i64* %ptr3 release, align 8
   ret i64 %2
diff --git a/llvm/test/CodeGen/SPARC/fp128.ll b/llvm/test/CodeGen/SPARC/fp128.ll
index a06112a..c864cb7 100644
--- a/llvm/test/CodeGen/SPARC/fp128.ll
+++ b/llvm/test/CodeGen/SPARC/fp128.ll
@@ -28,10 +28,10 @@
 
 define void @f128_ops(fp128* noalias sret %scalar.result, fp128* byval %a, fp128* byval %b, fp128* byval %c, fp128* byval %d) {
 entry:
-  %0 = load fp128* %a, align 8
-  %1 = load fp128* %b, align 8
-  %2 = load fp128* %c, align 8
-  %3 = load fp128* %d, align 8
+  %0 = load fp128, fp128* %a, align 8
+  %1 = load fp128, fp128* %b, align 8
+  %2 = load fp128, fp128* %c, align 8
+  %3 = load fp128, fp128* %d, align 8
   %4 = fadd fp128 %0, %1
   %5 = fsub fp128 %4, %2
   %6 = fmul fp128 %5, %3
@@ -56,7 +56,7 @@
 
 define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
 entry:
-  %0 = load fp128* %a, align 8
+  %0 = load fp128, fp128* %a, align 8
   call void asm sideeffect "", "~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"()
   store fp128 %0, fp128* %scalar.result, align 8
   ret void
@@ -71,8 +71,8 @@
 
 define i32 @f128_compare(fp128* byval %f0, fp128* byval %f1, i32 %a, i32 %b) {
 entry:
-   %0 = load fp128* %f0, align 8
-   %1 = load fp128* %f1, align 8
+   %0 = load fp128, fp128* %f0, align 8
+   %1 = load fp128, fp128* %f1, align 8
    %cond = fcmp ult fp128 %0, %1
    %ret = select i1 %cond, i32 %a, i32 %b
    ret i32 %ret
@@ -107,7 +107,7 @@
 
 define void @f128_abs(fp128* noalias sret %scalar.result, fp128* byval %a) {
 entry:
-  %0 = load fp128* %a, align 8
+  %0 = load fp128, fp128* %a, align 8
   %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
   store fp128 %1, fp128* %scalar.result, align 8
   ret void
@@ -142,8 +142,8 @@
 
 define void @fp128_unaligned(fp128* %a, fp128* %b, fp128* %c) {
 entry:
-  %0 = load fp128* %a, align 1
-  %1 = load fp128* %b, align 1
+  %0 = load fp128, fp128* %a, align 1
+  %1 = load fp128, fp128* %b, align 1
   %2 = fadd fp128 %0, %1
   store fp128 %2, fp128* %c, align 1
   ret void
@@ -173,8 +173,8 @@
 
 define i32 @f128_to_i32(fp128* %a, fp128* %b) {
 entry:
-  %0 = load fp128* %a, align 8
-  %1 = load fp128* %b, align 8
+  %0 = load fp128, fp128* %a, align 8
+  %1 = load fp128, fp128* %b, align 8
   %2 = fptoui fp128 %0 to i32
   %3 = fptosi fp128 %1 to i32
   %4 = add i32 %2, %3
@@ -197,12 +197,12 @@
 entry:
   %0 = sitofp i64 %a to fp128
   store  fp128 %0, fp128* %ptr1, align 8
-  %cval = load fp128* %c, align 8
+  %cval = load fp128, fp128* %c, align 8
   %1 = fptosi fp128 %cval to i64
   store  i64 %1, i64* %ptr0, align 8
   %2 = sitofp i32 %b to fp128
   store  fp128 %2, fp128* %ptr1, align 8
-  %dval = load fp128* %d, align 8
+  %dval = load fp128, fp128* %d, align 8
   %3 = fptosi fp128 %dval to i32
   %4 = bitcast i64* %ptr0 to i32*
   store  i32 %3, i32* %4, align 8
@@ -225,12 +225,12 @@
 entry:
   %0 = uitofp i64 %a to fp128
   store  fp128 %0, fp128* %ptr1, align 8
-  %cval = load fp128* %c, align 8
+  %cval = load fp128, fp128* %c, align 8
   %1 = fptoui fp128 %cval to i64
   store  i64 %1, i64* %ptr0, align 8
   %2 = uitofp i32 %b to fp128
   store  fp128 %2, fp128* %ptr1, align 8
-  %dval = load fp128* %d, align 8
+  %dval = load fp128, fp128* %d, align 8
   %3 = fptoui fp128 %dval to i32
   %4 = bitcast i64* %ptr0 to i32*
   store  i32 %3, i32* %4, align 8
@@ -242,7 +242,7 @@
 
 define void @f128_neg(fp128* noalias sret %scalar.result, fp128* byval %a) {
 entry:
-  %0 = load fp128* %a, align 8
+  %0 = load fp128, fp128* %a, align 8
   %1 = fsub fp128 0xL00000000000000008000000000000000, %0
   store fp128 %1, fp128* %scalar.result, align 8
   ret void
diff --git a/llvm/test/CodeGen/SPARC/globals.ll b/llvm/test/CodeGen/SPARC/globals.ll
index 3d3eba2..3ef135f 100644
--- a/llvm/test/CodeGen/SPARC/globals.ll
+++ b/llvm/test/CodeGen/SPARC/globals.ll
@@ -8,7 +8,7 @@
 @G = external global i8
 
 define zeroext i8 @loadG() {
-  %tmp = load i8* @G
+  %tmp = load i8, i8* @G
   ret i8 %tmp
 }
 
diff --git a/llvm/test/CodeGen/SPARC/leafproc.ll b/llvm/test/CodeGen/SPARC/leafproc.ll
index e6a77dc..fd74e5c 100644
--- a/llvm/test/CodeGen/SPARC/leafproc.ll
+++ b/llvm/test/CodeGen/SPARC/leafproc.ll
@@ -75,6 +75,6 @@
   %2 = getelementptr inbounds [2 x i32], [2 x i32]* %array, i32 0, i32 1
   store i32 2, i32* %2, align 4
   %3 = getelementptr inbounds [2 x i32], [2 x i32]* %array, i32 0, i32 %a
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   ret i32 %4
 }
diff --git a/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
index 6a67616..57864c2 100644
--- a/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
+++ b/llvm/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
@@ -166,7 +166,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -191,10 +191,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -206,10 +206,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -221,7 +221,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -278,10 +278,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
@@ -295,10 +295,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/SPARC/obj-relocs.ll b/llvm/test/CodeGen/SPARC/obj-relocs.ll
index 6d57598..115263a 100644
--- a/llvm/test/CodeGen/SPARC/obj-relocs.ll
+++ b/llvm/test/CodeGen/SPARC/obj-relocs.ll
@@ -21,7 +21,7 @@
 
 define i64 @foo(i64 %a) {
 entry:
-  %0 = load i64* @AGlobalVar, align 4
+  %0 = load i64, i64* @AGlobalVar, align 4
   %1 = add i64 %a, %0
   %2 = call i64 @bar(i64 %1)
   ret i64 %2
diff --git a/llvm/test/CodeGen/SPARC/private.ll b/llvm/test/CodeGen/SPARC/private.ll
index 38cea4c..400d907 100644
--- a/llvm/test/CodeGen/SPARC/private.ll
+++ b/llvm/test/CodeGen/SPARC/private.ll
@@ -11,7 +11,7 @@
 
 define i32 @bar() {
         call void @foo()
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/SPARC/setjmp.ll b/llvm/test/CodeGen/SPARC/setjmp.ll
index 0f9e546..b43b880 100644
--- a/llvm/test/CodeGen/SPARC/setjmp.ll
+++ b/llvm/test/CodeGen/SPARC/setjmp.ll
@@ -35,7 +35,7 @@
   %4 = getelementptr inbounds %struct.jmpbuf_env, %struct.jmpbuf_env* %inbuf, i32 0, i32 3
   store i32 %3, i32* %4, align 4, !tbaa !4
   store %struct.jmpbuf_env* %inbuf, %struct.jmpbuf_env** @jenv, align 4, !tbaa !3
-  %5 = load i32* %1, align 4, !tbaa !4
+  %5 = load i32, i32* %1, align 4, !tbaa !4
   %6 = icmp eq i32 %5, 1
   %7 = icmp eq i32 %3, 0
   %or.cond = and i1 %6, %7
@@ -46,7 +46,7 @@
   unreachable
 
 bar.exit:                                         ; preds = %entry
-  %8 = load i32* %0, align 4, !tbaa !4
+  %8 = load i32, i32* %0, align 4, !tbaa !4
   %9 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([30 x i8]* @.cst, i32 0, i32 0), i32 %8) #0
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/SPARC/spillsize.ll b/llvm/test/CodeGen/SPARC/spillsize.ll
index 2fcab54..a82e509 100644
--- a/llvm/test/CodeGen/SPARC/spillsize.ll
+++ b/llvm/test/CodeGen/SPARC/spillsize.ll
@@ -11,13 +11,13 @@
 ; CHECK: ldx [%fp+
 define void @spill4(i64* nocapture %p) {
 entry:
-  %val0 = load i64* %p
+  %val0 = load i64, i64* %p
   %cmp0 = icmp ult i64 %val0, 385672958347594845
   %cm80 = zext i1 %cmp0 to i64
   store i64 %cm80, i64* %p, align 8
   tail call void asm sideeffect "", "~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{g2},~{g3},~{g4},~{g5},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o0},~{o1},~{o2},~{o3},~{o4},~{o5},~{o7}"()
   %arrayidx1 = getelementptr inbounds i64, i64* %p, i64 1
-  %val = load i64* %arrayidx1
+  %val = load i64, i64* %arrayidx1
   %cmp = icmp ult i64 %val, 385672958347594845
   %cm8 = select i1 %cmp, i64 10, i64 20
   store i64 %cm8, i64* %arrayidx1, align 8
diff --git a/llvm/test/CodeGen/SPARC/tls.ll b/llvm/test/CodeGen/SPARC/tls.ll
index ce3e005..d54cf60 100644
--- a/llvm/test/CodeGen/SPARC/tls.ll
+++ b/llvm/test/CodeGen/SPARC/tls.ll
@@ -34,7 +34,7 @@
 
 define i32 @test_tls_local() {
 entry:
-  %0 = load i32* @local_symbol, align 4
+  %0 = load i32, i32* @local_symbol, align 4
   %1 = add i32 %0, 1
   store i32 %1, i32* @local_symbol, align 4
   ret i32 %1
@@ -68,7 +68,7 @@
 
 define i32 @test_tls_extern() {
 entry:
-  %0 = load i32* @extern_symbol, align 4
+  %0 = load i32, i32* @extern_symbol, align 4
   %1 = add i32 %0, 1
   store i32 %1, i32* @extern_symbol, align 4
   ret i32 %1
diff --git a/llvm/test/CodeGen/SPARC/varargs.ll b/llvm/test/CodeGen/SPARC/varargs.ll
index dea512a..9f18644 100644
--- a/llvm/test/CodeGen/SPARC/varargs.ll
+++ b/llvm/test/CodeGen/SPARC/varargs.ll
@@ -25,7 +25,7 @@
   %fmt.addr.0 = phi i8* [ %fmt, %entry ], [ %incdec.ptr, %for.cond.backedge ]
   %sum.addr.0 = phi double [ %sum, %entry ], [ %sum.addr.0.be, %for.cond.backedge ]
   %incdec.ptr = getelementptr inbounds i8, i8* %fmt.addr.0, i64 1
-  %0 = load i8* %fmt.addr.0, align 1
+  %0 = load i8, i8* %fmt.addr.0, align 1
   %conv = sext i8 %0 to i32
   switch i32 %conv, label %sw.default [
     i32 105, label %sw.bb
diff --git a/llvm/test/CodeGen/SystemZ/addr-01.ll b/llvm/test/CodeGen/SystemZ/addr-01.ll
index d0960cd..736efe8 100644
--- a/llvm/test/CodeGen/SystemZ/addr-01.ll
+++ b/llvm/test/CodeGen/SystemZ/addr-01.ll
@@ -10,7 +10,7 @@
 ; CHECK: br %r14
   %add = add i64 %addr, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -22,7 +22,7 @@
   %add1 = add i64 %addr, %index
   %add2 = add i64 %add1, 100
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -34,7 +34,7 @@
   %add1 = add i64 %addr, 100
   %add2 = add i64 %add1, %index
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -46,7 +46,7 @@
   %add1 = add i64 %addr, %index
   %add2 = sub i64 %add1, 100
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -58,7 +58,7 @@
   %add1 = sub i64 %addr, 100
   %add2 = add i64 %add1, %index
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -72,7 +72,7 @@
   %or = or i64 %aligned, 6
   %add = add i64 %or, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -85,7 +85,7 @@
   %or = or i64 %addr, 6
   %add = add i64 %or, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -102,6 +102,6 @@
   %add = add i64 %aligned, %index
   %or = or i64 %add, 6
   %ptr = inttoptr i64 %or to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/addr-02.ll b/llvm/test/CodeGen/SystemZ/addr-02.ll
index 56c4879..7e9b2f1 100644
--- a/llvm/test/CodeGen/SystemZ/addr-02.ll
+++ b/llvm/test/CodeGen/SystemZ/addr-02.ll
@@ -11,7 +11,7 @@
 ; CHECK: br %r14
   %add = add i64 %addr, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -24,7 +24,7 @@
   %add1 = add i64 %addr, %index
   %add2 = add i64 %add1, 100
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -37,7 +37,7 @@
   %add1 = add i64 %addr, 100
   %add2 = add i64 %add1, %index
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -50,7 +50,7 @@
   %add1 = add i64 %addr, %index
   %add2 = sub i64 %add1, 100
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -63,7 +63,7 @@
   %add1 = sub i64 %addr, 100
   %add2 = add i64 %add1, %index
   %ptr = inttoptr i64 %add2 to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -78,7 +78,7 @@
   %or = or i64 %aligned, 6
   %add = add i64 %or, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -92,7 +92,7 @@
   %or = or i64 %addr, 6
   %add = add i64 %or, %index
   %ptr = inttoptr i64 %add to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
@@ -110,7 +110,7 @@
   %add = add i64 %aligned, %index
   %or = or i64 %add, 6
   %ptr = inttoptr i64 %or to i8 *
-  %a = load volatile i8 *%ptr
+  %a = load volatile i8 , i8 *%ptr
   store volatile i8 *%ptr, i8 **%dst
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/addr-03.ll b/llvm/test/CodeGen/SystemZ/addr-03.ll
index 1146926..b2fd400 100644
--- a/llvm/test/CodeGen/SystemZ/addr-03.ll
+++ b/llvm/test/CodeGen/SystemZ/addr-03.ll
@@ -7,7 +7,7 @@
 ; CHECK: lb %r0, 0
 ; CHECK: br %r14
   %ptr = inttoptr i64 0 to i8 *
-  %val = load volatile i8 *%ptr
+  %val = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -16,7 +16,7 @@
 ; CHECK: lb %r0, -524288
 ; CHECK: br %r14
   %ptr = inttoptr i64 -524288 to i8 *
-  %val = load volatile i8 *%ptr
+  %val = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -25,7 +25,7 @@
 ; CHECK-NOT: lb %r0, -524289
 ; CHECK: br %r14
   %ptr = inttoptr i64 -524289 to i8 *
-  %val = load volatile i8 *%ptr
+  %val = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -34,7 +34,7 @@
 ; CHECK: lb %r0, 524287
 ; CHECK: br %r14
   %ptr = inttoptr i64 524287 to i8 *
-  %val = load volatile i8 *%ptr
+  %val = load volatile i8 , i8 *%ptr
   ret void
 }
 
@@ -43,6 +43,6 @@
 ; CHECK-NOT: lb %r0, 524288
 ; CHECK: br %r14
   %ptr = inttoptr i64 524288 to i8 *
-  %val = load volatile i8 *%ptr
+  %val = load volatile i8 , i8 *%ptr
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/alias-01.ll b/llvm/test/CodeGen/SystemZ/alias-01.ll
index 89a7318..852d18e 100644
--- a/llvm/test/CodeGen/SystemZ/alias-01.ll
+++ b/llvm/test/CodeGen/SystemZ/alias-01.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK-NOT: %r15
 ; CHECK: br %r14
-  %val = load <16 x i32> *%src1, !tbaa !1
+  %val = load <16 x i32> , <16 x i32> *%src1, !tbaa !1
   %add = add <16 x i32> %val, %val
   %res = bitcast <16 x i32> %add to <16 x float>
   store <16 x float> %res, <16 x float> *%dest, !tbaa !2
diff --git a/llvm/test/CodeGen/SystemZ/and-01.ll b/llvm/test/CodeGen/SystemZ/and-01.ll
index 469928f..56fe279 100644
--- a/llvm/test/CodeGen/SystemZ/and-01.ll
+++ b/llvm/test/CodeGen/SystemZ/and-01.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -30,7 +30,7 @@
 ; CHECK: n %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -41,7 +41,7 @@
 ; CHECK: ny %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -52,7 +52,7 @@
 ; CHECK: ny %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -65,7 +65,7 @@
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -76,7 +76,7 @@
 ; CHECK: ny %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -87,7 +87,7 @@
 ; CHECK: ny %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -100,7 +100,7 @@
 ; CHECK: n %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -113,7 +113,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -126,7 +126,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %and = and i32 %a, %b
   ret i32 %and
 }
@@ -147,16 +147,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/and-03.ll b/llvm/test/CodeGen/SystemZ/and-03.ll
index b12390e..5c15d24 100644
--- a/llvm/test/CodeGen/SystemZ/and-03.ll
+++ b/llvm/test/CodeGen/SystemZ/and-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -30,7 +30,7 @@
 ; CHECK: ng %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -43,7 +43,7 @@
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -54,7 +54,7 @@
 ; CHECK: ng %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -65,7 +65,7 @@
 ; CHECK: ng %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -78,7 +78,7 @@
 ; CHECK: ng %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -91,7 +91,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %and = and i64 %a, %b
   ret i64 %and
 }
@@ -112,16 +112,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/and-05.ll b/llvm/test/CodeGen/SystemZ/and-05.ll
index 1b0d0bb..488ec5b 100644
--- a/llvm/test/CodeGen/SystemZ/and-05.ll
+++ b/llvm/test/CodeGen/SystemZ/and-05.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ni 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, -255
   store i8 %and, i8 *%ptr
   ret void
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, -2
   store i8 %and, i8 *%ptr
   ret void
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: ni 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 1
   store i8 %and, i8 *%ptr
   ret void
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 254
   store i8 %and, i8 *%ptr
   ret void
@@ -52,7 +52,7 @@
 ; CHECK: ni 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -64,7 +64,7 @@
 ; CHECK: niy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -76,7 +76,7 @@
 ; CHECK: niy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -90,7 +90,7 @@
 ; CHECK: ni 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -102,7 +102,7 @@
 ; CHECK: niy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -114,7 +114,7 @@
 ; CHECK: niy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -128,7 +128,7 @@
 ; CHECK: ni 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -143,7 +143,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
@@ -158,7 +158,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %and = and i8 %val, 127
   store i8 %and, i8 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/and-06.ll b/llvm/test/CodeGen/SystemZ/and-06.ll
index f796618..537ee10 100644
--- a/llvm/test/CodeGen/SystemZ/and-06.ll
+++ b/llvm/test/CodeGen/SystemZ/and-06.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %and = and i32 %ext, -2
   %trunc = trunc i32 %and to i8
@@ -21,7 +21,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %and = and i64 %ext, -2
   %trunc = trunc i64 %and to i8
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %and = and i32 %ext, 254
   %trunc = trunc i32 %and to i8
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %and = and i64 %ext, 254
   %trunc = trunc i64 %and to i8
@@ -60,7 +60,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %and = and i32 %ext, -2
   %trunc = trunc i32 %and to i8
@@ -73,7 +73,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %and = and i64 %ext, -2
   %trunc = trunc i64 %and to i8
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %and = and i32 %ext, 254
   %trunc = trunc i32 %and to i8
@@ -99,7 +99,7 @@
 ; CHECK-LABEL: f8:
 ; CHECK: ni 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %and = and i64 %ext, 254
   %trunc = trunc i64 %and to i8
diff --git a/llvm/test/CodeGen/SystemZ/and-08.ll b/llvm/test/CodeGen/SystemZ/and-08.ll
index ec3f6ec..0622950 100644
--- a/llvm/test/CodeGen/SystemZ/and-08.ll
+++ b/llvm/test/CodeGen/SystemZ/and-08.ll
@@ -13,8 +13,8 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
-  %old = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr1
+  %old = load i8 , i8 *%ptr2
   %and = and i8 %val, %old
   store i8 %and, i8 *%ptr2
   ret void
@@ -26,8 +26,8 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
-  %old = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr1
+  %old = load i8 , i8 *%ptr2
   %and = and i8 %old, %val
   store i8 %and, i8 *%ptr2
   ret void
@@ -40,9 +40,9 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %extval = zext i8 %val to i32
-  %old = load i8 *%ptr2
+  %old = load i8 , i8 *%ptr2
   %extold = sext i8 %old to i32
   %and = and i32 %extval, %extold
   %trunc = trunc i32 %and to i8
@@ -56,9 +56,9 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %extval = sext i8 %val to i32
-  %old = load i8 *%ptr2
+  %old = load i8 , i8 *%ptr2
   %extold = zext i8 %old to i32
   %and = and i32 %extval, %extold
   %trunc = trunc i32 %and to i8
@@ -72,9 +72,9 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %extval = sext i8 %val to i32
-  %old = load i8 *%ptr2
+  %old = load i8 , i8 *%ptr2
   %extold = sext i8 %old to i32
   %and = and i32 %extval, %extold
   %trunc = trunc i32 %and to i8
@@ -88,9 +88,9 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %extval = zext i8 %val to i32
-  %old = load i8 *%ptr2
+  %old = load i8 , i8 *%ptr2
   %extold = zext i8 %old to i32
   %and = and i32 %extval, %extold
   %trunc = trunc i32 %and to i8
@@ -105,9 +105,9 @@
 ; CHECK: nc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %extval = sext i8 %val to i64
-  %old = load i8 *%ptr2
+  %old = load i8 , i8 *%ptr2
   %extold = zext i8 %old to i64
   %and = and i64 %extval, %extold
   %trunc = trunc i64 %and to i8
@@ -121,8 +121,8 @@
 ; CHECK: nc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
-  %old = load i16 *%ptr2
+  %val = load i16 , i16 *%ptr1
+  %old = load i16 , i16 *%ptr2
   %and = and i16 %val, %old
   store i16 %and, i16 *%ptr2
   ret void
@@ -134,9 +134,9 @@
 ; CHECK: nc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %extval = zext i16 %val to i32
-  %old = load i16 *%ptr2
+  %old = load i16 , i16 *%ptr2
   %extold = sext i16 %old to i32
   %and = and i32 %extval, %extold
   %trunc = trunc i32 %and to i16
@@ -150,9 +150,9 @@
 ; CHECK: nc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %extval = sext i16 %val to i64
-  %old = load i16 *%ptr2
+  %old = load i16 , i16 *%ptr2
   %extold = zext i16 %old to i64
   %and = and i64 %extval, %extold
   %trunc = trunc i64 %and to i16
@@ -166,8 +166,8 @@
 ; CHECK: nc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
-  %old = load i32 *%ptr2
+  %val = load i32 , i32 *%ptr1
+  %old = load i32 , i32 *%ptr2
   %and = and i32 %old, %val
   store i32 %and, i32 *%ptr2
   ret void
@@ -179,9 +179,9 @@
 ; CHECK: nc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
+  %val = load i32 , i32 *%ptr1
   %extval = sext i32 %val to i64
-  %old = load i32 *%ptr2
+  %old = load i32 , i32 *%ptr2
   %extold = zext i32 %old to i64
   %and = and i64 %extval, %extold
   %trunc = trunc i64 %and to i32
@@ -195,8 +195,8 @@
 ; CHECK: nc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2
   ret void
@@ -208,8 +208,8 @@
 ; CHECK-NOT: nc
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load volatile i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load volatile i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2
   ret void
@@ -221,8 +221,8 @@
 ; CHECK-NOT: nc
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
-  %old = load volatile i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load volatile i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2
   ret void
@@ -234,8 +234,8 @@
 ; CHECK-NOT: nc
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store volatile i64 %and, i64 *%ptr2
   ret void
@@ -248,8 +248,8 @@
 ; CHECK-LABEL: f17:
 ; CHECK-NOT: nc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2
   ret void
@@ -260,8 +260,8 @@
 ; CHECK-LABEL: f18:
 ; CHECK-NOT: nc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1, align 2
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2
   ret void
@@ -272,8 +272,8 @@
 ; CHECK-LABEL: f19:
 ; CHECK-NOT: nc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1, align 2
+  %old = load i64 , i64 *%ptr2
   %and = and i64 %val, %old
   store i64 %and, i64 *%ptr2
   ret void
@@ -284,8 +284,8 @@
 ; CHECK-LABEL: f20:
 ; CHECK-NOT: nc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2, align 2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2, align 2
   %and = and i64 %val, %old
   store i64 %and, i64 *%ptr2, align 2
   ret void
@@ -299,8 +299,8 @@
   %add = add i64 %base, 1
   %ptr1 = inttoptr i64 %base to i64 *
   %ptr2 = inttoptr i64 %add to i64 *
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2, align 1
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2, align 1
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2, align 1
   ret void
@@ -313,8 +313,8 @@
 ; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
 ; CHECK: nc 0(1,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load i8 *@g1src
-  %old = load i8 *@g1dst
+  %val = load i8 , i8 *@g1src
+  %old = load i8 , i8 *@g1dst
   %and = and i8 %val, %old
   store i8 %and, i8 *@g1dst
   ret void
@@ -327,8 +327,8 @@
 ; CHECK-DAG: larl [[DST:%r[0-5]]], g2dst
 ; CHECK: nc 0(2,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load i16 *@g2src
-  %old = load i16 *@g2dst
+  %val = load i16 , i16 *@g2src
+  %old = load i16 , i16 *@g2dst
   %and = and i16 %val, %old
   store i16 %and, i16 *@g2dst
   ret void
@@ -340,8 +340,8 @@
 ; CHECK: nc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1, align 1
-  %old = load i64 *%ptr2, align 1
+  %val = load i64 , i64 *%ptr1, align 1
+  %old = load i64 , i64 *%ptr2, align 1
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2, align 1
   ret void
@@ -352,8 +352,8 @@
 ; CHECK-LABEL: f25:
 ; CHECK: nc 0(8,%r3), 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2, !tbaa !3
-  %old = load i64 *%ptr2, align 2, !tbaa !4
+  %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 , i64 *%ptr2, align 2, !tbaa !4
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2, align 2, !tbaa !4
   ret void
@@ -364,8 +364,8 @@
 ; CHECK-LABEL: f26:
 ; CHECK-NOT: nc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2, !tbaa !3
-  %old = load i64 *%ptr2, align 2, !tbaa !3
+  %val = load i64 , i64 *%ptr1, align 2, !tbaa !3
+  %old = load i64 , i64 *%ptr2, align 2, !tbaa !3
   %and = and i64 %old, %val
   store i64 %and, i64 *%ptr2, align 2, !tbaa !3
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/asm-18.ll b/llvm/test/CodeGen/SystemZ/asm-18.ll
index 72454c5e..999984b 100644
--- a/llvm/test/CodeGen/SystemZ/asm-18.ll
+++ b/llvm/test/CodeGen/SystemZ/asm-18.ll
@@ -18,10 +18,10 @@
 ; CHECK: br %r14
   %ptr3 = getelementptr i32, i32 *%ptr1, i64 1024
   %ptr4 = getelementptr i32, i32 *%ptr2, i64 131071
-  %old1 = load i32 *%ptr1
-  %old2 = load i32 *%ptr2
-  %old3 = load i32 *%ptr3
-  %old4 = load i32 *%ptr4
+  %old1 = load i32 , i32 *%ptr1
+  %old2 = load i32 , i32 *%ptr2
+  %old3 = load i32 , i32 *%ptr3
+  %old4 = load i32 , i32 *%ptr4
   %res = call { i32, i32, i32, i32 } asm "blah $0, $1, $2, $3",
               "=h,=r,=h,=r,0,1,2,3"(i32 %old1, i32 %old2, i32 %old3, i32 %old4)
   %new1 = extractvalue { i32, i32, i32, i32 } %res, 0
@@ -62,10 +62,10 @@
 ; CHECK: br %r14
   %ptr3 = getelementptr i8, i8 *%ptr1, i64 4096
   %ptr4 = getelementptr i8, i8 *%ptr2, i64 524287
-  %val1 = load i8 *%ptr1
-  %val2 = load i8 *%ptr2
-  %val3 = load i8 *%ptr3
-  %val4 = load i8 *%ptr4
+  %val1 = load i8 , i8 *%ptr1
+  %val2 = load i8 , i8 *%ptr2
+  %val3 = load i8 , i8 *%ptr3
+  %val4 = load i8 , i8 *%ptr4
   %ext1 = sext i8 %val1 to i32
   %ext2 = sext i8 %val2 to i32
   %ext3 = sext i8 %val3 to i32
@@ -86,10 +86,10 @@
 ; CHECK: br %r14
   %ptr3 = getelementptr i16, i16 *%ptr1, i64 2048
   %ptr4 = getelementptr i16, i16 *%ptr2, i64 262143
-  %val1 = load i16 *%ptr1
-  %val2 = load i16 *%ptr2
-  %val3 = load i16 *%ptr3
-  %val4 = load i16 *%ptr4
+  %val1 = load i16 , i16 *%ptr1
+  %val2 = load i16 , i16 *%ptr2
+  %val3 = load i16 , i16 *%ptr3
+  %val4 = load i16 , i16 *%ptr4
   %ext1 = sext i16 %val1 to i32
   %ext2 = sext i16 %val2 to i32
   %ext3 = sext i16 %val3 to i32
@@ -110,10 +110,10 @@
 ; CHECK: br %r14
   %ptr3 = getelementptr i8, i8 *%ptr1, i64 4096
   %ptr4 = getelementptr i8, i8 *%ptr2, i64 524287
-  %val1 = load i8 *%ptr1
-  %val2 = load i8 *%ptr2
-  %val3 = load i8 *%ptr3
-  %val4 = load i8 *%ptr4
+  %val1 = load i8 , i8 *%ptr1
+  %val2 = load i8 , i8 *%ptr2
+  %val3 = load i8 , i8 *%ptr3
+  %val4 = load i8 , i8 *%ptr4
   %ext1 = zext i8 %val1 to i32
   %ext2 = zext i8 %val2 to i32
   %ext3 = zext i8 %val3 to i32
@@ -134,10 +134,10 @@
 ; CHECK: br %r14
   %ptr3 = getelementptr i16, i16 *%ptr1, i64 2048
   %ptr4 = getelementptr i16, i16 *%ptr2, i64 262143
-  %val1 = load i16 *%ptr1
-  %val2 = load i16 *%ptr2
-  %val3 = load i16 *%ptr3
-  %val4 = load i16 *%ptr4
+  %val1 = load i16 , i16 *%ptr1
+  %val2 = load i16 , i16 *%ptr2
+  %val3 = load i16 , i16 *%ptr3
+  %val4 = load i16 , i16 *%ptr4
   %ext1 = zext i16 %val1 to i32
   %ext2 = zext i16 %val2 to i32
   %ext3 = zext i16 %val3 to i32
@@ -713,11 +713,11 @@
 ; CHECK: clhf [[REG2]], 0(%r3)
 ; CHECK: br %r14
   %res1 = call i32 asm "stepa $0", "=h"()
-  %load1 = load i32 *%ptr1
+  %load1 = load i32 , i32 *%ptr1
   %cmp1 = icmp sle i32 %res1, %load1
   %sel1 = select i1 %cmp1, i32 0, i32 1
   %res2 = call i32 asm "stepb $0, $1", "=h,r"(i32 %sel1)
-  %load2 = load i32 *%ptr2
+  %load2 = load i32 , i32 *%ptr2
   %cmp2 = icmp ule i32 %res2, %load2
   %sel2 = select i1 %cmp2, i32 0, i32 1
   store i32 %sel2, i32 *%ptr1
@@ -733,11 +733,11 @@
 ; CHECK: cl [[REG2]], 0(%r3)
 ; CHECK: br %r14
   %res1 = call i32 asm "stepa $0", "=r"()
-  %load1 = load i32 *%ptr1
+  %load1 = load i32 , i32 *%ptr1
   %cmp1 = icmp sle i32 %res1, %load1
   %sel1 = select i1 %cmp1, i32 0, i32 1
   %res2 = call i32 asm "stepb $0, $1", "=r,r"(i32 %sel1)
-  %load2 = load i32 *%ptr2
+  %load2 = load i32 , i32 *%ptr2
   %cmp2 = icmp ule i32 %res2, %load2
   %sel2 = select i1 %cmp2, i32 0, i32 1
   store i32 %sel2, i32 *%ptr1
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-01.ll b/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
index f3acd60..b2f4ebe 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-01.ll
@@ -7,6 +7,6 @@
 ; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load atomic i8 *%src seq_cst, align 1
+  %val = load atomic i8 , i8 *%src seq_cst, align 1
   ret i8 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-02.ll b/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
index d9bec60..b2b60f3 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-02.ll
@@ -7,6 +7,6 @@
 ; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load atomic i16 *%src seq_cst, align 2
+  %val = load atomic i16 , i16 *%src seq_cst, align 2
   ret i16 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-03.ll b/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
index 7e5eb92..d83c430 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-03.ll
@@ -7,6 +7,6 @@
 ; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load atomic i32 *%src seq_cst, align 4
+  %val = load atomic i32 , i32 *%src seq_cst, align 4
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/atomic-load-04.ll b/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
index c7a9a98..dc6b271 100644
--- a/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
+++ b/llvm/test/CodeGen/SystemZ/atomic-load-04.ll
@@ -7,6 +7,6 @@
 ; CHECK: bcr 1{{[45]}}, %r0
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load atomic i64 *%src seq_cst, align 8
+  %val = load atomic i64 , i64 *%src seq_cst, align 8
   ret i64 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/branch-02.ll b/llvm/test/CodeGen/SystemZ/branch-02.ll
index 38b5d27..5a30cad 100644
--- a/llvm/test/CodeGen/SystemZ/branch-02.ll
+++ b/llvm/test/CodeGen/SystemZ/branch-02.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT: je .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp eq i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -27,7 +27,7 @@
 ; CHECK-NEXT: jlh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp ne i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -42,7 +42,7 @@
 ; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp sle i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -57,7 +57,7 @@
 ; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp slt i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -72,7 +72,7 @@
 ; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp sgt i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -87,7 +87,7 @@
 ; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp sge i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
diff --git a/llvm/test/CodeGen/SystemZ/branch-03.ll b/llvm/test/CodeGen/SystemZ/branch-03.ll
index ef31a9c..a258e85 100644
--- a/llvm/test/CodeGen/SystemZ/branch-03.ll
+++ b/llvm/test/CodeGen/SystemZ/branch-03.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp ule i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -25,7 +25,7 @@
 ; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp ult i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -40,7 +40,7 @@
 ; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp ugt i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -55,7 +55,7 @@
 ; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cond = icmp uge i32 %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
diff --git a/llvm/test/CodeGen/SystemZ/branch-04.ll b/llvm/test/CodeGen/SystemZ/branch-04.ll
index fafb234..8df2ca0 100644
--- a/llvm/test/CodeGen/SystemZ/branch-04.ll
+++ b/llvm/test/CodeGen/SystemZ/branch-04.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT: je .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp oeq float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -26,7 +26,7 @@
 ; CHECK-NEXT: jlh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp one float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -41,7 +41,7 @@
 ; CHECK-NEXT: jle .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ole float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -56,7 +56,7 @@
 ; CHECK-NEXT: jl .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp olt float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -71,7 +71,7 @@
 ; CHECK-NEXT: jh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ogt float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -86,7 +86,7 @@
 ; CHECK-NEXT: jhe .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp oge float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -101,7 +101,7 @@
 ; CHECK-NEXT: jnlh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ueq float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -116,7 +116,7 @@
 ; CHECK-NEXT: jne .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp une float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -131,7 +131,7 @@
 ; CHECK-NEXT: jnh .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ule float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -146,7 +146,7 @@
 ; CHECK-NEXT: jnhe .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ult float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -161,7 +161,7 @@
 ; CHECK-NEXT: jnle .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ugt float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -176,7 +176,7 @@
 ; CHECK-NEXT: jnl .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp uge float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -193,7 +193,7 @@
 ; CHECK-NEXT: jno .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp ord float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
@@ -210,7 +210,7 @@
 ; CHECK-NEXT: jo .L[[LABEL]]
   br label %loop
 loop:
-  %val = load volatile float *%src
+  %val = load volatile float , float *%src
   %cond = fcmp uno float %target, %val
   br i1 %cond, label %loop, label %exit
 exit:
diff --git a/llvm/test/CodeGen/SystemZ/branch-06.ll b/llvm/test/CodeGen/SystemZ/branch-06.ll
index 528d6c6..4549b12 100644
--- a/llvm/test/CodeGen/SystemZ/branch-06.ll
+++ b/llvm/test/CodeGen/SystemZ/branch-06.ll
@@ -100,7 +100,7 @@
   br label %loop
 loop:
   %val = call i32 @foo()
-  %byte = load i8 *%targetptr
+  %byte = load i8 , i8 *%targetptr
   %target = zext i8 %byte to i32
   %cond = icmp eq i32 %val, %target
   br i1 %cond, label %loop, label %exit
@@ -118,7 +118,7 @@
   br label %loop
 loop:
   %val = call i32 @foo()
-  %half = load i16 *%targetptr
+  %half = load i16 , i16 *%targetptr
   %target = zext i16 %half to i32
   %cond = icmp eq i32 %val, %target
   br i1 %cond, label %loop, label %exit
@@ -136,7 +136,7 @@
   br label %loop
 loop:
   %val = call i32 @foo()
-  %half = load i16 *@g1
+  %half = load i16 , i16 *@g1
   %target = zext i16 %half to i32
   %cond = icmp eq i32 %val, %target
   br i1 %cond, label %loop, label %exit
@@ -157,8 +157,8 @@
 loop:
   %val = call i32 @foo()
   %targetptr2 = getelementptr i8, i8 *%targetptr1, i64 1
-  %byte1 = load i8 *%targetptr1
-  %byte2 = load i8 *%targetptr2
+  %byte1 = load i8 , i8 *%targetptr1
+  %byte2 = load i8 , i8 *%targetptr2
   %ext1 = zext i8 %byte1 to i32
   %ext2 = zext i8 %byte2 to i32
   %cond = icmp ult i32 %ext1, %ext2
@@ -179,8 +179,8 @@
 loop:
   %val = call i32 @foo()
   %targetptr2 = getelementptr i16, i16 *%targetptr1, i64 1
-  %half1 = load i16 *%targetptr1
-  %half2 = load i16 *%targetptr2
+  %half1 = load i16 , i16 *%targetptr1
+  %half2 = load i16 , i16 *%targetptr2
   %ext1 = zext i16 %half1 to i32
   %ext2 = zext i16 %half2 to i32
   %cond = icmp ult i32 %ext1, %ext2
diff --git a/llvm/test/CodeGen/SystemZ/branch-08.ll b/llvm/test/CodeGen/SystemZ/branch-08.ll
index 6741d29..0aa4806 100644
--- a/llvm/test/CodeGen/SystemZ/branch-08.ll
+++ b/llvm/test/CodeGen/SystemZ/branch-08.ll
@@ -14,7 +14,7 @@
 ; CHECK: .L[[LABEL]]:
 ; CHECK: brasl %r14, foo@PLT
 entry:
-  %b = load i32 *%bptr
+  %b = load i32 , i32 *%bptr
   %cmp = icmp ult i32 %a, %b
   br i1 %cmp, label %callit, label %return
 
diff --git a/llvm/test/CodeGen/SystemZ/bswap-02.ll b/llvm/test/CodeGen/SystemZ/bswap-02.ll
index 698f1de3..9c964569 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-02.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-02.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
-  %a = load i32 *%src
+  %a = load i32 , i32 *%src
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -20,7 +20,7 @@
 ; CHECK: lrv %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -33,7 +33,7 @@
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -44,7 +44,7 @@
 ; CHECK: lrv %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -55,7 +55,7 @@
 ; CHECK: lrv %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -68,7 +68,7 @@
 ; CHECK: lrv %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -81,7 +81,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %a = load i32 *%ptr
+  %a = load i32 , i32 *%ptr
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -93,7 +93,7 @@
 ; CHECK: l [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: lrvr %r2, [[REG]]
 ; CHECK: br %r14
-  %a = load volatile i32 *%src
+  %a = load volatile i32 , i32 *%src
   %swapped = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %swapped
 }
@@ -104,22 +104,22 @@
 ; CHECK-LABEL: f9:
 ; CHECK: lrv {{%r[0-9]+}}, 16{{[04]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %swapped0 = call i32 @llvm.bswap.i32(i32 %val0)
   %swapped1 = call i32 @llvm.bswap.i32(i32 %val1)
diff --git a/llvm/test/CodeGen/SystemZ/bswap-03.ll b/llvm/test/CodeGen/SystemZ/bswap-03.ll
index 60f6259..ea62c4f 100644
--- a/llvm/test/CodeGen/SystemZ/bswap-03.ll
+++ b/llvm/test/CodeGen/SystemZ/bswap-03.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
-  %a = load i64 *%src
+  %a = load i64 , i64 *%src
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -20,7 +20,7 @@
 ; CHECK: lrvg %r2, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -33,7 +33,7 @@
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -44,7 +44,7 @@
 ; CHECK: lrvg %r2, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -55,7 +55,7 @@
 ; CHECK: lrvg %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -68,7 +68,7 @@
 ; CHECK: lrvg %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -81,7 +81,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %a = load i64 *%ptr
+  %a = load i64 , i64 *%ptr
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -93,7 +93,7 @@
 ; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: lrvgr %r2, [[REG]]
 ; CHECK: br %r14
-  %a = load volatile i64 *%src
+  %a = load volatile i64 , i64 *%src
   %swapped = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %swapped
 }
@@ -104,22 +104,22 @@
 ; CHECK-LABEL: f9:
 ; CHECK: lrvg {{%r[0-9]+}}, 160(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %swapped0 = call i64 @llvm.bswap.i64(i64 %val0)
   %swapped1 = call i64 @llvm.bswap.i64(i64 %val1)
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-01.ll b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
index f8a18fc..97d4027 100644
--- a/llvm/test/CodeGen/SystemZ/cond-load-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-load-01.ll
@@ -11,7 +11,7 @@
 ; CHECK: loche %r2, 0(%r3)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -23,7 +23,7 @@
 ; CHECK: locl %r2, 0(%r3)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %other, i32 %easy
   ret i32 %res
 }
@@ -36,7 +36,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -50,7 +50,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -63,7 +63,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -77,7 +77,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -91,7 +91,7 @@
   %ptr = alloca i32
   %easy = call i32 @foo(i32 *%ptr)
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -105,7 +105,7 @@
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
   %cond = icmp ult i32 %limit, 42
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %easy, i32 %other
   ret i32 %res
 }
@@ -121,7 +121,7 @@
   br i1 %cmp, label %load, label %exit
 
 load:
-  %other = load i32 *%ptr
+  %other = load i32 , i32 *%ptr
   br label %exit
 
 exit:
diff --git a/llvm/test/CodeGen/SystemZ/cond-load-02.ll b/llvm/test/CodeGen/SystemZ/cond-load-02.ll
index 6b39fec..d0fe65e 100644
--- a/llvm/test/CodeGen/SystemZ/cond-load-02.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-load-02.ll
@@ -11,7 +11,7 @@
 ; CHECK: locghe %r2, 0(%r3)
 ; CHECK: br %r14
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -23,7 +23,7 @@
 ; CHECK: locgl %r2, 0(%r3)
 ; CHECK: br %r14
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %other, i64 %easy
   ret i64 %res
 }
@@ -36,7 +36,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -50,7 +50,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -63,7 +63,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -77,7 +77,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -91,7 +91,7 @@
   %ptr = alloca i64
   %easy = call i64 @foo(i64 *%ptr)
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -105,7 +105,7 @@
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i64 *
   %cond = icmp ult i64 %limit, 42
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %easy, i64 %other
   ret i64 %res
 }
@@ -121,7 +121,7 @@
   br i1 %cmp, label %load, label %exit
 
 load:
-  %other = load i64 *%ptr
+  %other = load i64 , i64 *%ptr
   br label %exit
 
 exit:
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-01.ll b/llvm/test/CodeGen/SystemZ/cond-store-01.ll
index f9eefeb..ec7fc4a 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-01.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-01.ll
@@ -15,7 +15,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -31,7 +31,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %alt, i8 %orig
   store i8 %res, i8 *%ptr
   ret void
@@ -48,7 +48,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = sext i8 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
   %trunc = trunc i32 %res to i8
@@ -66,7 +66,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = sext i8 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
   %trunc = trunc i32 %res to i8
@@ -85,7 +85,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = zext i8 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
   %trunc = trunc i32 %res to i8
@@ -103,7 +103,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = zext i8 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
   %trunc = trunc i32 %res to i8
@@ -122,7 +122,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = sext i8 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i8
@@ -140,7 +140,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = sext i8 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i8
@@ -159,7 +159,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = zext i8 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i8
@@ -177,7 +177,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %ext = zext i8 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i8
@@ -196,7 +196,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 4095
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -213,7 +213,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 4096
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -230,7 +230,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 524287
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -249,7 +249,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 524288
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -266,7 +266,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 -524288
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -285,7 +285,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%base, i64 -524289
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -304,7 +304,7 @@
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -319,7 +319,7 @@
 ; CHECK: stc {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile i8 *%ptr
+  %orig = load volatile i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -334,7 +334,7 @@
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store volatile i8 %res, i8 *%ptr
   ret void
@@ -353,7 +353,7 @@
 ; CHECK: stc {{%r[0-9]+}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load atomic i8 *%ptr unordered, align 1
+  %orig = load atomic i8 , i8 *%ptr unordered, align 1
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   ret void
@@ -369,7 +369,7 @@
 ; CHECK: stc %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store atomic i8 %res, i8 *%ptr unordered, align 1
   ret void
@@ -389,7 +389,7 @@
   %ptr = alloca i8
   call void @foo(i8 *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load i8 *%ptr
+  %orig = load i8 , i8 *%ptr
   %res = select i1 %cond, i8 %orig, i8 %alt
   store i8 %res, i8 *%ptr
   call void @foo(i8 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-02.ll b/llvm/test/CodeGen/SystemZ/cond-store-02.ll
index 6319835..22bdfa3 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-02.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-02.ll
@@ -15,7 +15,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -31,7 +31,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %alt, i16 %orig
   store i16 %res, i16 *%ptr
   ret void
@@ -48,7 +48,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = sext i16 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
   %trunc = trunc i32 %res to i16
@@ -66,7 +66,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = sext i16 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
   %trunc = trunc i32 %res to i16
@@ -85,7 +85,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = zext i16 %orig to i32
   %res = select i1 %cond, i32 %ext, i32 %alt
   %trunc = trunc i32 %res to i16
@@ -103,7 +103,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = zext i16 %orig to i32
   %res = select i1 %cond, i32 %alt, i32 %ext
   %trunc = trunc i32 %res to i16
@@ -122,7 +122,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = sext i16 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i16
@@ -140,7 +140,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = sext i16 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i16
@@ -159,7 +159,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = zext i16 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i16
@@ -177,7 +177,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %ext = zext i16 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i16
@@ -196,7 +196,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2047
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -213,7 +213,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2048
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -230,7 +230,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 262143
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -249,7 +249,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 262144
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -266,7 +266,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 -262144
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -285,7 +285,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 -262145
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -304,7 +304,7 @@
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -319,7 +319,7 @@
 ; CHECK: sth {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile i16 *%ptr
+  %orig = load volatile i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -334,7 +334,7 @@
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store volatile i16 %res, i16 *%ptr
   ret void
@@ -353,7 +353,7 @@
 ; CHECK: sth {{%r[0-9]+}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load atomic i16 *%ptr unordered, align 2
+  %orig = load atomic i16 , i16 *%ptr unordered, align 2
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   ret void
@@ -369,7 +369,7 @@
 ; CHECK: sth %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store atomic i16 %res, i16 *%ptr unordered, align 2
   ret void
@@ -389,7 +389,7 @@
   %ptr = alloca i16
   call void @foo(i16 *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load i16 *%ptr
+  %orig = load i16 , i16 *%ptr
   %res = select i1 %cond, i16 %orig, i16 %alt
   store i16 %res, i16 *%ptr
   call void @foo(i16 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-03.ll b/llvm/test/CodeGen/SystemZ/cond-store-03.ll
index 05adfa6..7207164 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-03.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-03.ll
@@ -14,7 +14,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -30,7 +30,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %alt, i32 %orig
   store i32 %res, i32 *%ptr
   ret void
@@ -47,7 +47,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i32
@@ -65,7 +65,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i32
@@ -84,7 +84,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i32
@@ -102,7 +102,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i32
@@ -121,7 +121,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1023
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -138,7 +138,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1024
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -155,7 +155,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -174,7 +174,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -191,7 +191,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -210,7 +210,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -229,7 +229,7 @@
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -244,7 +244,7 @@
 ; CHECK: st {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile i32 *%ptr
+  %orig = load volatile i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -259,7 +259,7 @@
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store volatile i32 %res, i32 *%ptr
   ret void
@@ -278,7 +278,7 @@
 ; CHECK: st {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load atomic i32 *%ptr unordered, align 4
+  %orig = load atomic i32 , i32 *%ptr unordered, align 4
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -294,7 +294,7 @@
 ; CHECK: st %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store atomic i32 %res, i32 *%ptr unordered, align 4
   ret void
@@ -314,7 +314,7 @@
   %ptr = alloca i32
   call void @foo(i32 *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   call void @foo(i32 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-04.ll b/llvm/test/CodeGen/SystemZ/cond-store-04.ll
index f59a16a..7e25bb5 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-04.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-04.ll
@@ -14,7 +14,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -30,7 +30,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %alt, i64 %orig
   store i64 %res, i64 *%ptr
   ret void
@@ -47,7 +47,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -66,7 +66,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -83,7 +83,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -102,7 +102,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -121,7 +121,7 @@
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -136,7 +136,7 @@
 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile i64 *%ptr
+  %orig = load volatile i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -151,7 +151,7 @@
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store volatile i64 %res, i64 *%ptr
   ret void
@@ -170,7 +170,7 @@
 ; CHECK: stg {{%r[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load atomic i64 *%ptr unordered, align 8
+  %orig = load atomic i64 , i64 *%ptr unordered, align 8
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -186,7 +186,7 @@
 ; CHECK: stg %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store atomic i64 %res, i64 *%ptr unordered, align 8
   ret void
@@ -206,7 +206,7 @@
   %ptr = alloca i64
   call void @foo(i64 *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   call void @foo(i64 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-05.ll b/llvm/test/CodeGen/SystemZ/cond-store-05.ll
index 1300190..0cc0683 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-05.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-05.ll
@@ -14,7 +14,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -30,7 +30,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %alt, float %orig
   store float %res, float *%ptr
   ret void
@@ -47,7 +47,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -64,7 +64,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -81,7 +81,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 131071
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -100,7 +100,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 131072
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -117,7 +117,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -131072
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -136,7 +136,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -131073
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -155,7 +155,7 @@
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to float *
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -170,7 +170,7 @@
 ; CHECK: ste {{%f[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile float *%ptr
+  %orig = load volatile float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   ret void
@@ -185,7 +185,7 @@
 ; CHECK: ste %f0, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store volatile float %res, float *%ptr
   ret void
@@ -205,7 +205,7 @@
   %ptr = alloca float
   call void @foo(float *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load float *%ptr
+  %orig = load float , float *%ptr
   %res = select i1 %cond, float %orig, float %alt
   store float %res, float *%ptr
   call void @foo(float *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-06.ll b/llvm/test/CodeGen/SystemZ/cond-store-06.ll
index 6f6635d..01948b8 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-06.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-06.ll
@@ -14,7 +14,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -30,7 +30,7 @@
 ; CHECK: [[LABEL]]:
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %alt, double %orig
   store double %res, double *%ptr
   ret void
@@ -47,7 +47,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -64,7 +64,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -81,7 +81,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 65535
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -100,7 +100,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 65536
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -117,7 +117,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -65536
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -136,7 +136,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -65537
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -155,7 +155,7 @@
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to double *
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -170,7 +170,7 @@
 ; CHECK: std {{%f[0-5]}}, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load volatile double *%ptr
+  %orig = load volatile double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   ret void
@@ -185,7 +185,7 @@
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store volatile double %res, double *%ptr
   ret void
@@ -205,7 +205,7 @@
   %ptr = alloca double
   call void @foo(double *%ptr)
   %cond = icmp ult i32 %limit, 420
-  %orig = load double *%ptr
+  %orig = load double , double *%ptr
   %res = select i1 %cond, double %orig, double %alt
   store double %res, double *%ptr
   call void @foo(double *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-07.ll b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
index e008fd7e..35b1303 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-07.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-07.ll
@@ -11,7 +11,7 @@
 ; CHECK: stoche %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -24,7 +24,7 @@
 ; CHECK: stocl %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %alt, i32 %orig
   store i32 %res, i32 *%ptr
   ret void
@@ -38,7 +38,7 @@
 ; CHECK: stoche %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i32
@@ -53,7 +53,7 @@
 ; CHECK: stocl %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = sext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i32
@@ -69,7 +69,7 @@
 ; CHECK: stoche %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %ext, i64 %alt
   %trunc = trunc i64 %res to i32
@@ -84,7 +84,7 @@
 ; CHECK: stocl %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %ext = zext i32 %orig to i64
   %res = select i1 %cond, i64 %alt, i64 %ext
   %trunc = trunc i64 %res to i32
@@ -100,7 +100,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -115,7 +115,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -129,7 +129,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -144,7 +144,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   ret void
@@ -160,7 +160,7 @@
   %ptr = alloca i32
   call void @foo(i32 *%ptr)
   %cond = icmp ult i32 %limit, 42
-  %orig = load i32 *%ptr
+  %orig = load i32 , i32 *%ptr
   %res = select i1 %cond, i32 %orig, i32 %alt
   store i32 %res, i32 *%ptr
   call void @foo(i32 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/cond-store-08.ll b/llvm/test/CodeGen/SystemZ/cond-store-08.ll
index 14e638d..4c2b005 100644
--- a/llvm/test/CodeGen/SystemZ/cond-store-08.ll
+++ b/llvm/test/CodeGen/SystemZ/cond-store-08.ll
@@ -11,7 +11,7 @@
 ; CHECK: stocghe %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -24,7 +24,7 @@
 ; CHECK: stocgl %r3, 0(%r2)
 ; CHECK: br %r14
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %alt, i64 %orig
   store i64 %res, i64 *%ptr
   ret void
@@ -38,7 +38,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -53,7 +53,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   ret void
@@ -98,7 +98,7 @@
   %ptr = alloca i64
   call void @foo(i64 *%ptr)
   %cond = icmp ult i32 %limit, 42
-  %orig = load i64 *%ptr
+  %orig = load i64 , i64 *%ptr
   %res = select i1 %cond, i64 %orig, i64 %alt
   store i64 %res, i64 *%ptr
   call void @foo(i64 *%ptr)
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
index 0b4067d..d14a92a 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -31,9 +31,9 @@
 ; CHECK: lpxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
-  %orig = load fp128 *%ptr
+  %orig = load fp128 , fp128 *%ptr
   %abs = call fp128 @llvm.fabs.f128(fp128 %orig)
-  %op2 = load fp128 *%ptr2
+  %op2 = load fp128 , fp128 *%ptr2
   %res = fdiv fp128 %abs, %op2
   store fp128 %res, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
index 909c48a..deec8c3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -33,10 +33,10 @@
 ; CHECK: lnxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
-  %orig = load fp128 *%ptr
+  %orig = load fp128 , fp128 *%ptr
   %abs = call fp128 @llvm.fabs.f128(fp128 %orig)
   %negabs = fsub fp128 0xL00000000000000008000000000000000, %abs
-  %op2 = load fp128 *%ptr2
+  %op2 = load fp128 , fp128 *%ptr2
   %res = fdiv fp128 %negabs, %op2
   store fp128 %res, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-add-01.ll b/llvm/test/CodeGen/SystemZ/fp-add-01.ll
index c2cf1bf..5b0ed05 100644
--- a/llvm/test/CodeGen/SystemZ/fp-add-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-add-01.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fadd float %f1, %f2
   ret float %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: aeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fadd float %f1, %f2
   ret float %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fadd float %f1, %f2
   ret float %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: aeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fadd float %f1, %f2
   ret float %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %res = fadd float %f1, %f2
   ret float %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %ret = call float @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-add-02.ll b/llvm/test/CodeGen/SystemZ/fp-add-02.ll
index 1718f7d..07c7462 100644
--- a/llvm/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-add-02.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fadd double %f1, %f2
   ret double %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: adb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fadd double %f1, %f2
   ret double %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fadd double %f1, %f2
   ret double %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: adb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fadd double %f1, %f2
   ret double %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %res = fadd double %f1, %f2
   ret double %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %ret = call double @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-add-03.ll b/llvm/test/CodeGen/SystemZ/fp-add-03.ll
index cb4042e..53342e1 100644
--- a/llvm/test/CodeGen/SystemZ/fp-add-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-add-03.ll
@@ -12,7 +12,7 @@
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
 ; CHECK: br %r14
-  %f1 = load fp128 *%ptr
+  %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
   %sum = fadd fp128 %f1, %f2x
   store fp128 %sum, fp128 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-01.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-01.ll
index 09e531f..ed58103 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-01.ll
@@ -24,7 +24,7 @@
 ; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -38,7 +38,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -54,7 +54,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -69,7 +69,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -85,7 +85,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -108,17 +108,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %ret = call float @foo()
 
@@ -169,7 +169,7 @@
 ; CHECK-NEXT: je {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp oeq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -182,7 +182,7 @@
 ; CHECK-NEXT: jlh {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp one float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -195,7 +195,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp olt float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -208,7 +208,7 @@
 ; CHECK-NEXT: jhe {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ole float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -221,7 +221,7 @@
 ; CHECK-NEXT: jle {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp oge float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -234,7 +234,7 @@
 ; CHECK-NEXT: jl {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ogt float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -247,7 +247,7 @@
 ; CHECK-NEXT: jnlh {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ueq float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -260,7 +260,7 @@
 ; CHECK-NEXT: jne {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp une float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -273,7 +273,7 @@
 ; CHECK-NEXT: jnle {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ult float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -286,7 +286,7 @@
 ; CHECK-NEXT: jnl {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ule float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -299,7 +299,7 @@
 ; CHECK-NEXT: jnh {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp uge float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -312,7 +312,7 @@
 ; CHECK-NEXT: jnhe {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load float *%ptr
+  %f1 = load float , float *%ptr
   %cond = fcmp ugt float %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll
index 2a177c9..95af309 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -24,7 +24,7 @@
 ; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %cond = fcmp oeq double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -38,7 +38,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %cond = fcmp oeq double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -54,7 +54,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %cond = fcmp oeq double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -69,7 +69,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %cond = fcmp oeq double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -85,7 +85,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %cond = fcmp oeq double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -108,17 +108,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %ret = call double @foo()
 
@@ -168,7 +168,7 @@
 ; CHECK-NEXT: jl {{\.L.*}}
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f1 = load double *%ptr
+  %f1 = load double , double *%ptr
   %cond = fcmp ogt double %f1, %f2
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-03.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-03.ll
index e777d00..862c5e9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-03.ll
@@ -14,7 +14,7 @@
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
   %f2x = fpext float %f2 to fp128
-  %f1 = load fp128 *%ptr
+  %f1 = load fp128 , fp128 *%ptr
   %cond = fcmp oeq fp128 %f1, %f2x
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
@@ -29,7 +29,7 @@
 ; CHECK-NEXT: je
 ; CHECK: lgr %r2, %r3
 ; CHECK: br %r14
-  %f = load fp128 *%ptr
+  %f = load fp128 , fp128 *%ptr
   %cond = fcmp oeq fp128 %f, 0xL00000000000000000000000000000000
   %res = select i1 %cond, i64 %a, i64 %b
   ret i64 %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index 1637ccb..05c6dfe 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -88,7 +88,7 @@
 ; CHECK-NEXT: jnhe .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %cur = load float *%dest
+  %cur = load float , float *%dest
   %res = fsub float %a, %cur
   %cmp = fcmp ult float %res, 0.0
   br i1 %cmp, label %exit, label %store
@@ -284,8 +284,8 @@
 ; CHECK-NEXT: jl .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %val1 = load fp128 *%ptr1
-  %val2 = load fp128 *%ptr2
+  %val1 = load fp128 , fp128 *%ptr1
+  %val2 = load fp128 , fp128 *%ptr2
   %div = fdiv fp128 %val1, %val2
   store fp128 %div, fp128 *%ptr1
   %mul = fmul fp128 %val1, %val2
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-01.ll b/llvm/test/CodeGen/SystemZ/fp-conv-01.ll
index 49ed43b..ebc174a 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-01.ll
@@ -16,7 +16,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: lexbr %f0, %f0
 ; CHECK: br %r14
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %res = fptrunc fp128 %val to float
   ret float %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: aebr %f1, %f2
 ; CHECK: ste %f1, 0(%r2)
 ; CHECK: br %r14
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %conv = fptrunc fp128 %val to float
   %res = fadd float %conv, %d2
   store float %res, float *%dst
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: ldxbr %f0, %f0
 ; CHECK: br %r14
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %res = fptrunc fp128 %val to double
   ret double %res
 }
@@ -53,7 +53,7 @@
 ; CHECK: adbr %f1, %f2
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: br %r14
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %conv = fptrunc fp128 %val to double
   %res = fadd double %conv, %d2
   store double %res, double *%dst
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-02.ll b/llvm/test/CodeGen/SystemZ/fp-conv-02.ll
index 30db5c7..e9376ba 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-02.ll
@@ -16,7 +16,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to double
   ret double %res
 }
@@ -27,7 +27,7 @@
 ; CHECK: ldeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to double
   ret double %res
 }
@@ -40,7 +40,7 @@
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to double
   ret double %res
 }
@@ -52,7 +52,7 @@
 ; CHECK: ldeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to double
   ret double %res
 }
@@ -65,7 +65,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %val = load float *%ptr2
+  %val = load float , float *%ptr2
   %res = fpext float %val to double
   ret double %res
 }
@@ -76,23 +76,23 @@
 ; CHECK-LABEL: f7:
 ; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile float *%ptr2
-  %val1 = load volatile float *%ptr2
-  %val2 = load volatile float *%ptr2
-  %val3 = load volatile float *%ptr2
-  %val4 = load volatile float *%ptr2
-  %val5 = load volatile float *%ptr2
-  %val6 = load volatile float *%ptr2
-  %val7 = load volatile float *%ptr2
-  %val8 = load volatile float *%ptr2
-  %val9 = load volatile float *%ptr2
-  %val10 = load volatile float *%ptr2
-  %val11 = load volatile float *%ptr2
-  %val12 = load volatile float *%ptr2
-  %val13 = load volatile float *%ptr2
-  %val14 = load volatile float *%ptr2
-  %val15 = load volatile float *%ptr2
-  %val16 = load volatile float *%ptr2
+  %val0 = load volatile float , float *%ptr2
+  %val1 = load volatile float , float *%ptr2
+  %val2 = load volatile float , float *%ptr2
+  %val3 = load volatile float , float *%ptr2
+  %val4 = load volatile float , float *%ptr2
+  %val5 = load volatile float , float *%ptr2
+  %val6 = load volatile float , float *%ptr2
+  %val7 = load volatile float , float *%ptr2
+  %val8 = load volatile float , float *%ptr2
+  %val9 = load volatile float , float *%ptr2
+  %val10 = load volatile float , float *%ptr2
+  %val11 = load volatile float , float *%ptr2
+  %val12 = load volatile float , float *%ptr2
+  %val13 = load volatile float , float *%ptr2
+  %val14 = load volatile float , float *%ptr2
+  %val15 = load volatile float , float *%ptr2
+  %val16 = load volatile float , float *%ptr2
 
   %ext0 = fpext float %val0 to double
   %ext1 = fpext float %val1 to double
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-03.ll b/llvm/test/CodeGen/SystemZ/fp-conv-03.ll
index 88a8abd..bb14e61 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-03.ll
@@ -21,7 +21,7 @@
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -35,7 +35,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -51,7 +51,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -66,7 +66,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = fpext float %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %val = load float *%ptr2
+  %val = load float , float *%ptr2
   %res = fpext float %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -94,23 +94,23 @@
 ; CHECK-LABEL: f7:
 ; CHECK: lxeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile float *%ptr2
-  %val1 = load volatile float *%ptr2
-  %val2 = load volatile float *%ptr2
-  %val3 = load volatile float *%ptr2
-  %val4 = load volatile float *%ptr2
-  %val5 = load volatile float *%ptr2
-  %val6 = load volatile float *%ptr2
-  %val7 = load volatile float *%ptr2
-  %val8 = load volatile float *%ptr2
-  %val9 = load volatile float *%ptr2
-  %val10 = load volatile float *%ptr2
-  %val11 = load volatile float *%ptr2
-  %val12 = load volatile float *%ptr2
-  %val13 = load volatile float *%ptr2
-  %val14 = load volatile float *%ptr2
-  %val15 = load volatile float *%ptr2
-  %val16 = load volatile float *%ptr2
+  %val0 = load volatile float , float *%ptr2
+  %val1 = load volatile float , float *%ptr2
+  %val2 = load volatile float , float *%ptr2
+  %val3 = load volatile float , float *%ptr2
+  %val4 = load volatile float , float *%ptr2
+  %val5 = load volatile float , float *%ptr2
+  %val6 = load volatile float , float *%ptr2
+  %val7 = load volatile float , float *%ptr2
+  %val8 = load volatile float , float *%ptr2
+  %val9 = load volatile float , float *%ptr2
+  %val10 = load volatile float , float *%ptr2
+  %val11 = load volatile float , float *%ptr2
+  %val12 = load volatile float , float *%ptr2
+  %val13 = load volatile float , float *%ptr2
+  %val14 = load volatile float , float *%ptr2
+  %val15 = load volatile float , float *%ptr2
+  %val16 = load volatile float , float *%ptr2
 
   %ext0 = fpext float %val0 to fp128
   %ext1 = fpext float %val1 to fp128
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-04.ll b/llvm/test/CodeGen/SystemZ/fp-conv-04.ll
index ab1134c..cfcb98a 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-04.ll
@@ -21,7 +21,7 @@
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = fpext double %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -35,7 +35,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = fpext double %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -51,7 +51,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = fpext double %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -66,7 +66,7 @@
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = fpext double %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %val = load double *%ptr2
+  %val = load double , double *%ptr2
   %res = fpext double %val to fp128
   store fp128 %res, fp128 *%dst
   ret void
@@ -94,23 +94,23 @@
 ; CHECK-LABEL: f7:
 ; CHECK: lxdb {{%f[0-9]+}}, 160(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile double *%ptr2
-  %val1 = load volatile double *%ptr2
-  %val2 = load volatile double *%ptr2
-  %val3 = load volatile double *%ptr2
-  %val4 = load volatile double *%ptr2
-  %val5 = load volatile double *%ptr2
-  %val6 = load volatile double *%ptr2
-  %val7 = load volatile double *%ptr2
-  %val8 = load volatile double *%ptr2
-  %val9 = load volatile double *%ptr2
-  %val10 = load volatile double *%ptr2
-  %val11 = load volatile double *%ptr2
-  %val12 = load volatile double *%ptr2
-  %val13 = load volatile double *%ptr2
-  %val14 = load volatile double *%ptr2
-  %val15 = load volatile double *%ptr2
-  %val16 = load volatile double *%ptr2
+  %val0 = load volatile double , double *%ptr2
+  %val1 = load volatile double , double *%ptr2
+  %val2 = load volatile double , double *%ptr2
+  %val3 = load volatile double , double *%ptr2
+  %val4 = load volatile double , double *%ptr2
+  %val5 = load volatile double , double *%ptr2
+  %val6 = load volatile double , double *%ptr2
+  %val7 = load volatile double , double *%ptr2
+  %val8 = load volatile double , double *%ptr2
+  %val9 = load volatile double , double *%ptr2
+  %val10 = load volatile double , double *%ptr2
+  %val11 = load volatile double , double *%ptr2
+  %val12 = load volatile double , double *%ptr2
+  %val13 = load volatile double , double *%ptr2
+  %val14 = load volatile double , double *%ptr2
+  %val15 = load volatile double , double *%ptr2
+  %val16 = load volatile double , double *%ptr2
 
   %ext0 = fpext double %val0 to fp128
   %ext1 = fpext double %val1 to fp128
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll
index 6aee7364..21b809d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-09.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-09.ll
@@ -27,7 +27,7 @@
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: cfxbr %r2, 5, %f0
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptosi fp128 %f to i32
   ret i32 %conv
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-conv-10.ll
index b8155ed..cfbe0b9 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-10.ll
@@ -39,7 +39,7 @@
 ; CHECK: cfxbr
 ; CHECK: xilf
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptoui fp128 %f to i32
   ret i32 %conv
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-11.ll b/llvm/test/CodeGen/SystemZ/fp-conv-11.ll
index 46f4cb3..eb996cb 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-11.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-11.ll
@@ -27,7 +27,7 @@
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: cgxbr %r2, 5, %f0
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptosi fp128 %f to i64
   ret i64 %conv
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-conv-12.ll
index 770c940..28a5216 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-12.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-12.ll
@@ -38,7 +38,7 @@
 ; CHECK: cgxbr
 ; CHECK: xihf
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptoui fp128 %f to i64
   ret i64 %conv
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-14.ll b/llvm/test/CodeGen/SystemZ/fp-conv-14.ll
index e926e9b..e4f0a27 100644
--- a/llvm/test/CodeGen/SystemZ/fp-conv-14.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-conv-14.ll
@@ -27,7 +27,7 @@
 ; CHECK-DAG: ld %f2, 8(%r2)
 ; CHECK: clfxbr %r2, 5, %f0, 0
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptoui fp128 %f to i32
   ret i32 %conv
 }
@@ -57,7 +57,7 @@
 ; CHECK-DAG: ld %f2, 8(%r2)
 ; CHECK: clgxbr %r2, 5, %f0, 0
 ; CHECK: br %r14
-  %f = load fp128 *%src
+  %f = load fp128 , fp128 *%src
   %conv = fptoui fp128 %f to i64
   ret i64 %conv
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
index 50177e5..57ad76f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-copysign-01.ll
@@ -35,7 +35,7 @@
 ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
 ; CHECK: cpsdr %f0, %f0, [[BHIGH]]
 ; CHECK: br %r14
-  %bl = load volatile fp128 *%bptr
+  %bl = load volatile fp128 , fp128 *%bptr
   %b = fptrunc fp128 %bl to float
   %res = call float @copysignf(float %a, float %b) readnone
   ret float %res
@@ -69,7 +69,7 @@
 ; CHECK: ld [[BLOW:%f[0-7]]], 8(%r2)
 ; CHECK: cpsdr %f0, %f0, [[BHIGH]]
 ; CHECK: br %r14
-  %bl = load volatile fp128 *%bptr
+  %bl = load volatile fp128 , fp128 *%bptr
   %b = fptrunc fp128 %bl to double
   %res = call double @copysign(double %a, double %b) readnone
   ret double %res
@@ -86,7 +86,7 @@
 ; CHECK: std [[AHIGH]], 0(%r2)
 ; CHECK: std [[ALOW]], 8(%r2)
 ; CHECK: br %r14
-  %a = load volatile fp128 *%aptr
+  %a = load volatile fp128 , fp128 *%aptr
   %b = fpext float %bf to fp128
   %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
   store fp128 %c, fp128 *%cptr
@@ -102,7 +102,7 @@
 ; CHECK: std [[AHIGH]], 0(%r2)
 ; CHECK: std [[ALOW]], 8(%r2)
 ; CHECK: br %r14
-  %a = load volatile fp128 *%aptr
+  %a = load volatile fp128 , fp128 *%aptr
   %b = fpext double %bd to fp128
   %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
   store fp128 %c, fp128 *%cptr
@@ -120,8 +120,8 @@
 ; CHECK: std [[AHIGH]], 0(%r2)
 ; CHECK: std [[ALOW]], 8(%r2)
 ; CHECK: br %r14
-  %a = load volatile fp128 *%aptr
-  %b = load volatile fp128 *%bptr
+  %a = load volatile fp128 , fp128 *%aptr
+  %b = load volatile fp128 , fp128 *%bptr
   %c = call fp128 @copysignl(fp128 %a, fp128 %b) readnone
   store fp128 %c, fp128 *%cptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-div-01.ll b/llvm/test/CodeGen/SystemZ/fp-div-01.ll
index 269cfd2..0791e8d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-div-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-div-01.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fdiv float %f1, %f2
   ret float %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: deb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fdiv float %f1, %f2
   ret float %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fdiv float %f1, %f2
   ret float %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: deb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fdiv float %f1, %f2
   ret float %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %res = fdiv float %f1, %f2
   ret float %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %ret = call float @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-div-02.ll b/llvm/test/CodeGen/SystemZ/fp-div-02.ll
index 2d1358f..82eeb480 100644
--- a/llvm/test/CodeGen/SystemZ/fp-div-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-div-02.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fdiv double %f1, %f2
   ret double %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: ddb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fdiv double %f1, %f2
   ret double %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fdiv double %f1, %f2
   ret double %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: ddb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fdiv double %f1, %f2
   ret double %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %res = fdiv double %f1, %f2
   ret double %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %ret = call double @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-div-03.ll b/llvm/test/CodeGen/SystemZ/fp-div-03.ll
index 079b349..f052635 100644
--- a/llvm/test/CodeGen/SystemZ/fp-div-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-div-03.ll
@@ -12,7 +12,7 @@
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
 ; CHECK: br %r14
-  %f1 = load fp128 *%ptr
+  %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
   %sum = fdiv fp128 %f1, %f2x
   store fp128 %sum, fp128 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-01.ll b/llvm/test/CodeGen/SystemZ/fp-move-01.ll
index d16502f..31a8fc5 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-01.ll
@@ -22,7 +22,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lxr
 ; CHECK: axbr
-  %val = load volatile fp128 *%x
+  %val = load volatile fp128 , fp128 *%x
   %sum = fadd fp128 %val, %val
   store volatile fp128 %sum, fp128 *%x
   store volatile fp128 %val, fp128 *%x
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-02.ll b/llvm/test/CodeGen/SystemZ/fp-move-02.ll
index 505ee8d..2bd63f4 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-02.ll
@@ -71,7 +71,7 @@
 ; CHECK: stg
 ; CHECK: stg
 ; CHECK: br %r14
-  %val = load i128 *%b
+  %val = load i128 , i128 *%b
   %res = bitcast i128 %val to fp128
   store fp128 %res, fp128 *%a
   ret void
@@ -102,7 +102,7 @@
 ; CHECK: ld
 ; CHECK: std
 ; CHECK: std
-  %val = load fp128 *%a
+  %val = load fp128 , fp128 *%a
   %res = bitcast fp128 %val to i128
   store i128 %res, i128 *%b
   ret void
@@ -119,34 +119,34 @@
 ; CHECK: %exit
 ; CHECK: br %r14
 entry:
-  %double0 = load volatile double *@dptr
+  %double0 = load volatile double , double *@dptr
   %biased0 = fadd double %double0, %extra
   %int0 = bitcast double %biased0 to i64
-  %double1 = load volatile double *@dptr
+  %double1 = load volatile double , double *@dptr
   %biased1 = fadd double %double1, %extra
   %int1 = bitcast double %biased1 to i64
-  %double2 = load volatile double *@dptr
+  %double2 = load volatile double , double *@dptr
   %biased2 = fadd double %double2, %extra
   %int2 = bitcast double %biased2 to i64
-  %double3 = load volatile double *@dptr
+  %double3 = load volatile double , double *@dptr
   %biased3 = fadd double %double3, %extra
   %int3 = bitcast double %biased3 to i64
-  %double4 = load volatile double *@dptr
+  %double4 = load volatile double , double *@dptr
   %biased4 = fadd double %double4, %extra
   %int4 = bitcast double %biased4 to i64
-  %double5 = load volatile double *@dptr
+  %double5 = load volatile double , double *@dptr
   %biased5 = fadd double %double5, %extra
   %int5 = bitcast double %biased5 to i64
-  %double6 = load volatile double *@dptr
+  %double6 = load volatile double , double *@dptr
   %biased6 = fadd double %double6, %extra
   %int6 = bitcast double %biased6 to i64
-  %double7 = load volatile double *@dptr
+  %double7 = load volatile double , double *@dptr
   %biased7 = fadd double %double7, %extra
   %int7 = bitcast double %biased7 to i64
-  %double8 = load volatile double *@dptr
+  %double8 = load volatile double , double *@dptr
   %biased8 = fadd double %double8, %extra
   %int8 = bitcast double %biased8 to i64
-  %double9 = load volatile double *@dptr
+  %double9 = load volatile double , double *@dptr
   %biased9 = fadd double %double9, %extra
   %int9 = bitcast double %biased9 to i64
   br label %loop
@@ -181,34 +181,34 @@
 ; CHECK: %exit
 ; CHECK: br %r14
 entry:
-  %int0 = load volatile i64 *@iptr
+  %int0 = load volatile i64 , i64 *@iptr
   %masked0 = and i64 %int0, %mask
   %double0 = bitcast i64 %masked0 to double
-  %int1 = load volatile i64 *@iptr
+  %int1 = load volatile i64 , i64 *@iptr
   %masked1 = and i64 %int1, %mask
   %double1 = bitcast i64 %masked1 to double
-  %int2 = load volatile i64 *@iptr
+  %int2 = load volatile i64 , i64 *@iptr
   %masked2 = and i64 %int2, %mask
   %double2 = bitcast i64 %masked2 to double
-  %int3 = load volatile i64 *@iptr
+  %int3 = load volatile i64 , i64 *@iptr
   %masked3 = and i64 %int3, %mask
   %double3 = bitcast i64 %masked3 to double
-  %int4 = load volatile i64 *@iptr
+  %int4 = load volatile i64 , i64 *@iptr
   %masked4 = and i64 %int4, %mask
   %double4 = bitcast i64 %masked4 to double
-  %int5 = load volatile i64 *@iptr
+  %int5 = load volatile i64 , i64 *@iptr
   %masked5 = and i64 %int5, %mask
   %double5 = bitcast i64 %masked5 to double
-  %int6 = load volatile i64 *@iptr
+  %int6 = load volatile i64 , i64 *@iptr
   %masked6 = and i64 %int6, %mask
   %double6 = bitcast i64 %masked6 to double
-  %int7 = load volatile i64 *@iptr
+  %int7 = load volatile i64 , i64 *@iptr
   %masked7 = and i64 %int7, %mask
   %double7 = bitcast i64 %masked7 to double
-  %int8 = load volatile i64 *@iptr
+  %int8 = load volatile i64 , i64 *@iptr
   %masked8 = and i64 %int8, %mask
   %double8 = bitcast i64 %masked8 to double
-  %int9 = load volatile i64 *@iptr
+  %int9 = load volatile i64 , i64 *@iptr
   %masked9 = and i64 %int9, %mask
   %double9 = bitcast i64 %masked9 to double
   br label %loop
@@ -275,7 +275,7 @@
 
 exit:
   %unused1 = call i64 @foo()
-  %factor = load volatile double *@dptr
+  %factor = load volatile double , double *@dptr
 
   %conv0 = bitcast i64 %add0 to double
   %mul0 = fmul double %conv0, %factor
@@ -354,7 +354,7 @@
 
 exit:
   %unused1 = call i64 @foo()
-  %bias = load volatile i64 *@iptr
+  %bias = load volatile i64 , i64 *@iptr
 
   %conv0 = bitcast double %mul0 to i64
   %add0 = add i64 %conv0, %bias
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-03.ll b/llvm/test/CodeGen/SystemZ/fp-move-03.ll
index 9d49aca..f50e097 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-03.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
-  %val = load float *%src
+  %val = load float , float *%src
   ret float %val
 }
 
@@ -17,7 +17,7 @@
 ; CHECK: le %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 1023
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -27,7 +27,7 @@
 ; CHECK: ley %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 1024
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -37,7 +37,7 @@
 ; CHECK: ley %f0, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 131071
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -49,7 +49,7 @@
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 131072
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -59,7 +59,7 @@
 ; CHECK: ley %f0, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 -1
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -69,7 +69,7 @@
 ; CHECK: ley %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 -131072
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -81,7 +81,7 @@
 ; CHECK: le %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%src, i64 -131073
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -93,7 +93,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to float *
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
 
@@ -105,6 +105,6 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to float *
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   ret float %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-04.ll b/llvm/test/CodeGen/SystemZ/fp-move-04.ll
index 83a64a9..d3728d0 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-04.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
-  %val = load double *%src
+  %val = load double , double *%src
   ret double %val
 }
 
@@ -17,7 +17,7 @@
 ; CHECK: ld %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 511
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -27,7 +27,7 @@
 ; CHECK: ldy %f0, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 512
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -37,7 +37,7 @@
 ; CHECK: ldy %f0, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 65535
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -49,7 +49,7 @@
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 65536
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -59,7 +59,7 @@
 ; CHECK: ldy %f0, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 -1
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -69,7 +69,7 @@
 ; CHECK: ldy %f0, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 -65536
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -81,7 +81,7 @@
 ; CHECK: ld %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%src, i64 -65537
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -93,7 +93,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to double *
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
 
@@ -105,6 +105,6 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to double *
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   ret double %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-05.ll b/llvm/test/CodeGen/SystemZ/fp-move-05.ll
index d302a0f..da12af6 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-05.ll
@@ -9,7 +9,7 @@
 ; CHECK: ld %f2, 8(%r2)
 ; CHECK: br %r14
   %ptr = inttoptr i64 %src to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -22,7 +22,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, 4080
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -35,7 +35,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, 4088
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -48,7 +48,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, 4096
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -61,7 +61,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, 524272
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -76,7 +76,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, 524280
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -90,7 +90,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, -8
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -103,7 +103,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, -16
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -116,7 +116,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, -524288
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -131,7 +131,7 @@
 ; CHECK: br %r14
   %add = add i64 %src, -524296
   %ptr = inttoptr i64 %add to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
@@ -145,7 +145,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4088
   %ptr = inttoptr i64 %add2 to fp128 *
-  %val = load fp128 *%ptr
+  %val = load fp128 , fp128 *%ptr
   %trunc = fptrunc fp128 %val to double
   ret double %trunc
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-move-09.ll b/llvm/test/CodeGen/SystemZ/fp-move-09.ll
index 52b2ee2..e4a3e92 100644
--- a/llvm/test/CodeGen/SystemZ/fp-move-09.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-move-09.ll
@@ -9,7 +9,7 @@
 ; CHECK: oihh [[REG]], 16256
 ; CHECK: ldgr %f0, [[REG]]
 ; CHECK: br %r14
-  %base = load i16 *%ptr
+  %base = load i16 , i16 *%ptr
   %ext = zext i16 %base to i32
   %full = or i32 %ext, 1065353216
   %res = bitcast i32 %full to float
@@ -38,7 +38,7 @@
 ; CHECK: br %r14
   %int = bitcast float %val to i32
   %trunc = trunc i32 %int to i8
-  %old = load i8 *%ptr
+  %old = load i8 , i8 *%ptr
   %cmp = icmp eq i32 %which, 0
   %res = select i1 %cmp, i8 %trunc, i8 %old
   store i8 %res, i8 *%ptr
@@ -54,7 +54,7 @@
 ; CHECK: br %r14
   %int = bitcast float %val to i32
   %trunc = trunc i32 %int to i16
-  %old = load i16 *%ptr
+  %old = load i16 , i16 *%ptr
   %cmp = icmp eq i32 %which, 0
   %res = select i1 %cmp, i16 %trunc, i16 %old
   store i16 %res, i16 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
index 813765e..3b72d25 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fmul float %f1, %f2
   ret float %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: meeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fmul float %f1, %f2
   ret float %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fmul float %f1, %f2
   ret float %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: meeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fmul float %f1, %f2
   ret float %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %res = fmul float %f1, %f2
   ret float %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %ret = call float @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-02.ll b/llvm/test/CodeGen/SystemZ/fp-mul-02.ll
index 151d5b1..8435c3f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-02.ll
@@ -20,7 +20,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %f1x = fpext float %f1 to double
   %f2x = fpext float %f2 to double
   %res = fmul double %f1x, %f2x
@@ -33,7 +33,7 @@
 ; CHECK: mdeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %f1x = fpext float %f1 to double
   %f2x = fpext float %f2 to double
   %res = fmul double %f1x, %f2x
@@ -48,7 +48,7 @@
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %f1x = fpext float %f1 to double
   %f2x = fpext float %f2 to double
   %res = fmul double %f1x, %f2x
@@ -62,7 +62,7 @@
 ; CHECK: mdeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %f1x = fpext float %f1 to double
   %f2x = fpext float %f2 to double
   %res = fmul double %f1x, %f2x
@@ -77,7 +77,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %f1x = fpext float %f1 to double
   %f2x = fpext float %f2 to double
   %res = fmul double %f1x, %f2x
@@ -101,17 +101,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %frob0 = fadd float %val0, %val0
   %frob1 = fadd float %val1, %val1
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
index a9eaba1..701304e 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fmul double %f1, %f2
   ret double %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: mdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fmul double %f1, %f2
   ret double %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fmul double %f1, %f2
   ret double %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: mdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fmul double %f1, %f2
   ret double %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %res = fmul double %f1, %f2
   ret double %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %ret = call double @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-04.ll b/llvm/test/CodeGen/SystemZ/fp-mul-04.ll
index f5dc0d8..4226a3f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-04.ll
@@ -27,7 +27,7 @@
 ; CHECK: std %f0, 0(%r3)
 ; CHECK: std %f2, 8(%r3)
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %f1x = fpext double %f1 to fp128
   %f2x = fpext double %f2 to fp128
   %res = fmul fp128 %f1x, %f2x
@@ -43,7 +43,7 @@
 ; CHECK: std %f2, 8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %f1x = fpext double %f1 to fp128
   %f2x = fpext double %f2 to fp128
   %res = fmul fp128 %f1x, %f2x
@@ -61,7 +61,7 @@
 ; CHECK: std %f2, 8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %f1x = fpext double %f1 to fp128
   %f2x = fpext double %f2 to fp128
   %res = fmul fp128 %f1x, %f2x
@@ -78,7 +78,7 @@
 ; CHECK: std %f2, 8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %f1x = fpext double %f1 to fp128
   %f2x = fpext double %f2 to fp128
   %res = fmul fp128 %f1x, %f2x
@@ -96,7 +96,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %f1x = fpext double %f1 to fp128
   %f2x = fpext double %f2 to fp128
   %res = fmul fp128 %f1x, %f2x
@@ -121,17 +121,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %frob0 = fadd double %val0, %val0
   %frob1 = fadd double %val1, %val1
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-05.ll b/llvm/test/CodeGen/SystemZ/fp-mul-05.ll
index 0be1fe8..48d0dcd 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-05.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-05.ll
@@ -12,7 +12,7 @@
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
 ; CHECK: br %r14
-  %f1 = load fp128 *%ptr
+  %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
   %diff = fmul fp128 %f1, %f2x
   store fp128 %diff, fp128 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-06.ll b/llvm/test/CodeGen/SystemZ/fp-mul-06.ll
index 2271b96..896fafe 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-06.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-06.ll
@@ -16,7 +16,7 @@
 ; CHECK: maeb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -27,7 +27,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -57,7 +57,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -69,7 +69,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 %index
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 1023
   %ptr = getelementptr float, float *%base, i64 %index2
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
@@ -96,7 +96,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 1024
   %ptr = getelementptr float, float *%base, i64 %index2
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %acc)
   ret float %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-07.ll b/llvm/test/CodeGen/SystemZ/fp-mul-07.ll
index e8a0410..b1d0ae3 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-07.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-07.ll
@@ -16,7 +16,7 @@
 ; CHECK: madb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -27,7 +27,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -57,7 +57,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -69,7 +69,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 %index
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 511
   %ptr = getelementptr double, double *%base, i64 %index2
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
@@ -96,7 +96,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 512
   %ptr = getelementptr double, double *%base, i64 %index2
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
   ret double %res
 }
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
index ddd6c72..5e5538b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-08.ll
@@ -17,7 +17,7 @@
 ; CHECK: mseb %f2, %f0, 0(%r2)
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -29,7 +29,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -45,7 +45,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -61,7 +61,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -74,7 +74,7 @@
 ; CHECK: ler %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 %index
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -88,7 +88,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 1023
   %ptr = getelementptr float, float *%base, i64 %index2
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
@@ -103,7 +103,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 1024
   %ptr = getelementptr float, float *%base, i64 %index2
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %negacc = fsub float -0.0, %acc
   %res = call float @llvm.fma.f32 (float %f1, float %f2, float %negacc)
   ret float %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-09.ll b/llvm/test/CodeGen/SystemZ/fp-mul-09.ll
index e8c71ef..f2eadf5 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-09.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-09.ll
@@ -17,7 +17,7 @@
 ; CHECK: msdb %f2, %f0, 0(%r2)
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -29,7 +29,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -45,7 +45,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -61,7 +61,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -74,7 +74,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 %index
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -88,7 +88,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 511
   %ptr = getelementptr double, double *%base, i64 %index2
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
@@ -103,7 +103,7 @@
 ; CHECK: br %r14
   %index2 = add i64 %index, 512
   %ptr = getelementptr double, double *%base, i64 %index2
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %negacc = fsub double -0.0, %acc
   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
index 1cc6d81..927bcd4 100644
--- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -28,10 +28,10 @@
 ; CHECK: lcxbr
 ; CHECK: dxbr
 ; CHECK: br %r14
-  %orig = load fp128 *%ptr
+  %orig = load fp128 , fp128 *%ptr
   %negzero = fpext float -0.0 to fp128
   %neg = fsub fp128 0xL00000000000000008000000000000000, %orig
-  %op2 = load fp128 *%ptr2
+  %op2 = load fp128 , fp128 *%ptr2
   %res = fdiv fp128 %neg, %op2
   store fp128 %res, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-round-01.ll b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
index 565db5a..bdec02f 100644
--- a/llvm/test/CodeGen/SystemZ/fp-round-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-round-01.ll
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: fixbr %f0, 0, %f0
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.rint.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-round-02.ll
index d79c9c4..bd5419d 100644
--- a/llvm/test/CodeGen/SystemZ/fp-round-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-round-02.ll
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: fixbr %f0, 0, %f0
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.rint.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
@@ -60,7 +60,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK: fixbra %f0, 0, %f0, 4
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.nearbyint.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
@@ -92,7 +92,7 @@
 ; CHECK-LABEL: f9:
 ; CHECK: fixbra %f0, 7, %f0, 4
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.floor.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
@@ -124,7 +124,7 @@
 ; CHECK-LABEL: f12:
 ; CHECK: fixbra %f0, 6, %f0, 4
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.ceil.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
@@ -156,7 +156,7 @@
 ; CHECK-LABEL: f15:
 ; CHECK: fixbra %f0, 5, %f0, 4
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.trunc.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
@@ -188,7 +188,7 @@
 ; CHECK-LABEL: f18:
 ; CHECK: fixbra %f0, 1, %f0, 4
 ; CHECK: br %r14
-  %src = load fp128 *%ptr
+  %src = load fp128 , fp128 *%ptr
   %res = call fp128 @llvm.round.f128(fp128 %src)
   store fp128 %res, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll
index 32623ee..e8bf65b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-01.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
@@ -30,7 +30,7 @@
 ; CHECK: sqeb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
@@ -43,7 +43,7 @@
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
@@ -55,7 +55,7 @@
 ; CHECK: sqeb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %val = load float *%ptr
+  %val = load float , float *%ptr
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
@@ -68,7 +68,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %val = load float *%ptr2
+  %val = load float , float *%ptr2
   %res = call float @llvm.sqrt.f32(float %val)
   ret float %res
 }
@@ -79,23 +79,23 @@
 ; CHECK-LABEL: f7:
 ; CHECK: sqeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile float *%ptr
-  %val1 = load volatile float *%ptr
-  %val2 = load volatile float *%ptr
-  %val3 = load volatile float *%ptr
-  %val4 = load volatile float *%ptr
-  %val5 = load volatile float *%ptr
-  %val6 = load volatile float *%ptr
-  %val7 = load volatile float *%ptr
-  %val8 = load volatile float *%ptr
-  %val9 = load volatile float *%ptr
-  %val10 = load volatile float *%ptr
-  %val11 = load volatile float *%ptr
-  %val12 = load volatile float *%ptr
-  %val13 = load volatile float *%ptr
-  %val14 = load volatile float *%ptr
-  %val15 = load volatile float *%ptr
-  %val16 = load volatile float *%ptr
+  %val0 = load volatile float , float *%ptr
+  %val1 = load volatile float , float *%ptr
+  %val2 = load volatile float , float *%ptr
+  %val3 = load volatile float , float *%ptr
+  %val4 = load volatile float , float *%ptr
+  %val5 = load volatile float , float *%ptr
+  %val6 = load volatile float , float *%ptr
+  %val7 = load volatile float , float *%ptr
+  %val8 = load volatile float , float *%ptr
+  %val9 = load volatile float , float *%ptr
+  %val10 = load volatile float , float *%ptr
+  %val11 = load volatile float , float *%ptr
+  %val12 = load volatile float , float *%ptr
+  %val13 = load volatile float , float *%ptr
+  %val14 = load volatile float , float *%ptr
+  %val15 = load volatile float , float *%ptr
+  %val16 = load volatile float , float *%ptr
 
   %sqrt0 = call float @llvm.sqrt.f32(float %val0)
   %sqrt1 = call float @llvm.sqrt.f32(float %val1)
diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll
index 021a886..a6d987b 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
@@ -30,7 +30,7 @@
 ; CHECK: sqdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
@@ -43,7 +43,7 @@
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
@@ -55,7 +55,7 @@
 ; CHECK: sqdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %val = load double *%ptr
+  %val = load double , double *%ptr
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
@@ -68,7 +68,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %val = load double *%ptr2
+  %val = load double , double *%ptr2
   %res = call double @llvm.sqrt.f64(double %val)
   ret double %res
 }
@@ -79,23 +79,23 @@
 ; CHECK-LABEL: f7:
 ; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile double *%ptr
-  %val1 = load volatile double *%ptr
-  %val2 = load volatile double *%ptr
-  %val3 = load volatile double *%ptr
-  %val4 = load volatile double *%ptr
-  %val5 = load volatile double *%ptr
-  %val6 = load volatile double *%ptr
-  %val7 = load volatile double *%ptr
-  %val8 = load volatile double *%ptr
-  %val9 = load volatile double *%ptr
-  %val10 = load volatile double *%ptr
-  %val11 = load volatile double *%ptr
-  %val12 = load volatile double *%ptr
-  %val13 = load volatile double *%ptr
-  %val14 = load volatile double *%ptr
-  %val15 = load volatile double *%ptr
-  %val16 = load volatile double *%ptr
+  %val0 = load volatile double , double *%ptr
+  %val1 = load volatile double , double *%ptr
+  %val2 = load volatile double , double *%ptr
+  %val3 = load volatile double , double *%ptr
+  %val4 = load volatile double , double *%ptr
+  %val5 = load volatile double , double *%ptr
+  %val6 = load volatile double , double *%ptr
+  %val7 = load volatile double , double *%ptr
+  %val8 = load volatile double , double *%ptr
+  %val9 = load volatile double , double *%ptr
+  %val10 = load volatile double , double *%ptr
+  %val11 = load volatile double , double *%ptr
+  %val12 = load volatile double , double *%ptr
+  %val13 = load volatile double , double *%ptr
+  %val14 = load volatile double , double *%ptr
+  %val15 = load volatile double , double *%ptr
+  %val16 = load volatile double , double *%ptr
 
   %sqrt0 = call double @llvm.sqrt.f64(double %val0)
   %sqrt1 = call double @llvm.sqrt.f64(double %val1)
diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-03.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-03.ll
index 7142644..4bc05f1 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sqrt-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-03.ll
@@ -13,7 +13,7 @@
 ; CHECK: std %f0, 0(%r2)
 ; CHECK: std %f2, 8(%r2)
 ; CHECK: br %r14
-  %orig = load fp128 *%ptr
+  %orig = load fp128 , fp128 *%ptr
   %sqrt = call fp128 @llvm.sqrt.f128(fp128 %orig)
   store fp128 %sqrt, fp128 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
index b12629b..f4185ca 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sub-01.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fsub float %f1, %f2
   ret float %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: seb %f0, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1023
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fsub float %f1, %f2
   ret float %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 1024
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fsub float %f1, %f2
   ret float %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: seb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr float, float *%base, i64 -1
-  %f2 = load float *%ptr
+  %f2 = load float , float *%ptr
   %res = fsub float %f1, %f2
   ret float %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr float, float *%base, i64 %index
   %ptr2 = getelementptr float, float *%ptr1, i64 100
-  %f2 = load float *%ptr2
+  %f2 = load float , float *%ptr2
   %res = fsub float %f1, %f2
   ret float %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr float, float *%ptr0, i64 18
   %ptr10 = getelementptr float, float *%ptr0, i64 20
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
-  %val10 = load float *%ptr10
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
+  %val10 = load float , float *%ptr10
 
   %ret = call float @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
index 9b4a214..f59ec0a 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fsub double %f1, %f2
   ret double %res
 }
@@ -29,7 +29,7 @@
 ; CHECK: sdb %f0, 4088(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 511
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fsub double %f1, %f2
   ret double %res
 }
@@ -42,7 +42,7 @@
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 512
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fsub double %f1, %f2
   ret double %res
 }
@@ -54,7 +54,7 @@
 ; CHECK: sdb %f0, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr double, double *%base, i64 -1
-  %f2 = load double *%ptr
+  %f2 = load double , double *%ptr
   %res = fsub double %f1, %f2
   ret double %res
 }
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr double, double *%base, i64 %index
   %ptr2 = getelementptr double, double *%ptr1, i64 100
-  %f2 = load double *%ptr2
+  %f2 = load double , double *%ptr2
   %res = fsub double %f1, %f2
   ret double %res
 }
@@ -89,17 +89,17 @@
   %ptr9 = getelementptr double, double *%ptr0, i64 18
   %ptr10 = getelementptr double, double *%ptr0, i64 20
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
-  %val10 = load double *%ptr10
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
+  %val10 = load double , double *%ptr10
 
   %ret = call double @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-03.ll b/llvm/test/CodeGen/SystemZ/fp-sub-03.ll
index a1404c4..86faafe 100644
--- a/llvm/test/CodeGen/SystemZ/fp-sub-03.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-sub-03.ll
@@ -12,7 +12,7 @@
 ; CHECK: std %f1, 0(%r2)
 ; CHECK: std %f3, 8(%r2)
 ; CHECK: br %r14
-  %f1 = load fp128 *%ptr
+  %f1 = load fp128 , fp128 *%ptr
   %f2x = fpext float %f2 to fp128
   %sum = fsub fp128 %f1, %f2x
   store fp128 %sum, fp128 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/frame-02.ll b/llvm/test/CodeGen/SystemZ/frame-02.ll
index 9a7f8ea..a41db77 100644
--- a/llvm/test/CodeGen/SystemZ/frame-02.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-02.ll
@@ -37,22 +37,22 @@
 ; CHECK: ld %f15, 160(%r15)
 ; CHECK: aghi %r15, 224
 ; CHECK: br %r14
-  %l0 = load volatile float *%ptr
-  %l1 = load volatile float *%ptr
-  %l2 = load volatile float *%ptr
-  %l3 = load volatile float *%ptr
-  %l4 = load volatile float *%ptr
-  %l5 = load volatile float *%ptr
-  %l6 = load volatile float *%ptr
-  %l7 = load volatile float *%ptr
-  %l8 = load volatile float *%ptr
-  %l9 = load volatile float *%ptr
-  %l10 = load volatile float *%ptr
-  %l11 = load volatile float *%ptr
-  %l12 = load volatile float *%ptr
-  %l13 = load volatile float *%ptr
-  %l14 = load volatile float *%ptr
-  %l15 = load volatile float *%ptr
+  %l0 = load volatile float , float *%ptr
+  %l1 = load volatile float , float *%ptr
+  %l2 = load volatile float , float *%ptr
+  %l3 = load volatile float , float *%ptr
+  %l4 = load volatile float , float *%ptr
+  %l5 = load volatile float , float *%ptr
+  %l6 = load volatile float , float *%ptr
+  %l7 = load volatile float , float *%ptr
+  %l8 = load volatile float , float *%ptr
+  %l9 = load volatile float , float *%ptr
+  %l10 = load volatile float , float *%ptr
+  %l11 = load volatile float , float *%ptr
+  %l12 = load volatile float , float *%ptr
+  %l13 = load volatile float , float *%ptr
+  %l14 = load volatile float , float *%ptr
+  %l15 = load volatile float , float *%ptr
   %add0 = fadd float %l0, %l0
   %add1 = fadd float %l1, %add0
   %add2 = fadd float %l2, %add1
@@ -119,21 +119,21 @@
 ; CHECK: ld %f14, 160(%r15)
 ; CHECK: aghi %r15, 216
 ; CHECK: br %r14
-  %l0 = load volatile float *%ptr
-  %l1 = load volatile float *%ptr
-  %l2 = load volatile float *%ptr
-  %l3 = load volatile float *%ptr
-  %l4 = load volatile float *%ptr
-  %l5 = load volatile float *%ptr
-  %l6 = load volatile float *%ptr
-  %l7 = load volatile float *%ptr
-  %l8 = load volatile float *%ptr
-  %l9 = load volatile float *%ptr
-  %l10 = load volatile float *%ptr
-  %l11 = load volatile float *%ptr
-  %l12 = load volatile float *%ptr
-  %l13 = load volatile float *%ptr
-  %l14 = load volatile float *%ptr
+  %l0 = load volatile float , float *%ptr
+  %l1 = load volatile float , float *%ptr
+  %l2 = load volatile float , float *%ptr
+  %l3 = load volatile float , float *%ptr
+  %l4 = load volatile float , float *%ptr
+  %l5 = load volatile float , float *%ptr
+  %l6 = load volatile float , float *%ptr
+  %l7 = load volatile float , float *%ptr
+  %l8 = load volatile float , float *%ptr
+  %l9 = load volatile float , float *%ptr
+  %l10 = load volatile float , float *%ptr
+  %l11 = load volatile float , float *%ptr
+  %l12 = load volatile float , float *%ptr
+  %l13 = load volatile float , float *%ptr
+  %l14 = load volatile float , float *%ptr
   %add0 = fadd float %l0, %l0
   %add1 = fadd float %l1, %add0
   %add2 = fadd float %l2, %add1
@@ -185,15 +185,15 @@
 ; CHECK: ld %f8, 160(%r15)
 ; CHECK: aghi %r15, 168
 ; CHECK: br %r14
-  %l0 = load volatile float *%ptr
-  %l1 = load volatile float *%ptr
-  %l2 = load volatile float *%ptr
-  %l3 = load volatile float *%ptr
-  %l4 = load volatile float *%ptr
-  %l5 = load volatile float *%ptr
-  %l6 = load volatile float *%ptr
-  %l7 = load volatile float *%ptr
-  %l8 = load volatile float *%ptr
+  %l0 = load volatile float , float *%ptr
+  %l1 = load volatile float , float *%ptr
+  %l2 = load volatile float , float *%ptr
+  %l3 = load volatile float , float *%ptr
+  %l4 = load volatile float , float *%ptr
+  %l5 = load volatile float , float *%ptr
+  %l6 = load volatile float , float *%ptr
+  %l7 = load volatile float , float *%ptr
+  %l8 = load volatile float , float *%ptr
   %add0 = fadd float %l0, %l0
   %add1 = fadd float %l1, %add0
   %add2 = fadd float %l2, %add1
@@ -229,14 +229,14 @@
 ; CHECK-NOT: %f14
 ; CHECK-NOT: %f15
 ; CHECK: br %r14
-  %l0 = load volatile float *%ptr
-  %l1 = load volatile float *%ptr
-  %l2 = load volatile float *%ptr
-  %l3 = load volatile float *%ptr
-  %l4 = load volatile float *%ptr
-  %l5 = load volatile float *%ptr
-  %l6 = load volatile float *%ptr
-  %l7 = load volatile float *%ptr
+  %l0 = load volatile float , float *%ptr
+  %l1 = load volatile float , float *%ptr
+  %l2 = load volatile float , float *%ptr
+  %l3 = load volatile float , float *%ptr
+  %l4 = load volatile float , float *%ptr
+  %l5 = load volatile float , float *%ptr
+  %l6 = load volatile float , float *%ptr
+  %l7 = load volatile float , float *%ptr
   %add0 = fadd float %l0, %l0
   %add1 = fadd float %l1, %add0
   %add2 = fadd float %l2, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-03.ll b/llvm/test/CodeGen/SystemZ/frame-03.ll
index db146c7..029c6d6 100644
--- a/llvm/test/CodeGen/SystemZ/frame-03.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-03.ll
@@ -39,22 +39,22 @@
 ; CHECK: ld %f15, 160(%r15)
 ; CHECK: aghi %r15, 224
 ; CHECK: br %r14
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
-  %l9 = load volatile double *%ptr
-  %l10 = load volatile double *%ptr
-  %l11 = load volatile double *%ptr
-  %l12 = load volatile double *%ptr
-  %l13 = load volatile double *%ptr
-  %l14 = load volatile double *%ptr
-  %l15 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
+  %l9 = load volatile double , double *%ptr
+  %l10 = load volatile double , double *%ptr
+  %l11 = load volatile double , double *%ptr
+  %l12 = load volatile double , double *%ptr
+  %l13 = load volatile double , double *%ptr
+  %l14 = load volatile double , double *%ptr
+  %l15 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
@@ -121,21 +121,21 @@
 ; CHECK: ld %f14, 160(%r15)
 ; CHECK: aghi %r15, 216
 ; CHECK: br %r14
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
-  %l9 = load volatile double *%ptr
-  %l10 = load volatile double *%ptr
-  %l11 = load volatile double *%ptr
-  %l12 = load volatile double *%ptr
-  %l13 = load volatile double *%ptr
-  %l14 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
+  %l9 = load volatile double , double *%ptr
+  %l10 = load volatile double , double *%ptr
+  %l11 = load volatile double , double *%ptr
+  %l12 = load volatile double , double *%ptr
+  %l13 = load volatile double , double *%ptr
+  %l14 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
@@ -187,15 +187,15 @@
 ; CHECK: ld %f8, 160(%r15)
 ; CHECK: aghi %r15, 168
 ; CHECK: br %r14
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
@@ -231,14 +231,14 @@
 ; CHECK-NOT: %f14
 ; CHECK-NOT: %f15
 ; CHECK: br %r14
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-04.ll b/llvm/test/CodeGen/SystemZ/frame-04.ll
index 93c59a3..6020509 100644
--- a/llvm/test/CodeGen/SystemZ/frame-04.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-04.ll
@@ -38,14 +38,14 @@
 ; CHECK: ld %f15, 160(%r15)
 ; CHECK: aghi %r15, 224
 ; CHECK: br %r14
-  %l0 = load volatile fp128 *%ptr
-  %l1 = load volatile fp128 *%ptr
-  %l4 = load volatile fp128 *%ptr
-  %l5 = load volatile fp128 *%ptr
-  %l8 = load volatile fp128 *%ptr
-  %l9 = load volatile fp128 *%ptr
-  %l12 = load volatile fp128 *%ptr
-  %l13 = load volatile fp128 *%ptr
+  %l0 = load volatile fp128 , fp128 *%ptr
+  %l1 = load volatile fp128 , fp128 *%ptr
+  %l4 = load volatile fp128 , fp128 *%ptr
+  %l5 = load volatile fp128 , fp128 *%ptr
+  %l8 = load volatile fp128 , fp128 *%ptr
+  %l9 = load volatile fp128 , fp128 *%ptr
+  %l12 = load volatile fp128 , fp128 *%ptr
+  %l13 = load volatile fp128 , fp128 *%ptr
   %add0 = fadd fp128 %l0, %l0
   %add1 = fadd fp128 %l1, %add0
   %add4 = fadd fp128 %l4, %add1
@@ -94,13 +94,13 @@
 ; CHECK: ld %f14, 160(%r15)
 ; CHECK: aghi %r15, 208
 ; CHECK: br %r14
-  %l0 = load volatile fp128 *%ptr
-  %l1 = load volatile fp128 *%ptr
-  %l4 = load volatile fp128 *%ptr
-  %l5 = load volatile fp128 *%ptr
-  %l8 = load volatile fp128 *%ptr
-  %l9 = load volatile fp128 *%ptr
-  %l12 = load volatile fp128 *%ptr
+  %l0 = load volatile fp128 , fp128 *%ptr
+  %l1 = load volatile fp128 , fp128 *%ptr
+  %l4 = load volatile fp128 , fp128 *%ptr
+  %l5 = load volatile fp128 , fp128 *%ptr
+  %l8 = load volatile fp128 , fp128 *%ptr
+  %l9 = load volatile fp128 , fp128 *%ptr
+  %l12 = load volatile fp128 , fp128 *%ptr
   %add0 = fadd fp128 %l0, %l0
   %add1 = fadd fp128 %l1, %add0
   %add4 = fadd fp128 %l4, %add1
@@ -139,11 +139,11 @@
 ; CHECK: ld %f10, 160(%r15)
 ; CHECK: aghi %r15, 176
 ; CHECK: br %r14
-  %l0 = load volatile fp128 *%ptr
-  %l1 = load volatile fp128 *%ptr
-  %l4 = load volatile fp128 *%ptr
-  %l5 = load volatile fp128 *%ptr
-  %l8 = load volatile fp128 *%ptr
+  %l0 = load volatile fp128 , fp128 *%ptr
+  %l1 = load volatile fp128 , fp128 *%ptr
+  %l4 = load volatile fp128 , fp128 *%ptr
+  %l5 = load volatile fp128 , fp128 *%ptr
+  %l8 = load volatile fp128 , fp128 *%ptr
   %add0 = fadd fp128 %l0, %l0
   %add1 = fadd fp128 %l1, %add0
   %add4 = fadd fp128 %l4, %add1
@@ -171,10 +171,10 @@
 ; CHECK-NOT: %f14
 ; CHECK-NOT: %f15
 ; CHECK: br %r14
-  %l0 = load volatile fp128 *%ptr
-  %l1 = load volatile fp128 *%ptr
-  %l4 = load volatile fp128 *%ptr
-  %l5 = load volatile fp128 *%ptr
+  %l0 = load volatile fp128 , fp128 *%ptr
+  %l1 = load volatile fp128 , fp128 *%ptr
+  %l4 = load volatile fp128 , fp128 *%ptr
+  %l5 = load volatile fp128 , fp128 *%ptr
   %add0 = fadd fp128 %l0, %l0
   %add1 = fadd fp128 %l1, %add0
   %add4 = fadd fp128 %l4, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-05.ll b/llvm/test/CodeGen/SystemZ/frame-05.ll
index cfe69b2..93130dc 100644
--- a/llvm/test/CodeGen/SystemZ/frame-05.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-05.ll
@@ -32,20 +32,20 @@
 ; CHECK: st {{.*}}, 4(%r2)
 ; CHECK: lmg %r6, %r15, 48(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l6 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l11 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l6 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l11 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add3 = add i32 %l3, %add1
@@ -100,19 +100,19 @@
 ; CHECK: st {{.*}}, 4(%r2)
 ; CHECK: lmg %r7, %r15, 56(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l11 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l11 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add3 = add i32 %l3, %add1
@@ -163,12 +163,12 @@
 ; CHECK: st {{.*}}, 4(%r2)
 ; CHECK: lmg %r14, %r15, 112(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add3 = add i32 %l3, %add1
@@ -199,11 +199,11 @@
 ; CHECK-NOT: %r12
 ; CHECK-NOT: %r13
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add3 = add i32 %l3, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-06.ll b/llvm/test/CodeGen/SystemZ/frame-06.ll
index f649286..c2aa8af 100644
--- a/llvm/test/CodeGen/SystemZ/frame-06.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-06.ll
@@ -29,20 +29,20 @@
 ; CHECK: stg {{.*}}, 8(%r2)
 ; CHECK: lmg %r6, %r15, 48(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i64 *%ptr
-  %l1 = load volatile i64 *%ptr
-  %l3 = load volatile i64 *%ptr
-  %l4 = load volatile i64 *%ptr
-  %l5 = load volatile i64 *%ptr
-  %l6 = load volatile i64 *%ptr
-  %l7 = load volatile i64 *%ptr
-  %l8 = load volatile i64 *%ptr
-  %l9 = load volatile i64 *%ptr
-  %l10 = load volatile i64 *%ptr
-  %l11 = load volatile i64 *%ptr
-  %l12 = load volatile i64 *%ptr
-  %l13 = load volatile i64 *%ptr
-  %l14 = load volatile i64 *%ptr
+  %l0 = load volatile i64 , i64 *%ptr
+  %l1 = load volatile i64 , i64 *%ptr
+  %l3 = load volatile i64 , i64 *%ptr
+  %l4 = load volatile i64 , i64 *%ptr
+  %l5 = load volatile i64 , i64 *%ptr
+  %l6 = load volatile i64 , i64 *%ptr
+  %l7 = load volatile i64 , i64 *%ptr
+  %l8 = load volatile i64 , i64 *%ptr
+  %l9 = load volatile i64 , i64 *%ptr
+  %l10 = load volatile i64 , i64 *%ptr
+  %l11 = load volatile i64 , i64 *%ptr
+  %l12 = load volatile i64 , i64 *%ptr
+  %l13 = load volatile i64 , i64 *%ptr
+  %l14 = load volatile i64 , i64 *%ptr
   %add0 = add i64 %l0, %l0
   %add1 = add i64 %l1, %add0
   %add3 = add i64 %l3, %add1
@@ -97,19 +97,19 @@
 ; CHECK: stg {{.*}}, 8(%r2)
 ; CHECK: lmg %r7, %r15, 56(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i64 *%ptr
-  %l1 = load volatile i64 *%ptr
-  %l3 = load volatile i64 *%ptr
-  %l4 = load volatile i64 *%ptr
-  %l5 = load volatile i64 *%ptr
-  %l7 = load volatile i64 *%ptr
-  %l8 = load volatile i64 *%ptr
-  %l9 = load volatile i64 *%ptr
-  %l10 = load volatile i64 *%ptr
-  %l11 = load volatile i64 *%ptr
-  %l12 = load volatile i64 *%ptr
-  %l13 = load volatile i64 *%ptr
-  %l14 = load volatile i64 *%ptr
+  %l0 = load volatile i64 , i64 *%ptr
+  %l1 = load volatile i64 , i64 *%ptr
+  %l3 = load volatile i64 , i64 *%ptr
+  %l4 = load volatile i64 , i64 *%ptr
+  %l5 = load volatile i64 , i64 *%ptr
+  %l7 = load volatile i64 , i64 *%ptr
+  %l8 = load volatile i64 , i64 *%ptr
+  %l9 = load volatile i64 , i64 *%ptr
+  %l10 = load volatile i64 , i64 *%ptr
+  %l11 = load volatile i64 , i64 *%ptr
+  %l12 = load volatile i64 , i64 *%ptr
+  %l13 = load volatile i64 , i64 *%ptr
+  %l14 = load volatile i64 , i64 *%ptr
   %add0 = add i64 %l0, %l0
   %add1 = add i64 %l1, %add0
   %add3 = add i64 %l3, %add1
@@ -160,12 +160,12 @@
 ; CHECK: stg {{.*}}, 8(%r2)
 ; CHECK: lmg %r14, %r15, 112(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i64 *%ptr
-  %l1 = load volatile i64 *%ptr
-  %l3 = load volatile i64 *%ptr
-  %l4 = load volatile i64 *%ptr
-  %l5 = load volatile i64 *%ptr
-  %l14 = load volatile i64 *%ptr
+  %l0 = load volatile i64 , i64 *%ptr
+  %l1 = load volatile i64 , i64 *%ptr
+  %l3 = load volatile i64 , i64 *%ptr
+  %l4 = load volatile i64 , i64 *%ptr
+  %l5 = load volatile i64 , i64 *%ptr
+  %l14 = load volatile i64 , i64 *%ptr
   %add0 = add i64 %l0, %l0
   %add1 = add i64 %l1, %add0
   %add3 = add i64 %l3, %add1
@@ -196,11 +196,11 @@
 ; CHECK-NOT: %r12
 ; CHECK-NOT: %r13
 ; CHECK: br %r14
-  %l0 = load volatile i64 *%ptr
-  %l1 = load volatile i64 *%ptr
-  %l3 = load volatile i64 *%ptr
-  %l4 = load volatile i64 *%ptr
-  %l5 = load volatile i64 *%ptr
+  %l0 = load volatile i64 , i64 *%ptr
+  %l1 = load volatile i64 , i64 *%ptr
+  %l3 = load volatile i64 , i64 *%ptr
+  %l4 = load volatile i64 , i64 *%ptr
+  %l5 = load volatile i64 , i64 *%ptr
   %add0 = add i64 %l0, %l0
   %add1 = add i64 %l1, %add0
   %add3 = add i64 %l3, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-07.ll b/llvm/test/CodeGen/SystemZ/frame-07.ll
index 1faed4c..253bbc2 100644
--- a/llvm/test/CodeGen/SystemZ/frame-07.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-07.ll
@@ -68,22 +68,22 @@
   %y = alloca [486 x i64], align 8
   %elem = getelementptr inbounds [486 x i64], [486 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %elem
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
-  %l9 = load volatile double *%ptr
-  %l10 = load volatile double *%ptr
-  %l11 = load volatile double *%ptr
-  %l12 = load volatile double *%ptr
-  %l13 = load volatile double *%ptr
-  %l14 = load volatile double *%ptr
-  %l15 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
+  %l9 = load volatile double , double *%ptr
+  %l10 = load volatile double , double *%ptr
+  %l11 = load volatile double , double *%ptr
+  %l12 = load volatile double , double *%ptr
+  %l13 = load volatile double , double *%ptr
+  %l14 = load volatile double , double *%ptr
+  %l15 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
@@ -197,22 +197,22 @@
   %y = alloca [65510 x i64], align 8
   %elem = getelementptr inbounds [65510 x i64], [65510 x i64]* %y, i64 0, i64 0
   store volatile i64 %x, i64* %elem
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
-  %l9 = load volatile double *%ptr
-  %l10 = load volatile double *%ptr
-  %l11 = load volatile double *%ptr
-  %l12 = load volatile double *%ptr
-  %l13 = load volatile double *%ptr
-  %l14 = load volatile double *%ptr
-  %l15 = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
+  %l9 = load volatile double , double *%ptr
+  %l10 = load volatile double , double *%ptr
+  %l11 = load volatile double , double *%ptr
+  %l12 = load volatile double , double *%ptr
+  %l13 = load volatile double , double *%ptr
+  %l14 = load volatile double , double *%ptr
+  %l15 = load volatile double , double *%ptr
   %add0 = fadd double %l0, %l0
   %add1 = fadd double %l1, %add0
   %add2 = fadd double %l2, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-08.ll b/llvm/test/CodeGen/SystemZ/frame-08.ll
index 0d4fb2e..99e6410 100644
--- a/llvm/test/CodeGen/SystemZ/frame-08.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-08.ll
@@ -25,19 +25,19 @@
 ; CHECK-NOT: ag
 ; CHECK: lmg %r6, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l6 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l11 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l6 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l11 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
@@ -85,11 +85,11 @@
 ; CHECK-NOT: ag
 ; CHECK: lmg %r14, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
@@ -128,19 +128,19 @@
 ; CHECK: aghi %r15, 8
 ; CHECK: lmg %r6, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l6 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l11 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l6 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l11 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
@@ -187,11 +187,11 @@
 ; CHECK: aghi %r15, 8
 ; CHECK: lmg %r14, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
@@ -221,11 +221,11 @@
 ; CHECK: aghi %r15, 32760
 ; CHECK: lmg %r14, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
@@ -255,11 +255,11 @@
 ; CHECK: agfi %r15, 32768
 ; CHECK: lmg %r14, %r15, 524280(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add4 = add i32 %l4, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-09.ll b/llvm/test/CodeGen/SystemZ/frame-09.ll
index a7cb6bd..ead944e 100644
--- a/llvm/test/CodeGen/SystemZ/frame-09.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-09.ll
@@ -64,19 +64,19 @@
 ; CHECK: st {{.*}}, 4(%r2)
 ; CHECK: lmg %r6, %r15, 48(%r11)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l6 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l6 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
   %add0 = add i32 %l0, %l0
   %add1 = add i32 %l1, %add0
   %add3 = add i32 %l3, %add1
diff --git a/llvm/test/CodeGen/SystemZ/frame-13.ll b/llvm/test/CodeGen/SystemZ/frame-13.ll
index 71a5cc9..2afe6d7 100644
--- a/llvm/test/CodeGen/SystemZ/frame-13.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-13.ll
@@ -222,11 +222,11 @@
 ; CHECK-FP: mvhi 0([[REGISTER]]), 42
 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i3 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i3 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
   %region1 = alloca [978 x i32], align 8
   %region2 = alloca [978 x i32], align 8
   %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
@@ -254,20 +254,20 @@
 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i3 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
-  %i6 = load volatile i32 *%vptr
-  %i7 = load volatile i32 *%vptr
-  %i8 = load volatile i32 *%vptr
-  %i9 = load volatile i32 *%vptr
-  %i10 = load volatile i32 *%vptr
-  %i11 = load volatile i32 *%vptr
-  %i12 = load volatile i32 *%vptr
-  %i13 = load volatile i32 *%vptr
-  %i14 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i3 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
+  %i6 = load volatile i32 , i32 *%vptr
+  %i7 = load volatile i32 , i32 *%vptr
+  %i8 = load volatile i32 , i32 *%vptr
+  %i9 = load volatile i32 , i32 *%vptr
+  %i10 = load volatile i32 , i32 *%vptr
+  %i11 = load volatile i32 , i32 *%vptr
+  %i12 = load volatile i32 , i32 *%vptr
+  %i13 = load volatile i32 , i32 *%vptr
+  %i14 = load volatile i32 , i32 *%vptr
   %region1 = alloca [978 x i32], align 8
   %region2 = alloca [978 x i32], align 8
   %ptr1 = getelementptr inbounds [978 x i32], [978 x i32]* %region1, i64 0, i64 2
diff --git a/llvm/test/CodeGen/SystemZ/frame-14.ll b/llvm/test/CodeGen/SystemZ/frame-14.ll
index 21ef40a..3c080a4 100644
--- a/llvm/test/CodeGen/SystemZ/frame-14.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-14.ll
@@ -245,11 +245,11 @@
 ; CHECK-FP: mvi 0([[REGISTER]]), 42
 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i3 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i3 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
   %region1 = alloca [524104 x i8], align 8
   %region2 = alloca [524104 x i8], align 8
   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
@@ -278,20 +278,20 @@
 ; CHECK-NOFP: lg [[REGISTER]], [[OFFSET]](%r15)
 ; CHECK-NOFP: lmg %r6, %r15,
 ; CHECK-NOFP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i3 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
-  %i6 = load volatile i32 *%vptr
-  %i7 = load volatile i32 *%vptr
-  %i8 = load volatile i32 *%vptr
-  %i9 = load volatile i32 *%vptr
-  %i10 = load volatile i32 *%vptr
-  %i11 = load volatile i32 *%vptr
-  %i12 = load volatile i32 *%vptr
-  %i13 = load volatile i32 *%vptr
-  %i14 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i3 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
+  %i6 = load volatile i32 , i32 *%vptr
+  %i7 = load volatile i32 , i32 *%vptr
+  %i8 = load volatile i32 , i32 *%vptr
+  %i9 = load volatile i32 , i32 *%vptr
+  %i10 = load volatile i32 , i32 *%vptr
+  %i11 = load volatile i32 , i32 *%vptr
+  %i12 = load volatile i32 , i32 *%vptr
+  %i13 = load volatile i32 , i32 *%vptr
+  %i14 = load volatile i32 , i32 *%vptr
   %region1 = alloca [524104 x i8], align 8
   %region2 = alloca [524104 x i8], align 8
   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
diff --git a/llvm/test/CodeGen/SystemZ/frame-15.ll b/llvm/test/CodeGen/SystemZ/frame-15.ll
index a88ca33..f81c9dc 100644
--- a/llvm/test/CodeGen/SystemZ/frame-15.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-15.ll
@@ -41,8 +41,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 1
   %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 1
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -68,8 +68,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 2
   %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 2
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -95,8 +95,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 3
   %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 3
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -122,8 +122,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 1
   %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 1
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -149,8 +149,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 2
   %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 2
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -176,8 +176,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2002 x float], [2002 x float]* %region1, i64 0, i64 3
   %ptr2 = getelementptr inbounds [2002 x float], [2002 x float]* %region2, i64 0, i64 3
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -205,8 +205,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2004 x float], [2004 x float]* %region1, i64 0, i64 1023
   %ptr2 = getelementptr inbounds [2004 x float], [2004 x float]* %region2, i64 0, i64 1023
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -233,8 +233,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 1023
   %ptr2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 1023
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -262,8 +262,8 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [2006 x float], [2006 x float]* %region1, i64 0, i64 1024
   %ptr2 = getelementptr inbounds [2006 x float], [2006 x float]* %region2, i64 0, i64 1024
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -296,15 +296,15 @@
   call void @foo(float *%start1, float *%start2)
   %ptr1 = getelementptr inbounds [978 x float], [978 x float]* %region1, i64 0, i64 2
   %ptr2 = getelementptr inbounds [978 x float], [978 x float]* %region2, i64 0, i64 2
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i2 = load volatile i32 *%vptr
-  %i3 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
-  %i14 = load volatile i32 *%vptr
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i2 = load volatile i32 , i32 *%vptr
+  %i3 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
+  %i14 = load volatile i32 , i32 *%vptr
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
@@ -345,8 +345,8 @@
   %addr2 = add i64 %base2, %index
   %ptr1 = inttoptr i64 %addr1 to float *
   %ptr2 = inttoptr i64 %addr2 to float *
-  %float1 = load float *%ptr1
-  %float2 = load float *%ptr2
+  %float1 = load float , float *%ptr1
+  %float2 = load float , float *%ptr2
   %double1 = fpext float %float1 to double
   %double2 = fpext float %float2 to double
   store volatile double %double1, double *%dst
diff --git a/llvm/test/CodeGen/SystemZ/frame-16.ll b/llvm/test/CodeGen/SystemZ/frame-16.ll
index c6f011f..75da044 100644
--- a/llvm/test/CodeGen/SystemZ/frame-16.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-16.ll
@@ -233,10 +233,10 @@
 ; CHECK-FP: stc %r3, 0([[REGISTER]],%r11)
 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
   %region1 = alloca [524104 x i8], align 8
   %region2 = alloca [524104 x i8], align 8
   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
@@ -272,19 +272,19 @@
 ; CHECK-FP: lg [[REGISTER]], [[OFFSET]](%r11)
 ; CHECK-FP: lmg %r6, %r15,
 ; CHECK-FP: br %r14
-  %i0 = load volatile i32 *%vptr
-  %i1 = load volatile i32 *%vptr
-  %i4 = load volatile i32 *%vptr
-  %i5 = load volatile i32 *%vptr
-  %i6 = load volatile i32 *%vptr
-  %i7 = load volatile i32 *%vptr
-  %i8 = load volatile i32 *%vptr
-  %i9 = load volatile i32 *%vptr
-  %i10 = load volatile i32 *%vptr
-  %i11 = load volatile i32 *%vptr
-  %i12 = load volatile i32 *%vptr
-  %i13 = load volatile i32 *%vptr
-  %i14 = load volatile i32 *%vptr
+  %i0 = load volatile i32 , i32 *%vptr
+  %i1 = load volatile i32 , i32 *%vptr
+  %i4 = load volatile i32 , i32 *%vptr
+  %i5 = load volatile i32 , i32 *%vptr
+  %i6 = load volatile i32 , i32 *%vptr
+  %i7 = load volatile i32 , i32 *%vptr
+  %i8 = load volatile i32 , i32 *%vptr
+  %i9 = load volatile i32 , i32 *%vptr
+  %i10 = load volatile i32 , i32 *%vptr
+  %i11 = load volatile i32 , i32 *%vptr
+  %i12 = load volatile i32 , i32 *%vptr
+  %i13 = load volatile i32 , i32 *%vptr
+  %i14 = load volatile i32 , i32 *%vptr
   %region1 = alloca [524104 x i8], align 8
   %region2 = alloca [524104 x i8], align 8
   %ptr1 = getelementptr inbounds [524104 x i8], [524104 x i8]* %region1, i64 0, i64 8
diff --git a/llvm/test/CodeGen/SystemZ/frame-17.ll b/llvm/test/CodeGen/SystemZ/frame-17.ll
index 97cf83d..485297a 100644
--- a/llvm/test/CodeGen/SystemZ/frame-17.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-17.ll
@@ -31,23 +31,23 @@
 ; CHECK: ld %f15, 168(%r15)
 ; CHECK: aghi %r15, 232
 ; CHECK: br %r14
-  %l0 = load volatile float *%ptr
-  %l1 = load volatile float *%ptr
-  %l2 = load volatile float *%ptr
-  %l3 = load volatile float *%ptr
-  %l4 = load volatile float *%ptr
-  %l5 = load volatile float *%ptr
-  %l6 = load volatile float *%ptr
-  %l7 = load volatile float *%ptr
-  %l8 = load volatile float *%ptr
-  %l9 = load volatile float *%ptr
-  %l10 = load volatile float *%ptr
-  %l11 = load volatile float *%ptr
-  %l12 = load volatile float *%ptr
-  %l13 = load volatile float *%ptr
-  %l14 = load volatile float *%ptr
-  %l15 = load volatile float *%ptr
-  %lx = load volatile float *%ptr
+  %l0 = load volatile float , float *%ptr
+  %l1 = load volatile float , float *%ptr
+  %l2 = load volatile float , float *%ptr
+  %l3 = load volatile float , float *%ptr
+  %l4 = load volatile float , float *%ptr
+  %l5 = load volatile float , float *%ptr
+  %l6 = load volatile float , float *%ptr
+  %l7 = load volatile float , float *%ptr
+  %l8 = load volatile float , float *%ptr
+  %l9 = load volatile float , float *%ptr
+  %l10 = load volatile float , float *%ptr
+  %l11 = load volatile float , float *%ptr
+  %l12 = load volatile float , float *%ptr
+  %l13 = load volatile float , float *%ptr
+  %l14 = load volatile float , float *%ptr
+  %l15 = load volatile float , float *%ptr
+  %lx = load volatile float , float *%ptr
   store volatile float %lx, float *%ptr
   store volatile float %l15, float *%ptr
   store volatile float %l14, float *%ptr
@@ -92,23 +92,23 @@
 ; CHECK: ld %f15, 168(%r15)
 ; CHECK: aghi %r15, 232
 ; CHECK: br %r14
-  %l0 = load volatile double *%ptr
-  %l1 = load volatile double *%ptr
-  %l2 = load volatile double *%ptr
-  %l3 = load volatile double *%ptr
-  %l4 = load volatile double *%ptr
-  %l5 = load volatile double *%ptr
-  %l6 = load volatile double *%ptr
-  %l7 = load volatile double *%ptr
-  %l8 = load volatile double *%ptr
-  %l9 = load volatile double *%ptr
-  %l10 = load volatile double *%ptr
-  %l11 = load volatile double *%ptr
-  %l12 = load volatile double *%ptr
-  %l13 = load volatile double *%ptr
-  %l14 = load volatile double *%ptr
-  %l15 = load volatile double *%ptr
-  %lx = load volatile double *%ptr
+  %l0 = load volatile double , double *%ptr
+  %l1 = load volatile double , double *%ptr
+  %l2 = load volatile double , double *%ptr
+  %l3 = load volatile double , double *%ptr
+  %l4 = load volatile double , double *%ptr
+  %l5 = load volatile double , double *%ptr
+  %l6 = load volatile double , double *%ptr
+  %l7 = load volatile double , double *%ptr
+  %l8 = load volatile double , double *%ptr
+  %l9 = load volatile double , double *%ptr
+  %l10 = load volatile double , double *%ptr
+  %l11 = load volatile double , double *%ptr
+  %l12 = load volatile double , double *%ptr
+  %l13 = load volatile double , double *%ptr
+  %l14 = load volatile double , double *%ptr
+  %l15 = load volatile double , double *%ptr
+  %lx = load volatile double , double *%ptr
   store volatile double %lx, double *%ptr
   store volatile double %l15, double *%ptr
   store volatile double %l14, double *%ptr
@@ -155,15 +155,15 @@
 ; CHECK: ld %f15, 176(%r15)
 ; CHECK: aghi %r15, 240
 ; CHECK: br %r14
-  %l0 = load volatile fp128 *%ptr
-  %l1 = load volatile fp128 *%ptr
-  %l4 = load volatile fp128 *%ptr
-  %l5 = load volatile fp128 *%ptr
-  %l8 = load volatile fp128 *%ptr
-  %l9 = load volatile fp128 *%ptr
-  %l12 = load volatile fp128 *%ptr
-  %l13 = load volatile fp128 *%ptr
-  %lx = load volatile fp128 *%ptr
+  %l0 = load volatile fp128 , fp128 *%ptr
+  %l1 = load volatile fp128 , fp128 *%ptr
+  %l4 = load volatile fp128 , fp128 *%ptr
+  %l5 = load volatile fp128 , fp128 *%ptr
+  %l8 = load volatile fp128 , fp128 *%ptr
+  %l9 = load volatile fp128 , fp128 *%ptr
+  %l12 = load volatile fp128 , fp128 *%ptr
+  %l13 = load volatile fp128 , fp128 *%ptr
+  %lx = load volatile fp128 , fp128 *%ptr
   store volatile fp128 %lx, fp128 *%ptr
   store volatile fp128 %l13, fp128 *%ptr
   store volatile fp128 %l12, fp128 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/frame-18.ll b/llvm/test/CodeGen/SystemZ/frame-18.ll
index 21dfc12..0f58e43 100644
--- a/llvm/test/CodeGen/SystemZ/frame-18.ll
+++ b/llvm/test/CodeGen/SystemZ/frame-18.ll
@@ -16,21 +16,21 @@
 ; CHECK-NOT: 160(%r15)
 ; CHECK: lmg %r6, %r15, 216(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i32 *%ptr
-  %l1 = load volatile i32 *%ptr
-  %l3 = load volatile i32 *%ptr
-  %l4 = load volatile i32 *%ptr
-  %l5 = load volatile i32 *%ptr
-  %l6 = load volatile i32 *%ptr
-  %l7 = load volatile i32 *%ptr
-  %l8 = load volatile i32 *%ptr
-  %l9 = load volatile i32 *%ptr
-  %l10 = load volatile i32 *%ptr
-  %l11 = load volatile i32 *%ptr
-  %l12 = load volatile i32 *%ptr
-  %l13 = load volatile i32 *%ptr
-  %l14 = load volatile i32 *%ptr
-  %lx = load volatile i32 *%ptr
+  %l0 = load volatile i32 , i32 *%ptr
+  %l1 = load volatile i32 , i32 *%ptr
+  %l3 = load volatile i32 , i32 *%ptr
+  %l4 = load volatile i32 , i32 *%ptr
+  %l5 = load volatile i32 , i32 *%ptr
+  %l6 = load volatile i32 , i32 *%ptr
+  %l7 = load volatile i32 , i32 *%ptr
+  %l8 = load volatile i32 , i32 *%ptr
+  %l9 = load volatile i32 , i32 *%ptr
+  %l10 = load volatile i32 , i32 *%ptr
+  %l11 = load volatile i32 , i32 *%ptr
+  %l12 = load volatile i32 , i32 *%ptr
+  %l13 = load volatile i32 , i32 *%ptr
+  %l14 = load volatile i32 , i32 *%ptr
+  %lx = load volatile i32 , i32 *%ptr
   store volatile i32 %lx, i32 *%ptr
   store volatile i32 %l14, i32 *%ptr
   store volatile i32 %l13, i32 *%ptr
@@ -58,21 +58,21 @@
 ; CHECK: lg [[REGISTER]], 160(%r15)
 ; CHECK: lmg %r6, %r15, 216(%r15)
 ; CHECK: br %r14
-  %l0 = load volatile i64 *%ptr
-  %l1 = load volatile i64 *%ptr
-  %l3 = load volatile i64 *%ptr
-  %l4 = load volatile i64 *%ptr
-  %l5 = load volatile i64 *%ptr
-  %l6 = load volatile i64 *%ptr
-  %l7 = load volatile i64 *%ptr
-  %l8 = load volatile i64 *%ptr
-  %l9 = load volatile i64 *%ptr
-  %l10 = load volatile i64 *%ptr
-  %l11 = load volatile i64 *%ptr
-  %l12 = load volatile i64 *%ptr
-  %l13 = load volatile i64 *%ptr
-  %l14 = load volatile i64 *%ptr
-  %lx = load volatile i64 *%ptr
+  %l0 = load volatile i64 , i64 *%ptr
+  %l1 = load volatile i64 , i64 *%ptr
+  %l3 = load volatile i64 , i64 *%ptr
+  %l4 = load volatile i64 , i64 *%ptr
+  %l5 = load volatile i64 , i64 *%ptr
+  %l6 = load volatile i64 , i64 *%ptr
+  %l7 = load volatile i64 , i64 *%ptr
+  %l8 = load volatile i64 , i64 *%ptr
+  %l9 = load volatile i64 , i64 *%ptr
+  %l10 = load volatile i64 , i64 *%ptr
+  %l11 = load volatile i64 , i64 *%ptr
+  %l12 = load volatile i64 , i64 *%ptr
+  %l13 = load volatile i64 , i64 *%ptr
+  %l14 = load volatile i64 , i64 *%ptr
+  %lx = load volatile i64 , i64 *%ptr
   store volatile i64 %lx, i64 *%ptr
   store volatile i64 %l14, i64 *%ptr
   store volatile i64 %l13, i64 *%ptr
diff --git a/llvm/test/CodeGen/SystemZ/insert-01.ll b/llvm/test/CodeGen/SystemZ/insert-01.ll
index a542cf8..eb39552 100644
--- a/llvm/test/CodeGen/SystemZ/insert-01.ll
+++ b/llvm/test/CodeGen/SystemZ/insert-01.ll
@@ -9,7 +9,7 @@
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = and i32 %orig, -256
   %or = or i32 %ptr1, %ptr2
@@ -22,7 +22,7 @@
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = and i32 %orig, -256
   %or = or i32 %ptr2, %ptr1
@@ -36,7 +36,7 @@
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = and i32 %orig, -512
   %or = or i32 %ptr1, %ptr2
@@ -49,7 +49,7 @@
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = and i32 %orig, -512
   %or = or i32 %ptr2, %ptr1
@@ -62,7 +62,7 @@
 ; CHECK: sll %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = shl i32 %orig, 8
   %or = or i32 %ptr1, %ptr2
@@ -75,7 +75,7 @@
 ; CHECK: sll %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %ptr1 = shl i32 %orig, 8
   %or = or i32 %ptr2, %ptr1
@@ -88,7 +88,7 @@
 ; CHECK: lhi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %or = or i32 %ptr2, 256
   ret i32 %or
@@ -100,7 +100,7 @@
 ; CHECK: lhi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i32
   %or = or i32 256, %ptr2
   ret i32 %or
@@ -112,7 +112,7 @@
 ; CHECK: ic %r2, 4095(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -125,7 +125,7 @@
 ; CHECK: icy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -138,7 +138,7 @@
 ; CHECK: icy %r2, 524287(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -153,7 +153,7 @@
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -166,7 +166,7 @@
 ; CHECK: icy %r2, -1(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -179,7 +179,7 @@
 ; CHECK: icy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -194,7 +194,7 @@
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -208,7 +208,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr i8, i8 *%src, i64 %index
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 4095
-  %val = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr2
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
@@ -222,7 +222,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr i8, i8 *%src, i64 %index
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 4096
-  %val = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr2
   %src2 = zext i8 %val to i32
   %src1 = and i32 %orig, -256
   %or = or i32 %src2, %src1
diff --git a/llvm/test/CodeGen/SystemZ/insert-02.ll b/llvm/test/CodeGen/SystemZ/insert-02.ll
index f8e8c97..8ecfd1f 100644
--- a/llvm/test/CodeGen/SystemZ/insert-02.ll
+++ b/llvm/test/CodeGen/SystemZ/insert-02.ll
@@ -9,7 +9,7 @@
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = and i64 %orig, -256
   %or = or i64 %ptr1, %ptr2
@@ -22,7 +22,7 @@
 ; CHECK-NOT: ni
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = and i64 %orig, -256
   %or = or i64 %ptr2, %ptr1
@@ -36,7 +36,7 @@
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = and i64 %orig, -512
   %or = or i64 %ptr1, %ptr2
@@ -49,7 +49,7 @@
 ; CHECK: nill %r2, 65024
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = and i64 %orig, -512
   %or = or i64 %ptr2, %ptr1
@@ -62,7 +62,7 @@
 ; CHECK: sllg %r2, %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = shl i64 %orig, 8
   %or = or i64 %ptr1, %ptr2
@@ -75,7 +75,7 @@
 ; CHECK: sllg %r2, %r2, 8
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %ptr1 = shl i64 %orig, 8
   %or = or i64 %ptr2, %ptr1
@@ -88,7 +88,7 @@
 ; CHECK: lghi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %or = or i64 %ptr2, 256
   ret i64 %or
@@ -100,7 +100,7 @@
 ; CHECK: lghi %r2, 256
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ptr2 = zext i8 %val to i64
   %or = or i64 256, %ptr2
   ret i64 %or
@@ -112,7 +112,7 @@
 ; CHECK: ic %r2, 4095(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -125,7 +125,7 @@
 ; CHECK: icy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -138,7 +138,7 @@
 ; CHECK: icy %r2, 524287(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -153,7 +153,7 @@
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -166,7 +166,7 @@
 ; CHECK: icy %r2, -1(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -179,7 +179,7 @@
 ; CHECK: icy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -194,7 +194,7 @@
 ; CHECK: ic %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -208,7 +208,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr i8, i8 *%src, i64 %index
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 4095
-  %val = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr2
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
@@ -222,7 +222,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr i8, i8 *%src, i64 %index
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 4096
-  %val = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr2
   %src2 = zext i8 %val to i64
   %src1 = and i64 %orig, -256
   %or = or i64 %src2, %src1
diff --git a/llvm/test/CodeGen/SystemZ/insert-06.ll b/llvm/test/CodeGen/SystemZ/insert-06.ll
index 81a9c87..3243d9f 100644
--- a/llvm/test/CodeGen/SystemZ/insert-06.ll
+++ b/llvm/test/CodeGen/SystemZ/insert-06.ll
@@ -85,7 +85,7 @@
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: l %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %low = zext i32 %b to i64
   %high = and i64 %a, -4294967296
   %res = or i64 %high, %low
@@ -98,7 +98,7 @@
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lb %r2, 0(%r3)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %b = sext i8 %byte to i32
   %low = zext i32 %b to i64
   %high = and i64 %a, -4294967296
@@ -185,7 +185,7 @@
 ; CHECK-NOT: {{%r[23]}}
 ; CHECK: lb %r2, 0(%r3)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %b = sext i8 %byte to i64
   %low = and i64 %b, 4294967295
   %high = and i64 %a, -4294967296
diff --git a/llvm/test/CodeGen/SystemZ/int-add-01.ll b/llvm/test/CodeGen/SystemZ/int-add-01.ll
index 6f0d8fa..f7a3a26 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-01.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -20,7 +20,7 @@
 ; CHECK: ah %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2047
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -32,7 +32,7 @@
 ; CHECK: ahy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2048
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -44,7 +44,7 @@
 ; CHECK: ahy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -58,7 +58,7 @@
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -70,7 +70,7 @@
 ; CHECK: ahy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -82,7 +82,7 @@
 ; CHECK: ahy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -96,7 +96,7 @@
 ; CHECK: ah %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -110,7 +110,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4094
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
@@ -124,7 +124,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = add i32 %lhs, %rhs
   ret i32 %res
diff --git a/llvm/test/CodeGen/SystemZ/int-add-02.ll b/llvm/test/CodeGen/SystemZ/int-add-02.ll
index 760b4e4..01e77de 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-02.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -30,7 +30,7 @@
 ; CHECK: a %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -41,7 +41,7 @@
 ; CHECK: ay %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -52,7 +52,7 @@
 ; CHECK: ay %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -65,7 +65,7 @@
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -76,7 +76,7 @@
 ; CHECK: ay %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -87,7 +87,7 @@
 ; CHECK: ay %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -100,7 +100,7 @@
 ; CHECK: a %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -113,7 +113,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -126,7 +126,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %add = add i32 %a, %b
   ret i32 %add
 }
@@ -147,16 +147,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-add-03.ll b/llvm/test/CodeGen/SystemZ/int-add-03.ll
index ef38124..92e3c16 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -31,7 +31,7 @@
 ; CHECK: agf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -45,7 +45,7 @@
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -57,7 +57,7 @@
 ; CHECK: agf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -69,7 +69,7 @@
 ; CHECK: agf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -83,7 +83,7 @@
 ; CHECK: agf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -97,7 +97,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -119,16 +119,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
diff --git a/llvm/test/CodeGen/SystemZ/int-add-04.ll b/llvm/test/CodeGen/SystemZ/int-add-04.ll
index 0434de6..6828b61 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-04.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -31,7 +31,7 @@
 ; CHECK: algf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -45,7 +45,7 @@
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -57,7 +57,7 @@
 ; CHECK: algf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -69,7 +69,7 @@
 ; CHECK: algf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -83,7 +83,7 @@
 ; CHECK: algf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -97,7 +97,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %add = add i64 %a, %bext
   ret i64 %add
@@ -119,16 +119,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
diff --git a/llvm/test/CodeGen/SystemZ/int-add-05.ll b/llvm/test/CodeGen/SystemZ/int-add-05.ll
index 9477a57..f28e305 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-05.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -30,7 +30,7 @@
 ; CHECK: ag %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -43,7 +43,7 @@
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -54,7 +54,7 @@
 ; CHECK: ag %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -65,7 +65,7 @@
 ; CHECK: ag %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -78,7 +78,7 @@
 ; CHECK: ag %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -91,7 +91,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %add = add i64 %a, %b
   ret i64 %add
 }
@@ -112,16 +112,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-add-08.ll b/llvm/test/CodeGen/SystemZ/int-add-08.ll
index f6415f5..75b85d0 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-08.ll
@@ -11,7 +11,7 @@
 ; CHECK: algr
 ; CHECK: alcgr
 ; CHECK: br %r14
-  %value = load i128 *%ptr
+  %value = load i128 , i128 *%ptr
   %add = add i128 %value, %value
   store i128 %add, i128 *%ptr
   ret void
@@ -25,8 +25,8 @@
 ; CHECK: alcg {{%r[0-5]}}, 0(%r3)
 ; CHECK: br %r14
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -40,8 +40,8 @@
 ; CHECK: br %r14
   %addr = add i64 %base, 524272
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -57,8 +57,8 @@
 ; CHECK: br %r14
   %addr = add i64 %base, 524280
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -74,8 +74,8 @@
 ; CHECK: br %r14
   %addr = add i64 %base, 524288
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -89,8 +89,8 @@
 ; CHECK: br %r14
   %addr = add i64 %base, -524288
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -104,8 +104,8 @@
 ; CHECK: br %r14
   %addr = add i64 %base, -524296
   %bptr = inttoptr i64 %addr to i128 *
-  %a = load volatile i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load volatile i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %add = add i128 %a, %b
   store i128 %add, i128 *%aptr
   ret void
@@ -124,15 +124,15 @@
   %ptr3 = getelementptr i128, i128 *%ptr0, i128 6
   %ptr4 = getelementptr i128, i128 *%ptr0, i128 8
 
-  %val0 = load i128 *%ptr0
-  %val1 = load i128 *%ptr1
-  %val2 = load i128 *%ptr2
-  %val3 = load i128 *%ptr3
-  %val4 = load i128 *%ptr4
+  %val0 = load i128 , i128 *%ptr0
+  %val1 = load i128 , i128 *%ptr1
+  %val2 = load i128 , i128 *%ptr2
+  %val3 = load i128 , i128 *%ptr3
+  %val4 = load i128 , i128 *%ptr4
 
   %retptr = call i128 *@foo()
 
-  %ret = load i128 *%retptr
+  %ret = load i128 , i128 *%retptr
   %add0 = add i128 %ret, %val0
   %add1 = add i128 %add0, %val1
   %add2 = add i128 %add1, %val2
diff --git a/llvm/test/CodeGen/SystemZ/int-add-09.ll b/llvm/test/CodeGen/SystemZ/int-add-09.ll
index fd151a7..b7bcdc8 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-09.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-09.ll
@@ -9,7 +9,7 @@
 ; CHECK: algfi {{%r[0-5]}}, 1
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 128
   %add = add i128 %xor, 1
   store i128 %add, i128 *%aptr
@@ -22,7 +22,7 @@
 ; CHECK: algfi {{%r[0-5]}}, 4294967295
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 128
   %add = add i128 %xor, 4294967295
   store i128 %add, i128 *%aptr
@@ -35,7 +35,7 @@
 ; CHECK: algr
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 128
   %add = add i128 %xor, 4294967296
   store i128 %add, i128 *%aptr
@@ -48,7 +48,7 @@
 ; CHECK: algr
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 128
   %add = add i128 %xor, -1
   store i128 %add, i128 *%aptr
diff --git a/llvm/test/CodeGen/SystemZ/int-add-10.ll b/llvm/test/CodeGen/SystemZ/int-add-10.ll
index 87c6296..f55788d 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-10.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-10.ll
@@ -9,7 +9,7 @@
 ; CHECK: algfr {{%r[0-5]}}, %r3
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
@@ -23,7 +23,7 @@
 ; CHECK: algfr {{%r[0-5]}}, %r3
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %trunc = trunc i64 %b to i32
   %bext = zext i32 %trunc to i128
@@ -39,7 +39,7 @@
 ; CHECK: algfr {{%r[0-5]}}, %r3
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %bext = zext i64 %b to i128
   %and = and i128 %bext, 4294967295
@@ -54,9 +54,9 @@
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
-  %b = load i32 *%bsrc
+  %b = load i32 , i32 *%bsrc
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -69,10 +69,10 @@
 ; CHECK: algf {{%r[0-5]}}, 524284(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -87,10 +87,10 @@
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -103,10 +103,10 @@
 ; CHECK: algf {{%r[0-5]}}, -4(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -119,10 +119,10 @@
 ; CHECK: algf {{%r[0-5]}}, -524288(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -137,10 +137,10 @@
 ; CHECK: algf {{%r[0-5]}}, 0(%r3)
 ; CHECK: alcg
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
@@ -152,12 +152,12 @@
 ; CHECK-LABEL: f10:
 ; CHECK: algf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %add = add i128 %xor, %bext
   store i128 %add, i128 *%aptr
diff --git a/llvm/test/CodeGen/SystemZ/int-add-11.ll b/llvm/test/CodeGen/SystemZ/int-add-11.ll
index 215bfa5..b93555f 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-11.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-11.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 127
   store i32 %add, i32 *%ptr
   ret void
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: asi 0(%r2), 127
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 127
   store i32 %add, i32 *%ptr
   ret void
@@ -32,7 +32,7 @@
 ; CHECK-NOT: asi
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 128
   store i32 %add, i32 *%ptr
   ret void
@@ -43,7 +43,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: asi 0(%r2), -128
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, -128
   store i32 %add, i32 *%ptr
   ret void
@@ -55,7 +55,7 @@
 ; CHECK-NOT: asi
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, -129
   store i32 %add, i32 *%ptr
   ret void
@@ -67,7 +67,7 @@
 ; CHECK: asi 524284(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 1
   store i32 %add, i32 *%ptr
   ret void
@@ -81,7 +81,7 @@
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 1
   store i32 %add, i32 *%ptr
   ret void
@@ -93,7 +93,7 @@
 ; CHECK: asi -524288(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 1
   store i32 %add, i32 *%ptr
   ret void
@@ -107,7 +107,7 @@
 ; CHECK: asi 0(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 1
   store i32 %add, i32 *%ptr
   ret void
@@ -122,7 +122,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4
   %ptr = inttoptr i64 %add2 to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %add = add i32 %val, 1
   store i32 %add, i32 *%ptr
   ret void
@@ -134,22 +134,22 @@
 ; CHECK: asi {{[0-9]+}}(%r15), 127
 ; CHECK: br %r14
 entry:
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %test = icmp ne i32 %sel, 0
   br i1 %test, label %add, label %store
@@ -217,22 +217,22 @@
 ; CHECK: asi {{[0-9]+}}(%r15), -128
 ; CHECK: br %r14
 entry:
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %test = icmp ne i32 %sel, 0
   br i1 %test, label %add, label %store
diff --git a/llvm/test/CodeGen/SystemZ/int-add-12.ll b/llvm/test/CodeGen/SystemZ/int-add-12.ll
index bc0223d..496650f4 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-12.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-12.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 127
   store i64 %add, i64 *%ptr
   ret void
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: agsi 0(%r2), 127
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 127
   store i64 %add, i64 *%ptr
   ret void
@@ -31,7 +31,7 @@
 ; CHECK-NOT: agsi
 ; CHECK: stg %r0, 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 128
   store i64 %add, i64 *%ptr
   ret void
@@ -42,7 +42,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: agsi 0(%r2), -128
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, -128
   store i64 %add, i64 *%ptr
   ret void
@@ -54,7 +54,7 @@
 ; CHECK-NOT: agsi
 ; CHECK: stg %r0, 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, -129
   store i64 %add, i64 *%ptr
   ret void
@@ -66,7 +66,7 @@
 ; CHECK: agsi 524280(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 1
   store i64 %add, i64 *%ptr
   ret void
@@ -80,7 +80,7 @@
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 1
   store i64 %add, i64 *%ptr
   ret void
@@ -92,7 +92,7 @@
 ; CHECK: agsi -524288(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 1
   store i64 %add, i64 *%ptr
   ret void
@@ -106,7 +106,7 @@
 ; CHECK: agsi 0(%r2), 1
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 1
   store i64 %add, i64 *%ptr
   ret void
@@ -121,7 +121,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 8
   %ptr = inttoptr i64 %add2 to i64 *
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %add = add i64 %val, 1
   store i64 %add, i64 *%ptr
   ret void
@@ -133,22 +133,22 @@
 ; CHECK: agsi {{[0-9]+}}(%r15), 127
 ; CHECK: br %r14
 entry:
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %test = icmp ne i32 %sel, 0
   br i1 %test, label %add, label %store
@@ -216,22 +216,22 @@
 ; CHECK: agsi {{[0-9]+}}(%r15), -128
 ; CHECK: br %r14
 entry:
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %test = icmp ne i32 %sel, 0
   br i1 %test, label %add, label %store
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-01.ll b/llvm/test/CodeGen/SystemZ/int-cmp-01.ll
index 8fb98f6..97b697d 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-01.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -22,7 +22,7 @@
 ; CHECK: ch %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2047
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -36,7 +36,7 @@
 ; CHECK: chy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2048
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -50,7 +50,7 @@
 ; CHECK: chy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -66,7 +66,7 @@
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -80,7 +80,7 @@
 ; CHECK: chy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -94,7 +94,7 @@
 ; CHECK: chy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -110,7 +110,7 @@
 ; CHECK: ch %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -126,7 +126,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4094
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -142,7 +142,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, i32 100, i32 200
@@ -157,7 +157,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %lhs = sext i16 %half to i32
   %cond = icmp slt i32 %lhs, %rhs
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-02.ll b/llvm/test/CodeGen/SystemZ/int-cmp-02.ll
index 83e49a8..d5aef0f 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-02.ll
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -36,7 +36,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1023
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -50,7 +50,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1024
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -64,7 +64,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -80,7 +80,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -94,7 +94,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -108,7 +108,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -124,7 +124,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -140,7 +140,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -156,7 +156,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -189,7 +189,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i1 = load i32 *%ptr
+  %i1 = load i32 , i32 *%ptr
   %cond = icmp slt i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-03.ll b/llvm/test/CodeGen/SystemZ/int-cmp-03.ll
index 6399cdc..0246666 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-03.ll
@@ -20,7 +20,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -34,7 +34,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1023
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1024
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -62,7 +62,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -78,7 +78,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -92,7 +92,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -106,7 +106,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -122,7 +122,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -138,7 +138,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -154,7 +154,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %i2 = load i32 *%ptr
+  %i2 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -167,7 +167,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i1 = load i32 *%ptr
+  %i1 = load i32 , i32 *%ptr
   %cond = icmp ult i32 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-04.ll b/llvm/test/CodeGen/SystemZ/int-cmp-04.ll
index 8f114f8..90f05ea 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-04.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -22,7 +22,7 @@
 ; CHECK: cgh %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -38,7 +38,7 @@
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -52,7 +52,7 @@
 ; CHECK: cgh %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -66,7 +66,7 @@
 ; CHECK: cgh %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -82,7 +82,7 @@
 ; CHECK: cgh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -98,7 +98,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, i64 100, i64 200
@@ -113,7 +113,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %lhs = sext i16 %half to i64
   %cond = icmp slt i64 %lhs, %rhs
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-05.ll b/llvm/test/CodeGen/SystemZ/int-cmp-05.ll
index d9373cf..70640b6 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-05.ll
@@ -61,7 +61,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: cgf
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -87,7 +87,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp eq i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -101,7 +101,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp ne i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -116,7 +116,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -133,7 +133,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -148,7 +148,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -163,7 +163,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -180,7 +180,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -197,7 +197,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -220,16 +220,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
@@ -311,7 +311,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i1 = sext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-06.ll b/llvm/test/CodeGen/SystemZ/int-cmp-06.ll
index 5ac6659..16c2ade 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-06.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-06.ll
@@ -111,7 +111,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -123,7 +123,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: clgf
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -137,7 +137,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp eq i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -151,7 +151,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ne i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -166,7 +166,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131071
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -183,7 +183,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 131072
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -198,7 +198,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -213,7 +213,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131072
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -230,7 +230,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -131073
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -247,7 +247,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i2 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
@@ -270,16 +270,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
@@ -374,7 +374,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %unext = load i32 *%ptr
+  %unext = load i32 , i32 *%ptr
   %i1 = zext i32 %unext to i64
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-07.ll b/llvm/test/CodeGen/SystemZ/int-cmp-07.ll
index 6d1e9df..0a787c9 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-07.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-07.ll
@@ -20,7 +20,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -34,7 +34,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -50,7 +50,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -64,7 +64,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -1
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -78,7 +78,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -94,7 +94,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -110,7 +110,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -123,7 +123,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i1 = load i64 *%ptr
+  %i1 = load i64 , i64 *%ptr
   %cond = icmp slt i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-08.ll b/llvm/test/CodeGen/SystemZ/int-cmp-08.ll
index 8ba3c83..384b41b 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-08.ll
@@ -20,7 +20,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -34,7 +34,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65535
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -50,7 +50,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 65536
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -64,7 +64,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -1
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -78,7 +78,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65536
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -94,7 +94,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -65537
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -110,7 +110,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %i2 = load i64 *%ptr
+  %i2 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -123,7 +123,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %i1 = load i64 *%ptr
+  %i1 = load i64 , i64 *%ptr
   %cond = icmp ult i64 %i1, %i2
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-15.ll b/llvm/test/CodeGen/SystemZ/int-cmp-15.ll
index 1d6b3e9..3c1e052 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-15.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-15.ll
@@ -8,7 +8,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ugt i8 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -20,7 +20,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 254
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -32,7 +32,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp slt i8 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -44,7 +44,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp sle i8 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -56,7 +56,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp sge i8 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -68,7 +68,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp sgt i8 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -80,7 +80,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp eq i8 %val, -128
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -92,7 +92,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp eq i8 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -104,7 +104,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp eq i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -116,7 +116,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp eq i8 %val, 255
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -128,7 +128,7 @@
 ; CHECK: cli 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -140,7 +140,7 @@
 ; CHECK: cliy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -152,7 +152,7 @@
 ; CHECK: cliy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -166,7 +166,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -178,7 +178,7 @@
 ; CHECK: cliy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -190,7 +190,7 @@
 ; CHECK: cliy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -204,7 +204,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -219,7 +219,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -234,7 +234,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %cond = icmp ult i8 %val, 127
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-16.ll b/llvm/test/CodeGen/SystemZ/int-cmp-16.ll
index be206d9..37508b5 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-16.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-16.ll
@@ -9,7 +9,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp eq i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp eq i32 %ext, 255
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp eq i32 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp eq i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, 127
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, -128
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp eq i32 %ext, -129
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-17.ll b/llvm/test/CodeGen/SystemZ/int-cmp-17.ll
index 3df4ecc..a22fb60 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-17.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-17.ll
@@ -9,7 +9,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ne i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ne i32 %ext, 255
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ne i32 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ne i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, 127
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, -128
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ne i32 %ext, -129
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-18.ll b/llvm/test/CodeGen/SystemZ/int-cmp-18.ll
index d03d6ac..f4bc5c0e 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-18.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-18.ll
@@ -9,7 +9,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp eq i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp eq i64 %ext, 255
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp eq i64 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp eq i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, 127
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, -128
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp eq i64 %ext, -129
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-19.ll b/llvm/test/CodeGen/SystemZ/int-cmp-19.ll
index b5f0856..0a23f06 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-19.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-19.ll
@@ -9,7 +9,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ne i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ne i64 %ext, 255
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ne i64 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ne i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: cli 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, 127
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: cli 0(%r2), 255
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, -128
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ne i64 %ext, -129
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-20.ll b/llvm/test/CodeGen/SystemZ/int-cmp-20.ll
index 98c41cd..2acff55 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-20.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-20.ll
@@ -10,7 +10,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ugt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ugt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ult i32 %ext, 254
   %res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ult i32 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp ult i32 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -81,7 +81,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ult i32 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -93,7 +93,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp ult i32 %ext, -129
   %res = select i1 %cond, double %a, double %b
@@ -107,7 +107,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp sgt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -120,7 +120,7 @@
 ; CHECK-LABEL: f9:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp sgt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -134,7 +134,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp slt i32 %ext, 254
   %res = select i1 %cond, double %a, double %b
@@ -147,7 +147,7 @@
 ; CHECK-LABEL: f11:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp slt i32 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -160,7 +160,7 @@
 ; CHECK-LABEL: f12:
 ; CHECK-NOT: cli {{.*}}
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %cond = icmp slt i32 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -173,7 +173,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp sge i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -186,7 +186,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp sgt i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -199,7 +199,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp slt i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -212,7 +212,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %cond = icmp sle i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-21.ll b/llvm/test/CodeGen/SystemZ/int-cmp-21.ll
index ca9225d..5be9732 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-21.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-21.ll
@@ -10,7 +10,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ugt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ugt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ult i64 %ext, 254
   %res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ult i64 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp ult i64 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -81,7 +81,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ult i64 %ext, 128
   %res = select i1 %cond, double %a, double %b
@@ -93,7 +93,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp ult i64 %ext, -129
   %res = select i1 %cond, double %a, double %b
@@ -107,7 +107,7 @@
 ; CHECK: cli 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp sgt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -120,7 +120,7 @@
 ; CHECK-LABEL: f9:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp sgt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -134,7 +134,7 @@
 ; CHECK: cli 0(%r2), 254
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp slt i64 %ext, 254
   %res = select i1 %cond, double %a, double %b
@@ -147,7 +147,7 @@
 ; CHECK-LABEL: f11:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp slt i64 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -160,7 +160,7 @@
 ; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %cond = icmp slt i64 %ext, 256
   %res = select i1 %cond, double %a, double %b
@@ -173,7 +173,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp sge i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -186,7 +186,7 @@
 ; CHECK: cli 0(%r2), 128
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp sgt i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -199,7 +199,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp slt i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -212,7 +212,7 @@
 ; CHECK: cli 0(%r2), 127
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %cond = icmp sle i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-22.ll b/llvm/test/CodeGen/SystemZ/int-cmp-22.ll
index a8d027f..f29023c 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-22.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-22.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 32766
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -61,7 +61,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, -32766
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -75,7 +75,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2047
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -90,7 +90,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2048
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -105,7 +105,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 -1
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -121,7 +121,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i16 *
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp slt i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-23.ll b/llvm/test/CodeGen/SystemZ/int-cmp-23.ll
index b3c8e52..df6b626 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-23.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-23.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ugt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ult i16 %val, 65534
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -36,7 +36,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2047
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ugt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -51,7 +51,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 2048
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ugt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -66,7 +66,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%base, i64 -1
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ugt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -82,7 +82,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i16 *
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ugt i16 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-24.ll b/llvm/test/CodeGen/SystemZ/int-cmp-24.ll
index 1a8e587..e1141a7 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-24.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-24.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp eq i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp eq i16 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp eq i16 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp eq i16 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-25.ll b/llvm/test/CodeGen/SystemZ/int-cmp-25.ll
index 50803df..2685303 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-25.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-25.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ne i16 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ne i16 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ne i16 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %cond = icmp ne i16 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-26.ll b/llvm/test/CodeGen/SystemZ/int-cmp-26.ll
index 6077865..ba93f08 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-26.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-26.ll
@@ -9,7 +9,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp eq i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp eq i32 %ext, 65535
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp eq i32 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp eq i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: clhhsi 0(%r2), 32767
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, 32767
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: clhhsi 0(%r2), 32768
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, -32768
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp eq i32 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-27.ll b/llvm/test/CodeGen/SystemZ/int-cmp-27.ll
index 3102f5c..9a503c9 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-27.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-27.ll
@@ -9,7 +9,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ne i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ne i32 %ext, 65535
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ne i32 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ne i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: clhhsi 0(%r2), 32767
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, 32767
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: clhhsi 0(%r2), 32768
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, -32768
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ne i32 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-28.ll b/llvm/test/CodeGen/SystemZ/int-cmp-28.ll
index c3b9059..68f1cd2 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-28.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-28.ll
@@ -9,7 +9,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp eq i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp eq i64 %ext, 65535
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp eq i64 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp eq i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: clhhsi 0(%r2), 32767
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, 32767
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: clhhsi 0(%r2), 32768
 ; CHECK-NEXT: je
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, -32768
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp eq i64 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-29.ll b/llvm/test/CodeGen/SystemZ/int-cmp-29.ll
index 1b40d8c..4fb2e85 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-29.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-29.ll
@@ -9,7 +9,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ne i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -22,7 +22,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ne i64 %ext, 65535
   %res = select i1 %cond, double %a, double %b
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ne i64 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ne i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -60,7 +60,7 @@
 ; CHECK: clhhsi 0(%r2), 0
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, 0
   %res = select i1 %cond, double %a, double %b
@@ -73,7 +73,7 @@
 ; CHECK: clhhsi 0(%r2), 32767
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, 32767
   %res = select i1 %cond, double %a, double %b
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -99,7 +99,7 @@
 ; CHECK: clhhsi 0(%r2), 65535
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, -1
   %res = select i1 %cond, double %a, double %b
@@ -112,7 +112,7 @@
 ; CHECK: clhhsi 0(%r2), 32768
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, -32768
   %res = select i1 %cond, double %a, double %b
@@ -125,7 +125,7 @@
 ; CHECK-LABEL: f10:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ne i64 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-30.ll b/llvm/test/CodeGen/SystemZ/int-cmp-30.ll
index 6c9498c..043ff48 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-30.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-30.ll
@@ -10,7 +10,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ugt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ugt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ult i32 %ext, 65534
   %res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ult i32 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp ult i32 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -82,7 +82,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ult i32 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -94,7 +94,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp ult i32 %ext, -32769
   %res = select i1 %cond, double %a, double %b
@@ -108,7 +108,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp sgt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -122,7 +122,7 @@
 ; CHECK: chhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp sgt i32 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -136,7 +136,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp slt i32 %ext, 65534
   %res = select i1 %cond, double %a, double %b
@@ -150,7 +150,7 @@
 ; CHECK: chhsi 0(%r2), -2
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp slt i32 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -163,7 +163,7 @@
 ; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i32
   %cond = icmp slt i32 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -177,7 +177,7 @@
 ; CHECK: chhsi 0(%r2), 32766
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp slt i32 %ext, 32766
   %res = select i1 %cond, double %a, double %b
@@ -190,7 +190,7 @@
 ; CHECK-LABEL: f14:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp slt i32 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -204,7 +204,7 @@
 ; CHECK: chhsi 0(%r2), -32767
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp sgt i32 %ext, -32767
   %res = select i1 %cond, double %a, double %b
@@ -217,7 +217,7 @@
 ; CHECK-LABEL: f16:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i32
   %cond = icmp sgt i32 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-31.ll b/llvm/test/CodeGen/SystemZ/int-cmp-31.ll
index 21539f2..298b446 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-31.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-31.ll
@@ -10,7 +10,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ugt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -24,7 +24,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ugt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -38,7 +38,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ult i64 %ext, 65534
   %res = select i1 %cond, double %a, double %b
@@ -52,7 +52,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ult i64 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -65,7 +65,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp ult i64 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -82,7 +82,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ult i64 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -94,7 +94,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: clhhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp ult i64 %ext, -32769
   %res = select i1 %cond, double %a, double %b
@@ -108,7 +108,7 @@
 ; CHECK: clhhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp sgt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -122,7 +122,7 @@
 ; CHECK: chhsi 0(%r2), 1
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp sgt i64 %ext, 1
   %res = select i1 %cond, double %a, double %b
@@ -136,7 +136,7 @@
 ; CHECK: clhhsi 0(%r2), 65534
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp slt i64 %ext, 65534
   %res = select i1 %cond, double %a, double %b
@@ -150,7 +150,7 @@
 ; CHECK: chhsi 0(%r2), -2
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp slt i64 %ext, -2
   %res = select i1 %cond, double %a, double %b
@@ -163,7 +163,7 @@
 ; CHECK-LABEL: f12:
 ; CHECK-NOT: cli
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = zext i16 %val to i64
   %cond = icmp slt i64 %ext, 65536
   %res = select i1 %cond, double %a, double %b
@@ -177,7 +177,7 @@
 ; CHECK: chhsi 0(%r2), 32766
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp slt i64 %ext, 32766
   %res = select i1 %cond, double %a, double %b
@@ -190,7 +190,7 @@
 ; CHECK-LABEL: f14:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp slt i64 %ext, 32768
   %res = select i1 %cond, double %a, double %b
@@ -204,7 +204,7 @@
 ; CHECK: chhsi 0(%r2), -32767
 ; CHECK-NEXT: jh
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp sgt i64 %ext, -32767
   %res = select i1 %cond, double %a, double %b
@@ -217,7 +217,7 @@
 ; CHECK-LABEL: f16:
 ; CHECK-NOT: chhsi
 ; CHECK: br %r14
-  %val = load i16 *%ptr
+  %val = load i16 , i16 *%ptr
   %ext = sext i16 %val to i64
   %cond = icmp sgt i64 %ext, -32769
   %res = select i1 %cond, double %a, double %b
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-32.ll b/llvm/test/CodeGen/SystemZ/int-cmp-32.ll
index d2be62d..da0e2d7 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-32.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-32.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -46,7 +46,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -59,7 +59,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -72,7 +72,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -83,7 +83,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, -32769
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -96,7 +96,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -109,7 +109,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -122,7 +122,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -133,7 +133,7 @@
 ; CHECK-LABEL: f11:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -146,7 +146,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -159,7 +159,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -170,7 +170,7 @@
 ; CHECK-LABEL: f14:
 ; CHECK-NOT: chsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, -32769
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -184,7 +184,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1023
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -199,7 +199,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1024
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -214,7 +214,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -230,7 +230,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp slt i32 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-33.ll b/llvm/test/CodeGen/SystemZ/int-cmp-33.ll
index e248e04..94f3e70 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-33.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-33.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: jh
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ugt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -23,7 +23,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ult i32 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clfhsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ult i32 %val, 65536
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -61,7 +61,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -72,7 +72,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: clfhsi
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp eq i32 %val, 65536
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -86,7 +86,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1023
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ugt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -101,7 +101,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 1024
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ugt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -116,7 +116,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%base, i64 -1
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ugt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -132,7 +132,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %cond = icmp ugt i32 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-34.ll b/llvm/test/CodeGen/SystemZ/int-cmp-34.ll
index 5942e3a..114b694 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-34.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-34.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -22,7 +22,7 @@
 ; CHECK-NEXT: jle
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -46,7 +46,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -59,7 +59,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -72,7 +72,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -83,7 +83,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, -32769
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -96,7 +96,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -109,7 +109,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -122,7 +122,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 32767
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -133,7 +133,7 @@
 ; CHECK-LABEL: f11:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -146,7 +146,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, -1
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -159,7 +159,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, -32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -170,7 +170,7 @@
 ; CHECK-LABEL: f14:
 ; CHECK-NOT: cghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, -32769
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -184,7 +184,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 511
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -199,7 +199,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 512
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -214,7 +214,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -1
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -230,7 +230,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i64 *
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp slt i64 %val, 0
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-35.ll b/llvm/test/CodeGen/SystemZ/int-cmp-35.ll
index 3c48f00..0eaf4fa 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-35.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-35.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -23,7 +23,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK-NOT: clghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 65536
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -48,7 +48,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 32768
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -61,7 +61,7 @@
 ; CHECK-NEXT: je
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 65535
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -72,7 +72,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK-NOT: clghsi
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp eq i64 %val, 65536
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -86,7 +86,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 511
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -101,7 +101,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 512
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -116,7 +116,7 @@
 ; CHECK: ldr %f0, %f2
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%base, i64 -1
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
@@ -132,7 +132,7 @@
 ; CHECK: br %r14
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i64 *
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   %cond = icmp ult i64 %val, 2
   %res = select i1 %cond, double %a, double %b
   ret double %res
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-36.ll b/llvm/test/CodeGen/SystemZ/int-cmp-36.ll
index fa2d4bf..113d2c1 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-36.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-36.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i32
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: chrl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i32
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i32
   %cond = icmp eq i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i32
   %cond = icmp ne i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@h, align 1
+  %val = load i16 , i16 *@h, align 1
   %src2 = sext i16 %val to i32
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src1 = sext i16 %val to i32
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll
index 8095ed1..ac5d39f9 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-37.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-37.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i32
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: clhrl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i32
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i32
   %cond = icmp eq i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i32
   %cond = icmp ne i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK: clrjl %r2, [[VAL]],
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@h, align 1
+  %val = load i16 , i16 *@h, align 1
   %src2 = zext i16 %val to i32
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src1 = zext i16 %val to i32
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-38.ll b/llvm/test/CodeGen/SystemZ/int-cmp-38.ll
index 9017583..0d8913b0 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-38.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-38.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@g
+  %src2 = load i32 , i32 *@g
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -31,7 +31,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@g
+  %src2 = load i32 , i32 *@g
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -49,7 +49,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@g
+  %src2 = load i32 , i32 *@g
   %cond = icmp eq i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -67,7 +67,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@g
+  %src2 = load i32 , i32 *@g
   %cond = icmp ne i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -86,7 +86,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@h, align 2
+  %src2 = load i32 , i32 *@h, align 2
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -105,7 +105,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i32 *@h, align 2
+  %src2 = load i32 , i32 *@h, align 2
   %cond = icmp ult i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -123,7 +123,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %src1 = load i32 *@g
+  %src1 = load i32 , i32 *@g
   %cond = icmp slt i32 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-39.ll b/llvm/test/CodeGen/SystemZ/int-cmp-39.ll
index fc9547d..5e3abce 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-39.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-39.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: cghrl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i64
   %cond = icmp eq i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = sext i16 %val to i64
   %cond = icmp ne i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@h, align 1
+  %val = load i16 , i16 *@h, align 1
   %src2 = sext i16 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src1 = sext i16 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll
index 9c532f1..92696d7 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-40.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-40.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: clghrl
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i64
   %cond = icmp eq i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src2 = zext i16 %val to i64
   %cond = icmp ne i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK: clgrjl %r2, [[VAL]],
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@h, align 1
+  %val = load i16 , i16 *@h, align 1
   %src2 = zext i16 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i16 *@g
+  %val = load i16 , i16 *@g
   %src1 = zext i16 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-41.ll b/llvm/test/CodeGen/SystemZ/int-cmp-41.ll
index 77f6e7d..f4f5b4a 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-41.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-41.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = sext i32 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: cgfrl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = sext i32 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = sext i32 %val to i64
   %cond = icmp eq i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = sext i32 %val to i64
   %cond = icmp ne i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@h, align 2
+  %val = load i32 , i32 *@h, align 2
   %src2 = sext i32 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src1 = sext i32 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-42.ll b/llvm/test/CodeGen/SystemZ/int-cmp-42.ll
index 94ef008..ca87b86 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-42.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-42.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = zext i32 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -31,7 +31,7 @@
 ; CHECK-NOT: clgfrl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = zext i32 %val to i64
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -50,7 +50,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = zext i32 %val to i64
   %cond = icmp eq i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -69,7 +69,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src2 = zext i32 %val to i64
   %cond = icmp ne i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -89,7 +89,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@h, align 2
+  %val = load i32 , i32 *@h, align 2
   %src2 = zext i32 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
@@ -108,7 +108,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i32 *@g
+  %val = load i32 , i32 *@g
   %src1 = zext i32 %val to i64
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-43.ll b/llvm/test/CodeGen/SystemZ/int-cmp-43.ll
index 1a62588..108b041 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-43.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-43.ll
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i64 *@g
+  %src2 = load i64 , i64 *@g
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -31,7 +31,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i64 *@g
+  %src2 = load i64 , i64 *@g
   %cond = icmp ult i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -49,7 +49,7 @@
 ; CHECK-NEXT: je
 ; CHECK: br %r14
 entry:
-  %src2 = load i64 *@g
+  %src2 = load i64 , i64 *@g
   %cond = icmp eq i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -67,7 +67,7 @@
 ; CHECK-NEXT: jlh
 ; CHECK: br %r14
 entry:
-  %src2 = load i64 *@g
+  %src2 = load i64 , i64 *@g
   %cond = icmp ne i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -86,7 +86,7 @@
 ; CHECK-NEXT: jl
 ; CHECK: br %r14
 entry:
-  %src2 = load i64 *@h, align 4
+  %src2 = load i64 , i64 *@h, align 4
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
@@ -104,7 +104,7 @@
 ; CHECK-NEXT: jh {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %src1 = load i64 *@g
+  %src1 = load i64 , i64 *@g
   %cond = icmp slt i64 %src1, %src2
   br i1 %cond, label %exit, label %mulb
 mulb:
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-44.ll b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll
index 30c1c4f..97d4852 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -127,7 +127,7 @@
 ; CHECK-NEXT: jne .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %cur = load i32 *%dest
+  %cur = load i32 , i32 *%dest
   %res = sub i32 %a, %cur
   %cmp = icmp ne i32 %res, 0
   br i1 %cmp, label %exit, label %store
@@ -147,7 +147,7 @@
 ; CHECK-NEXT: cijl %r2, 0, .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %cur = load i32 *%dest
+  %cur = load i32 , i32 *%dest
   %res = sub i32 %a, %cur
   %cmp = icmp slt i32 %res, 0
   br i1 %cmp, label %exit, label %store
@@ -468,7 +468,7 @@
 ; CHECK-NEXT: cijlh [[REG]], 0, .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %xor = xor i32 %val, 1
   %add = add i32 %xor, 1000000
   call void @foo()
@@ -561,7 +561,7 @@
 ; CHECK: br %r14
 entry:
   %ptr = inttoptr i64 %a to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 15
   store i8 %xor, i8 *%ptr
   %cmp = icmp eq i64 %a, 0
@@ -584,7 +584,7 @@
 entry:
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
-  %res = load i32 *%ptr
+  %res = load i32 , i32 *%ptr
   %cmp = icmp sle i32 %res, 0
   br i1 %cmp, label %exit, label %store
 
@@ -606,7 +606,7 @@
   %add1 = add i64 %base, %index
   %add2 = add i64 %add1, 100000
   %ptr = inttoptr i64 %add2 to i32 *
-  %res = load i32 *%ptr
+  %res = load i32 , i32 *%ptr
   %cmp = icmp sle i32 %res, 0
   br i1 %cmp, label %exit, label %store
 
@@ -627,7 +627,7 @@
 entry:
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i64 *
-  %res = load i64 *%ptr
+  %res = load i64 , i64 *%ptr
   %cmp = icmp sge i64 %res, 0
   br i1 %cmp, label %exit, label %store
 
@@ -648,7 +648,7 @@
 entry:
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %res = sext i32 %val to i64
   %cmp = icmp sgt i64 %res, 0
   br i1 %cmp, label %exit, label %store
@@ -853,7 +853,7 @@
 ; CHECK-NEXT: jne .L{{.*}}
 ; CHECK: br %r14
 entry:
-  %cur = load i32 *%dest
+  %cur = load i32 , i32 *%dest
   %res = sub i32 %a, %cur
   %cmp = icmp ne i32 %a, %cur
   br i1 %cmp, label %exit, label %store
@@ -875,7 +875,7 @@
 entry:
   %add = add i64 %base, %index
   %ptr = inttoptr i64 %add to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   %res = sext i32 %val to i64
   %cmp = icmp sgt i32 %val, 0
   br i1 %cmp, label %exit, label %store
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-45.ll b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll
index c9affa6..e5474fa 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-45.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-45.ll
@@ -12,7 +12,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp eq i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %arg = select i1 %cmp, i32 %c, i32 %b
   call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
   ret i32 %add
@@ -26,7 +26,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp eq i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %newval = select i1 %cmp, i32 %b, i32 %c
   store i32 %newval, i32 *%cptr
   ret i32 %add
@@ -53,7 +53,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp eq i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %arg = select i1 %cmp, i32 %b, i32 %c
   call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
   ret i32 %add
@@ -67,7 +67,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp eq i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %newval = select i1 %cmp, i32 %c, i32 %b
   store i32 %newval, i32 *%cptr
   ret i32 %add
@@ -94,7 +94,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp ne i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %arg = select i1 %cmp, i32 %b, i32 %c
   call void asm sideeffect "blah $0", "{r3}"(i32 %arg)
   ret i32 %add
@@ -108,7 +108,7 @@
 ; CHECK: br %r14
   %add = add i32 %a, 1000000
   %cmp = icmp ne i32 %add, 0
-  %c = load i32 *%cptr
+  %c = load i32 , i32 *%cptr
   %newval = select i1 %cmp, i32 %c, i32 %b
   store i32 %newval, i32 *%cptr
   ret i32 %add
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-48.ll b/llvm/test/CodeGen/SystemZ/int-cmp-48.ll
index 85cfb03..e266947 100644
--- a/llvm/test/CodeGen/SystemZ/int-cmp-48.ll
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-48.ll
@@ -11,7 +11,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   br i1 %cmp, label %exit, label %store
@@ -34,7 +34,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
 entry:
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   store i8 0, i8 *%src
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
@@ -54,7 +54,7 @@
 ; CHECK: tm 0(%r2), 1
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -68,7 +68,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: mvi 0(%r2), 0
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -82,7 +82,7 @@
 ; CHECK: tm 0(%r2), 1
 ; CHECK: jne {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 1
   %cmp = icmp ne i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -95,7 +95,7 @@
 ; CHECK: tm 0(%r2), 254
 ; CHECK: jo {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 254
   %cmp = icmp eq i8 %and, 254
   %res = select i1 %cmp, double %b, double %a
@@ -108,7 +108,7 @@
 ; CHECK: tm 0(%r2), 254
 ; CHECK: jno {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 254
   %cmp = icmp ne i8 %and, 254
   %res = select i1 %cmp, double %b, double %a
@@ -123,7 +123,7 @@
 ; CHECK: tmll [[REG]], 3
 ; CHECK: jh {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 3
   %cmp = icmp eq i8 %and, 2
   %res = select i1 %cmp, double %b, double %a
@@ -137,7 +137,7 @@
 ; CHECK: tmll [[REG]], 3
 ; CHECK: jl {{\.L.*}}
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %and = and i8 %byte, 3
   %cmp = icmp eq i8 %and, 1
   %res = select i1 %cmp, double %b, double %a
@@ -151,7 +151,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -165,7 +165,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -179,7 +179,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -194,7 +194,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -208,7 +208,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -223,7 +223,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
@@ -237,7 +237,7 @@
 ; CHECK: je {{\.L.*}}
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 %index
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %and = and i8 %byte, 1
   %cmp = icmp eq i8 %and, 0
   %res = select i1 %cmp, double %b, double %a
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-01.ll b/llvm/test/CodeGen/SystemZ/int-conv-01.ll
index 8b37d29..70ef78a 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-01.ll
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -38,7 +38,7 @@
 ; CHECK: lb %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -51,7 +51,7 @@
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -62,7 +62,7 @@
 ; CHECK: lb %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -73,7 +73,7 @@
 ; CHECK: lb %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -86,7 +86,7 @@
 ; CHECK: lb %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -99,7 +99,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i8 *
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i32
   ret i32 %ext
 }
@@ -110,22 +110,22 @@
 ; CHECK-LABEL: f10:
 ; CHECK: lb {{%r[0-9]+}}, 16{{[37]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i8
   %trunc1 = trunc i32 %val1 to i8
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-02.ll b/llvm/test/CodeGen/SystemZ/int-conv-02.ll
index 6b3682a..5b248cc 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-02.ll
@@ -37,7 +37,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -48,7 +48,7 @@
 ; CHECK: llc %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -61,7 +61,7 @@
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -72,7 +72,7 @@
 ; CHECK: llc %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -83,7 +83,7 @@
 ; CHECK: llc %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -96,7 +96,7 @@
 ; CHECK: llc %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -109,7 +109,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i8 *
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i32
   ret i32 %ext
 }
@@ -120,22 +120,22 @@
 ; CHECK-LABEL: f11:
 ; CHECK: llc {{%r[0-9]+}}, 16{{[37]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i8
   %trunc1 = trunc i32 %val1 to i8
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-03.ll b/llvm/test/CodeGen/SystemZ/int-conv-03.ll
index 0db3319..e621bcd6 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-03.ll
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -38,7 +38,7 @@
 ; CHECK: lgb %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -51,7 +51,7 @@
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -62,7 +62,7 @@
 ; CHECK: lgb %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -73,7 +73,7 @@
 ; CHECK: lgb %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -86,7 +86,7 @@
 ; CHECK: lgb %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -99,7 +99,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i8 *
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = sext i8 %byte to i64
   ret i64 %ext
 }
@@ -110,22 +110,22 @@
 ; CHECK-LABEL: f10:
 ; CHECK: lgb {{%r[0-9]+}}, 167(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %trunc0 = trunc i64 %val0 to i8
   %trunc1 = trunc i64 %val1 to i8
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-04.ll b/llvm/test/CodeGen/SystemZ/int-conv-04.ll
index c726607..a0f5d63 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-04.ll
@@ -36,7 +36,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
-  %byte = load i8 *%src
+  %byte = load i8 , i8 *%src
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -47,7 +47,7 @@
 ; CHECK: llgc %r2, 524287(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -60,7 +60,7 @@
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -71,7 +71,7 @@
 ; CHECK: llgc %r2, -1(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -82,7 +82,7 @@
 ; CHECK: llgc %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -95,7 +95,7 @@
 ; CHECK: llgc %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -108,7 +108,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i8 *
-  %byte = load i8 *%ptr
+  %byte = load i8 , i8 *%ptr
   %ext = zext i8 %byte to i64
   ret i64 %ext
 }
@@ -119,22 +119,22 @@
 ; CHECK-LABEL: f11:
 ; CHECK: llgc {{%r[0-9]+}}, 167(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %trunc0 = trunc i64 %val0 to i8
   %trunc1 = trunc i64 %val1 to i8
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-05.ll b/llvm/test/CodeGen/SystemZ/int-conv-05.ll
index 14ef708..5529952 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-05.ll
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -38,7 +38,7 @@
 ; CHECK: lh %r2, 4094(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2047
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -49,7 +49,7 @@
 ; CHECK: lhy %r2, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2048
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -60,7 +60,7 @@
 ; CHECK: lhy %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -73,7 +73,7 @@
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -84,7 +84,7 @@
 ; CHECK: lhy %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -95,7 +95,7 @@
 ; CHECK: lhy %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -108,7 +108,7 @@
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -121,7 +121,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4094
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -134,7 +134,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i32
   ret i32 %ext
 }
@@ -145,22 +145,22 @@
 ; CHECK-LABEL: f13:
 ; CHECK: lh {{%r[0-9]+}}, 16{{[26]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i16
   %trunc1 = trunc i32 %val1 to i16
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-06.ll b/llvm/test/CodeGen/SystemZ/int-conv-06.ll
index c4bfe3a..99ff84e 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-06.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-06.ll
@@ -37,7 +37,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -48,7 +48,7 @@
 ; CHECK: llh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -61,7 +61,7 @@
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -72,7 +72,7 @@
 ; CHECK: llh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -83,7 +83,7 @@
 ; CHECK: llh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -96,7 +96,7 @@
 ; CHECK: llh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -109,7 +109,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i32
   ret i32 %ext
 }
@@ -120,22 +120,22 @@
 ; CHECK-LABEL: f11:
 ; CHECK: llh {{%r[0-9]+}}, 16{{[26]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i16
   %trunc1 = trunc i32 %val1 to i16
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-07.ll b/llvm/test/CodeGen/SystemZ/int-conv-07.ll
index ed7c08b..5e989e4 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-07.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-07.ll
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -38,7 +38,7 @@
 ; CHECK: lgh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -51,7 +51,7 @@
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -62,7 +62,7 @@
 ; CHECK: lgh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -73,7 +73,7 @@
 ; CHECK: lgh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -86,7 +86,7 @@
 ; CHECK: lgh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -99,7 +99,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = sext i16 %half to i64
   ret i64 %ext
 }
@@ -110,22 +110,22 @@
 ; CHECK-LABEL: f10:
 ; CHECK: lgh {{%r[0-9]+}}, 166(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %trunc0 = trunc i64 %val0 to i16
   %trunc1 = trunc i64 %val1 to i16
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-08.ll b/llvm/test/CodeGen/SystemZ/int-conv-08.ll
index 8c14fb8..8524dfe 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-08.ll
@@ -36,7 +36,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -47,7 +47,7 @@
 ; CHECK: llgh %r2, 524286(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -60,7 +60,7 @@
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -71,7 +71,7 @@
 ; CHECK: llgh %r2, -2(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -82,7 +82,7 @@
 ; CHECK: llgh %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -95,7 +95,7 @@
 ; CHECK: llgh %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -108,7 +108,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %ext = zext i16 %half to i64
   ret i64 %ext
 }
@@ -119,22 +119,22 @@
 ; CHECK-LABEL: f11:
 ; CHECK: llgh {{%r[0-9]+}}, 166(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i64 *%ptr
-  %val1 = load volatile i64 *%ptr
-  %val2 = load volatile i64 *%ptr
-  %val3 = load volatile i64 *%ptr
-  %val4 = load volatile i64 *%ptr
-  %val5 = load volatile i64 *%ptr
-  %val6 = load volatile i64 *%ptr
-  %val7 = load volatile i64 *%ptr
-  %val8 = load volatile i64 *%ptr
-  %val9 = load volatile i64 *%ptr
-  %val10 = load volatile i64 *%ptr
-  %val11 = load volatile i64 *%ptr
-  %val12 = load volatile i64 *%ptr
-  %val13 = load volatile i64 *%ptr
-  %val14 = load volatile i64 *%ptr
-  %val15 = load volatile i64 *%ptr
+  %val0 = load volatile i64 , i64 *%ptr
+  %val1 = load volatile i64 , i64 *%ptr
+  %val2 = load volatile i64 , i64 *%ptr
+  %val3 = load volatile i64 , i64 *%ptr
+  %val4 = load volatile i64 , i64 *%ptr
+  %val5 = load volatile i64 , i64 *%ptr
+  %val6 = load volatile i64 , i64 *%ptr
+  %val7 = load volatile i64 , i64 *%ptr
+  %val8 = load volatile i64 , i64 *%ptr
+  %val9 = load volatile i64 , i64 *%ptr
+  %val10 = load volatile i64 , i64 *%ptr
+  %val11 = load volatile i64 , i64 *%ptr
+  %val12 = load volatile i64 , i64 *%ptr
+  %val13 = load volatile i64 , i64 *%ptr
+  %val14 = load volatile i64 , i64 *%ptr
+  %val15 = load volatile i64 , i64 *%ptr
 
   %trunc0 = trunc i64 %val0 to i16
   %trunc1 = trunc i64 %val1 to i16
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-09.ll b/llvm/test/CodeGen/SystemZ/int-conv-09.ll
index 1e50eb8..ffd2049 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-09.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-09.ll
@@ -26,7 +26,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
-  %word = load i32 *%src
+  %word = load i32 , i32 *%src
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -37,7 +37,7 @@
 ; CHECK: lgf %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -50,7 +50,7 @@
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -61,7 +61,7 @@
 ; CHECK: lgf %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -72,7 +72,7 @@
 ; CHECK: lgf %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -85,7 +85,7 @@
 ; CHECK: lgf %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
@@ -98,7 +98,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = sext i32 %word to i64
   ret i64 %ext
 }
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-10.ll b/llvm/test/CodeGen/SystemZ/int-conv-10.ll
index 5666a16..a5e7406 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-10.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-10.ll
@@ -35,7 +35,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
-  %word = load i32 *%src
+  %word = load i32 , i32 *%src
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -46,7 +46,7 @@
 ; CHECK: llgf %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -59,7 +59,7 @@
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -70,7 +70,7 @@
 ; CHECK: llgf %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -81,7 +81,7 @@
 ; CHECK: llgf %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -94,7 +94,7 @@
 ; CHECK: llgf %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
@@ -107,7 +107,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %word = load i32 *%ptr
+  %word = load i32 , i32 *%ptr
   %ext = zext i32 %word to i64
   ret i64 %ext
 }
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-11.ll b/llvm/test/CodeGen/SystemZ/int-conv-11.ll
index 3076962..cfa0870 100644
--- a/llvm/test/CodeGen/SystemZ/int-conv-11.ll
+++ b/llvm/test/CodeGen/SystemZ/int-conv-11.ll
@@ -8,38 +8,38 @@
 ; CHECK-LABEL: f1:
 ; CHECK: llc{{h?}} {{%r[0-9]+}}, 16{{[37]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
-  %val16 = load volatile i32 *%ptr
-  %val17 = load volatile i32 *%ptr
-  %val18 = load volatile i32 *%ptr
-  %val19 = load volatile i32 *%ptr
-  %val20 = load volatile i32 *%ptr
-  %val21 = load volatile i32 *%ptr
-  %val22 = load volatile i32 *%ptr
-  %val23 = load volatile i32 *%ptr
-  %val24 = load volatile i32 *%ptr
-  %val25 = load volatile i32 *%ptr
-  %val26 = load volatile i32 *%ptr
-  %val27 = load volatile i32 *%ptr
-  %val28 = load volatile i32 *%ptr
-  %val29 = load volatile i32 *%ptr
-  %val30 = load volatile i32 *%ptr
-  %val31 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
+  %val16 = load volatile i32 , i32 *%ptr
+  %val17 = load volatile i32 , i32 *%ptr
+  %val18 = load volatile i32 , i32 *%ptr
+  %val19 = load volatile i32 , i32 *%ptr
+  %val20 = load volatile i32 , i32 *%ptr
+  %val21 = load volatile i32 , i32 *%ptr
+  %val22 = load volatile i32 , i32 *%ptr
+  %val23 = load volatile i32 , i32 *%ptr
+  %val24 = load volatile i32 , i32 *%ptr
+  %val25 = load volatile i32 , i32 *%ptr
+  %val26 = load volatile i32 , i32 *%ptr
+  %val27 = load volatile i32 , i32 *%ptr
+  %val28 = load volatile i32 , i32 *%ptr
+  %val29 = load volatile i32 , i32 *%ptr
+  %val30 = load volatile i32 , i32 *%ptr
+  %val31 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i8
   %trunc1 = trunc i32 %val1 to i8
@@ -181,38 +181,38 @@
 ; CHECK-LABEL: f2:
 ; CHECK: llh{{h?}} {{%r[0-9]+}}, 16{{[26]}}(%r15)
 ; CHECK: br %r14
-  %val0 = load volatile i32 *%ptr
-  %val1 = load volatile i32 *%ptr
-  %val2 = load volatile i32 *%ptr
-  %val3 = load volatile i32 *%ptr
-  %val4 = load volatile i32 *%ptr
-  %val5 = load volatile i32 *%ptr
-  %val6 = load volatile i32 *%ptr
-  %val7 = load volatile i32 *%ptr
-  %val8 = load volatile i32 *%ptr
-  %val9 = load volatile i32 *%ptr
-  %val10 = load volatile i32 *%ptr
-  %val11 = load volatile i32 *%ptr
-  %val12 = load volatile i32 *%ptr
-  %val13 = load volatile i32 *%ptr
-  %val14 = load volatile i32 *%ptr
-  %val15 = load volatile i32 *%ptr
-  %val16 = load volatile i32 *%ptr
-  %val17 = load volatile i32 *%ptr
-  %val18 = load volatile i32 *%ptr
-  %val19 = load volatile i32 *%ptr
-  %val20 = load volatile i32 *%ptr
-  %val21 = load volatile i32 *%ptr
-  %val22 = load volatile i32 *%ptr
-  %val23 = load volatile i32 *%ptr
-  %val24 = load volatile i32 *%ptr
-  %val25 = load volatile i32 *%ptr
-  %val26 = load volatile i32 *%ptr
-  %val27 = load volatile i32 *%ptr
-  %val28 = load volatile i32 *%ptr
-  %val29 = load volatile i32 *%ptr
-  %val30 = load volatile i32 *%ptr
-  %val31 = load volatile i32 *%ptr
+  %val0 = load volatile i32 , i32 *%ptr
+  %val1 = load volatile i32 , i32 *%ptr
+  %val2 = load volatile i32 , i32 *%ptr
+  %val3 = load volatile i32 , i32 *%ptr
+  %val4 = load volatile i32 , i32 *%ptr
+  %val5 = load volatile i32 , i32 *%ptr
+  %val6 = load volatile i32 , i32 *%ptr
+  %val7 = load volatile i32 , i32 *%ptr
+  %val8 = load volatile i32 , i32 *%ptr
+  %val9 = load volatile i32 , i32 *%ptr
+  %val10 = load volatile i32 , i32 *%ptr
+  %val11 = load volatile i32 , i32 *%ptr
+  %val12 = load volatile i32 , i32 *%ptr
+  %val13 = load volatile i32 , i32 *%ptr
+  %val14 = load volatile i32 , i32 *%ptr
+  %val15 = load volatile i32 , i32 *%ptr
+  %val16 = load volatile i32 , i32 *%ptr
+  %val17 = load volatile i32 , i32 *%ptr
+  %val18 = load volatile i32 , i32 *%ptr
+  %val19 = load volatile i32 , i32 *%ptr
+  %val20 = load volatile i32 , i32 *%ptr
+  %val21 = load volatile i32 , i32 *%ptr
+  %val22 = load volatile i32 , i32 *%ptr
+  %val23 = load volatile i32 , i32 *%ptr
+  %val24 = load volatile i32 , i32 *%ptr
+  %val25 = load volatile i32 , i32 *%ptr
+  %val26 = load volatile i32 , i32 *%ptr
+  %val27 = load volatile i32 , i32 *%ptr
+  %val28 = load volatile i32 , i32 *%ptr
+  %val29 = load volatile i32 , i32 *%ptr
+  %val30 = load volatile i32 , i32 *%ptr
+  %val31 = load volatile i32 , i32 *%ptr
 
   %trunc0 = trunc i32 %val0 to i16
   %trunc1 = trunc i32 %val1 to i16
diff --git a/llvm/test/CodeGen/SystemZ/int-div-01.ll b/llvm/test/CodeGen/SystemZ/int-div-01.ll
index 5c739ec..1442109 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-01.ll
@@ -69,7 +69,7 @@
 ; CHECK-NOT: dsgfr
 ; CHECK: or %r2, %r3
 ; CHECK: br %r14
-  %a = load i32 *%src
+  %a = load i32 , i32 *%src
   %div = sdiv i32 %a, %b
   %rem = srem i32 %a, %b
   %or = or i32 %rem, %div
@@ -83,7 +83,7 @@
 ; CHECK: dsgf %r0, 0(%r4)
 ; CHECK: st %r1, 0(%r2)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %div = sdiv i32 %a, %b
   store i32 %div, i32 *%dest
   ret void
@@ -96,7 +96,7 @@
 ; CHECK: dsgf %r0, 0(%r4)
 ; CHECK: st %r0, 0(%r2)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %rem = srem i32 %a, %b
   store i32 %rem, i32 *%dest
   ret void
@@ -112,7 +112,7 @@
 ; CHECK-NOT: {{dsgf|dsgfr}}
 ; CHECK: or %r2, %r3
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %div = sdiv i32 %a, %b
   %rem = srem i32 %a, %b
   %or = or i32 %rem, %div
@@ -125,7 +125,7 @@
 ; CHECK: dsgf %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -138,7 +138,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -149,7 +149,7 @@
 ; CHECK: dsgf %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -160,7 +160,7 @@
 ; CHECK: dsgf %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -173,7 +173,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -186,7 +186,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = srem i32 %a, %b
   ret i32 %rem
 }
@@ -200,7 +200,7 @@
 ; CHECK: lgfr %r1, %r2
 ; CHECK: dsgfr %r0, [[B]]
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %a = call i32 @foo()
   %div = sdiv i32 %a, %b
   store i32 %div, i32 *%dest
@@ -223,16 +223,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-div-02.ll b/llvm/test/CodeGen/SystemZ/int-div-02.ll
index dfbaa60..1a4b4d9 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-02.ll
@@ -57,7 +57,7 @@
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: st %r3, 0(%r5)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %div = udiv i32 %a, %b
   store i32 %div, i32 *%dest
   ret void
@@ -72,7 +72,7 @@
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: st %r2, 0(%r5)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %rem = urem i32 %a, %b
   store i32 %rem, i32 *%dest
   ret void
@@ -88,7 +88,7 @@
 ; CHECK-NOT: {{dl|dlr}}
 ; CHECK: or %r2, %r3
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %div = udiv i32 %a, %b
   %rem = urem i32 %a, %b
   %or = or i32 %rem, %div
@@ -101,7 +101,7 @@
 ; CHECK: dl %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -114,7 +114,7 @@
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -125,7 +125,7 @@
 ; CHECK: dl %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -136,7 +136,7 @@
 ; CHECK: dl %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -149,7 +149,7 @@
 ; CHECK: dl %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -162,7 +162,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %rem = urem i32 %a, %b
   ret i32 %rem
 }
@@ -183,16 +183,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-div-03.ll b/llvm/test/CodeGen/SystemZ/int-div-03.ll
index f529ce8..37a7c4f 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-03.ll
@@ -75,7 +75,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: stg %r3, 0(%r5)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %div = sdiv i64 %a, %bext
   store i64 %div, i64 *%dest
@@ -89,7 +89,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: stg %r2, 0(%r5)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   store i64 %rem, i64 *%dest
@@ -104,7 +104,7 @@
 ; CHECK-NOT: {{dsgf|dsgfr}}
 ; CHECK: ogr %r2, %r3
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %div = sdiv i64 %a, %bext
   %rem = srem i64 %a, %bext
@@ -118,7 +118,7 @@
 ; CHECK: dsgf %r2, 524284(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -132,7 +132,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -144,7 +144,7 @@
 ; CHECK: dsgf %r2, -4(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -156,7 +156,7 @@
 ; CHECK: dsgf %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -170,7 +170,7 @@
 ; CHECK: dsgf %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -184,7 +184,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %rem = srem i64 %a, %bext
   ret i64 %rem
@@ -199,7 +199,7 @@
 ; CHECK: lgr %r1, %r2
 ; CHECK: dsgfr %r0, [[B]]
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %a = call i64 @foo()
   %ext = sext i32 %b to i64
   %div = sdiv i64 %a, %ext
diff --git a/llvm/test/CodeGen/SystemZ/int-div-04.ll b/llvm/test/CodeGen/SystemZ/int-div-04.ll
index 2b2259e..e8c6f3e 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-04.ll
@@ -49,7 +49,7 @@
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: stg %r3, 0(%r5)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %div = sdiv i64 %a, %b
   store i64 %div, i64 *%dest
   ret void
@@ -62,7 +62,7 @@
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: stg %r2, 0(%r5)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %rem = srem i64 %a, %b
   store i64 %rem, i64 *%dest
   ret void
@@ -76,7 +76,7 @@
 ; CHECK-NOT: {{dsg|dsgr}}
 ; CHECK: ogr %r2, %r3
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %div = sdiv i64 %a, %b
   %rem = srem i64 %a, %b
   %or = or i64 %rem, %div
@@ -89,7 +89,7 @@
 ; CHECK: dsg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -102,7 +102,7 @@
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -113,7 +113,7 @@
 ; CHECK: dsg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -124,7 +124,7 @@
 ; CHECK: dsg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -137,7 +137,7 @@
 ; CHECK: dsg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -150,7 +150,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = srem i64 %a, %b
   ret i64 %rem
 }
@@ -172,17 +172,17 @@
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
   %ptr10 = getelementptr i64, i64 *%ptr0, i64 20
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
-  %val10 = load i64 *%ptr10
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
+  %val10 = load i64 , i64 *%ptr10
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-div-05.ll b/llvm/test/CodeGen/SystemZ/int-div-05.ll
index 0cface9..f80a139 100644
--- a/llvm/test/CodeGen/SystemZ/int-div-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-div-05.ll
@@ -57,7 +57,7 @@
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: stg %r3, 0(%r5)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %div = udiv i64 %a, %b
   store i64 %div, i64 *%dest
   ret void
@@ -72,7 +72,7 @@
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: stg %r2, 0(%r5)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %rem = urem i64 %a, %b
   store i64 %rem, i64 *%dest
   ret void
@@ -88,7 +88,7 @@
 ; CHECK-NOT: {{dlg|dlgr}}
 ; CHECK: ogr %r2, %r3
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %div = udiv i64 %a, %b
   %rem = urem i64 %a, %b
   %or = or i64 %rem, %div
@@ -101,7 +101,7 @@
 ; CHECK: dlg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -114,7 +114,7 @@
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -125,7 +125,7 @@
 ; CHECK: dlg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -136,7 +136,7 @@
 ; CHECK: dlg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -149,7 +149,7 @@
 ; CHECK: dlg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -162,7 +162,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %rem = urem i64 %a, %b
   ret i64 %rem
 }
@@ -184,17 +184,17 @@
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
   %ptr10 = getelementptr i64, i64 *%ptr0, i64 20
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
-  %val10 = load i64 *%ptr10
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
+  %val10 = load i64 , i64 *%ptr10
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-move-02.ll b/llvm/test/CodeGen/SystemZ/int-move-02.ll
index 5d6f5d7..7ec0f41 100644
--- a/llvm/test/CodeGen/SystemZ/int-move-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-move-02.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load i32 *%src
+  %val = load i32 , i32 *%src
   ret i32 %val
 }
 
@@ -17,7 +17,7 @@
 ; CHECK: l %r2, 4092(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -27,7 +27,7 @@
 ; CHECK: ly %r2, 4096(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -37,7 +37,7 @@
 ; CHECK: ly %r2, 524284(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -49,7 +49,7 @@
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -59,7 +59,7 @@
 ; CHECK: ly %r2, -4(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -69,7 +69,7 @@
 ; CHECK: ly %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -81,7 +81,7 @@
 ; CHECK: l %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -93,7 +93,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
 
@@ -105,6 +105,6 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/int-move-03.ll b/llvm/test/CodeGen/SystemZ/int-move-03.ll
index 93e9e66e..60eb004 100644
--- a/llvm/test/CodeGen/SystemZ/int-move-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-move-03.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *%src
+  %val = load i64 , i64 *%src
   ret i64 %val
 }
 
@@ -17,7 +17,7 @@
 ; CHECK: lg %r2, 524280(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
 
@@ -29,7 +29,7 @@
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
 
@@ -39,7 +39,7 @@
 ; CHECK: lg %r2, -8(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
 
@@ -49,7 +49,7 @@
 ; CHECK: lg %r2, -524288(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
 
@@ -61,7 +61,7 @@
 ; CHECK: lg %r2, 0(%r2)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
 
@@ -73,6 +73,6 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   ret i64 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/int-move-08.ll b/llvm/test/CodeGen/SystemZ/int-move-08.ll
index 3b35fa0..d28d298 100644
--- a/llvm/test/CodeGen/SystemZ/int-move-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-move-08.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: lhrl %r2, gsrc16
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16
+  %val = load i16 , i16 *@gsrc16
   %ext = sext i16 %val to i32
   ret i32 %ext
 }
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: llhrl %r2, gsrc16
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16
+  %val = load i16 , i16 *@gsrc16
   %ext = zext i16 %val to i32
   ret i32 %ext
 }
@@ -49,7 +49,7 @@
 ; CHECK: lrl %r0, gsrc32
 ; CHECK: strl %r0, gdst32
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32
+  %val = load i32 , i32 *@gsrc32
   store i32 %val, i32 *@gdst32
   ret void
 }
@@ -60,7 +60,7 @@
 ; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
 ; CHECK: lh %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16u, align 1
+  %val = load i16 , i16 *@gsrc16u, align 1
   %ext = sext i16 %val to i32
   ret i32 %ext
 }
@@ -71,7 +71,7 @@
 ; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u
 ; CHECK: llh %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16u, align 1
+  %val = load i16 , i16 *@gsrc16u, align 1
   %ext = zext i16 %val to i32
   ret i32 %ext
 }
@@ -95,7 +95,7 @@
 ; CHECK: larl [[REG:%r[0-5]]], gdst32u
 ; CHECK: st [[VAL]], 0([[REG]])
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32u, align 2
+  %val = load i32 , i32 *@gsrc32u, align 2
   store i32 %val, i32 *@gdst32u, align 2
   ret void
 }
@@ -111,7 +111,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr [2 x i8], [2 x i8] *@garray8, i64 0, i64 0
   %ptr2 = getelementptr [2 x i8], [2 x i8] *@garray8, i64 0, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %shr = lshr i8 %val, 1
   store i8 %shr, i8 *%ptr2
   ret void
@@ -127,7 +127,7 @@
 ; CHECK: br %r14
   %ptr1 = getelementptr [2 x i16], [2 x i16] *@garray16, i64 0, i64 0
   %ptr2 = getelementptr [2 x i16], [2 x i16] *@garray16, i64 0, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %shr = lshr i16 %val, 1
   store i16 %shr, i16 *%ptr2
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/int-move-09.ll b/llvm/test/CodeGen/SystemZ/int-move-09.ll
index b5c9cb1..6476842 100644
--- a/llvm/test/CodeGen/SystemZ/int-move-09.ll
+++ b/llvm/test/CodeGen/SystemZ/int-move-09.ll
@@ -20,7 +20,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: lghrl %r2, gsrc16
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16
+  %val = load i16 , i16 *@gsrc16
   %ext = sext i16 %val to i64
   ret i64 %ext
 }
@@ -30,7 +30,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: llghrl %r2, gsrc16
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16
+  %val = load i16 , i16 *@gsrc16
   %ext = zext i16 %val to i64
   ret i64 %ext
 }
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: lgfrl %r2, gsrc32
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32
+  %val = load i32 , i32 *@gsrc32
   %ext = sext i32 %val to i64
   ret i64 %ext
 }
@@ -50,7 +50,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: llgfrl %r2, gsrc32
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32
+  %val = load i32 , i32 *@gsrc32
   %ext = zext i32 %val to i64
   ret i64 %ext
 }
@@ -81,7 +81,7 @@
 ; CHECK: lgrl %r0, gsrc64
 ; CHECK: stgrl %r0, gdst64
 ; CHECK: br %r14
-  %val = load i64 *@gsrc64
+  %val = load i64 , i64 *@gsrc64
   store i64 %val, i64 *@gdst64
   ret void
 }
@@ -92,7 +92,7 @@
 ; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
 ; CHECK: lgh %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16u, align 1
+  %val = load i16 , i16 *@gsrc16u, align 1
   %ext = sext i16 %val to i64
   ret i64 %ext
 }
@@ -103,7 +103,7 @@
 ; CHECK: lgrl [[REG:%r[0-5]]], gsrc16u@GOT
 ; CHECK: llgh %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i16 *@gsrc16u, align 1
+  %val = load i16 , i16 *@gsrc16u, align 1
   %ext = zext i16 %val to i64
   ret i64 %ext
 }
@@ -114,7 +114,7 @@
 ; CHECK: larl [[REG:%r[0-5]]], gsrc32u
 ; CHECK: lgf %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32u, align 2
+  %val = load i32 , i32 *@gsrc32u, align 2
   %ext = sext i32 %val to i64
   ret i64 %ext
 }
@@ -125,7 +125,7 @@
 ; CHECK: larl [[REG:%r[0-5]]], gsrc32u
 ; CHECK: llgf %r2, 0([[REG]])
 ; CHECK: br %r14
-  %val = load i32 *@gsrc32u, align 2
+  %val = load i32 , i32 *@gsrc32u, align 2
   %ext = zext i32 %val to i64
   ret i64 %ext
 }
@@ -160,7 +160,7 @@
 ; CHECK: larl [[REG:%r[0-5]]], gdst64u
 ; CHECK: stg [[VAL]], 0([[REG]])
 ; CHECK: br %r14
-  %val = load i64 *@gsrc64u, align 4
+  %val = load i64 , i64 *@gsrc64u, align 4
   store i64 %val, i64 *@gdst64u, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-01.ll b/llvm/test/CodeGen/SystemZ/int-mul-01.ll
index 667fc1c..b0adc18 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-01.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -20,7 +20,7 @@
 ; CHECK: mh %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2047
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -32,7 +32,7 @@
 ; CHECK: mhy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2048
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -44,7 +44,7 @@
 ; CHECK: mhy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -58,7 +58,7 @@
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -70,7 +70,7 @@
 ; CHECK: mhy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -82,7 +82,7 @@
 ; CHECK: mhy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -96,7 +96,7 @@
 ; CHECK: mh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -110,7 +110,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4094
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
@@ -124,7 +124,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = mul i32 %lhs, %rhs
   ret i32 %res
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-02.ll b/llvm/test/CodeGen/SystemZ/int-mul-02.ll
index 50b3698..265674f 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-02.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -29,7 +29,7 @@
 ; CHECK: ms %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -40,7 +40,7 @@
 ; CHECK: msy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -51,7 +51,7 @@
 ; CHECK: msy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -64,7 +64,7 @@
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -75,7 +75,7 @@
 ; CHECK: msy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -86,7 +86,7 @@
 ; CHECK: msy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -99,7 +99,7 @@
 ; CHECK: ms %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -112,7 +112,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -125,7 +125,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %mul = mul i32 %a, %b
   ret i32 %mul
 }
@@ -146,16 +146,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-03.ll b/llvm/test/CodeGen/SystemZ/int-mul-03.ll
index 3d273f1..c4d16ce 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -31,7 +31,7 @@
 ; CHECK: msgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -45,7 +45,7 @@
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -57,7 +57,7 @@
 ; CHECK: msgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -69,7 +69,7 @@
 ; CHECK: msgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -83,7 +83,7 @@
 ; CHECK: msgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -97,7 +97,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %mul = mul i64 %a, %bext
   ret i64 %mul
@@ -119,16 +119,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-04.ll b/llvm/test/CodeGen/SystemZ/int-mul-04.ll
index 34d9ad5..1ec4661 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-04.ll
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -29,7 +29,7 @@
 ; CHECK: msg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -42,7 +42,7 @@
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -53,7 +53,7 @@
 ; CHECK: msg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -64,7 +64,7 @@
 ; CHECK: msg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -77,7 +77,7 @@
 ; CHECK: msg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -90,7 +90,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %mul = mul i64 %a, %b
   ret i64 %mul
 }
@@ -111,16 +111,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-08.ll b/llvm/test/CodeGen/SystemZ/int-mul-08.ll
index 919c8a6..c430896 100644
--- a/llvm/test/CodeGen/SystemZ/int-mul-08.ll
+++ b/llvm/test/CodeGen/SystemZ/int-mul-08.ll
@@ -88,7 +88,7 @@
 ; CHECK-NOT: {{%r[234]}}
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -103,7 +103,7 @@
 ; CHECK: mlg %r2, 524280(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -120,7 +120,7 @@
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -135,7 +135,7 @@
 ; CHECK: mlg %r2, -8(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -150,7 +150,7 @@
 ; CHECK: mlg %r2, -524288(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -167,7 +167,7 @@
 ; CHECK: mlg %r2, 0(%r4)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -184,7 +184,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524287
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %ax = zext i64 %a to i128
   %bx = zext i64 %b to i128
   %mulx = mul i128 %ax, %bx
@@ -209,16 +209,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-01.ll b/llvm/test/CodeGen/SystemZ/int-sub-01.ll
index 7793143..c04a619 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-01.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-01.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -30,7 +30,7 @@
 ; CHECK: s %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -41,7 +41,7 @@
 ; CHECK: sy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -52,7 +52,7 @@
 ; CHECK: sy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -65,7 +65,7 @@
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -76,7 +76,7 @@
 ; CHECK: sy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -87,7 +87,7 @@
 ; CHECK: sy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -100,7 +100,7 @@
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -113,7 +113,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -126,7 +126,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %sub = sub i32 %a, %b
   ret i32 %sub
 }
@@ -147,16 +147,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-02.ll b/llvm/test/CodeGen/SystemZ/int-sub-02.ll
index a9c0702..23be240 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-02.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -31,7 +31,7 @@
 ; CHECK: sgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -45,7 +45,7 @@
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -57,7 +57,7 @@
 ; CHECK: sgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -69,7 +69,7 @@
 ; CHECK: sgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -83,7 +83,7 @@
 ; CHECK: sgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -97,7 +97,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = sext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -119,16 +119,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-03.ll b/llvm/test/CodeGen/SystemZ/int-sub-03.ll
index 3bb3265..9d51006 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-03.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -31,7 +31,7 @@
 ; CHECK: slgf %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -45,7 +45,7 @@
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -57,7 +57,7 @@
 ; CHECK: slgf %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -69,7 +69,7 @@
 ; CHECK: slgf %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -83,7 +83,7 @@
 ; CHECK: slgf %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -97,7 +97,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i64
   %sub = sub i64 %a, %bext
   ret i64 %sub
@@ -119,16 +119,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %frob0 = add i32 %val0, 100
   %frob1 = add i32 %val1, 100
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-04.ll b/llvm/test/CodeGen/SystemZ/int-sub-04.ll
index 0545024..ec2944d 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-04.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-04.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -30,7 +30,7 @@
 ; CHECK: sg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -43,7 +43,7 @@
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -54,7 +54,7 @@
 ; CHECK: sg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -65,7 +65,7 @@
 ; CHECK: sg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -78,7 +78,7 @@
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -91,7 +91,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %sub = sub i64 %a, %b
   ret i64 %sub
 }
@@ -112,16 +112,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-05.ll b/llvm/test/CodeGen/SystemZ/int-sub-05.ll
index 37ab2c3b..9775298 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-05.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-05.ll
@@ -11,7 +11,7 @@
 ; CHECK: slgr {{%r[0-5]}}, %r4
 ; CHECK: slbgr {{%r[0-5]}}, %r3
 ; CHECK: br %r14
-  %a = load i128 *%ptr
+  %a = load i128 , i128 *%ptr
   %highx = zext i64 %high to i128
   %lowx = zext i64 %low to i128
   %bhigh = shl i128 %highx, 64
@@ -29,8 +29,8 @@
 ; CHECK: br %r14
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -45,8 +45,8 @@
   %addr = add i64 %base, 524272
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -63,8 +63,8 @@
   %addr = add i64 %base, 524280
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -81,8 +81,8 @@
   %addr = add i64 %base, 524288
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -97,8 +97,8 @@
   %addr = add i64 %base, -524288
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -113,8 +113,8 @@
   %addr = add i64 %base, -524296
   %bptr = inttoptr i64 %addr to i128 *
   %aptr = getelementptr i128, i128 *%bptr, i64 -8
-  %a = load i128 *%aptr
-  %b = load i128 *%bptr
+  %a = load i128 , i128 *%aptr
+  %b = load i128 , i128 *%bptr
   %sub = sub i128 %a, %b
   store i128 %sub, i128 *%aptr
   ret void
@@ -133,15 +133,15 @@
   %ptr3 = getelementptr i128, i128 *%ptr0, i128 6
   %ptr4 = getelementptr i128, i128 *%ptr0, i128 8
 
-  %val0 = load i128 *%ptr0
-  %val1 = load i128 *%ptr1
-  %val2 = load i128 *%ptr2
-  %val3 = load i128 *%ptr3
-  %val4 = load i128 *%ptr4
+  %val0 = load i128 , i128 *%ptr0
+  %val1 = load i128 , i128 *%ptr1
+  %val2 = load i128 , i128 *%ptr2
+  %val3 = load i128 , i128 *%ptr3
+  %val4 = load i128 , i128 *%ptr4
 
   %retptr = call i128 *@foo()
 
-  %ret = load i128 *%retptr
+  %ret = load i128 , i128 *%retptr
   %sub0 = sub i128 %ret, %val0
   %sub1 = sub i128 %sub0, %val1
   %sub2 = sub i128 %sub1, %val2
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-06.ll b/llvm/test/CodeGen/SystemZ/int-sub-06.ll
index ae7954e..c26383e 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-06.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-06.ll
@@ -9,7 +9,7 @@
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
@@ -23,7 +23,7 @@
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %trunc = trunc i64 %b to i32
   %bext = zext i32 %trunc to i128
@@ -39,7 +39,7 @@
 ; CHECK: slgfr {{%r[0-5]}}, %r3
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %bext = zext i64 %b to i128
   %and = and i128 %bext, 4294967295
@@ -54,9 +54,9 @@
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
-  %b = load i32 *%bsrc
+  %b = load i32 , i32 *%bsrc
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -69,10 +69,10 @@
 ; CHECK: slgf {{%r[0-5]}}, 524284(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -87,10 +87,10 @@
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -103,10 +103,10 @@
 ; CHECK: slgf {{%r[0-5]}}, -4(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -119,10 +119,10 @@
 ; CHECK: slgf {{%r[0-5]}}, -524288(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -137,10 +137,10 @@
 ; CHECK: slgf {{%r[0-5]}}, 0(%r3)
 ; CHECK: slbgr
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %ptr = getelementptr i32, i32 *%bsrc, i128 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
@@ -152,12 +152,12 @@
 ; CHECK-LABEL: f10:
 ; CHECK: slgf {{%r[0-5]}}, 524284({{%r4,%r3|%r3,%r4}})
 ; CHECK: br %r14
-  %a = load i128 *%aptr
+  %a = load i128 , i128 *%aptr
   %xor = xor i128 %a, 127
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524284
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %bext = zext i32 %b to i128
   %sub = sub i128 %xor, %bext
   store i128 %sub, i128 *%aptr
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-07.ll b/llvm/test/CodeGen/SystemZ/int-sub-07.ll
index 7f0f59a..1d54fd6 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-07.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-07.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: sh %r2, 0(%r3)
 ; CHECK: br %r14
-  %half = load i16 *%src
+  %half = load i16 , i16 *%src
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -20,7 +20,7 @@
 ; CHECK: sh %r2, 4094(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2047
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -32,7 +32,7 @@
 ; CHECK: shy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 2048
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -44,7 +44,7 @@
 ; CHECK: shy %r2, 524286(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262143
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -58,7 +58,7 @@
 ; CHECK: sh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -70,7 +70,7 @@
 ; CHECK: shy %r2, -2(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -1
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -82,7 +82,7 @@
 ; CHECK: shy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262144
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -96,7 +96,7 @@
 ; CHECK: sh %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i16, i16 *%src, i64 -262145
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -110,7 +110,7 @@
   %sub1 = add i64 %src, %index
   %sub2 = add i64 %sub1, 4094
   %ptr = inttoptr i64 %sub2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
@@ -124,7 +124,7 @@
   %sub1 = add i64 %src, %index
   %sub2 = add i64 %sub1, 4096
   %ptr = inttoptr i64 %sub2 to i16 *
-  %half = load i16 *%ptr
+  %half = load i16 , i16 *%ptr
   %rhs = sext i16 %half to i32
   %res = sub i32 %lhs, %rhs
   ret i32 %res
diff --git a/llvm/test/CodeGen/SystemZ/loop-01.ll b/llvm/test/CodeGen/SystemZ/loop-01.ll
index 32d9add..b51c96d 100644
--- a/llvm/test/CodeGen/SystemZ/loop-01.ll
+++ b/llvm/test/CodeGen/SystemZ/loop-01.ll
@@ -37,7 +37,7 @@
 loop:
   %count = phi i32 [ 0, %entry ], [ %next, %loop.next ]
   %next = add i32 %count, 1
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cmp = icmp eq i32 %val, 0
   br i1 %cmp, label %loop.next, label %loop.store
 
@@ -67,7 +67,7 @@
 loop:
   %count = phi i64 [ 0, %entry ], [ %next, %loop.next ]
   %next = add i64 %count, 1
-  %val = load volatile i64 *%src
+  %val = load volatile i64 , i64 *%src
   %cmp = icmp eq i64 %val, 0
   br i1 %cmp, label %loop.next, label %loop.store
 
@@ -100,7 +100,7 @@
 loop:
   %left = phi i64 [ %count, %entry ], [ %next, %loop.next ]
   store volatile i64 %left, i64 *%dest2
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   %cmp = icmp eq i32 %val, 0
   br i1 %cmp, label %loop.next, label %loop.store
 
diff --git a/llvm/test/CodeGen/SystemZ/memchr-02.ll b/llvm/test/CodeGen/SystemZ/memchr-02.ll
index 8986627..71b2cf0 100644
--- a/llvm/test/CodeGen/SystemZ/memchr-02.ll
+++ b/llvm/test/CodeGen/SystemZ/memchr-02.ll
@@ -29,7 +29,7 @@
 ; CHECK-NOT: %r0
 ; CHECK: srst %r2, [[RES1]]
 ; CHECK: br %r14
-  %char = load volatile i8 *%charptr
+  %char = load volatile i8 , i8 *%charptr
   %charext = zext i8 %char to i32
   %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
   %res2 = call i8 *@memchr(i8 *%res1, i32 %charext, i64 %len)
@@ -48,7 +48,7 @@
 ; CHECK: lr %r0, [[CHAR]]
 ; CHECK: srst %r2, [[RES1]]
 ; CHECK: br %r14
-  %char = load volatile i8 *%charptr
+  %char = load volatile i8 , i8 *%charptr
   %charext = zext i8 %char to i32
   %res1 = call i8 *@memchr(i8 *%src, i32 %charext, i64 %len)
   call void asm sideeffect "blah $0", "{r0}" (i32 0)
diff --git a/llvm/test/CodeGen/SystemZ/memcpy-02.ll b/llvm/test/CodeGen/SystemZ/memcpy-02.ll
index 19858f2..df44502 100644
--- a/llvm/test/CodeGen/SystemZ/memcpy-02.ll
+++ b/llvm/test/CodeGen/SystemZ/memcpy-02.ll
@@ -17,7 +17,7 @@
 ; CHECK: mvc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   store i8 %val, i8 *%ptr2
   ret void
 }
@@ -28,7 +28,7 @@
 ; CHECK: mvc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %ext = zext i8 %val to i32
   %trunc = trunc i32 %ext to i8
   store i8 %trunc, i8 *%ptr2
@@ -41,7 +41,7 @@
 ; CHECK: mvc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %ext = zext i8 %val to i64
   %trunc = trunc i64 %ext to i8
   store i8 %trunc, i8 *%ptr2
@@ -54,7 +54,7 @@
 ; CHECK: mvc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %ext = sext i8 %val to i32
   %trunc = trunc i32 %ext to i8
   store i8 %trunc, i8 *%ptr2
@@ -67,7 +67,7 @@
 ; CHECK: mvc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
+  %val = load i8 , i8 *%ptr1
   %ext = sext i8 %val to i64
   %trunc = trunc i64 %ext to i8
   store i8 %trunc, i8 *%ptr2
@@ -80,7 +80,7 @@
 ; CHECK: mvc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   store i16 %val, i16 *%ptr2
   ret void
 }
@@ -91,7 +91,7 @@
 ; CHECK: mvc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %ext = zext i16 %val to i32
   %trunc = trunc i32 %ext to i16
   store i16 %trunc, i16 *%ptr2
@@ -104,7 +104,7 @@
 ; CHECK: mvc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %ext = zext i16 %val to i64
   %trunc = trunc i64 %ext to i16
   store i16 %trunc, i16 *%ptr2
@@ -117,7 +117,7 @@
 ; CHECK: mvc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %ext = sext i16 %val to i32
   %trunc = trunc i32 %ext to i16
   store i16 %trunc, i16 *%ptr2
@@ -130,7 +130,7 @@
 ; CHECK: mvc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
+  %val = load i16 , i16 *%ptr1
   %ext = sext i16 %val to i64
   %trunc = trunc i64 %ext to i16
   store i16 %trunc, i16 *%ptr2
@@ -143,7 +143,7 @@
 ; CHECK: mvc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
+  %val = load i32 , i32 *%ptr1
   store i32 %val, i32 *%ptr2
   ret void
 }
@@ -154,7 +154,7 @@
 ; CHECK: mvc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
+  %val = load i32 , i32 *%ptr1
   %ext = zext i32 %val to i64
   %trunc = trunc i64 %ext to i32
   store i32 %trunc, i32 *%ptr2
@@ -167,7 +167,7 @@
 ; CHECK: mvc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
+  %val = load i32 , i32 *%ptr1
   %ext = sext i32 %val to i64
   %trunc = trunc i64 %ext to i32
   store i32 %trunc, i32 *%ptr2
@@ -180,7 +180,7 @@
 ; CHECK: mvc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
+  %val = load i64 , i64 *%ptr1
   store i64 %val, i64 *%ptr2
   ret void
 }
@@ -191,7 +191,7 @@
 ; CHECK: mvc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr float, float *%ptr1, i64 1
-  %val = load float *%ptr1
+  %val = load float , float *%ptr1
   store float %val, float *%ptr2
   ret void
 }
@@ -202,7 +202,7 @@
 ; CHECK: mvc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr double, double *%ptr1, i64 1
-  %val = load double *%ptr1
+  %val = load double , double *%ptr1
   store double %val, double *%ptr2
   ret void
 }
@@ -213,7 +213,7 @@
 ; CHECK: mvc 16(16,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr fp128, fp128 *%ptr1, i64 1
-  %val = load fp128 *%ptr1
+  %val = load fp128 , fp128 *%ptr1
   store fp128 %val, fp128 *%ptr2
   ret void
 }
@@ -224,7 +224,7 @@
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load volatile i64 *%ptr1
+  %val = load volatile i64 , i64 *%ptr1
   store i64 %val, i64 *%ptr2
   ret void
 }
@@ -235,7 +235,7 @@
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
+  %val = load i64 , i64 *%ptr1
   store volatile i64 %val, i64 *%ptr2
   ret void
 }
@@ -247,7 +247,7 @@
 ; CHECK-LABEL: f20:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1
+  %val = load i64 , i64 *%ptr1
   store i64 %val, i64 *%ptr2
   ret void
 }
@@ -257,7 +257,7 @@
 ; CHECK-LABEL: f21:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2
+  %val = load i64 , i64 *%ptr1, align 2
   store i64 %val, i64 *%ptr2, align 2
   ret void
 }
@@ -270,7 +270,7 @@
   %add = add i64 %base, 1
   %ptr1 = inttoptr i64 %base to i64 *
   %ptr2 = inttoptr i64 %add to i64 *
-  %val = load i64 *%ptr1, align 1
+  %val = load i64 , i64 *%ptr1, align 1
   store i64 %val, i64 *%ptr2, align 1
   ret void
 }
@@ -282,7 +282,7 @@
 ; CHECK-DAG: larl [[DST:%r[0-5]]], g1dst
 ; CHECK: mvc 0(1,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load i8 *@g1src
+  %val = load i8 , i8 *@g1src
   store i8 %val, i8 *@g1dst
   ret void
 }
@@ -293,7 +293,7 @@
 ; CHECK: lhrl [[REG:%r[0-5]]], g2src
 ; CHECK: sthrl [[REG]], g2dst
 ; CHECK: br %r14
-  %val = load i16 *@g2src
+  %val = load i16 , i16 *@g2src
   store i16 %val, i16 *@g2dst
   ret void
 }
@@ -304,7 +304,7 @@
 ; CHECK: lrl [[REG:%r[0-5]]], g3
 ; CHECK: st [[REG]], 0(%r2)
 ; CHECK: br %r14
-  %val = load i32 *@g3
+  %val = load i32 , i32 *@g3
   store i32 %val, i32 *%ptr
   ret void
 }
@@ -315,7 +315,7 @@
 ; CHECK: l [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: strl [[REG]], g3
 ; CHECK: br %r14
-  %val = load i32 *%ptr
+  %val = load i32 , i32 *%ptr
   store i32 %val, i32 *@g3
   ret void
 }
@@ -326,7 +326,7 @@
 ; CHECK: lgrl [[REG:%r[0-5]]], g4
 ; CHECK: stg [[REG]], 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *@g4
+  %val = load i64 , i64 *@g4
   store i64 %val, i64 *%ptr
   ret void
 }
@@ -337,7 +337,7 @@
 ; CHECK: lg [[REG:%r[0-5]]], 0(%r2)
 ; CHECK: stgrl [[REG]], g4
 ; CHECK: br %r14
-  %val = load i64 *%ptr
+  %val = load i64 , i64 *%ptr
   store i64 %val, i64 *@g4
   ret void
 }
@@ -349,7 +349,7 @@
 ; CHECK-DAG: larl [[DST:%r[0-5]]], g5dst
 ; CHECK: mvc 0(16,[[DST]]), 0([[SRC]])
 ; CHECK: br %r14
-  %val = load fp128 *@g5src, align 16
+  %val = load fp128 , fp128 *@g5src, align 16
   store fp128 %val, fp128 *@g5dst, align 16
   ret void
 }
@@ -360,7 +360,7 @@
 ; CHECK: mvc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1, align 1
+  %val = load i64 , i64 *%ptr1, align 1
   store i64 %val, i64 *%ptr2, align 1
   ret void
 }
@@ -370,7 +370,7 @@
 ; CHECK-LABEL: f31:
 ; CHECK: mvc 0(8,%r3), 0(%r2)
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2, !tbaa !1
+  %val = load i64 , i64 *%ptr1, align 2, !tbaa !1
   store i64 %val, i64 *%ptr2, align 2, !tbaa !2
   ret void
 }
@@ -380,7 +380,7 @@
 ; CHECK-LABEL: f32:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
-  %val = load i64 *%ptr1, align 2, !tbaa !1
+  %val = load i64 , i64 *%ptr1, align 2, !tbaa !1
   store i64 %val, i64 *%ptr2, align 2, !tbaa !1
   ret void
 }
diff --git a/llvm/test/CodeGen/SystemZ/or-01.ll b/llvm/test/CodeGen/SystemZ/or-01.ll
index 59b3fe3..ce556ef 100644
--- a/llvm/test/CodeGen/SystemZ/or-01.ll
+++ b/llvm/test/CodeGen/SystemZ/or-01.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -30,7 +30,7 @@
 ; CHECK: o %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -41,7 +41,7 @@
 ; CHECK: oy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -52,7 +52,7 @@
 ; CHECK: oy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -65,7 +65,7 @@
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -76,7 +76,7 @@
 ; CHECK: oy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -87,7 +87,7 @@
 ; CHECK: oy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -100,7 +100,7 @@
 ; CHECK: o %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -113,7 +113,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -126,7 +126,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %or = or i32 %a, %b
   ret i32 %or
 }
@@ -147,16 +147,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/or-03.ll b/llvm/test/CodeGen/SystemZ/or-03.ll
index 364caf3..f299537 100644
--- a/llvm/test/CodeGen/SystemZ/or-03.ll
+++ b/llvm/test/CodeGen/SystemZ/or-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -30,7 +30,7 @@
 ; CHECK: og %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -43,7 +43,7 @@
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -54,7 +54,7 @@
 ; CHECK: og %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -65,7 +65,7 @@
 ; CHECK: og %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -78,7 +78,7 @@
 ; CHECK: og %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -91,7 +91,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %or = or i64 %a, %b
   ret i64 %or
 }
@@ -112,16 +112,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/or-05.ll b/llvm/test/CodeGen/SystemZ/or-05.ll
index 42cc781..3fb70d9 100644
--- a/llvm/test/CodeGen/SystemZ/or-05.ll
+++ b/llvm/test/CodeGen/SystemZ/or-05.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: oi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, -255
   store i8 %or, i8 *%ptr
   ret void
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, -2
   store i8 %or, i8 *%ptr
   ret void
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: oi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 1
   store i8 %or, i8 *%ptr
   ret void
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 254
   store i8 %or, i8 *%ptr
   ret void
@@ -52,7 +52,7 @@
 ; CHECK: oi 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -64,7 +64,7 @@
 ; CHECK: oiy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -76,7 +76,7 @@
 ; CHECK: oiy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -90,7 +90,7 @@
 ; CHECK: oi 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -102,7 +102,7 @@
 ; CHECK: oiy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -114,7 +114,7 @@
 ; CHECK: oiy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -128,7 +128,7 @@
 ; CHECK: oi 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -143,7 +143,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
@@ -158,7 +158,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %or = or i8 %val, 127
   store i8 %or, i8 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/or-06.ll b/llvm/test/CodeGen/SystemZ/or-06.ll
index 0a865d3..6f441f4 100644
--- a/llvm/test/CodeGen/SystemZ/or-06.ll
+++ b/llvm/test/CodeGen/SystemZ/or-06.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %or = or i32 %ext, -2
   %trunc = trunc i32 %or to i8
@@ -21,7 +21,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %or = or i64 %ext, -2
   %trunc = trunc i64 %or to i8
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %or = or i32 %ext, 254
   %trunc = trunc i32 %or to i8
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %or = or i64 %ext, 254
   %trunc = trunc i64 %or to i8
@@ -60,7 +60,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %or = or i32 %ext, -2
   %trunc = trunc i32 %or to i8
@@ -73,7 +73,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %or = or i64 %ext, -2
   %trunc = trunc i64 %or to i8
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %or = or i32 %ext, 254
   %trunc = trunc i32 %or to i8
@@ -99,7 +99,7 @@
 ; CHECK-LABEL: f8:
 ; CHECK: oi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %or = or i64 %ext, 254
   %trunc = trunc i64 %or to i8
diff --git a/llvm/test/CodeGen/SystemZ/or-08.ll b/llvm/test/CodeGen/SystemZ/or-08.ll
index 17a1174..a9921b1 100644
--- a/llvm/test/CodeGen/SystemZ/or-08.ll
+++ b/llvm/test/CodeGen/SystemZ/or-08.ll
@@ -8,8 +8,8 @@
 ; CHECK: oc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
-  %old = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr1
+  %old = load i8 , i8 *%ptr2
   %or = or i8 %val, %old
   store i8 %or, i8 *%ptr2
   ret void
@@ -21,8 +21,8 @@
 ; CHECK: oc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
-  %old = load i16 *%ptr2
+  %val = load i16 , i16 *%ptr1
+  %old = load i16 , i16 *%ptr2
   %or = or i16 %val, %old
   store i16 %or, i16 *%ptr2
   ret void
@@ -34,8 +34,8 @@
 ; CHECK: oc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
-  %old = load i32 *%ptr2
+  %val = load i32 , i32 *%ptr1
+  %old = load i32 , i32 *%ptr2
   %or = or i32 %old, %val
   store i32 %or, i32 *%ptr2
   ret void
@@ -47,8 +47,8 @@
 ; CHECK: oc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %or = or i64 %old, %val
   store i64 %or, i64 *%ptr2
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/serialize-01.ll b/llvm/test/CodeGen/SystemZ/serialize-01.ll
index 7801fac..4a24517 100644
--- a/llvm/test/CodeGen/SystemZ/serialize-01.ll
+++ b/llvm/test/CodeGen/SystemZ/serialize-01.ll
@@ -16,6 +16,6 @@
 ; CHECK-FAST: bcr 14, %r0
 ; CHECK-FAST: l %r2, 0(%r2)
 ; CHECK-FAST: br %r14
-  %val = load volatile i32 *%src
+  %val = load volatile i32 , i32 *%src
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-01.ll b/llvm/test/CodeGen/SystemZ/shift-01.ll
index 5dab36b..3e838f5 100644
--- a/llvm/test/CodeGen/SystemZ/shift-01.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-01.ll
@@ -108,7 +108,7 @@
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: sll %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i32 *%ptr
+  %amt = load i32 , i32 *%ptr
   %shift = shl i32 %a, %amt
   ret i32 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-02.ll b/llvm/test/CodeGen/SystemZ/shift-02.ll
index 27e73cd3..43576db 100644
--- a/llvm/test/CodeGen/SystemZ/shift-02.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-02.ll
@@ -108,7 +108,7 @@
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: srl %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i32 *%ptr
+  %amt = load i32 , i32 *%ptr
   %shift = lshr i32 %a, %amt
   ret i32 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-03.ll b/llvm/test/CodeGen/SystemZ/shift-03.ll
index c45ae48..6803ff5a 100644
--- a/llvm/test/CodeGen/SystemZ/shift-03.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-03.ll
@@ -108,7 +108,7 @@
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: sra %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i32 *%ptr
+  %amt = load i32 , i32 *%ptr
   %shift = ashr i32 %a, %amt
   ret i32 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-04.ll b/llvm/test/CodeGen/SystemZ/shift-04.ll
index de2d74f..2a32872 100644
--- a/llvm/test/CodeGen/SystemZ/shift-04.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-04.ll
@@ -180,7 +180,7 @@
 ; CHECK: l %r1, 0(%r3)
 ; CHECK: rll %r2, %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i32 *%ptr
+  %amt = load i32 , i32 *%ptr
   %amtb = sub i32 32, %amt
   %parta = shl i32 %a, %amt
   %partb = lshr i32 %a, %amtb
diff --git a/llvm/test/CodeGen/SystemZ/shift-05.ll b/llvm/test/CodeGen/SystemZ/shift-05.ll
index 833b2fb..240be3f 100644
--- a/llvm/test/CodeGen/SystemZ/shift-05.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-05.ll
@@ -143,7 +143,7 @@
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: sllg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i64 *%ptr
+  %amt = load i64 , i64 *%ptr
   %shift = shl i64 %a, %amt
   ret i64 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-06.ll b/llvm/test/CodeGen/SystemZ/shift-06.ll
index 74cae12..d9b9f47 100644
--- a/llvm/test/CodeGen/SystemZ/shift-06.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-06.ll
@@ -143,7 +143,7 @@
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: srlg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i64 *%ptr
+  %amt = load i64 , i64 *%ptr
   %shift = lshr i64 %a, %amt
   ret i64 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-07.ll b/llvm/test/CodeGen/SystemZ/shift-07.ll
index 712849d..1616288 100644
--- a/llvm/test/CodeGen/SystemZ/shift-07.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-07.ll
@@ -143,7 +143,7 @@
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: srag %r2, %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i64 *%ptr
+  %amt = load i64 , i64 *%ptr
   %shift = ashr i64 %a, %amt
   ret i64 %shift
 }
diff --git a/llvm/test/CodeGen/SystemZ/shift-08.ll b/llvm/test/CodeGen/SystemZ/shift-08.ll
index 47283b5..0db53c9 100644
--- a/llvm/test/CodeGen/SystemZ/shift-08.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-08.ll
@@ -181,7 +181,7 @@
 ; CHECK: l %r1, 4(%r3)
 ; CHECK: rllg %r2, %r2, 0(%r1)
 ; CHECK: br %r14
-  %amt = load i64 *%ptr
+  %amt = load i64 , i64 *%ptr
   %amtb = sub i64 64, %amt
   %parta = shl i64 %a, %amt
   %partb = lshr i64 %a, %amtb
diff --git a/llvm/test/CodeGen/SystemZ/spill-01.ll b/llvm/test/CodeGen/SystemZ/spill-01.ll
index 88eb374..a59c06f 100644
--- a/llvm/test/CodeGen/SystemZ/spill-01.ll
+++ b/llvm/test/CodeGen/SystemZ/spill-01.ll
@@ -44,13 +44,13 @@
   %ptr5 = getelementptr i32, i32 *%ptr0, i32 10
   %ptr6 = getelementptr i32, i32 *%ptr0, i32 12
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
 
   call void @foo()
 
@@ -82,15 +82,15 @@
   %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
 
   call void @foo()
 
@@ -124,15 +124,15 @@
   %ptr7 = getelementptr i64, i64 *%ptr0, i64 14
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
 
   call void @foo()
 
@@ -170,16 +170,16 @@
   %ptr8 = getelementptr float, float *%ptr0, i64 16
   %ptr9 = getelementptr float, float *%ptr0, i64 18
 
-  %val0 = load float *%ptr0
-  %val1 = load float *%ptr1
-  %val2 = load float *%ptr2
-  %val3 = load float *%ptr3
-  %val4 = load float *%ptr4
-  %val5 = load float *%ptr5
-  %val6 = load float *%ptr6
-  %val7 = load float *%ptr7
-  %val8 = load float *%ptr8
-  %val9 = load float *%ptr9
+  %val0 = load float , float *%ptr0
+  %val1 = load float , float *%ptr1
+  %val2 = load float , float *%ptr2
+  %val3 = load float , float *%ptr3
+  %val4 = load float , float *%ptr4
+  %val5 = load float , float *%ptr5
+  %val6 = load float , float *%ptr6
+  %val7 = load float , float *%ptr7
+  %val8 = load float , float *%ptr8
+  %val9 = load float , float *%ptr9
 
   call void @foo()
 
@@ -214,16 +214,16 @@
   %ptr8 = getelementptr double, double *%ptr0, i64 16
   %ptr9 = getelementptr double, double *%ptr0, i64 18
 
-  %val0 = load double *%ptr0
-  %val1 = load double *%ptr1
-  %val2 = load double *%ptr2
-  %val3 = load double *%ptr3
-  %val4 = load double *%ptr4
-  %val5 = load double *%ptr5
-  %val6 = load double *%ptr6
-  %val7 = load double *%ptr7
-  %val8 = load double *%ptr8
-  %val9 = load double *%ptr9
+  %val0 = load double , double *%ptr0
+  %val1 = load double , double *%ptr1
+  %val2 = load double , double *%ptr2
+  %val3 = load double , double *%ptr3
+  %val4 = load double , double *%ptr4
+  %val5 = load double , double *%ptr5
+  %val6 = load double , double *%ptr6
+  %val7 = load double , double *%ptr7
+  %val8 = load double , double *%ptr8
+  %val9 = load double , double *%ptr9
 
   call void @foo()
 
@@ -255,15 +255,15 @@
   %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
 
-  %val0 = load atomic i32 *%ptr0 unordered, align 4
-  %val1 = load atomic i32 *%ptr1 unordered, align 4
-  %val2 = load atomic i32 *%ptr2 unordered, align 4
-  %val3 = load atomic i32 *%ptr3 unordered, align 4
-  %val4 = load atomic i32 *%ptr4 unordered, align 4
-  %val5 = load atomic i32 *%ptr5 unordered, align 4
-  %val6 = load atomic i32 *%ptr6 unordered, align 4
-  %val7 = load atomic i32 *%ptr7 unordered, align 4
-  %val8 = load atomic i32 *%ptr8 unordered, align 4
+  %val0 = load atomic i32 , i32 *%ptr0 unordered, align 4
+  %val1 = load atomic i32 , i32 *%ptr1 unordered, align 4
+  %val2 = load atomic i32 , i32 *%ptr2 unordered, align 4
+  %val3 = load atomic i32 , i32 *%ptr3 unordered, align 4
+  %val4 = load atomic i32 , i32 *%ptr4 unordered, align 4
+  %val5 = load atomic i32 , i32 *%ptr5 unordered, align 4
+  %val6 = load atomic i32 , i32 *%ptr6 unordered, align 4
+  %val7 = load atomic i32 , i32 *%ptr7 unordered, align 4
+  %val8 = load atomic i32 , i32 *%ptr8 unordered, align 4
 
   call void @foo()
 
@@ -294,15 +294,15 @@
   %ptr7 = getelementptr i32, i32 *%ptr0, i64 14
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
 
-  %val0 = load volatile i32 *%ptr0
-  %val1 = load volatile i32 *%ptr1
-  %val2 = load volatile i32 *%ptr2
-  %val3 = load volatile i32 *%ptr3
-  %val4 = load volatile i32 *%ptr4
-  %val5 = load volatile i32 *%ptr5
-  %val6 = load volatile i32 *%ptr6
-  %val7 = load volatile i32 *%ptr7
-  %val8 = load volatile i32 *%ptr8
+  %val0 = load volatile i32 , i32 *%ptr0
+  %val1 = load volatile i32 , i32 *%ptr1
+  %val2 = load volatile i32 , i32 *%ptr2
+  %val3 = load volatile i32 , i32 *%ptr3
+  %val4 = load volatile i32 , i32 *%ptr4
+  %val5 = load volatile i32 , i32 *%ptr5
+  %val6 = load volatile i32 , i32 *%ptr6
+  %val7 = load volatile i32 , i32 *%ptr7
+  %val8 = load volatile i32 , i32 *%ptr8
 
   call void @foo()
 
@@ -324,16 +324,16 @@
 ; CHECK-LABEL: f8:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
-  %val0 = load i32 *@g0
-  %val1 = load i32 *@g1
-  %val2 = load i32 *@g2
-  %val3 = load i32 *@g3
-  %val4 = load i32 *@g4
-  %val5 = load i32 *@g5
-  %val6 = load i32 *@g6
-  %val7 = load i32 *@g7
-  %val8 = load i32 *@g8
-  %val9 = load i32 *@g9
+  %val0 = load i32 , i32 *@g0
+  %val1 = load i32 , i32 *@g1
+  %val2 = load i32 , i32 *@g2
+  %val3 = load i32 , i32 *@g3
+  %val4 = load i32 , i32 *@g4
+  %val5 = load i32 , i32 *@g5
+  %val6 = load i32 , i32 *@g6
+  %val7 = load i32 , i32 *@g7
+  %val8 = load i32 , i32 *@g8
+  %val9 = load i32 , i32 *@g9
 
   call void @foo()
 
@@ -356,16 +356,16 @@
 ; CHECK-LABEL: f9:
 ; CHECK-NOT: mvc
 ; CHECK: br %r14
-  %val0 = load i64 *@h0
-  %val1 = load i64 *@h1
-  %val2 = load i64 *@h2
-  %val3 = load i64 *@h3
-  %val4 = load i64 *@h4
-  %val5 = load i64 *@h5
-  %val6 = load i64 *@h6
-  %val7 = load i64 *@h7
-  %val8 = load i64 *@h8
-  %val9 = load i64 *@h9
+  %val0 = load i64 , i64 *@h0
+  %val1 = load i64 , i64 *@h1
+  %val2 = load i64 , i64 *@h2
+  %val3 = load i64 , i64 *@h3
+  %val4 = load i64 , i64 *@h4
+  %val5 = load i64 , i64 *@h5
+  %val6 = load i64 , i64 *@h6
+  %val7 = load i64 , i64 *@h7
+  %val8 = load i64 , i64 *@h8
+  %val9 = load i64 , i64 *@h9
 
   call void @foo()
 
@@ -400,16 +400,16 @@
 ; CHECK: stgrl [[REG]], h8
 ; CHECK: br %r14
 entry:
-  %val8 = load volatile i64 *@h8
-  %val0 = load volatile i64 *@h0
-  %val1 = load volatile i64 *@h1
-  %val2 = load volatile i64 *@h2
-  %val3 = load volatile i64 *@h3
-  %val4 = load volatile i64 *@h4
-  %val5 = load volatile i64 *@h5
-  %val6 = load volatile i64 *@h6
-  %val7 = load volatile i64 *@h7
-  %val9 = load volatile i64 *@h9
+  %val8 = load volatile i64 , i64 *@h8
+  %val0 = load volatile i64 , i64 *@h0
+  %val1 = load volatile i64 , i64 *@h1
+  %val2 = load volatile i64 , i64 *@h2
+  %val3 = load volatile i64 , i64 *@h3
+  %val4 = load volatile i64 , i64 *@h4
+  %val5 = load volatile i64 , i64 *@h5
+  %val6 = load volatile i64 , i64 *@h6
+  %val7 = load volatile i64 , i64 *@h7
+  %val9 = load volatile i64 , i64 *@h9
 
   call void @foo()
 
@@ -422,7 +422,7 @@
   store volatile i64 %val6, i64 *@h6
   store volatile i64 %val7, i64 *@h7
 
-  %check = load volatile i64 *@h0
+  %check = load volatile i64 , i64 *@h0
   %cond = icmp eq i64 %check, 0
   br i1 %cond, label %skip, label %fallthru
 
@@ -464,17 +464,17 @@
 ; CHECK-NOT: mvc [[OFFSET:[0-9]+]](8,%r15), [[OFFSET]](%r15)
 ; CHECK: br %r14
 entry:
-  %val0 = load volatile i64 *@h0
-  %val1 = load volatile i64 *@h1
-  %val2 = load volatile i64 *@h2
-  %val3 = load volatile i64 *@h3
-  %val4 = load volatile i64 *@h4
-  %val5 = load volatile i64 *@h5
-  %val6 = load volatile i64 *@h6
-  %val7 = load volatile i64 *@h7
+  %val0 = load volatile i64 , i64 *@h0
+  %val1 = load volatile i64 , i64 *@h1
+  %val2 = load volatile i64 , i64 *@h2
+  %val3 = load volatile i64 , i64 *@h3
+  %val4 = load volatile i64 , i64 *@h4
+  %val5 = load volatile i64 , i64 *@h5
+  %val6 = load volatile i64 , i64 *@h6
+  %val7 = load volatile i64 , i64 *@h7
 
-  %altval0 = load volatile i64 *@h0
-  %altval1 = load volatile i64 *@h1
+  %altval0 = load volatile i64 , i64 *@h0
+  %altval1 = load volatile i64 , i64 *@h1
 
   call void @foo()
 
@@ -487,7 +487,7 @@
   store volatile i64 %val6, i64 *@h6
   store volatile i64 %val7, i64 *@h7
 
-  %check = load volatile i64 *@h0
+  %check = load volatile i64 , i64 *@h0
   %cond = icmp eq i64 %check, 0
   br i1 %cond, label %a1, label %b1
 
diff --git a/llvm/test/CodeGen/SystemZ/strcpy-01.ll b/llvm/test/CodeGen/SystemZ/strcpy-01.ll
index 29bab62..d6d0edf4 100644
--- a/llvm/test/CodeGen/SystemZ/strcpy-01.ll
+++ b/llvm/test/CodeGen/SystemZ/strcpy-01.ll
@@ -43,7 +43,7 @@
 ; CHECK-NEXT: jo [[LABEL]]
 ; CHECK: mvhi 0(%r6), 0
 ; CHECK: br %r14
-  %res = load i32 *%resptr
+  %res = load i32 , i32 *%resptr
   %unused = call i8 *@strcpy(i8 *%dest, i8 *%src)
   store i32 0, i32 *%storeptr
   ret i32 %res
diff --git a/llvm/test/CodeGen/SystemZ/tls-05.ll b/llvm/test/CodeGen/SystemZ/tls-05.ll
index 385208d..502d6d4 100644
--- a/llvm/test/CodeGen/SystemZ/tls-05.ll
+++ b/llvm/test/CodeGen/SystemZ/tls-05.ll
@@ -8,7 +8,7 @@
 @x = thread_local global i32 0
 
 define i32 @foo() {
-  %val = load i32* @x
+  %val = load i32, i32* @x
   %inc = add nsw i32 %val, 1
   store i32 %inc, i32* @x
   ret i32 %val
diff --git a/llvm/test/CodeGen/SystemZ/tls-06.ll b/llvm/test/CodeGen/SystemZ/tls-06.ll
index fcd8614..8f1796d 100644
--- a/llvm/test/CodeGen/SystemZ/tls-06.ll
+++ b/llvm/test/CodeGen/SystemZ/tls-06.ll
@@ -10,8 +10,8 @@
 @y = thread_local global i32 0
 
 define i32 @foo() {
-  %valx = load i32* @x
-  %valy = load i32* @y
+  %valx = load i32, i32* @x
+  %valy = load i32, i32* @y
   %add = add nsw i32 %valx, %valy
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/SystemZ/tls-07.ll b/llvm/test/CodeGen/SystemZ/tls-07.ll
index 6547515..be66c09 100644
--- a/llvm/test/CodeGen/SystemZ/tls-07.ll
+++ b/llvm/test/CodeGen/SystemZ/tls-07.ll
@@ -9,8 +9,8 @@
 @y = thread_local(localdynamic) global i32 0
 
 define i32 @foo() {
-  %valx = load i32* @x
-  %valy = load i32* @y
+  %valx = load i32, i32* @x
+  %valy = load i32, i32* @y
   %add = add nsw i32 %valx, %valy
   ret i32 %add
 }
diff --git a/llvm/test/CodeGen/SystemZ/unaligned-01.ll b/llvm/test/CodeGen/SystemZ/unaligned-01.ll
index c9f6f36..94cad0e 100644
--- a/llvm/test/CodeGen/SystemZ/unaligned-01.ll
+++ b/llvm/test/CodeGen/SystemZ/unaligned-01.ll
@@ -28,7 +28,7 @@
 ; CHECK: lh %r2, 0(%r2)
 ; CHECK: sth %r2, 0(%r3)
 ; CHECK: br %r14
-  %val = load i16 *%src, align 1
+  %val = load i16 , i16 *%src, align 1
   store i16 %val, i16 *%dst, align 1
   ret i16 %val
 }
@@ -40,8 +40,8 @@
 ; CHECK: s %r2, 0(%r3)
 ; CHECK: st %r2, 0(%r4)
 ; CHECK: br %r14
-  %val1 = load i32 *%src1, align 1
-  %val2 = load i32 *%src2, align 2
+  %val1 = load i32 , i32 *%src1, align 1
+  %val2 = load i32 , i32 *%src2, align 2
   %sub = sub i32 %val1, %val2
   store i32 %sub, i32 *%dst, align 1
   ret i32 %sub
@@ -54,8 +54,8 @@
 ; CHECK: sg %r2, 0(%r3)
 ; CHECK: stg %r2, 0(%r4)
 ; CHECK: br %r14
-  %val1 = load i64 *%src1, align 1
-  %val2 = load i64 *%src2, align 2
+  %val1 = load i64 , i64 *%src1, align 1
+  %val2 = load i64 , i64 *%src2, align 2
   %sub = sub i64 %val1, %val2
   store i64 %sub, i64 *%dst, align 4
   ret i64 %sub
diff --git a/llvm/test/CodeGen/SystemZ/xor-01.ll b/llvm/test/CodeGen/SystemZ/xor-01.ll
index ee98cb5..e0aaffb 100644
--- a/llvm/test/CodeGen/SystemZ/xor-01.ll
+++ b/llvm/test/CodeGen/SystemZ/xor-01.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i32 *%src
+  %b = load i32 , i32 *%src
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -30,7 +30,7 @@
 ; CHECK: x %r2, 4092(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1023
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -41,7 +41,7 @@
 ; CHECK: xy %r2, 4096(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 1024
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -52,7 +52,7 @@
 ; CHECK: xy %r2, 524284(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131071
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -65,7 +65,7 @@
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -76,7 +76,7 @@
 ; CHECK: xy %r2, -4(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -1
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -87,7 +87,7 @@
 ; CHECK: xy %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131072
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -100,7 +100,7 @@
 ; CHECK: x %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i32, i32 *%src, i64 -131073
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -113,7 +113,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4092
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -126,7 +126,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i32 *
-  %b = load i32 *%ptr
+  %b = load i32 , i32 *%ptr
   %xor = xor i32 %a, %b
   ret i32 %xor
 }
@@ -147,16 +147,16 @@
   %ptr8 = getelementptr i32, i32 *%ptr0, i64 16
   %ptr9 = getelementptr i32, i32 *%ptr0, i64 18
 
-  %val0 = load i32 *%ptr0
-  %val1 = load i32 *%ptr1
-  %val2 = load i32 *%ptr2
-  %val3 = load i32 *%ptr3
-  %val4 = load i32 *%ptr4
-  %val5 = load i32 *%ptr5
-  %val6 = load i32 *%ptr6
-  %val7 = load i32 *%ptr7
-  %val8 = load i32 *%ptr8
-  %val9 = load i32 *%ptr9
+  %val0 = load i32 , i32 *%ptr0
+  %val1 = load i32 , i32 *%ptr1
+  %val2 = load i32 , i32 *%ptr2
+  %val3 = load i32 , i32 *%ptr3
+  %val4 = load i32 , i32 *%ptr4
+  %val5 = load i32 , i32 *%ptr5
+  %val6 = load i32 , i32 *%ptr6
+  %val7 = load i32 , i32 *%ptr7
+  %val8 = load i32 , i32 *%ptr8
+  %val9 = load i32 , i32 *%ptr9
 
   %ret = call i32 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/xor-03.ll b/llvm/test/CodeGen/SystemZ/xor-03.ll
index edc9233..36fb1df 100644
--- a/llvm/test/CodeGen/SystemZ/xor-03.ll
+++ b/llvm/test/CodeGen/SystemZ/xor-03.ll
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
-  %b = load i64 *%src
+  %b = load i64 , i64 *%src
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -30,7 +30,7 @@
 ; CHECK: xg %r2, 524280(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65535
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -43,7 +43,7 @@
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -54,7 +54,7 @@
 ; CHECK: xg %r2, -8(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -1
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -65,7 +65,7 @@
 ; CHECK: xg %r2, -524288(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65536
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -78,7 +78,7 @@
 ; CHECK: xg %r2, 0(%r3)
 ; CHECK: br %r14
   %ptr = getelementptr i64, i64 *%src, i64 -65537
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -91,7 +91,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 524280
   %ptr = inttoptr i64 %add2 to i64 *
-  %b = load i64 *%ptr
+  %b = load i64 , i64 *%ptr
   %xor = xor i64 %a, %b
   ret i64 %xor
 }
@@ -112,16 +112,16 @@
   %ptr8 = getelementptr i64, i64 *%ptr0, i64 16
   %ptr9 = getelementptr i64, i64 *%ptr0, i64 18
 
-  %val0 = load i64 *%ptr0
-  %val1 = load i64 *%ptr1
-  %val2 = load i64 *%ptr2
-  %val3 = load i64 *%ptr3
-  %val4 = load i64 *%ptr4
-  %val5 = load i64 *%ptr5
-  %val6 = load i64 *%ptr6
-  %val7 = load i64 *%ptr7
-  %val8 = load i64 *%ptr8
-  %val9 = load i64 *%ptr9
+  %val0 = load i64 , i64 *%ptr0
+  %val1 = load i64 , i64 *%ptr1
+  %val2 = load i64 , i64 *%ptr2
+  %val3 = load i64 , i64 *%ptr3
+  %val4 = load i64 , i64 *%ptr4
+  %val5 = load i64 , i64 *%ptr5
+  %val6 = load i64 , i64 *%ptr6
+  %val7 = load i64 , i64 *%ptr7
+  %val8 = load i64 , i64 *%ptr8
+  %val9 = load i64 , i64 *%ptr9
 
   %ret = call i64 @foo()
 
diff --git a/llvm/test/CodeGen/SystemZ/xor-05.ll b/llvm/test/CodeGen/SystemZ/xor-05.ll
index 1fad255..7b79c7f 100644
--- a/llvm/test/CodeGen/SystemZ/xor-05.ll
+++ b/llvm/test/CodeGen/SystemZ/xor-05.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: xi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, -255
   store i8 %xor, i8 *%ptr
   ret void
@@ -18,7 +18,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, -2
   store i8 %xor, i8 *%ptr
   ret void
@@ -29,7 +29,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: xi 0(%r2), 1
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 1
   store i8 %xor, i8 *%ptr
   ret void
@@ -40,7 +40,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 254
   store i8 %xor, i8 *%ptr
   ret void
@@ -52,7 +52,7 @@
 ; CHECK: xi 4095(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4095
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -64,7 +64,7 @@
 ; CHECK: xiy 4096(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 4096
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -76,7 +76,7 @@
 ; CHECK: xiy 524287(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524287
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -90,7 +90,7 @@
 ; CHECK: xi 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -102,7 +102,7 @@
 ; CHECK: xiy -1(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -1
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -114,7 +114,7 @@
 ; CHECK: xiy -524288(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524288
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -128,7 +128,7 @@
 ; CHECK: xi 0(%r2), 127
 ; CHECK: br %r14
   %ptr = getelementptr i8, i8 *%src, i64 -524289
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -143,7 +143,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4095
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
@@ -158,7 +158,7 @@
   %add1 = add i64 %src, %index
   %add2 = add i64 %add1, 4096
   %ptr = inttoptr i64 %add2 to i8 *
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %xor = xor i8 %val, 127
   store i8 %xor, i8 *%ptr
   ret void
diff --git a/llvm/test/CodeGen/SystemZ/xor-06.ll b/llvm/test/CodeGen/SystemZ/xor-06.ll
index f39c0fe..40db3cb 100644
--- a/llvm/test/CodeGen/SystemZ/xor-06.ll
+++ b/llvm/test/CodeGen/SystemZ/xor-06.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: f1:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %xor = xor i32 %ext, -2
   %trunc = trunc i32 %xor to i8
@@ -21,7 +21,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %xor = xor i64 %ext, -2
   %trunc = trunc i64 %xor to i8
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: f3:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i32
   %xor = xor i32 %ext, 254
   %trunc = trunc i32 %xor to i8
@@ -47,7 +47,7 @@
 ; CHECK-LABEL: f4:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = zext i8 %val to i64
   %xor = xor i64 %ext, 254
   %trunc = trunc i64 %xor to i8
@@ -60,7 +60,7 @@
 ; CHECK-LABEL: f5:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %xor = xor i32 %ext, -2
   %trunc = trunc i32 %xor to i8
@@ -73,7 +73,7 @@
 ; CHECK-LABEL: f6:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %xor = xor i64 %ext, -2
   %trunc = trunc i64 %xor to i8
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: f7:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i32
   %xor = xor i32 %ext, 254
   %trunc = trunc i32 %xor to i8
@@ -99,7 +99,7 @@
 ; CHECK-LABEL: f8:
 ; CHECK: xi 0(%r2), 254
 ; CHECK: br %r14
-  %val = load i8 *%ptr
+  %val = load i8 , i8 *%ptr
   %ext = sext i8 %val to i64
   %xor = xor i64 %ext, 254
   %trunc = trunc i64 %xor to i8
diff --git a/llvm/test/CodeGen/SystemZ/xor-08.ll b/llvm/test/CodeGen/SystemZ/xor-08.ll
index 31b32ec..9988a4c 100644
--- a/llvm/test/CodeGen/SystemZ/xor-08.ll
+++ b/llvm/test/CodeGen/SystemZ/xor-08.ll
@@ -8,8 +8,8 @@
 ; CHECK: xc 1(1,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i8, i8 *%ptr1, i64 1
-  %val = load i8 *%ptr1
-  %old = load i8 *%ptr2
+  %val = load i8 , i8 *%ptr1
+  %old = load i8 , i8 *%ptr2
   %xor = xor i8 %val, %old
   store i8 %xor, i8 *%ptr2
   ret void
@@ -21,8 +21,8 @@
 ; CHECK: xc 2(2,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i16, i16 *%ptr1, i64 1
-  %val = load i16 *%ptr1
-  %old = load i16 *%ptr2
+  %val = load i16 , i16 *%ptr1
+  %old = load i16 , i16 *%ptr2
   %xor = xor i16 %val, %old
   store i16 %xor, i16 *%ptr2
   ret void
@@ -34,8 +34,8 @@
 ; CHECK: xc 4(4,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i32, i32 *%ptr1, i64 1
-  %val = load i32 *%ptr1
-  %old = load i32 *%ptr2
+  %val = load i32 , i32 *%ptr1
+  %old = load i32 , i32 *%ptr2
   %xor = xor i32 %old, %val
   store i32 %xor, i32 *%ptr2
   ret void
@@ -47,8 +47,8 @@
 ; CHECK: xc 8(8,%r2), 0(%r2)
 ; CHECK: br %r14
   %ptr2 = getelementptr i64, i64 *%ptr1, i64 1
-  %val = load i64 *%ptr1
-  %old = load i64 *%ptr2
+  %val = load i64 , i64 *%ptr1
+  %old = load i64 , i64 *%ptr2
   %xor = xor i64 %old, %val
   store i64 %xor, i64 *%ptr2
   ret void
diff --git a/llvm/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/llvm/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 1e61b23..37bcc36 100644
--- a/llvm/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
+++ b/llvm/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -6,7 +6,7 @@
 define void @f1() {
 	%D = alloca %struct.rtx_def, align 1
 	%tmp1 = bitcast %struct.rtx_def* %D to i32*
-	%tmp7 = load i32* %tmp1
+	%tmp7 = load i32, i32* %tmp1
 	%tmp14 = lshr i32 %tmp7, 1
 	%tmp1415 = and i32 %tmp14, 1
 	call void (i32, ...)* @printf( i32 undef, i32 0, i32 %tmp1415 )
diff --git a/llvm/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/llvm/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 929c472..9e4ecf4 100644
--- a/llvm/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
+++ b/llvm/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -9,7 +9,7 @@
 
 define i8* @f(i8* %a) {
 entry:
-	%tmp1 = load i32* @i.1882		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i.1882		; <i32> [#uses=1]
 	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=2]
 	store i32 %tmp2, i32* @i.1882
 	%tmp34 = inttoptr i32 %tmp2 to i8*		; <i8*> [#uses=1]
@@ -21,15 +21,15 @@
 	%t = alloca i32, align 4		; <i32*> [#uses=4]
 	%ret = alloca i32, align 4		; <i32*> [#uses=3]
 	%tmp1 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]
-	%tmp2 = load i32* %t		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %t		; <i32> [#uses=1]
 	%ret3 = bitcast i32* %ret to i8**		; <i8**> [#uses=2]
 	%tmp4 = call i32 @pthread_join( i32 %tmp2, i8** %ret3 )		; <i32> [#uses=0]
-	%tmp5 = load i32* %ret		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %ret		; <i32> [#uses=1]
 	%tmp7 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 )		; <i32> [#uses=0]
 	%tmp8 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]
-	%tmp9 = load i32* %t		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %t		; <i32> [#uses=1]
 	%tmp11 = call i32 @pthread_join( i32 %tmp9, i8** %ret3 )		; <i32> [#uses=0]
-	%tmp12 = load i32* %ret		; <i32> [#uses=1]
+	%tmp12 = load i32, i32* %ret		; <i32> [#uses=1]
 	%tmp14 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 )		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/llvm/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
index d4651a1..fd30032 100644
--- a/llvm/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
+++ b/llvm/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -4,7 +4,7 @@
 
 define i64 @millisecs() nounwind {
 entry:
-	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%0 = load i64, i64* @Time.2535, align 4		; <i64> [#uses=2]
 	%1 = add i64 %0, 1		; <i64> [#uses=1]
 	store i64 %1, i64* @Time.2535, align 4
 	ret i64 %0
diff --git a/llvm/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/llvm/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
index 09072f5..7036dd1 100644
--- a/llvm/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
+++ b/llvm/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -5,334 +5,334 @@
 define void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
 entry:
 	%0 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
-	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
-	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	%1 = load i32, i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32, i32* undef, align 4             ; <i32> [#uses=2]
 	br i1 undef, label %bb1, label %bb
 
 bb:                                               ; preds = %entry
-	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%3 = load i32, i32* %0, align 4                ; <i32> [#uses=1]
 	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
-	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%5 = load i32, i32* null, align 4              ; <i32> [#uses=1]
 	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
 	%7 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
-	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%8 = load i32, i32* %7, align 4                ; <i32> [#uses=1]
 	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
 	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
 	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
 	%12 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
-	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%13 = load i32, i32* %12, align 4              ; <i32> [#uses=1]
 	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
 	%15 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
-	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%16 = load i32, i32* %15, align 4              ; <i32> [#uses=1]
 	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
 	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
 	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
 	%20 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
-	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%21 = load i32, i32* %20, align 4              ; <i32> [#uses=1]
 	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
 	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
 	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
 	%25 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
-	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%26 = load i32, i32* %25, align 4              ; <i32> [#uses=1]
 	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
 	%28 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
-	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%29 = load i32, i32* %28, align 4              ; <i32> [#uses=1]
 	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
 	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
 	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
 	%33 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%34 = load i32, i32* %33, align 4              ; <i32> [#uses=1]
 	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
 	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
 	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
 	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
 	%39 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
-	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%40 = load i32, i32* %39, align 4              ; <i32> [#uses=1]
 	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
 	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
 	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
 	%44 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
-	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%45 = load i32, i32* %44, align 4              ; <i32> [#uses=1]
 	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
 	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
 	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
 	%49 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
-	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%50 = load i32, i32* %49, align 4              ; <i32> [#uses=1]
 	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
 	%52 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
-	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%53 = load i32, i32* %52, align 4              ; <i32> [#uses=1]
 	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
 	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
 	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
 	%57 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
-	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%58 = load i32, i32* %57, align 4              ; <i32> [#uses=1]
 	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
 	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
 	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
 	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
 	%63 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
-	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%64 = load i32, i32* %63, align 4              ; <i32> [#uses=1]
 	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
 	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
 	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
 	%68 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
-	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%69 = load i32, i32* %68, align 4              ; <i32> [#uses=1]
 	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
 	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
 	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
-	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%73 = load i32, i32* null, align 4             ; <i32> [#uses=1]
 	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
 	%75 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
-	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%76 = load i32, i32* %75, align 4              ; <i32> [#uses=1]
 	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
 	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
 	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
 	%80 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%81 = load i32, i32* %80, align 4              ; <i32> [#uses=1]
 	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
 	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
 	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
 	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
 	%86 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
-	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%87 = load i32, i32* %86, align 4              ; <i32> [#uses=1]
 	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
 	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
 	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
 	%91 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
-	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%92 = load i32, i32* %91, align 4              ; <i32> [#uses=1]
 	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
 	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
 	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
-	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%96 = load i32, i32* undef, align 4            ; <i32> [#uses=1]
 	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
 	%98 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
-	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%99 = load i32, i32* %98, align 4              ; <i32> [#uses=1]
 	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
 	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
 	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
 	%103 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
-	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%104 = load i32, i32* %103, align 4            ; <i32> [#uses=1]
 	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
 	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
 	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
 	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
 	%109 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
-	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%110 = load i32, i32* %109, align 4            ; <i32> [#uses=1]
 	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
 	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
 	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
 	%114 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
-	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%115 = load i32, i32* %114, align 4            ; <i32> [#uses=1]
 	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
 	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
 	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
 	%119 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
-	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%120 = load i32, i32* %119, align 4            ; <i32> [#uses=1]
 	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
 	%122 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
-	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%123 = load i32, i32* %122, align 4            ; <i32> [#uses=1]
 	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
 	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
 	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
 	%127 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
-	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%128 = load i32, i32* %127, align 4            ; <i32> [#uses=1]
 	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
 	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
 	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
 	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
 	%133 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
-	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%134 = load i32, i32* %133, align 4            ; <i32> [#uses=1]
 	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
 	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
 	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
 	%138 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
-	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%139 = load i32, i32* %138, align 4            ; <i32> [#uses=1]
 	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
 	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
 	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
 	%143 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
-	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%144 = load i32, i32* %143, align 4            ; <i32> [#uses=1]
 	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
 	%146 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
-	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%147 = load i32, i32* %146, align 4            ; <i32> [#uses=1]
 	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
 	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
 	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
 	%151 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
-	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%152 = load i32, i32* %151, align 4            ; <i32> [#uses=1]
 	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
 	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
 	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
 	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
 	%157 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
-	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%158 = load i32, i32* %157, align 4            ; <i32> [#uses=1]
 	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
 	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
 	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
 	%162 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
-	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%163 = load i32, i32* %162, align 4            ; <i32> [#uses=1]
 	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
 	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
 	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
-	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%167 = load i32, i32* undef, align 4           ; <i32> [#uses=1]
 	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
 	%169 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
-	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%170 = load i32, i32* %169, align 4            ; <i32> [#uses=1]
 	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
 	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
 	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
 	%174 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
-	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%175 = load i32, i32* %174, align 4            ; <i32> [#uses=1]
 	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
 	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
 	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
 	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
 	%180 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
-	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%181 = load i32, i32* %180, align 4            ; <i32> [#uses=1]
 	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
 	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
 	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
 	%185 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
-	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%186 = load i32, i32* %185, align 4            ; <i32> [#uses=1]
 	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
 	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
 	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
 	%190 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
-	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%191 = load i32, i32* %190, align 4            ; <i32> [#uses=1]
 	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
 	%193 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
-	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%194 = load i32, i32* %193, align 4            ; <i32> [#uses=1]
 	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
 	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
 	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
 	%198 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
-	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%199 = load i32, i32* %198, align 4            ; <i32> [#uses=1]
 	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
 	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
 	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
 	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
 	%204 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
-	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%205 = load i32, i32* %204, align 4            ; <i32> [#uses=1]
 	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
 	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
 	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
 	%209 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
-	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%210 = load i32, i32* %209, align 4            ; <i32> [#uses=1]
 	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
 	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
 	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
 	%214 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
-	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%215 = load i32, i32* %214, align 4            ; <i32> [#uses=1]
 	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
 	%217 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
-	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%218 = load i32, i32* %217, align 4            ; <i32> [#uses=1]
 	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
 	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
 	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
 	%222 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
-	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%223 = load i32, i32* %222, align 4            ; <i32> [#uses=1]
 	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
 	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
 	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
 	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
 	%228 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
-	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%229 = load i32, i32* %228, align 4            ; <i32> [#uses=1]
 	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
 	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
 	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
 	%233 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
-	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%234 = load i32, i32* %233, align 4            ; <i32> [#uses=1]
 	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
 	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
 	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
 	%238 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
-	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%239 = load i32, i32* %238, align 4            ; <i32> [#uses=1]
 	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
 	%241 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
-	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%242 = load i32, i32* %241, align 4            ; <i32> [#uses=1]
 	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
 	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
 	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
 	%246 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
-	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%247 = load i32, i32* %246, align 4            ; <i32> [#uses=1]
 	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
 	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
 	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
 	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
 	%252 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
-	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%253 = load i32, i32* %252, align 4            ; <i32> [#uses=1]
 	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
 	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
 	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
 	%257 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
-	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%258 = load i32, i32* %257, align 4            ; <i32> [#uses=1]
 	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
 	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
 	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
-	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%262 = load i32, i32* undef, align 4           ; <i32> [#uses=1]
 	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
 	%264 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
-	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%265 = load i32, i32* %264, align 4            ; <i32> [#uses=1]
 	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
 	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
 	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
 	%269 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
-	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%270 = load i32, i32* %269, align 4            ; <i32> [#uses=1]
 	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
 	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
 	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
 	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
 	%275 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
-	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%276 = load i32, i32* %275, align 4            ; <i32> [#uses=1]
 	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
 	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
 	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
 	%280 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
-	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%281 = load i32, i32* %280, align 4            ; <i32> [#uses=1]
 	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
 	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
 	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
-	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%285 = load i32, i32* null, align 4            ; <i32> [#uses=1]
 	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
 	%287 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
-	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%288 = load i32, i32* %287, align 4            ; <i32> [#uses=1]
 	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
 	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
 	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
 	%292 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
-	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%293 = load i32, i32* %292, align 4            ; <i32> [#uses=1]
 	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
 	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
 	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
 	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
 	%298 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
-	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%299 = load i32, i32* %298, align 4            ; <i32> [#uses=1]
 	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
 	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
 	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
 	%303 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
-	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%304 = load i32, i32* %303, align 4            ; <i32> [#uses=1]
 	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
 	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
 	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
 	%308 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
-	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%309 = load i32, i32* %308, align 4            ; <i32> [#uses=1]
 	%310 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
-	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%311 = load i32, i32* %310, align 4            ; <i32> [#uses=1]
 	%312 = or i32 0, 256                      ; <i32> [#uses=1]
 	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
 	%314 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
-	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%315 = load i32, i32* %314, align 4            ; <i32> [#uses=1]
 	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
 	%317 = or i32 0, 512                      ; <i32> [#uses=1]
 	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
 	%319 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
-	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%320 = load i32, i32* %319, align 4            ; <i32> [#uses=1]
 	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
 	%322 = or i32 0, 768                      ; <i32> [#uses=1]
 	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
 	%324 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
-	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%325 = load i32, i32* %324, align 4            ; <i32> [#uses=1]
 	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
 	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
 	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
@@ -340,357 +340,357 @@
 	br label %bb2
 
 bb1:                                              ; preds = %entry
-	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%330 = load i32, i32* null, align 4            ; <i32> [#uses=1]
 	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
-	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%332 = load i32, i32* null, align 4            ; <i32> [#uses=1]
 	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
 	%334 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
-	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
-	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%335 = load i32, i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32, i32* null, align 4            ; <i32> [#uses=1]
 	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
 	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
 	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
 	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
 	%341 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
-	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%342 = load i32, i32* %341, align 4            ; <i32> [#uses=1]
 	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
 	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
 	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
 	%346 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
-	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%347 = load i32, i32* %346, align 4            ; <i32> [#uses=1]
 	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
 	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
 	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
 	%351 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
-	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%352 = load i32, i32* %351, align 4            ; <i32> [#uses=1]
 	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
 	%354 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
-	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%355 = load i32, i32* %354, align 4            ; <i32> [#uses=1]
 	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
 	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
 	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
 	%359 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
-	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%360 = load i32, i32* %359, align 4            ; <i32> [#uses=1]
 	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
 	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
 	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
 	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
 	%365 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
-	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%366 = load i32, i32* %365, align 4            ; <i32> [#uses=1]
 	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
 	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
 	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
 	%370 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
-	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%371 = load i32, i32* %370, align 4            ; <i32> [#uses=1]
 	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
 	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
 	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
 	%375 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
-	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%376 = load i32, i32* %375, align 4            ; <i32> [#uses=1]
 	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
 	%378 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
-	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%379 = load i32, i32* %378, align 4            ; <i32> [#uses=1]
 	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
 	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
 	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
 	%383 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
-	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%384 = load i32, i32* %383, align 4            ; <i32> [#uses=1]
 	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
 	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
 	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
 	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
 	%389 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
-	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%390 = load i32, i32* %389, align 4            ; <i32> [#uses=1]
 	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
 	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
 	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
 	%394 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
-	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%395 = load i32, i32* %394, align 4            ; <i32> [#uses=1]
 	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
 	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
 	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
 	%399 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
-	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%400 = load i32, i32* %399, align 4            ; <i32> [#uses=1]
 	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
 	%402 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
-	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%403 = load i32, i32* %402, align 4            ; <i32> [#uses=1]
 	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
 	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
 	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
 	%407 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
-	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%408 = load i32, i32* %407, align 4            ; <i32> [#uses=1]
 	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
 	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
 	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
 	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
 	%413 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
-	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%414 = load i32, i32* %413, align 4            ; <i32> [#uses=1]
 	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
 	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
 	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
 	%418 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
-	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%419 = load i32, i32* %418, align 4            ; <i32> [#uses=1]
 	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
 	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
 	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
 	%423 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
-	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%424 = load i32, i32* %423, align 4            ; <i32> [#uses=1]
 	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
 	%426 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
-	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%427 = load i32, i32* %426, align 4            ; <i32> [#uses=1]
 	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
 	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
 	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
 	%431 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
-	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%432 = load i32, i32* %431, align 4            ; <i32> [#uses=1]
 	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
 	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
 	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
 	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
 	%437 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
-	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%438 = load i32, i32* %437, align 4            ; <i32> [#uses=1]
 	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
 	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
 	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
 	%442 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
-	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%443 = load i32, i32* %442, align 4            ; <i32> [#uses=1]
 	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
 	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
 	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
-	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%447 = load i32, i32* undef, align 4           ; <i32> [#uses=1]
 	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
 	%449 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
-	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%450 = load i32, i32* %449, align 4            ; <i32> [#uses=1]
 	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
 	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
 	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
 	%454 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
-	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%455 = load i32, i32* %454, align 4            ; <i32> [#uses=1]
 	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
 	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
 	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
 	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
 	%460 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
-	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%461 = load i32, i32* %460, align 4            ; <i32> [#uses=1]
 	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
 	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
 	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
 	%465 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
-	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%466 = load i32, i32* %465, align 4            ; <i32> [#uses=1]
 	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
 	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
 	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
 	%470 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
-	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%471 = load i32, i32* %470, align 4            ; <i32> [#uses=1]
 	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
 	%473 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
-	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%474 = load i32, i32* %473, align 4            ; <i32> [#uses=1]
 	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
 	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
 	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
 	%478 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
-	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%479 = load i32, i32* %478, align 4            ; <i32> [#uses=1]
 	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
 	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
 	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
 	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
 	%484 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
-	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%485 = load i32, i32* %484, align 4            ; <i32> [#uses=1]
 	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
 	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
 	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
 	%489 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
-	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%490 = load i32, i32* %489, align 4            ; <i32> [#uses=1]
 	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
 	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
 	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
 	%494 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
-	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%495 = load i32, i32* %494, align 4            ; <i32> [#uses=1]
 	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
 	%497 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
-	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%498 = load i32, i32* %497, align 4            ; <i32> [#uses=1]
 	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
 	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
 	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
 	%502 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
-	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%503 = load i32, i32* %502, align 4            ; <i32> [#uses=1]
 	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
 	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
 	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
 	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
 	%508 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
-	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%509 = load i32, i32* %508, align 4            ; <i32> [#uses=1]
 	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
 	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
 	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
 	%513 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
-	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%514 = load i32, i32* %513, align 4            ; <i32> [#uses=1]
 	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
 	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
 	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
 	%518 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
-	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%519 = load i32, i32* %518, align 4            ; <i32> [#uses=1]
 	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
 	%521 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
-	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%522 = load i32, i32* %521, align 4            ; <i32> [#uses=1]
 	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
 	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
 	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
 	%526 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
-	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%527 = load i32, i32* %526, align 4            ; <i32> [#uses=1]
 	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
 	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
 	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
 	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
 	%532 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
-	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%533 = load i32, i32* %532, align 4            ; <i32> [#uses=1]
 	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
 	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
 	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
 	%537 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
-	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%538 = load i32, i32* %537, align 4            ; <i32> [#uses=1]
 	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
 	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
 	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
-	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%542 = load i32, i32* undef, align 4           ; <i32> [#uses=1]
 	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
 	%544 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
-	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%545 = load i32, i32* %544, align 4            ; <i32> [#uses=1]
 	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
 	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
 	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
 	%549 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
-	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%550 = load i32, i32* %549, align 4            ; <i32> [#uses=1]
 	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
 	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
 	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
 	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
 	%555 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
-	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%556 = load i32, i32* %555, align 4            ; <i32> [#uses=1]
 	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
 	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
 	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
 	%560 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
-	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%561 = load i32, i32* %560, align 4            ; <i32> [#uses=1]
 	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
 	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
 	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
 	%565 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
-	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%566 = load i32, i32* %565, align 4            ; <i32> [#uses=1]
 	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
 	%568 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
-	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%569 = load i32, i32* %568, align 4            ; <i32> [#uses=1]
 	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
 	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
 	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
 	%573 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
-	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%574 = load i32, i32* %573, align 4            ; <i32> [#uses=1]
 	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
 	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
 	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
 	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
 	%579 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
-	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%580 = load i32, i32* %579, align 4            ; <i32> [#uses=1]
 	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
 	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
 	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
 	%584 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
-	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%585 = load i32, i32* %584, align 4            ; <i32> [#uses=1]
 	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
 	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
 	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
 	%589 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
-	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%590 = load i32, i32* %589, align 4            ; <i32> [#uses=1]
 	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
 	%592 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
-	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%593 = load i32, i32* %592, align 4            ; <i32> [#uses=1]
 	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
 	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
 	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
 	%597 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
-	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%598 = load i32, i32* %597, align 4            ; <i32> [#uses=1]
 	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
 	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
 	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
 	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
 	%603 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
-	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%604 = load i32, i32* %603, align 4            ; <i32> [#uses=1]
 	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
 	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
 	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
 	%608 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
-	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%609 = load i32, i32* %608, align 4            ; <i32> [#uses=1]
 	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
 	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
 	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
 	%613 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
-	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%614 = load i32, i32* %613, align 4            ; <i32> [#uses=1]
 	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
 	%616 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
-	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%617 = load i32, i32* %616, align 4            ; <i32> [#uses=1]
 	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
 	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
 	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
 	%621 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
-	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%622 = load i32, i32* %621, align 4            ; <i32> [#uses=1]
 	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
 	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
 	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
 	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
 	%627 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
-	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%628 = load i32, i32* %627, align 4            ; <i32> [#uses=1]
 	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
 	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
 	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
 	%632 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
-	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%633 = load i32, i32* %632, align 4            ; <i32> [#uses=1]
 	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
 	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
 	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
 	%637 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
-	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%638 = load i32, i32* %637, align 4            ; <i32> [#uses=1]
 	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
 	%640 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
-	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%641 = load i32, i32* %640, align 4            ; <i32> [#uses=1]
 	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
 	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
 	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
 	%645 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
-	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%646 = load i32, i32* %645, align 4            ; <i32> [#uses=1]
 	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
 	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
 	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
 	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
 	%651 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
-	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%652 = load i32, i32* %651, align 4            ; <i32> [#uses=1]
 	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
 	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
 	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
 	%656 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
-	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%657 = load i32, i32* %656, align 4            ; <i32> [#uses=1]
 	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
 	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
 	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
-	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%661 = load i32, i32* undef, align 4           ; <i32> [#uses=1]
 	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
 	%663 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
-	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%664 = load i32, i32* %663, align 4            ; <i32> [#uses=1]
 	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
 	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
 	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
 	%668 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
-	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%669 = load i32, i32* %668, align 4            ; <i32> [#uses=1]
 	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
 	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
 	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
 	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
 	%674 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
-	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%675 = load i32, i32* %674, align 4            ; <i32> [#uses=1]
 	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
 	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
 	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
 	%679 = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
-	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%680 = load i32, i32* %679, align 4            ; <i32> [#uses=1]
 	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
 	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
 	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
@@ -716,17 +716,17 @@
 	%.pn12.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
 	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
 	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
-	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
-	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn11 = load i32, i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32, i32* %.pn12.in              ; <i32> [#uses=1]
 	%.pn9.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
 	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
 	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
-	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn9 = load i32, i32* %.pn9.in                ; <i32> [#uses=1]
 	%.pn7.in = getelementptr %struct.BF_KEY, %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
 	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
-	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
-	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
-	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn7 = load i32, i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32, i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32, i32* %.pn2.in                ; <i32> [#uses=1]
 	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
 	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
 	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/llvm/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
index ba353fb0..8f23f06 100644
--- a/llvm/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
+++ b/llvm/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -16,14 +16,14 @@
 	br label %bb11
 
 bb6:                                              ; preds = %bb11
-	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%4 = load i8**, i8*** %3, align 4               ; <i8**> [#uses=1]
 	%scevgep = getelementptr i8*, i8** %4, i32 %8  ; <i8**> [#uses=1]
-	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	%5 = load i8*, i8** %scevgep, align 4          ; <i8*> [#uses=1]
 	br label %bb3.i
 
 bb3.i:                                            ; preds = %bb3.i, %bb6
 	%scevgep7.i = getelementptr i8, i8* %5, i32 0 ; <i8*> [#uses=1]
-	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	%6 = load i8, i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
 	br i1 undef, label %bb3.i, label %bb10
 
 bb10:                                             ; preds = %bb3.i
diff --git a/llvm/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/llvm/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
index 6aac7ff..86d7023 100644
--- a/llvm/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
+++ b/llvm/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -19,14 +19,14 @@
 
 bb1:                                              ; preds = %entry
   %1 = getelementptr inbounds %struct.asl_file_t, %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
-  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %2 = load %struct.FILE*, %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
   %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
   br i1 %3, label %bb13, label %bb3
 
 bb3:                                              ; preds = %bb1
   %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
   %5 = getelementptr inbounds %struct.asl_file_t, %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
-  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %6 = load i32, i32* %5, align 4                      ; <i32> [#uses=1]
   %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
   %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
   br i1 %8, label %bb13, label %bb5
@@ -38,7 +38,7 @@
 
 bb7:                                              ; preds = %bb5
   store i64 0, i64* %val, align 4
-  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %11 = load %struct.FILE*, %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
   %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
   %12 = call  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
   %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
@@ -49,7 +49,7 @@
   br i1 %14, label %bb13, label %bb11
 
 bb11:                                             ; preds = %bb10
-  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %15 = load i64, i64* %val, align 4                   ; <i64> [#uses=1]
   %16 = call  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
   store i64 %16, i64* %out, align 4
   ret i32 0
diff --git a/llvm/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll b/llvm/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
index 89232bf..e768417 100644
--- a/llvm/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
+++ b/llvm/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
@@ -12,7 +12,7 @@
 
 define i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize {
 entry:
-  %0 = load i8* %opcodes, align 1                 ; <i8> [#uses=1]
+  %0 = load i8, i8* %opcodes, align 1                 ; <i8> [#uses=1]
   %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
   br label %bb
@@ -22,7 +22,7 @@
   %gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; <i8**> [#uses=1]
   %result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; <i32> [#uses=6]
   %opcodes_addr.0 = getelementptr i8, i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
-  %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
+  %gotovar.22.0 = load i8*, i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
   indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD]
 
 RETURN:                                           ; preds = %bb
@@ -30,7 +30,7 @@
 
 INCREMENT:                                        ; preds = %bb
   %3 = add nsw i32 %result.0, 1                   ; <i32> [#uses=1]
-  %4 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %4 = load i8, i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
   %5 = zext i8 %4 to i32                          ; <i32> [#uses=1]
   %6 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
   br label %bb.backedge
@@ -43,14 +43,14 @@
 
 DECREMENT:                                        ; preds = %bb
   %7 = add i32 %result.0, -1                      ; <i32> [#uses=1]
-  %8 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %8 = load i8, i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
   %9 = zext i8 %8 to i32                          ; <i32> [#uses=1]
   %10 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
   br label %bb.backedge
 
 DOUBLE:                                           ; preds = %bb
   %11 = shl i32 %result.0, 1                      ; <i32> [#uses=1]
-  %12 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %12 = load i8, i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
   %13 = zext i8 %12 to i32                        ; <i32> [#uses=1]
   %14 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
   br label %bb.backedge
@@ -59,7 +59,7 @@
   %15 = shl i32 %result.0, 16                     ; <i32> [#uses=1]
   %16 = ashr i32 %result.0, 16                    ; <i32> [#uses=1]
   %17 = or i32 %15, %16                           ; <i32> [#uses=1]
-  %18 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %18 = load i8, i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
   %19 = zext i8 %18 to i32                        ; <i32> [#uses=1]
   %20 = getelementptr inbounds [5 x i8*], [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
   br label %bb.backedge
diff --git a/llvm/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll b/llvm/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
index ce20ef4..d736528 100644
--- a/llvm/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
+++ b/llvm/test/CodeGen/Thumb/2011-05-11-DAGLegalizer.ll
@@ -23,26 +23,26 @@
   br label %do.body
 
 do.body:                                          ; preds = %entry
-  %tmp = load i8** @kkkkkk, align 4
-  %tmp1 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %tmp = load i8*, i8** @kkkkkk, align 4
+  %tmp1 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
   %eph = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp1, i32 0, i32 0
   %arrayidx = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph, i32 0, i32 0
   %tmp2 = bitcast %struct.RRRRRRRR* %agg.tmp to i8*
   %tmp3 = bitcast %struct.RRRRRRRR* %arrayidx to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 312, i32 4, i1 false)
-  %tmp5 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %tmp5 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
   %eph6 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp5, i32 0, i32 0
   %arrayidx7 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph6, i32 0, i32 1
   %tmp8 = bitcast %struct.RRRRRRRR* %agg.tmp4 to i8*
   %tmp9 = bitcast %struct.RRRRRRRR* %arrayidx7 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp8, i8* %tmp9, i32 312, i32 4, i1 false)
-  %tmp11 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %tmp11 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
   %eph12 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp11, i32 0, i32 0
   %arrayidx13 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph12, i32 0, i32 2
   %tmp14 = bitcast %struct.RRRRRRRR* %agg.tmp10 to i8*
   %tmp15 = bitcast %struct.RRRRRRRR* %arrayidx13 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp14, i8* %tmp15, i32 312, i32 4, i1 false)
-  %tmp17 = load %struct.MMMMMMMMMMMM** %aidData.addr
+  %tmp17 = load %struct.MMMMMMMMMMMM*, %struct.MMMMMMMMMMMM** %aidData.addr
   %eph18 = getelementptr inbounds %struct.MMMMMMMMMMMM, %struct.MMMMMMMMMMMM* %tmp17, i32 0, i32 0
   %arrayidx19 = getelementptr inbounds [4 x %struct.RRRRRRRR], [4 x %struct.RRRRRRRR]* %eph18, i32 0, i32 3
   %tmp20 = bitcast %struct.RRRRRRRR* %agg.tmp16 to i8*
diff --git a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
index 16789e6..9657dc7 100644
--- a/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
+++ b/llvm/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -6,7 +6,7 @@
 
 define void @t1(%struct.state* %v) {
 ; CHECK: push {r4
-  %tmp6 = load i32* null
+  %tmp6 = load i32, i32* null
   %tmp8 = alloca float, i32 %tmp6
   store i32 1, i32* null
   br label %return
diff --git a/llvm/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll b/llvm/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
index 3ed3561..d5c8db7 100644
--- a/llvm/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
+++ b/llvm/test/CodeGen/Thumb/2014-06-10-thumb1-ldst-opt-bug.ll
@@ -7,9 +7,9 @@
 ; CHECK: ldm
 ; CHECK-NEXT: subs
 ; CHECK-NEXT: bl
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   tail call void @bar(i32* %A, i32 %0, i32 %1) #2
   ret void
 }
diff --git a/llvm/test/CodeGen/Thumb/asmprinter-bug.ll b/llvm/test/CodeGen/Thumb/asmprinter-bug.ll
index 478f5f8..f1d4c22 100644
--- a/llvm/test/CodeGen/Thumb/asmprinter-bug.ll
+++ b/llvm/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -16,13 +16,13 @@
 define void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
 entry:
 	%0 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
-	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%1 = load i16, i16* %0, align 2		; <i16> [#uses=1]
 	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
 	%3 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
-	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%4 = load i8, i8* %3, align 2		; <i8> [#uses=1]
 	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
 	%6 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %6, align 4		; <i32> [#uses=1]
 	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
 	br i1 %8, label %bb, label %bb27
 
@@ -35,7 +35,7 @@
 	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
 	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
 	%inp.038 = getelementptr i16, i16* %indata, i32 %indvar		; <i16*> [#uses=1]
-	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%9 = load i16, i16* %inp.038, align 2		; <i16> [#uses=1]
 	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
 	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
 	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
@@ -80,14 +80,14 @@
 	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
 	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
 	%30 = getelementptr [16 x i32], [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
-	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%31 = load i32, i32* %30, align 4		; <i32> [#uses=1]
 	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
 	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
 	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
 	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
 	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
 	%35 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
-	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%36 = load i32, i32* %35, align 4		; <i32> [#uses=1]
 	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
 	br i1 %37, label %bb24, label %bb23
 
@@ -141,13 +141,13 @@
 define void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
 entry:
 	%0 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
-	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%1 = load i16, i16* %0, align 2		; <i16> [#uses=1]
 	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
 	%3 = getelementptr %struct.adpcm_state, %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
-	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%4 = load i8, i8* %3, align 2		; <i8> [#uses=1]
 	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
 	%6 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %6, align 4		; <i32> [#uses=1]
 	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
 	br i1 %8, label %bb, label %bb22
 
@@ -164,7 +164,7 @@
 	br i1 %9, label %bb2, label %bb3
 
 bb2:		; preds = %bb
-	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%10 = load i8, i8* %inp.131, align 1		; <i8> [#uses=1]
 	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
 	%12 = getelementptr i8, i8* %inp.131, i32 1		; <i8*> [#uses=1]
 	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
@@ -177,7 +177,7 @@
 	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
 	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
 	%14 = getelementptr [16 x i32], [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
-	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%15 = load i32, i32* %14, align 4		; <i32> [#uses=1]
 	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
 	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
 	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
@@ -228,7 +228,7 @@
 bb20:		; preds = %bb19, %bb18, %bb13
 	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
 	%34 = getelementptr [89 x i32], [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
-	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%35 = load i32, i32* %34, align 4		; <i32> [#uses=1]
 	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
 	store i16 %36, i16* %outp.030, align 2
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
@@ -270,10 +270,10 @@
 	br label %bb
 
 bb4:		; preds = %bb2
-	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
-	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%6 = load %struct.FILE*, %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16, i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
 	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
-	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%9 = load i8, i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
 	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
 	%11 = tail call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/llvm/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll b/llvm/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
index c4403fe..cba1ca6 100644
--- a/llvm/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
+++ b/llvm/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
@@ -8,6 +8,6 @@
 ; V7M-LABEL: split_load
 ; V7M-NOT: ldrh
 ; V7M: bx lr
-  %val = load i32* %p, align 2
+  %val = load i32, i32* %p, align 2
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/Thumb/dyn-stackalloc.ll b/llvm/test/CodeGen/Thumb/dyn-stackalloc.ll
index 6b74faa..ba83053 100644
--- a/llvm/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/llvm/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -12,7 +12,7 @@
 ; CHECK: mov r[[R1:[0-9]+]], sp
 ; CHECK: subs r[[R2:[0-9]+]], r[[R1]], r[[R0]]
 ; CHECK: mov sp, r[[R2]]
-	%tmp6 = load i32* null
+	%tmp6 = load i32, i32* null
 	%tmp8 = alloca float, i32 %tmp6
 	store i32 1, i32* null
 	br i1 false, label %bb123.preheader, label %return
@@ -22,7 +22,7 @@
 
 bb43:
 	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
-	%tmp70 = load i32* null
+	%tmp70 = load i32, i32* null
 	%tmp85 = getelementptr float, float* %tmp8, i32 0
 	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
 	ret void
diff --git a/llvm/test/CodeGen/Thumb/large-stack.ll b/llvm/test/CodeGen/Thumb/large-stack.ll
index 269bdd9..0d53458 100644
--- a/llvm/test/CodeGen/Thumb/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb/large-stack.ll
@@ -56,7 +56,7 @@
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
     store i32 0, i32* %tmp
-    %tmp1 = load i32* %tmp
+    %tmp1 = load i32, i32* %tmp
     ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/Thumb/ldm-merge-call.ll b/llvm/test/CodeGen/Thumb/ldm-merge-call.ll
index bd1f8a9..eb78529 100644
--- a/llvm/test/CodeGen/Thumb/ldm-merge-call.ll
+++ b/llvm/test/CodeGen/Thumb/ldm-merge-call.ll
@@ -8,9 +8,9 @@
 ; CHECK-LABEL: foo:
 ; CHECK: ldm r[[BASE:[0-9]]]!,
 ; CHECK-NEXT: mov r[[BASE]],
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i32 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %call = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
   %call2 = tail call i32 @bar(i32 %0, i32 %1, i32 %0, i32 %1) #2
   ret void
diff --git a/llvm/test/CodeGen/Thumb/ldm-merge-struct.ll b/llvm/test/CodeGen/Thumb/ldm-merge-struct.ll
index 2f732e0..9136f05 100644
--- a/llvm/test/CodeGen/Thumb/ldm-merge-struct.ll
+++ b/llvm/test/CodeGen/Thumb/ldm-merge-struct.ll
@@ -11,8 +11,8 @@
 ; CHECK-LABEL: f:
 ; CHECK: ldm r[[BASE:[0-9]]],
 ; CHECK-NEXT-NOT: subs r[[BASE]]
-  %0 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4
-  %1 = load i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4
+  %0 = load i32, i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 0), align 4
+  %1 = load i32, i32* getelementptr inbounds (%struct.S* @s, i32 0, i32 1), align 4
   %cmp = icmp sgt i32 %0, %1
   %2 = sub i32 0, %1
   %cond.p = select i1 %cmp, i32 %1, i32 %2
diff --git a/llvm/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/llvm/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
index e458d74..916e5ea 100644
--- a/llvm/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
+++ b/llvm/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
@@ -15,10 +15,10 @@
 ; CHECK-NEXT: ldm r[[NLB]],
 ; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
 ; CHECK-NEXT: stm r[[NSB]]
-  %0 = load i32** @a, align 4
+  %0 = load i32*, i32** @a, align 4
   %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
   %1 = bitcast i32* %arrayidx to i8*
-  %2 = load i32** @b, align 4
+  %2 = load i32*, i32** @b, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
   %3 = bitcast i32* %arrayidx1 to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
diff --git a/llvm/test/CodeGen/Thumb/ldr_ext.ll b/llvm/test/CodeGen/Thumb/ldr_ext.ll
index 2d25af3..90194ae 100644
--- a/llvm/test/CodeGen/Thumb/ldr_ext.ll
+++ b/llvm/test/CodeGen/Thumb/ldr_ext.ll
@@ -7,7 +7,7 @@
 ; V5: ldrb
 
 ; V6: ldrb
-    %tmp.u = load i8* %t1
+    %tmp.u = load i8, i8* %t1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
@@ -16,7 +16,7 @@
 ; V5: ldrh
 
 ; V6: ldrh
-    %tmp.u = load i16* %t1
+    %tmp.u = load i16, i16* %t1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
@@ -28,7 +28,7 @@
 
 ; V6: ldrb
 ; V6: sxtb
-    %tmp.s = load i8* %t0
+    %tmp.s = load i8, i8* %t0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
@@ -40,7 +40,7 @@
 
 ; V6: ldrh
 ; V6: sxth
-    %tmp.s = load i16* %t0
+    %tmp.s = load i16, i16* %t0
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
@@ -51,7 +51,7 @@
 
 ; V6: movs r0, #0
 ; V6: ldrsh
-    %tmp.s = load i16* null
+    %tmp.s = load i16, i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
diff --git a/llvm/test/CodeGen/Thumb/ldr_frame.ll b/llvm/test/CodeGen/Thumb/ldr_frame.ll
index 5be5972..fdcf3b7 100644
--- a/llvm/test/CodeGen/Thumb/ldr_frame.ll
+++ b/llvm/test/CodeGen/Thumb/ldr_frame.ll
@@ -5,7 +5,7 @@
 ; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 0
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
@@ -15,7 +15,7 @@
 ; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 0
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
@@ -25,7 +25,7 @@
 ; CHECK: ldr r0
 	%buf = alloca [32 x i32], align 4
 	%tmp = getelementptr [32 x i32], [32 x i32]* %buf, i32 0, i32 32
-	%tmp1 = load i32* %tmp
+	%tmp1 = load i32, i32* %tmp
 	ret i32 %tmp1
 }
 
@@ -35,7 +35,7 @@
 ; CHECK: ldrb
 	%buf = alloca [32 x i8], align 4
 	%tmp = getelementptr [32 x i8], [32 x i8]* %buf, i32 0, i32 2
-	%tmp1 = load i8* %tmp
+	%tmp1 = load i8, i8* %tmp
         %tmp2 = zext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/Thumb/long.ll b/llvm/test/CodeGen/Thumb/long.ll
index 2449e5a..33f6389 100644
--- a/llvm/test/CodeGen/Thumb/long.ll
+++ b/llvm/test/CodeGen/Thumb/long.ll
@@ -65,7 +65,7 @@
 define i64 @f10() {
 entry:
         %a = alloca i64, align 8                ; <i64*> [#uses=1]
-        %retval = load i64* %a          ; <i64> [#uses=1]
+        %retval = load i64, i64* %a          ; <i64> [#uses=1]
         ret i64 %retval
 }
 
diff --git a/llvm/test/CodeGen/Thumb/segmented-stacks.ll b/llvm/test/CodeGen/Thumb/segmented-stacks.ll
index d6e25c7..09f5db8 100644
--- a/llvm/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/llvm/test/CodeGen/Thumb/segmented-stacks.ll
@@ -55,7 +55,7 @@
 }
 
 define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
-       %addend = load i32 * %closure
+       %addend = load i32 , i32 * %closure
        %result = add i32 %other, %addend
        %mem = alloca i32, i32 10
        call void @dummy_use (i32* %mem, i32 10)
diff --git a/llvm/test/CodeGen/Thumb/stack-access.ll b/llvm/test/CodeGen/Thumb/stack-access.ll
index bcffda2..7845ed3 100644
--- a/llvm/test/CodeGen/Thumb/stack-access.ll
+++ b/llvm/test/CodeGen/Thumb/stack-access.ll
@@ -36,9 +36,9 @@
   %x = alloca i8, align 1
   %y = alloca i8, align 1
 ; CHECK: ldr r0, [sp]
-  %1 = load i8* %x, align 1
+  %1 = load i8, i8* %x, align 1
 ; CHECK: ldr r1, [sp, #4]
-  %2 = load i8* %y, align 1
+  %2 = load i8, i8* %y, align 1
   %3 = add nsw i8 %1, %2
   %4 = zext i8 %3 to i32
   ret i32 %4
@@ -48,9 +48,9 @@
   %x = alloca i16, align 2
   %y = alloca i16, align 2
 ; CHECK: ldr r0, [sp]
-  %1 = load i16* %x, align 2
+  %1 = load i16, i16* %x, align 2
 ; CHECK: ldr r1, [sp, #4]
-  %2 = load i16* %y, align 2
+  %2 = load i16, i16* %y, align 2
   %3 = add nsw i16 %1, %2
   %4 = zext i16 %3 to i32
   ret i32 %4
@@ -61,7 +61,7 @@
   %x = alloca i8, align 1
 ; CHECK: mov r0, sp
 ; CHECK: ldrb r0, [r0]
-  %1 = load i8* %x, align 1
+  %1 = load i8, i8* %x, align 1
   ret i8 %1
 }
 
@@ -69,6 +69,6 @@
   %x = alloca i16, align 2
 ; CHECK: mov r0, sp
 ; CHECK: ldrh r0, [r0]
-  %1 = load i16* %x, align 2
+  %1 = load i16, i16* %x, align 2
   ret i16 %1
 }
diff --git a/llvm/test/CodeGen/Thumb/stm-merge.ll b/llvm/test/CodeGen/Thumb/stm-merge.ll
index d4b4cd2..5492ad8 100644
--- a/llvm/test/CodeGen/Thumb/stm-merge.ll
+++ b/llvm/test/CodeGen/Thumb/stm-merge.ll
@@ -18,7 +18,7 @@
   store i32 %x, i32* %i, align 4
   store i32 %y, i32* %h, align 4
   store i32 %z, i32* %g, align 4
-  %.pr = load i32* @d, align 4
+  %.pr = load i32, i32* @d, align 4
   %cmp11 = icmp slt i32 %.pr, 1
   br i1 %cmp11, label %for.inc.lr.ph, label %for.body5
 
diff --git a/llvm/test/CodeGen/Thumb/thumb-ldm.ll b/llvm/test/CodeGen/Thumb/thumb-ldm.ll
index 7e9560e..18772d8 100644
--- a/llvm/test/CodeGen/Thumb/thumb-ldm.ll
+++ b/llvm/test/CodeGen/Thumb/thumb-ldm.ll
@@ -7,8 +7,8 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
         ret i32 %tmp4
 }
@@ -18,9 +18,9 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
@@ -30,9 +30,9 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
diff --git a/llvm/test/CodeGen/Thumb/vargs.ll b/llvm/test/CodeGen/Thumb/vargs.ll
index 3e8e333..9bc2212 100644
--- a/llvm/test/CodeGen/Thumb/vargs.ll
+++ b/llvm/test/CodeGen/Thumb/vargs.ll
@@ -17,7 +17,7 @@
 
 bb:             ; preds = %bb, %entry
         %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
-        %tmp = load volatile i8** %va           ; <i8*> [#uses=2]
+        %tmp = load volatile i8*, i8** %va           ; <i8*> [#uses=2]
         %tmp2 = getelementptr i8, i8* %tmp, i32 4           ; <i8*> [#uses=1]
         store volatile i8* %tmp2, i8** %va
         %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
@@ -26,7 +26,7 @@
 
 bb7:            ; preds = %bb
         %tmp3 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=1]
-        %tmp.upgrd.3 = load i32* %tmp3          ; <i32> [#uses=1]
+        %tmp.upgrd.3 = load i32, i32* %tmp3          ; <i32> [#uses=1]
         %tmp10 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 )                ; <i32> [#uses=0]
         %va.upgrd.4 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]
         call void @llvm.va_end( i8* %va.upgrd.4 )
diff --git a/llvm/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/llvm/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
index 6a83131..f76c8ff 100644
--- a/llvm/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -18,7 +18,7 @@
 	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
 	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
 	%p_addr.0.i = getelementptr i8, i8* undef, i32 %tmp39		; <i8*> [#uses=1]
-	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%0 = load i8, i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
 	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
 	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
 	br i1 %1, label %bb.i, label %read_uleb128.exit
diff --git a/llvm/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/llvm/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
index 7509205..7b10fe3 100644
--- a/llvm/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -7,20 +7,20 @@
 entry:
 ; CHECK-LABEL: t:
 ; CHECK: add r7, sp, #12
-	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%1 = load i8*, i8** undef, align 4		; <i8*> [#uses=3]
 	%2 = getelementptr i8, i8* %1, i32 4		; <i8*> [#uses=1]
 	%3 = getelementptr i8, i8* %1, i32 8		; <i8*> [#uses=1]
 	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
-	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* %4, align 4		; <i32> [#uses=1]
 	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
 	%7 = getelementptr i8, i8* %1, i32 12		; <i8*> [#uses=1]
 	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%9 = load i32, i32* %8, align 4		; <i32> [#uses=1]
 	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
 	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
-	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%12 = load i32, i32* %11, align 4		; <i32> [#uses=1]
 	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
-	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%14 = load i32, i32* undef, align 4		; <i32> [#uses=2]
 	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
 	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
 	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/llvm/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
index 4b6da58..77d2991 100644
--- a/llvm/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -31,7 +31,7 @@
 define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
 entry:
 	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
-	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	%0 = load i8*, i8** undef, align 4		; <i8*> [#uses=5]
 	br label %bb
 
 bb:		; preds = %bb, %entry
@@ -55,11 +55,11 @@
 	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
 	%scevgep79 = getelementptr i16, i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
 	%inptr.117 = getelementptr i16, i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
-	%1 = load i16* null, align 2		; <i16> [#uses=1]
-	%2 = load i16* undef, align 2		; <i16> [#uses=1]
-	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%1 = load i16, i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16, i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16, i16* %inptr.117, align 2		; <i16> [#uses=1]
 	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
-	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%5 = load float, float* %quantptr.118, align 4		; <float> [#uses=1]
 	%6 = fmul float %4, %5		; <float> [#uses=1]
 	%7 = fsub float %6, undef		; <float> [#uses=2]
 	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
@@ -70,7 +70,7 @@
 	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
 	%14 = fmul float %13, undef		; <float> [#uses=2]
 	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
-	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%16 = load float, float* undef, align 4		; <float> [#uses=1]
 	%17 = fmul float %15, %16		; <float> [#uses=1]
 	%18 = fadd float %14, undef		; <float> [#uses=2]
 	%19 = fsub float %14, undef		; <float> [#uses=2]
@@ -124,12 +124,12 @@
 	%scevgep28 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
 	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
 	%wsptr.215 = getelementptr [64 x float], [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
-	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
-	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
-	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%40 = load i8*, i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float, float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float, float* %scevgep24, align 4		; <float> [#uses=1]
 	%43 = fadd float %41, %42		; <float> [#uses=1]
-	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
-	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%44 = load float, float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float, float* %scevgep28, align 4		; <float> [#uses=1]
 	%46 = fadd float %44, %45		; <float> [#uses=1]
 	%47 = fsub float %43, %46		; <float> [#uses=2]
 	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
@@ -140,11 +140,11 @@
 	%53 = and i32 %52, 1023		; <i32> [#uses=1]
 	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
 	%54 = getelementptr i8, i8* %0, i32 %.sum14		; <i8*> [#uses=1]
-	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	%55 = load i8, i8* %54, align 1		; <i8> [#uses=1]
 	store i8 %55, i8* null, align 1
 	%56 = getelementptr i8, i8* %40, i32 %.sum10		; <i8*> [#uses=1]
 	store i8 0, i8* %56, align 1
-	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%57 = load i8, i8* null, align 1		; <i8> [#uses=1]
 	%58 = getelementptr i8, i8* %40, i32 %.sum8		; <i8*> [#uses=1]
 	store i8 %57, i8* %58, align 1
 	%59 = fadd float undef, %48		; <float> [#uses=1]
@@ -154,7 +154,7 @@
 	%63 = and i32 %62, 1023		; <i32> [#uses=1]
 	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
 	%64 = getelementptr i8, i8* %0, i32 %.sum7		; <i8*> [#uses=1]
-	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%65 = load i8, i8* %64, align 1		; <i8> [#uses=1]
 	%66 = getelementptr i8, i8* %40, i32 %.sum6		; <i8*> [#uses=1]
 	store i8 %65, i8* %66, align 1
 	%67 = fptosi float undef to i32		; <i32> [#uses=1]
@@ -163,7 +163,7 @@
 	%70 = and i32 %69, 1023		; <i32> [#uses=1]
 	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
 	%71 = getelementptr i8, i8* %0, i32 %.sum5		; <i8*> [#uses=1]
-	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	%72 = load i8, i8* %71, align 1		; <i8> [#uses=1]
 	store i8 %72, i8* undef, align 1
 	%73 = fadd float %47, undef		; <float> [#uses=1]
 	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
@@ -172,7 +172,7 @@
 	%77 = and i32 %76, 1023		; <i32> [#uses=1]
 	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
 	%78 = getelementptr i8, i8* %0, i32 %.sum3		; <i8*> [#uses=1]
-	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	%79 = load i8, i8* %78, align 1		; <i8> [#uses=1]
 	store i8 %79, i8* undef, align 1
 	%80 = fsub float %47, undef		; <float> [#uses=1]
 	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
@@ -181,7 +181,7 @@
 	%84 = and i32 %83, 1023		; <i32> [#uses=1]
 	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
 	%85 = getelementptr i8, i8* %0, i32 %.sum1		; <i8*> [#uses=1]
-	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%86 = load i8, i8* %85, align 1		; <i8> [#uses=1]
 	%87 = getelementptr i8, i8* %40, i32 %.sum		; <i8*> [#uses=1]
 	store i8 %86, i8* %87, align 1
 	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/llvm/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
index 80dd402..4a99e28 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -38,7 +38,7 @@
 	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
 
 bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
-	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%0 = load i8, i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
 	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
 	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
 
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
index 88cbb0e..55b0921 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -32,9 +32,9 @@
 bb:		; preds = %entry
 	store i8 0, i8* %0, align 1
 	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML", %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%2 = load i32 (...)**, i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
 	%3 = getelementptr i32 (...)*, i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
-	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%4 = load i32 (...)*, i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
 	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
 	tail call  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
 	ret void
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
index 106f2ad..9d26d40 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -45,7 +45,7 @@
 bb5:		; preds = %bb5, %entry
 	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
 	%y.0.in = getelementptr %struct.rec, %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
-	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	%y.0 = load %struct.rec*, %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
 	br i1 undef, label %bb5, label %bb6
 
 bb6:		; preds = %bb5
@@ -62,7 +62,7 @@
 	br label %FontSize.exit
 
 FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
-	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
 	br i1 %2, label %bb.i5, label %FontName.exit
 
@@ -75,13 +75,13 @@
 	%4 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
 	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
 	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
-	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
 	%8 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	%9 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %9, align 4		; <i32> [#uses=1]
 	%11 = sub i32 0, %10		; <i32> [#uses=1]
-	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%12 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
 	%13 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	br label %bb100.outer.outer
@@ -116,7 +116,7 @@
 	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
 
 bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
-	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	%15 = load i8, i8* undef, align 1		; <i8> [#uses=0]
 	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
 
 bb3.i52:		; preds = %bb2.i51
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
index 8ac666d..1ac59b4 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -77,7 +77,7 @@
 entry:
 	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
 	%0 = getelementptr %struct.rec, %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
-	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%1 = load i8, i8* %0, align 4		; <i8> [#uses=1]
 	%2 = add i8 %1, -94		; <i8> [#uses=1]
 	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
 	br i1 %3, label %bb, label %bb1
@@ -89,7 +89,7 @@
 	%4 = getelementptr %struct.rec, %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
 	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
 	%6 = getelementptr %5, %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
-	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%7 = load i8, i8* %6, align 1		; <i8> [#uses=1]
 	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
 	br i1 %8, label %bb2, label %bb3
 
@@ -98,15 +98,15 @@
 	br label %bb3
 
 bb3:		; preds = %bb2, %bb1
-	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	%9 = load %struct.rec*, %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
 	br label %bb5
 
 bb5:		; preds = %bb5, %bb3
-	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	%y.0 = load %struct.rec*, %struct.rec** null		; <%struct.rec*> [#uses=2]
 	br i1 false, label %bb5, label %bb6
 
 bb6:		; preds = %bb5
-	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%10 = load i8, i8* %0, align 4		; <i8> [#uses=1]
 	%11 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
 	%12 = call  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
 	br i1 false, label %bb7, label %bb8
@@ -116,7 +116,7 @@
 
 bb8:		; preds = %bb6
 	%13 = and i32 undef, 4095		; <i32> [#uses=2]
-	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	%14 = load i32, i32* @currentfont, align 4		; <i32> [#uses=0]
 	br i1 false, label %bb10, label %bb9
 
 bb9:		; preds = %bb8
@@ -125,7 +125,7 @@
 
 bb.i:		; preds = %bb9
 	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
-	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	%.pre186 = load i32, i32* @currentfont, align 4		; <i32> [#uses=1]
 	br label %FontHalfXHeight.exit
 
 FontHalfXHeight.exit:		; preds = %bb.i, %bb9
@@ -144,9 +144,9 @@
 
 FontSize.exit:		; preds = %bb1.i
 	%17 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
-	%18 = load i32* undef, align 4		; <i32> [#uses=1]
-	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
-	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%18 = load i32, i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32, i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32, i32* @font_count, align 4		; <i32> [#uses=1]
 	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
 	br i1 %21, label %bb.i5, label %FontName.exit
 
@@ -163,19 +163,19 @@
 bb10:		; preds = %FontName.exit, %bb8
 	%25 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
 	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
-	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%27 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
 	%28 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
 	%29 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
 	%30 = getelementptr %struct.rec, %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%31 = load i32, i32* %30, align 4		; <i32> [#uses=1]
 	%32 = sub i32 0, %31		; <i32> [#uses=1]
-	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%33 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%34 = sub i32 0, %33		; <i32> [#uses=1]
-	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%35 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
 	%36 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @cpexists, align 4
-	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%37 = load %struct.rec*, %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
 	%38 = getelementptr %struct.rec, %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
 	%39 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
 	%buff14 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
@@ -199,17 +199,17 @@
 	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
 	%pp.0.i82 = getelementptr [27 x i8], [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
 	%sp.0.i83 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
-	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%44 = load i8, i8* %sp.0.i83, align 1		; <i8> [#uses=2]
 	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
 	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
 
 bb3.i85:		; preds = %bb2.i84
-	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%46 = load i8, i8* %pp.0.i82, align 1		; <i8> [#uses=3]
 	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
 	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
 
 StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
-	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	%.pr = load i8, i8* %pp.0.i82		; <i8> [#uses=1]
 	br label %StringBeginsWith.exit88
 
 StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
@@ -225,16 +225,16 @@
 
 bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
 	%sp.0.i76 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
-	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%49 = load i8, i8* %sp.0.i76, align 1		; <i8> [#uses=1]
 	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
 	br i1 %50, label %bb24, label %bb2.i.i68
 
 bb24:		; preds = %bb3.i77
 	%51 = call  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
-	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%52 = load i8, i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
 	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
 	%54 = getelementptr [524 x %struct.rec*], [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
-	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%55 = load %struct.rec*, %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
 	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
 	br i1 %56, label %bb27, label %bb28
 
@@ -262,7 +262,7 @@
 
 bb28:		; preds = %bb24
 	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
-	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%59 = load %struct.rec*, %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
 	store %struct.rec* %59, %struct.rec** %54, align 4
 	br label %bb29
 
@@ -280,7 +280,7 @@
 	br i1 undef, label %bb41, label %bb37
 
 bb37:		; preds = %bb35
-	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%60 = load %struct.rec*, %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
 	store %struct.rec* %60, %struct.rec** undef
 	store %struct.rec* undef, %struct.rec** null
 	store %struct.rec* %.pre184, %struct.rec** null, align 4
@@ -298,11 +298,11 @@
 
 bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
 	%pp.0.i49 = getelementptr [17 x i8], [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
-	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	%64 = load i8, i8* null, align 1		; <i8> [#uses=1]
 	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
 
 bb3.i52:		; preds = %bb2.i51
-	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	%65 = load i8, i8* %pp.0.i49, align 1		; <i8> [#uses=1]
 	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
 
 StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
@@ -322,7 +322,7 @@
 	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
 
 bb3.i.i16:		; preds = %bb2.i.i15
-	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	%66 = load i8, i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
 	br label %StringBeginsWith.exit.i20
 
 StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
@@ -337,7 +337,7 @@
 	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
 	%sp.0.i5.i25 = getelementptr [512 x i8], [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
 	%pp.0.i4.i24 = getelementptr [10 x i8], [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
-	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%68 = load i8, i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
 	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
 	br i1 undef, label %bb2.i6.i26, label %bb55
 
@@ -368,10 +368,10 @@
 
 bb66:		; preds = %StringBeginsWith.exit
 	%71 = call  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
-	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%72 = load i8, i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
 	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
 	%74 = getelementptr [524 x %struct.rec*], [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
-	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%75 = load %struct.rec*, %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
 	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
 	br i1 %76, label %bb69, label %bb70
 
@@ -404,7 +404,7 @@
 	br label %bb71
 
 bb70:		; preds = %bb66
-	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%84 = load %struct.rec*, %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
 	store %struct.rec* %84, %struct.rec** %74, align 4
 	br label %bb71
 
@@ -416,7 +416,7 @@
 	%88 = getelementptr %struct.rec, %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
 	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
 	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
-	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	%89 = load %struct.rec*, %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
 	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
 	br i1 false, label %bb77, label %bb73
 
@@ -435,7 +435,7 @@
 bb79:		; preds = %bb77
 	%91 = getelementptr %struct.rec, %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
 	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
-	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	%92 = load %struct.rec*, %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
 	store %struct.rec* %92, %struct.rec** %91
 	%93 = getelementptr %struct.rec, %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
 	store %struct.rec* %71, %struct.rec** %93, align 4
@@ -467,11 +467,11 @@
 	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
 
 bb3.i7.i:		; preds = %bb2.i6.i
-	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	%94 = load i8, i8* undef, align 1		; <i8> [#uses=1]
 	br i1 undef, label %strip_out.exit, label %bb.i2.i
 
 strip_out.exitthread-split:		; preds = %bb2.i6.i
-	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	%.pr100 = load i8, i8* undef		; <i8> [#uses=1]
 	br label %strip_out.exit
 
 strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
@@ -501,7 +501,7 @@
 	unreachable
 
 bb103:		; preds = %bb101.split
-	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%99 = load %struct.FILE*, %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
 	%100 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
 	store i32 0, i32* @wordcount, align 4
 	ret void
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
index ad32dc9..66ed876 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -17,7 +17,7 @@
 	br i1 undef, label %bb11, label %bb5
 
 bb11:		; preds = %bb5
-	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
 	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
 	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
index 18e7e93..5480868 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -33,12 +33,12 @@
 	br label %bb
 
 bb:		; preds = %bb, %entry
-	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%0 = load float, float* undef, align 4		; <float> [#uses=1]
 	%1 = fmul float undef, %0		; <float> [#uses=2]
 	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
 	%scevgep74 = getelementptr i8, i8* null, i32 %tmp73		; <i8*> [#uses=1]
 	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
-	%2 = load float* null, align 4		; <float> [#uses=1]
+	%2 = load float, float* null, align 4		; <float> [#uses=1]
 	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
 	%4 = fadd float %1, %3		; <float> [#uses=1]
 	%5 = fsub float %1, %3		; <float> [#uses=2]
@@ -51,7 +51,7 @@
 	%12 = sitofp i16 undef to float		; <float> [#uses=1]
 	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
 	%14 = sitofp i16 undef to float		; <float> [#uses=1]
-	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%15 = load float, float* %scevgep7475, align 4		; <float> [#uses=1]
 	%16 = fmul float %14, %15		; <float> [#uses=2]
 	%17 = fadd float undef, undef		; <float> [#uses=2]
 	%18 = fadd float %13, %16		; <float> [#uses=2]
diff --git a/llvm/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/llvm/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
index 6bccf93..2bbed1b 100644
--- a/llvm/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -3,11 +3,11 @@
 define float @t1(i32 %v0) nounwind {
 entry:
 	store i32 undef, i32* undef, align 4
-	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
-	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%0 = load [4 x i8]*, [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8, i8* undef, align 1		; <i8> [#uses=1]
 	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
 	%3 = getelementptr [4 x i8], [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
-	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%4 = load i8, i8* %3, align 1		; <i8> [#uses=1]
 	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
 	%6 = sub i32 %5, %2		; <i32> [#uses=1]
 	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/llvm/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
index 632273f..7d7445e 100644
--- a/llvm/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
@@ -37,25 +37,25 @@
 define internal i32 @transpose() nounwind readonly {
 ; CHECK: push
 entry:
-  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %0 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
   %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
-  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %2 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
   %3 = or i32 %1, %2                              ; <i32> [#uses=1]
   %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
-  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %5 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
   %6 = or i32 %4, %5                              ; <i32> [#uses=3]
-  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %7 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
   %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
-  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %9 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
   %10 = or i32 %8, %9                             ; <i32> [#uses=1]
   %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
-  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %12 = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
   %13 = or i32 %11, %12                           ; <i32> [#uses=3]
   %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
   %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
   %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
   %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
-  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn3.i = load i32, i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
   %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
   %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
   %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
@@ -67,19 +67,19 @@
   %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
   %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
   %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
-  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  %21 = load i32*, i32** @ht, align 4                   ; <i32*> [#uses=1]
   br label %bb5
 
 bb:                                               ; preds = %bb5
   %22 = getelementptr inbounds i32, i32* %21, i32 %x.0 ; <i32*> [#uses=1]
-  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %23 = load i32, i32* %22, align 4                    ; <i32> [#uses=1]
   %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
   br i1 %24, label %bb1, label %bb2
 
 bb1:                                              ; preds = %bb
-  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %25 = load i8*, i8** @he, align 4                    ; <i8*> [#uses=1]
   %26 = getelementptr inbounds i8, i8* %25, i32 %x.0  ; <i8*> [#uses=1]
-  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %27 = load i8, i8* %26, align 1                     ; <i8> [#uses=1]
   %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
   ret i32 %28
 
diff --git a/llvm/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/llvm/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
index 77d8ec2..8fdff02 100644
--- a/llvm/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -44,13 +44,13 @@
   br i1 undef, label %if.then1992, label %if.else2003
 
 if.then1992:                                      ; preds = %for.body1940
-  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %tmp14.i302 = load i32, i32* undef                   ; <i32> [#uses=4]
   %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
   %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
   call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
-  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)*, void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
   call  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
-  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)*, void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
   call  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
   call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
   unreachable
@@ -102,11 +102,11 @@
   %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
   %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
   %arrayidx3705 = getelementptr inbounds i16, i16* undef, i32 1 ; <i16*> [#uses=1]
-  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %tmp3706 = load i16, i16* %arrayidx3705              ; <i16> [#uses=1]
   %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
   %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
   %arrayidx3724 = getelementptr inbounds i16, i16* null, i32 1 ; <i16*> [#uses=1]
-  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %tmp3725 = load i16, i16* %arrayidx3724              ; <i16> [#uses=1]
   %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
   %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
   br label %if.end3770
@@ -115,11 +115,11 @@
   %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
   %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
   %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
-  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %tmp3746 = load i16, i16* undef                      ; <i16> [#uses=1]
   %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
   %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
   %arrayidx3765 = getelementptr inbounds i16, i16* null, i32 1 ; <i16*> [#uses=1]
-  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %tmp3766 = load i16, i16* %arrayidx3765              ; <i16> [#uses=1]
   %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
   %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
   br label %if.end3770
diff --git a/llvm/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/llvm/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
index 9346198..e283cb3 100644
--- a/llvm/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
@@ -7,7 +7,7 @@
 
 define %struct.OP* @Perl_pp_complement() nounwind {
 entry:
-  %0 = load %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
+  %0 = load %struct.SV*, %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
   br i1 undef, label %bb21, label %bb5
 
 bb5:                                              ; preds = %entry
@@ -18,14 +18,14 @@
 
 bb7:                                              ; preds = %bb6
   %1 = getelementptr inbounds %struct.SV, %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
-  %2 = load i8** %1, align 4                      ; <i8*> [#uses=1]
+  %2 = load i8*, i8** %1, align 4                      ; <i8*> [#uses=1]
   %3 = getelementptr inbounds i8, i8* %2, i32 12      ; <i8*> [#uses=1]
   %4 = bitcast i8* %3 to i32*                     ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 4                      ; <i32> [#uses=1]
+  %5 = load i32, i32* %4, align 4                      ; <i32> [#uses=1]
   %storemerge5 = xor i32 %5, -1                   ; <i32> [#uses=1]
   call  void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
   %6 = getelementptr inbounds %struct.SV, %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
-  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %7 = load i32, i32* %6, align 4                      ; <i32> [#uses=1]
   %8 = and i32 %7, 16384                          ; <i32> [#uses=1]
   %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
   br i1 %9, label %bb12, label %bb11
@@ -54,7 +54,7 @@
 
 Perl_sv_setuv.exit:                               ; preds = %bb1.i, %bb.i
   %11 = getelementptr inbounds %struct.SV, %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
-  %12 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %12 = load i32, i32* %11, align 4                    ; <i32> [#uses=1]
   %13 = and i32 %12, 16384                        ; <i32> [#uses=1]
   %14 = icmp eq i32 %13, 0                        ; <i1> [#uses=1]
   br i1 %14, label %bb20, label %bb19
diff --git a/llvm/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/llvm/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
index 5c3e259..bb4bf52 100644
--- a/llvm/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
+++ b/llvm/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -43,16 +43,16 @@
   store i32 %x_size, i32* %x_size_addr
   store i32 %y_size, i32* %y_size_addr
   store i8* %bp, i8** %bp_addr
-  %0 = load i8** %in_addr, align 4                ; <i8*> [#uses=1]
+  %0 = load i8*, i8** %in_addr, align 4                ; <i8*> [#uses=1]
   store i8* %0, i8** %out, align 4
   %1 = call  i32 (...)* @foo() nounwind ; <i32> [#uses=1]
   store i32 %1, i32* %i, align 4
-  %2 = load i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
+  %2 = load i32, i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
   %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
   br i1 %3, label %bb, label %bb2
 
 bb:                                               ; preds = %entry
-  %4 = load float* %dt_addr, align 4              ; <float> [#uses=1]
+  %4 = load float, float* %dt_addr, align 4              ; <float> [#uses=1]
   %5 = fpext float %4 to double                   ; <double> [#uses=1]
   %6 = fmul double %5, 1.500000e+00               ; <double> [#uses=1]
   %7 = fptosi double %6 to i32                    ; <i32> [#uses=1]
@@ -65,54 +65,54 @@
   br label %bb3
 
 bb3:                                              ; preds = %bb2, %bb
-  %9 = load i32* %mask_size, align 4              ; <i32> [#uses=1]
+  %9 = load i32, i32* %mask_size, align 4              ; <i32> [#uses=1]
   %10 = mul i32 %9, 2                             ; <i32> [#uses=1]
   %11 = add nsw i32 %10, 1                        ; <i32> [#uses=1]
   store i32 %11, i32* %n_max, align 4
-  %12 = load i32* %x_size_addr, align 4           ; <i32> [#uses=1]
-  %13 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %12 = load i32, i32* %x_size_addr, align 4           ; <i32> [#uses=1]
+  %13 = load i32, i32* %n_max, align 4                 ; <i32> [#uses=1]
   %14 = sub i32 %12, %13                          ; <i32> [#uses=1]
   store i32 %14, i32* %increment, align 4
-  %15 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
-  %16 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %15 = load i32, i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %16 = load i32, i32* %n_max, align 4                 ; <i32> [#uses=1]
   %17 = mul i32 %15, %16                          ; <i32> [#uses=1]
   %18 = call  noalias i8* @malloc(i32 %17) nounwind ; <i8*> [#uses=1]
   store i8* %18, i8** %dp, align 4
-  %19 = load i8** %dp, align 4                    ; <i8*> [#uses=1]
+  %19 = load i8*, i8** %dp, align 4                    ; <i8*> [#uses=1]
   store i8* %19, i8** %dpt, align 4
-  %20 = load float* %dt_addr, align 4             ; <float> [#uses=1]
-  %21 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %20 = load float, float* %dt_addr, align 4             ; <float> [#uses=1]
+  %21 = load float, float* %dt_addr, align 4             ; <float> [#uses=1]
   %22 = fmul float %20, %21                       ; <float> [#uses=1]
   %23 = fsub float -0.000000e+00, %22             ; <float> [#uses=1]
   store float %23, float* %temp, align 4
-  %24 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %24 = load i32, i32* %mask_size, align 4             ; <i32> [#uses=1]
   %25 = sub i32 0, %24                            ; <i32> [#uses=1]
   store i32 %25, i32* %j, align 4
   br label %bb5
 
 bb4:                                              ; preds = %bb5
-  %26 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %27 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %26 = load i32, i32* %j, align 4                     ; <i32> [#uses=1]
+  %27 = load i32, i32* %j, align 4                     ; <i32> [#uses=1]
   %28 = mul i32 %26, %27                          ; <i32> [#uses=1]
   %29 = sitofp i32 %28 to double                  ; <double> [#uses=1]
   %30 = fmul double %29, 1.234000e+00             ; <double> [#uses=1]
   %31 = fptosi double %30 to i32                  ; <i32> [#uses=1]
   store i32 %31, i32* %x, align 4
-  %32 = load i32* %x, align 4                     ; <i32> [#uses=1]
+  %32 = load i32, i32* %x, align 4                     ; <i32> [#uses=1]
   %33 = trunc i32 %32 to i8                       ; <i8> [#uses=1]
-  %34 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  %34 = load i8*, i8** %dpt, align 4                   ; <i8*> [#uses=1]
   store i8 %33, i8* %34, align 1
-  %35 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  %35 = load i8*, i8** %dpt, align 4                   ; <i8*> [#uses=1]
   %36 = getelementptr inbounds i8, i8* %35, i64 1     ; <i8*> [#uses=1]
   store i8* %36, i8** %dpt, align 4
-  %37 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %37 = load i32, i32* %j, align 4                     ; <i32> [#uses=1]
   %38 = add nsw i32 %37, 1                        ; <i32> [#uses=1]
   store i32 %38, i32* %j, align 4
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %39 = load i32* %j, align 4                     ; <i32> [#uses=1]
-  %40 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %39 = load i32, i32* %j, align 4                     ; <i32> [#uses=1]
+  %40 = load i32, i32* %mask_size, align 4             ; <i32> [#uses=1]
   %41 = icmp sle i32 %39, %40                     ; <i1> [#uses=1]
   br i1 %41, label %bb4, label %bb6
 
diff --git a/llvm/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/llvm/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
index 1f46924..08aa35b 100644
--- a/llvm/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -20,7 +20,7 @@
 define %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind {
 entry:
   %0 = tail call  i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
-  %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  %.b24 = load i1, i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
   br i1 %.b24, label %bb5, label %bb6
 
 bb5:                                              ; preds = %entry
@@ -33,7 +33,7 @@
 
 bb7:                                              ; preds = %bb11
   %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache", %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
-  %3 = load %struct._opaque_pthread_t** %2, align 4
+  %3 = load %struct._opaque_pthread_t*, %struct._opaque_pthread_t** %2, align 4
   %4 = tail call  i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind
   %5 = icmp eq i32 %4, 0
   br i1 %5, label %bb10, label %bb14
@@ -44,7 +44,7 @@
 
 bb11:                                             ; preds = %bb10, %bb6
   %h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
-  %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
   %7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null
   br i1 %7, label %bb13, label %bb7
 
@@ -56,17 +56,17 @@
   %heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
   %9 = tail call  i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
   %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache", %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
-  %11 = load i8* %10, align 4
+  %11 = load i8, i8* %10, align 4
   %toBool15not = icmp eq i8 %11, 0                ; <i1> [#uses=1]
   br i1 %toBool15not, label %bb19, label %bb22
 
 bb19:                                             ; preds = %bb14
-  %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  %.b = load i1, i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
   br i1 %.b, label %bb21, label %bb22
 
 bb21:                                             ; preds = %bb19
   store i8 1, i8* %10, align 4
-  %12 = load i32* @_ZN3WTFL8heap_keyE, align 4
+  %12 = load i32, i32* @_ZN3WTFL8heap_keyE, align 4
   %13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8*
   %14 = tail call  i32 @pthread_setspecific(i32 %12, i8* %13) nounwind
   ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
diff --git a/llvm/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll b/llvm/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
index 6133ea2..012aa4d 100644
--- a/llvm/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
@@ -139,7 +139,7 @@
   %indvar670 = phi i32 [ %tmp673, %bb345 ], [ 0, %bb339 ] ; <i32> [#uses=1]
   %tmp673 = add i32 %indvar670, 1                 ; <i32> [#uses=2]
   %scevgep674 = getelementptr [256 x i8], [256 x i8]* %last, i32 0, i32 %tmp673 ; <i8*> [#uses=1]
-  %5 = load i8* %scevgep674, align 1              ; <i8> [#uses=1]
+  %5 = load i8, i8* %scevgep674, align 1              ; <i8> [#uses=1]
   br i1 undef, label %bb347, label %bb345
 
 bb347:                                            ; preds = %bb345
@@ -167,7 +167,7 @@
   %indvar662 = phi i32 [ %tmp665, %bb366 ], [ 0, %bb360 ] ; <i32> [#uses=1]
   %tmp665 = add i32 %indvar662, 1                 ; <i32> [#uses=2]
   %scevgep666 = getelementptr [256 x i8], [256 x i8]* %last2, i32 0, i32 %tmp665 ; <i8*> [#uses=1]
-  %6 = load i8* %scevgep666, align 1              ; <i8> [#uses=0]
+  %6 = load i8, i8* %scevgep666, align 1              ; <i8> [#uses=0]
   br i1 false, label %bb368, label %bb366
 
 bb368:                                            ; preds = %bb366
@@ -189,7 +189,7 @@
   ret void
 
 bb383:                                            ; preds = %bb373
-  %10 = load i8* undef, align 1                   ; <i8> [#uses=1]
+  %10 = load i8, i8* undef, align 1                   ; <i8> [#uses=1]
   %cond1 = icmp eq i8 %10, 46                     ; <i1> [#uses=1]
   br i1 %cond1, label %bb373, label %bb388
 
@@ -203,7 +203,7 @@
 
 bb391:                                            ; preds = %bb390, %bb388
   %indvar724 = phi i32 [ %indvar.next725, %bb390 ], [ 0, %bb388 ] ; <i32> [#uses=2]
-  %11 = load i8* undef, align 1                   ; <i8> [#uses=0]
+  %11 = load i8, i8* undef, align 1                   ; <i8> [#uses=0]
   br i1 false, label %bb395, label %bb392
 
 bb392:                                            ; preds = %bb391
diff --git a/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
index b8aea9c..d3a4495 100644
--- a/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -54,7 +54,7 @@
   %l.09 = phi i32 [ %tmp28, %bb4 ], [ 0, %entry ]
   %scevgep = getelementptr %s1, %s1* %this, i32 0, i32 11, i32 %l.09
   %scevgep10 = getelementptr i32, i32* %rowbytes, i32 %l.09
-  %tmp27 = load i32* %scevgep10, align 4
+  %tmp27 = load i32, i32* %scevgep10, align 4
   store i32 %tmp27, i32* %scevgep, align 4
   %tmp28 = add i32 %l.09, 1
   %exitcond = icmp eq i32 %tmp28, %levels
diff --git a/llvm/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll b/llvm/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
index 501f763..eba2e58 100644
--- a/llvm/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
@@ -18,7 +18,7 @@
   %1 = tail call arm_apcscc  i32 @__maskrune(i32 %0, i32 32768) nounwind ; <i32> [#uses=1]
   %2 = icmp ne i32 %1, 0                          ; <i1> [#uses=1]
   %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
-  %.pre = load i8* undef, align 1                 ; <i8> [#uses=1]
+  %.pre = load i8, i8* undef, align 1                 ; <i8> [#uses=1]
   br label %isupper144.exit12
 
 isupper144.exit12:                                ; preds = %bb1.i.i11, %bb.i.i10
diff --git a/llvm/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/llvm/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
index f3046e1..05af91b 100644
--- a/llvm/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -86,7 +86,7 @@
 
 bb11:                                             ; preds = %bb10, %bb9
   %p.0 = phi i8* [ undef, %bb10 ], [ %p.1, %bb9 ] ; <i8*> [#uses=1]
-  %0 = load %struct.FILE** @finput, align 4       ; <%struct.FILE*> [#uses=1]
+  %0 = load %struct.FILE*, %struct.FILE** @finput, align 4       ; <%struct.FILE*> [#uses=1]
   %1 = tail call i32 @getc(%struct.FILE* %0) nounwind ; <i32> [#uses=0]
   br label %bb12
 
@@ -96,7 +96,7 @@
   br i1 %2, label %bb.i.i2, label %bb1.i.i3
 
 bb.i.i2:                                          ; preds = %bb12
-  %3 = load i32* null, align 4                    ; <i32> [#uses=1]
+  %3 = load i32, i32* null, align 4                    ; <i32> [#uses=1]
   %4 = lshr i32 %3, 8                             ; <i32> [#uses=1]
   %.lobit.i1 = and i32 %4, 1                      ; <i32> [#uses=1]
   %.not = icmp ne i32 %.lobit.i1, 0               ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/llvm/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
index 3d5e500..94562d7 100644
--- a/llvm/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -16,7 +16,7 @@
   %1 = alloca [1000 x i8], align 4                ; <[1000 x i8]*> [#uses=1]
   %.sub = getelementptr inbounds [1000 x i8], [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
   %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
-  %3 = load i8* %.sub, align 4                    ; <i8> [#uses=1]
+  %3 = load i8, i8* %.sub, align 4                    ; <i8> [#uses=1]
   %4 = sext i8 %3 to i32                          ; <i32> [#uses=1]
   ret i32 %4
 
diff --git a/llvm/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/llvm/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
index b26c130..fc7d0e1 100644
--- a/llvm/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
+++ b/llvm/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -9,7 +9,7 @@
 
 define void @Perl_ck_sort() nounwind optsize {
 entry:
-  %tmp27 = load %struct.op** undef, align 4
+  %tmp27 = load %struct.op*, %struct.op** undef, align 4
   switch i16 undef, label %if.end151 [
     i16 178, label %if.then60
     i16 177, label %if.then60
@@ -26,7 +26,7 @@
 
 if.end95:                                         ; preds = %if.else92, %if.then67
   %.pre-phi = phi %struct.op** [ undef, %if.then60 ], [ %0, %if.then67 ]
-  %tmp98 = load %struct.op** %.pre-phi, align 4
+  %tmp98 = load %struct.op*, %struct.op** %.pre-phi, align 4
   br label %if.end151
 
 if.end151:                                        ; preds = %if.end100, %if.end, %entry
diff --git a/llvm/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll b/llvm/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
index c7c2cfe..7c8802d 100644
--- a/llvm/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
+++ b/llvm/test/CodeGen/Thumb2/2011-12-16-T2SizeReduceAssert.ll
@@ -8,15 +8,15 @@
   br label %bb3
 
 bb:                                               ; preds = %bb3
-  %Scan.0.idx7.val = load i8** undef, align 4
+  %Scan.0.idx7.val = load i8*, i8** undef, align 4
   %.idx = getelementptr i8, i8* %Scan.0.idx7.val, i32 4
   %0 = bitcast i8* %.idx to i8**
-  %.idx.val = load i8** %0, align 4
+  %.idx.val = load i8*, i8** %0, align 4
   %1 = icmp eq i8* %.idx.val, %Key
   br i1 %1, label %bb5, label %bb2
 
 bb2:                                              ; preds = %bb
-  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16** undef, align 4
+  %Scan.0.idx8.val = load %struct.LIST_NODE.0.16*, %struct.LIST_NODE.0.16** undef, align 4
   br label %bb3
 
 bb3:                                              ; preds = %bb2, %entry
@@ -34,7 +34,7 @@
   br i1 undef, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.body, %entry
-  %0 = load double* null, align 8
+  %0 = load double, double* null, align 8
   %cmp2.6 = fcmp ogt double %0, 0.000000e+00
   %idx.1.6 = select i1 %cmp2.6, i32 undef, i32 0
   %idx.1.7 = select i1 undef, i32 undef, i32 %idx.1.6
diff --git a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
index 482b391..9121044 100644
--- a/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
+++ b/llvm/test/CodeGen/Thumb2/2012-01-13-CBNZBug.ll
@@ -23,15 +23,15 @@
 
 if.end:                                           ; preds = %tailrecurse
   %string = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 0
-  %0 = load i8** %string, align 4
+  %0 = load i8*, i8** %string, align 4
   br label %while.cond.i
 
 while.cond.i:                                     ; preds = %while.body.i, %if.end
   %1 = phi i8* [ %s, %if.end ], [ %incdec.ptr.i, %while.body.i ]
   %storemerge.i = phi i8* [ %0, %if.end ], [ %incdec.ptr6.i, %while.body.i ]
-  %2 = load i8* %1, align 1
+  %2 = load i8, i8* %1, align 1
   %cmp.i = icmp eq i8 %2, 0
-  %.pre.i = load i8* %storemerge.i, align 1
+  %.pre.i = load i8, i8* %storemerge.i, align 1
   br i1 %cmp.i, label %lor.lhs.false.i, label %land.end.i
 
 land.end.i:                                       ; preds = %while.cond.i
@@ -69,7 +69,7 @@
 ; CHECK-NOT: cbnz
   %storemerge1.i3 = phi i32 [ %sub.i, %dict_match.exit ], [ 0, %lor.lhs.false.i ], [ 0, %while.end.i ]
   %right = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 4
-  %4 = load %struct.Dict_node_struct** %right, align 4
+  %4 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %right, align 4
   tail call fastcc void @rdictionary_lookup(%struct.Dict_node_struct* %4, i8* %s)
   %cmp4 = icmp eq i32 %storemerge1.i3, 0
   br i1 %cmp4, label %if.then5, label %if.end8
@@ -79,7 +79,7 @@
   %5 = bitcast i8* %call6 to %struct.Dict_node_struct*
   %6 = bitcast %struct.Dict_node_struct* %dn.tr to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call6, i8* %6, i32 16, i32 4, i1 false)
-  %7 = load %struct.Dict_node_struct** @lookup_list, align 4
+  %7 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** @lookup_list, align 4
   %right7 = getelementptr inbounds i8, i8* %call6, i32 16
   %8 = bitcast i8* %right7 to %struct.Dict_node_struct**
   store %struct.Dict_node_struct* %7, %struct.Dict_node_struct** %8, align 4
@@ -92,7 +92,7 @@
 
 if.then10:                                        ; preds = %if.end8, %if.then5, %dict_match.exit
   %left = getelementptr inbounds %struct.Dict_node_struct, %struct.Dict_node_struct* %dn.tr, i32 0, i32 3
-  %9 = load %struct.Dict_node_struct** %left, align 4
+  %9 = load %struct.Dict_node_struct*, %struct.Dict_node_struct** %left, align 4
   br label %tailrecurse
 
 if.end11:                                         ; preds = %if.end8, %tailrecurse
diff --git a/llvm/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll b/llvm/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
index bc0db3b..c9d3f3d 100644
--- a/llvm/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
+++ b/llvm/test/CodeGen/Thumb2/2013-02-19-tail-call-register-hint.ll
@@ -18,13 +18,13 @@
   tail call void @def(%"myclass"* %2) nounwind
   %3 = getelementptr inbounds i8, i8* %Data, i32 8
   %4 = bitcast i8* %3 to i8**
-  %5 = load i8** %4, align 4
+  %5 = load i8*, i8** %4, align 4
   tail call void @ghi(i8* %5) nounwind
   %6 = bitcast i8* %Data to void (i8*)**
-  %7 = load void (i8*)** %6, align 4
+  %7 = load void (i8*)*, void (i8*)** %6, align 4
   %8 = getelementptr inbounds i8, i8* %Data, i32 4
   %9 = bitcast i8* %8 to i8**
-  %10 = load i8** %9, align 4
+  %10 = load i8*, i8** %9, align 4
   %11 = icmp eq i8* %Data, null
   br i1 %11, label %14, label %12
 
diff --git a/llvm/test/CodeGen/Thumb2/aligned-constants.ll b/llvm/test/CodeGen/Thumb2/aligned-constants.ll
index 16b3a19..13cca11 100644
--- a/llvm/test/CodeGen/Thumb2/aligned-constants.ll
+++ b/llvm/test/CodeGen/Thumb2/aligned-constants.ll
@@ -16,10 +16,10 @@
 ; CHECK:	.long	1123477881
 define void @func(float* nocapture %x, double* nocapture %y) nounwind ssp {
 entry:
-  %0 = load float* %x, align 4
+  %0 = load float, float* %x, align 4
   %add = fadd float %0, 0x405EDD2F20000000
   store float %add, float* %x, align 4
-  %1 = load double* %y, align 4
+  %1 = load double, double* %y, align 4
   %add1 = fadd double %1, 2.234560e+02
   store double %add1, double* %y, align 4
   ret void
diff --git a/llvm/test/CodeGen/Thumb2/aligned-spill.ll b/llvm/test/CodeGen/Thumb2/aligned-spill.ll
index 4ef294b..59f546b 100644
--- a/llvm/test/CodeGen/Thumb2/aligned-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/aligned-spill.ll
@@ -13,7 +13,7 @@
 ; CHECK: mov sp, r4
 define void @f(double* nocapture %p) nounwind ssp {
 entry:
-  %0 = load double* %p, align 4
+  %0 = load double, double* %p, align 4
   tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
   tail call void @g() nounwind
   store double %0, double* %p, align 4
diff --git a/llvm/test/CodeGen/Thumb2/bfi.ll b/llvm/test/CodeGen/Thumb2/bfi.ll
index 4f056d57..337f46a 100644
--- a/llvm/test/CodeGen/Thumb2/bfi.ll
+++ b/llvm/test/CodeGen/Thumb2/bfi.ll
@@ -9,7 +9,7 @@
 ; CHECK: f1
 ; CHECK: movs r2, #10
 ; CHECK: bfi r1, r2, #22, #4
-  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %0 = load i32, i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
   %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
   %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
   store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll b/llvm/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
index 991b043..c2a2c06 100644
--- a/llvm/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-islands-new-island-padding.ll
@@ -15,22 +15,22 @@
 
 define i32 @testpadding(i32 %a) {
 entry:
-  %0 = load i32* @g0, align 4
+  %0 = load i32, i32* @g0, align 4
   %add = add nsw i32 %0, 12
   store i32 %add, i32* @g0, align 4
-  %1 = load double* @d0, align 8
+  %1 = load double, double* @d0, align 8
   %add1 = fadd double %1, 0x3FF3C0B8ED46EACB
   store double %add1, double* @d0, align 8
   %tmpcall11 = call i32 @llvm.arm.space(i32 28, i32 undef)
   call void @foo20(i32 191)
-  %2 = load float* @f0, align 4
+  %2 = load float, float* @f0, align 4
   %add2 = fadd float %2, 0x3FF3C0BDC0000000
   store float %add2, float* @f0, align 4
   br label %do.body
 
 do.body:                                          ; preds = %do.body, %entry
   tail call void @foo20(i32 19)
-  %3 = load i32* @g1, align 4
+  %3 = load i32, i32* @g1, align 4
   %tobool = icmp eq i32 %3, 0
   br i1 %tobool, label %do.end, label %do.body
 
diff --git a/llvm/test/CodeGen/Thumb2/constant-islands.ll b/llvm/test/CodeGen/Thumb2/constant-islands.ll
index bb1d7aa..8be6a55 100644
--- a/llvm/test/CodeGen/Thumb2/constant-islands.ll
+++ b/llvm/test/CodeGen/Thumb2/constant-islands.ll
@@ -262,18 +262,18 @@
   store %class.btDynamicsWorld* %ownerWorld, %class.btDynamicsWorld** %ownerWorld.addr, align 4
   store %class.btVector3* %positionOffset, %class.btVector3** %positionOffset.addr, align 4
   store float %scale, float* %scale.addr, align 4
-  %this1 = load %class.RagDoll** %this.addr
+  %this1 = load %class.RagDoll*, %class.RagDoll** %this.addr
   store %class.RagDoll* %this1, %class.RagDoll** %retval
   %0 = bitcast %class.RagDoll* %this1 to i8***
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV7RagDoll, i64 0, i64 2), i8*** %0
   %m_ownerWorld = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %1 = load %class.btDynamicsWorld** %ownerWorld.addr, align 4
+  %1 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %ownerWorld.addr, align 4
   store %class.btDynamicsWorld* %1, %class.btDynamicsWorld** %m_ownerWorld, align 4
   %call = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %2 = bitcast i8* %call to %class.btCapsuleShape*
-  %3 = load float* %scale.addr, align 4
+  %3 = load float, float* %scale.addr, align 4
   %mul = fmul float 0x3FC3333340000000, %3
-  %4 = load float* %scale.addr, align 4
+  %4 = load float, float* %scale.addr, align 4
   %mul2 = fmul float 0x3FC99999A0000000, %4
   %call3 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %2, float %mul, float %mul2)
           to label %invoke.cont unwind label %lpad
@@ -285,9 +285,9 @@
   store %class.btCollisionShape* %5, %class.btCollisionShape** %arrayidx, align 4
   %call5 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %6 = bitcast i8* %call5 to %class.btCapsuleShape*
-  %7 = load float* %scale.addr, align 4
+  %7 = load float, float* %scale.addr, align 4
   %mul6 = fmul float 0x3FC3333340000000, %7
-  %8 = load float* %scale.addr, align 4
+  %8 = load float, float* %scale.addr, align 4
   %mul7 = fmul float 0x3FD1EB8520000000, %8
   %call10 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %6, float %mul6, float %mul7)
           to label %invoke.cont9 unwind label %lpad8
@@ -299,9 +299,9 @@
   store %class.btCollisionShape* %9, %class.btCollisionShape** %arrayidx13, align 4
   %call14 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %10 = bitcast i8* %call14 to %class.btCapsuleShape*
-  %11 = load float* %scale.addr, align 4
+  %11 = load float, float* %scale.addr, align 4
   %mul15 = fmul float 0x3FB99999A0000000, %11
-  %12 = load float* %scale.addr, align 4
+  %12 = load float, float* %scale.addr, align 4
   %mul16 = fmul float 0x3FA99999A0000000, %12
   %call19 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %10, float %mul15, float %mul16)
           to label %invoke.cont18 unwind label %lpad17
@@ -313,9 +313,9 @@
   store %class.btCollisionShape* %13, %class.btCollisionShape** %arrayidx22, align 4
   %call23 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %14 = bitcast i8* %call23 to %class.btCapsuleShape*
-  %15 = load float* %scale.addr, align 4
+  %15 = load float, float* %scale.addr, align 4
   %mul24 = fmul float 0x3FB1EB8520000000, %15
-  %16 = load float* %scale.addr, align 4
+  %16 = load float, float* %scale.addr, align 4
   %mul25 = fmul float 0x3FDCCCCCC0000000, %16
   %call28 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %14, float %mul24, float %mul25)
           to label %invoke.cont27 unwind label %lpad26
@@ -327,9 +327,9 @@
   store %class.btCollisionShape* %17, %class.btCollisionShape** %arrayidx31, align 4
   %call32 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %18 = bitcast i8* %call32 to %class.btCapsuleShape*
-  %19 = load float* %scale.addr, align 4
+  %19 = load float, float* %scale.addr, align 4
   %mul33 = fmul float 0x3FA99999A0000000, %19
-  %20 = load float* %scale.addr, align 4
+  %20 = load float, float* %scale.addr, align 4
   %mul34 = fmul float 0x3FD7AE1480000000, %20
   %call37 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %18, float %mul33, float %mul34)
           to label %invoke.cont36 unwind label %lpad35
@@ -341,9 +341,9 @@
   store %class.btCollisionShape* %21, %class.btCollisionShape** %arrayidx40, align 4
   %call41 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %22 = bitcast i8* %call41 to %class.btCapsuleShape*
-  %23 = load float* %scale.addr, align 4
+  %23 = load float, float* %scale.addr, align 4
   %mul42 = fmul float 0x3FB1EB8520000000, %23
-  %24 = load float* %scale.addr, align 4
+  %24 = load float, float* %scale.addr, align 4
   %mul43 = fmul float 0x3FDCCCCCC0000000, %24
   %call46 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %22, float %mul42, float %mul43)
           to label %invoke.cont45 unwind label %lpad44
@@ -355,9 +355,9 @@
   store %class.btCollisionShape* %25, %class.btCollisionShape** %arrayidx49, align 4
   %call50 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %26 = bitcast i8* %call50 to %class.btCapsuleShape*
-  %27 = load float* %scale.addr, align 4
+  %27 = load float, float* %scale.addr, align 4
   %mul51 = fmul float 0x3FA99999A0000000, %27
-  %28 = load float* %scale.addr, align 4
+  %28 = load float, float* %scale.addr, align 4
   %mul52 = fmul float 0x3FD7AE1480000000, %28
   %call55 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %26, float %mul51, float %mul52)
           to label %invoke.cont54 unwind label %lpad53
@@ -369,9 +369,9 @@
   store %class.btCollisionShape* %29, %class.btCollisionShape** %arrayidx58, align 4
   %call59 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %30 = bitcast i8* %call59 to %class.btCapsuleShape*
-  %31 = load float* %scale.addr, align 4
+  %31 = load float, float* %scale.addr, align 4
   %mul60 = fmul float 0x3FA99999A0000000, %31
-  %32 = load float* %scale.addr, align 4
+  %32 = load float, float* %scale.addr, align 4
   %mul61 = fmul float 0x3FD51EB860000000, %32
   %call64 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %30, float %mul60, float %mul61)
           to label %invoke.cont63 unwind label %lpad62
@@ -383,9 +383,9 @@
   store %class.btCollisionShape* %33, %class.btCollisionShape** %arrayidx67, align 4
   %call68 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %34 = bitcast i8* %call68 to %class.btCapsuleShape*
-  %35 = load float* %scale.addr, align 4
+  %35 = load float, float* %scale.addr, align 4
   %mul69 = fmul float 0x3FA47AE140000000, %35
-  %36 = load float* %scale.addr, align 4
+  %36 = load float, float* %scale.addr, align 4
   %mul70 = fmul float 2.500000e-01, %36
   %call73 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %34, float %mul69, float %mul70)
           to label %invoke.cont72 unwind label %lpad71
@@ -397,9 +397,9 @@
   store %class.btCollisionShape* %37, %class.btCollisionShape** %arrayidx76, align 4
   %call77 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %38 = bitcast i8* %call77 to %class.btCapsuleShape*
-  %39 = load float* %scale.addr, align 4
+  %39 = load float, float* %scale.addr, align 4
   %mul78 = fmul float 0x3FA99999A0000000, %39
-  %40 = load float* %scale.addr, align 4
+  %40 = load float, float* %scale.addr, align 4
   %mul79 = fmul float 0x3FD51EB860000000, %40
   %call82 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %38, float %mul78, float %mul79)
           to label %invoke.cont81 unwind label %lpad80
@@ -411,9 +411,9 @@
   store %class.btCollisionShape* %41, %class.btCollisionShape** %arrayidx85, align 4
   %call86 = call i8* @_ZN13btConvexShapenwEm(i32 56)
   %42 = bitcast i8* %call86 to %class.btCapsuleShape*
-  %43 = load float* %scale.addr, align 4
+  %43 = load float, float* %scale.addr, align 4
   %mul87 = fmul float 0x3FA47AE140000000, %43
-  %44 = load float* %scale.addr, align 4
+  %44 = load float, float* %scale.addr, align 4
   %mul88 = fmul float 2.500000e-01, %44
   %call91 = invoke %class.btCapsuleShape* @_ZN14btCapsuleShapeC1Eff(%class.btCapsuleShape* %42, float %mul87, float %mul88)
           to label %invoke.cont90 unwind label %lpad89
@@ -425,7 +425,7 @@
   store %class.btCollisionShape* %45, %class.btCollisionShape** %arrayidx94, align 4
   %call95 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %offset)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %offset)
-  %46 = load %class.btVector3** %positionOffset.addr, align 4
+  %46 = load %class.btVector3*, %class.btVector3** %positionOffset.addr, align 4
   call void @_ZN11btTransform9setOriginERK9btVector3(%class.btTransform* %offset, %class.btVector3* %46)
   %call96 = call %class.btTransform* @_ZN11btTransformC1Ev(%class.btTransform* %transform)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %transform)
@@ -438,7 +438,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp102, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes103 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx104 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes103, i32 0, i32 0
-  %47 = load %class.btCollisionShape** %arrayidx104, align 4
+  %47 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx104, align 4
   %call105 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp102, %class.btCollisionShape* %47)
   %m_bodies = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx106 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies, i32 0, i32 0
@@ -453,7 +453,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp113, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes114 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx115 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes114, i32 0, i32 1
-  %48 = load %class.btCollisionShape** %arrayidx115, align 4
+  %48 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx115, align 4
   %call116 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp113, %class.btCollisionShape* %48)
   %m_bodies117 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx118 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies117, i32 0, i32 1
@@ -468,7 +468,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp125, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes126 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx127 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes126, i32 0, i32 2
-  %49 = load %class.btCollisionShape** %arrayidx127, align 4
+  %49 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx127, align 4
   %call128 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp125, %class.btCollisionShape* %49)
   %m_bodies129 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx130 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies129, i32 0, i32 2
@@ -483,7 +483,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp137, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes138 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx139 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes138, i32 0, i32 3
-  %50 = load %class.btCollisionShape** %arrayidx139, align 4
+  %50 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx139, align 4
   %call140 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp137, %class.btCollisionShape* %50)
   %m_bodies141 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx142 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies141, i32 0, i32 3
@@ -498,7 +498,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp149, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes150 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx151 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes150, i32 0, i32 4
-  %51 = load %class.btCollisionShape** %arrayidx151, align 4
+  %51 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx151, align 4
   %call152 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp149, %class.btCollisionShape* %51)
   %m_bodies153 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx154 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies153, i32 0, i32 4
@@ -513,7 +513,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp161, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes162 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx163 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes162, i32 0, i32 5
-  %52 = load %class.btCollisionShape** %arrayidx163, align 4
+  %52 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx163, align 4
   %call164 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp161, %class.btCollisionShape* %52)
   %m_bodies165 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx166 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies165, i32 0, i32 5
@@ -528,7 +528,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp173, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes174 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx175 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes174, i32 0, i32 6
-  %53 = load %class.btCollisionShape** %arrayidx175, align 4
+  %53 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx175, align 4
   %call176 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp173, %class.btCollisionShape* %53)
   %m_bodies177 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx178 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies177, i32 0, i32 6
@@ -545,7 +545,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp186, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes187 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx188 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes187, i32 0, i32 7
-  %54 = load %class.btCollisionShape** %arrayidx188, align 4
+  %54 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx188, align 4
   %call189 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp186, %class.btCollisionShape* %54)
   %m_bodies190 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx191 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies190, i32 0, i32 7
@@ -562,7 +562,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp199, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes200 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx201 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes200, i32 0, i32 8
-  %55 = load %class.btCollisionShape** %arrayidx201, align 4
+  %55 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx201, align 4
   %call202 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp199, %class.btCollisionShape* %55)
   %m_bodies203 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx204 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies203, i32 0, i32 8
@@ -579,7 +579,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp212, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes213 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx214 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes213, i32 0, i32 9
-  %56 = load %class.btCollisionShape** %arrayidx214, align 4
+  %56 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx214, align 4
   %call215 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp212, %class.btCollisionShape* %56)
   %m_bodies216 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx217 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies216, i32 0, i32 9
@@ -596,7 +596,7 @@
   call void @_ZNK11btTransformmlERKS_(%class.btTransform* sret %ref.tmp225, %class.btTransform* %offset, %class.btTransform* %transform)
   %m_shapes226 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 2
   %arrayidx227 = getelementptr inbounds [11 x %class.btCollisionShape*], [11 x %class.btCollisionShape*]* %m_shapes226, i32 0, i32 10
-  %57 = load %class.btCollisionShape** %arrayidx227, align 4
+  %57 = load %class.btCollisionShape*, %class.btCollisionShape** %arrayidx227, align 4
   %call228 = call %class.btRigidBody* @_ZN7RagDoll20localCreateRigidBodyEfRK11btTransformP16btCollisionShape(%class.RagDoll* %this1, float 1.000000e+00, %class.btTransform* %ref.tmp225, %class.btCollisionShape* %57)
   %m_bodies229 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx230 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies229, i32 0, i32 10
@@ -605,31 +605,31 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %invoke.cont90
-  %58 = load i32* %i, align 4
+  %58 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %58, 11
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %59 = load i32* %i, align 4
+  %59 = load i32, i32* %i, align 4
   %m_bodies231 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx232 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies231, i32 0, i32 %59
-  %60 = load %class.btRigidBody** %arrayidx232, align 4
+  %60 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx232, align 4
   call void @_ZN11btRigidBody10setDampingEff(%class.btRigidBody* %60, float 0x3FA99999A0000000, float 0x3FEB333340000000)
-  %61 = load i32* %i, align 4
+  %61 = load i32, i32* %i, align 4
   %m_bodies233 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx234 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies233, i32 0, i32 %61
-  %62 = load %class.btRigidBody** %arrayidx234, align 4
+  %62 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx234, align 4
   %63 = bitcast %class.btRigidBody* %62 to %class.btCollisionObject*
   call void @_ZN17btCollisionObject19setDeactivationTimeEf(%class.btCollisionObject* %63, float 0x3FE99999A0000000)
-  %64 = load i32* %i, align 4
+  %64 = load i32, i32* %i, align 4
   %m_bodies235 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx236 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies235, i32 0, i32 %64
-  %65 = load %class.btRigidBody** %arrayidx236, align 4
+  %65 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx236, align 4
   call void @_ZN11btRigidBody21setSleepingThresholdsEff(%class.btRigidBody* %65, float 0x3FF99999A0000000, float 2.500000e+00)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %66 = load i32* %i, align 4
+  %66 = load i32, i32* %i, align 4
   %inc = add nsw i32 %66, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -802,31 +802,31 @@
   %100 = bitcast i8* %call253 to %class.btHingeConstraint*
   %m_bodies254 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx255 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies254, i32 0, i32 0
-  %101 = load %class.btRigidBody** %arrayidx255, align 4
+  %101 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx255, align 4
   %m_bodies256 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx257 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies256, i32 0, i32 1
-  %102 = load %class.btRigidBody** %arrayidx257, align 4
+  %102 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx257, align 4
   %call260 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %100, %class.btRigidBody* %101, %class.btRigidBody* %102, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
           to label %invoke.cont259 unwind label %lpad258
 
 invoke.cont259:                                   ; preds = %for.end
   store %class.btHingeConstraint* %100, %class.btHingeConstraint** %hingeC, align 4
-  %103 = load %class.btHingeConstraint** %hingeC, align 4
+  %103 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %103, float 0xBFE921FB60000000, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
-  %104 = load %class.btHingeConstraint** %hingeC, align 4
+  %104 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   %105 = bitcast %class.btHingeConstraint* %104 to %class.btTypedConstraint*
   %m_joints = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx261 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints, i32 0, i32 0
   store %class.btTypedConstraint* %105, %class.btTypedConstraint** %arrayidx261, align 4
   %m_ownerWorld262 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %106 = load %class.btDynamicsWorld** %m_ownerWorld262, align 4
+  %106 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld262, align 4
   %107 = bitcast %class.btDynamicsWorld* %106 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
+  %vtable = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %107
   %vfn = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable, i64 10
-  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
+  %108 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn
   %m_joints263 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx264 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints263, i32 0, i32 0
-  %109 = load %class.btTypedConstraint** %arrayidx264, align 4
+  %109 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx264, align 4
   call void %108(%class.btDynamicsWorld* %106, %class.btTypedConstraint* %109, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -850,31 +850,31 @@
   %110 = bitcast i8* %call279 to %class.btConeTwistConstraint*
   %m_bodies280 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx281 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies280, i32 0, i32 1
-  %111 = load %class.btRigidBody** %arrayidx281, align 4
+  %111 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx281, align 4
   %m_bodies282 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx283 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies282, i32 0, i32 2
-  %112 = load %class.btRigidBody** %arrayidx283, align 4
+  %112 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx283, align 4
   %call286 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %110, %class.btRigidBody* %111, %class.btRigidBody* %112, %class.btTransform* %localA, %class.btTransform* %localB)
           to label %invoke.cont285 unwind label %lpad284
 
 invoke.cont285:                                   ; preds = %invoke.cont259
   store %class.btConeTwistConstraint* %110, %class.btConeTwistConstraint** %coneC, align 4
-  %113 = load %class.btConeTwistConstraint** %coneC, align 4
+  %113 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %113, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0x3FF921FB60000000, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
-  %114 = load %class.btConeTwistConstraint** %coneC, align 4
+  %114 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   %115 = bitcast %class.btConeTwistConstraint* %114 to %class.btTypedConstraint*
   %m_joints287 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx288 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints287, i32 0, i32 1
   store %class.btTypedConstraint* %115, %class.btTypedConstraint** %arrayidx288, align 4
   %m_ownerWorld289 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %116 = load %class.btDynamicsWorld** %m_ownerWorld289, align 4
+  %116 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld289, align 4
   %117 = bitcast %class.btDynamicsWorld* %116 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
+  %vtable290 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %117
   %vfn291 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable290, i64 10
-  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
+  %118 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn291
   %m_joints292 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx293 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints292, i32 0, i32 1
-  %119 = load %class.btTypedConstraint** %arrayidx293, align 4
+  %119 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx293, align 4
   call void %118(%class.btDynamicsWorld* %116, %class.btTypedConstraint* %119, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -898,31 +898,31 @@
   %120 = bitcast i8* %call308 to %class.btConeTwistConstraint*
   %m_bodies309 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx310 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies309, i32 0, i32 0
-  %121 = load %class.btRigidBody** %arrayidx310, align 4
+  %121 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx310, align 4
   %m_bodies311 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx312 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies311, i32 0, i32 3
-  %122 = load %class.btRigidBody** %arrayidx312, align 4
+  %122 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx312, align 4
   %call315 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %120, %class.btRigidBody* %121, %class.btRigidBody* %122, %class.btTransform* %localA, %class.btTransform* %localB)
           to label %invoke.cont314 unwind label %lpad313
 
 invoke.cont314:                                   ; preds = %invoke.cont285
   store %class.btConeTwistConstraint* %120, %class.btConeTwistConstraint** %coneC, align 4
-  %123 = load %class.btConeTwistConstraint** %coneC, align 4
+  %123 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %123, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
-  %124 = load %class.btConeTwistConstraint** %coneC, align 4
+  %124 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   %125 = bitcast %class.btConeTwistConstraint* %124 to %class.btTypedConstraint*
   %m_joints316 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx317 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints316, i32 0, i32 2
   store %class.btTypedConstraint* %125, %class.btTypedConstraint** %arrayidx317, align 4
   %m_ownerWorld318 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %126 = load %class.btDynamicsWorld** %m_ownerWorld318, align 4
+  %126 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld318, align 4
   %127 = bitcast %class.btDynamicsWorld* %126 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
+  %vtable319 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %127
   %vfn320 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable319, i64 10
-  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
+  %128 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn320
   %m_joints321 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx322 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints321, i32 0, i32 2
-  %129 = load %class.btTypedConstraint** %arrayidx322, align 4
+  %129 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx322, align 4
   call void %128(%class.btDynamicsWorld* %126, %class.btTypedConstraint* %129, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -946,31 +946,31 @@
   %130 = bitcast i8* %call337 to %class.btHingeConstraint*
   %m_bodies338 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx339 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies338, i32 0, i32 3
-  %131 = load %class.btRigidBody** %arrayidx339, align 4
+  %131 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx339, align 4
   %m_bodies340 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx341 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies340, i32 0, i32 4
-  %132 = load %class.btRigidBody** %arrayidx341, align 4
+  %132 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx341, align 4
   %call344 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %130, %class.btRigidBody* %131, %class.btRigidBody* %132, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
           to label %invoke.cont343 unwind label %lpad342
 
 invoke.cont343:                                   ; preds = %invoke.cont314
   store %class.btHingeConstraint* %130, %class.btHingeConstraint** %hingeC, align 4
-  %133 = load %class.btHingeConstraint** %hingeC, align 4
+  %133 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %133, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
-  %134 = load %class.btHingeConstraint** %hingeC, align 4
+  %134 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   %135 = bitcast %class.btHingeConstraint* %134 to %class.btTypedConstraint*
   %m_joints345 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx346 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints345, i32 0, i32 3
   store %class.btTypedConstraint* %135, %class.btTypedConstraint** %arrayidx346, align 4
   %m_ownerWorld347 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %136 = load %class.btDynamicsWorld** %m_ownerWorld347, align 4
+  %136 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld347, align 4
   %137 = bitcast %class.btDynamicsWorld* %136 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
+  %vtable348 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %137
   %vfn349 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable348, i64 10
-  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
+  %138 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn349
   %m_joints350 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx351 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints350, i32 0, i32 3
-  %139 = load %class.btTypedConstraint** %arrayidx351, align 4
+  %139 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx351, align 4
   call void %138(%class.btDynamicsWorld* %136, %class.btTypedConstraint* %139, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -994,31 +994,31 @@
   %140 = bitcast i8* %call366 to %class.btConeTwistConstraint*
   %m_bodies367 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx368 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies367, i32 0, i32 0
-  %141 = load %class.btRigidBody** %arrayidx368, align 4
+  %141 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx368, align 4
   %m_bodies369 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx370 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies369, i32 0, i32 5
-  %142 = load %class.btRigidBody** %arrayidx370, align 4
+  %142 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx370, align 4
   %call373 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %140, %class.btRigidBody* %141, %class.btRigidBody* %142, %class.btTransform* %localA, %class.btTransform* %localB)
           to label %invoke.cont372 unwind label %lpad371
 
 invoke.cont372:                                   ; preds = %invoke.cont343
   store %class.btConeTwistConstraint* %140, %class.btConeTwistConstraint** %coneC, align 4
-  %143 = load %class.btConeTwistConstraint** %coneC, align 4
+  %143 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %143, float 0x3FE921FB60000000, float 0x3FE921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
-  %144 = load %class.btConeTwistConstraint** %coneC, align 4
+  %144 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   %145 = bitcast %class.btConeTwistConstraint* %144 to %class.btTypedConstraint*
   %m_joints374 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx375 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints374, i32 0, i32 4
   store %class.btTypedConstraint* %145, %class.btTypedConstraint** %arrayidx375, align 4
   %m_ownerWorld376 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %146 = load %class.btDynamicsWorld** %m_ownerWorld376, align 4
+  %146 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld376, align 4
   %147 = bitcast %class.btDynamicsWorld* %146 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
+  %vtable377 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %147
   %vfn378 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable377, i64 10
-  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
+  %148 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn378
   %m_joints379 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx380 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints379, i32 0, i32 4
-  %149 = load %class.btTypedConstraint** %arrayidx380, align 4
+  %149 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx380, align 4
   call void %148(%class.btDynamicsWorld* %146, %class.btTypedConstraint* %149, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1042,31 +1042,31 @@
   %150 = bitcast i8* %call395 to %class.btHingeConstraint*
   %m_bodies396 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx397 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies396, i32 0, i32 5
-  %151 = load %class.btRigidBody** %arrayidx397, align 4
+  %151 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx397, align 4
   %m_bodies398 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx399 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies398, i32 0, i32 6
-  %152 = load %class.btRigidBody** %arrayidx399, align 4
+  %152 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx399, align 4
   %call402 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %150, %class.btRigidBody* %151, %class.btRigidBody* %152, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
           to label %invoke.cont401 unwind label %lpad400
 
 invoke.cont401:                                   ; preds = %invoke.cont372
   store %class.btHingeConstraint* %150, %class.btHingeConstraint** %hingeC, align 4
-  %153 = load %class.btHingeConstraint** %hingeC, align 4
+  %153 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %153, float 0.000000e+00, float 0x3FF921FB60000000, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
-  %154 = load %class.btHingeConstraint** %hingeC, align 4
+  %154 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   %155 = bitcast %class.btHingeConstraint* %154 to %class.btTypedConstraint*
   %m_joints403 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx404 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints403, i32 0, i32 5
   store %class.btTypedConstraint* %155, %class.btTypedConstraint** %arrayidx404, align 4
   %m_ownerWorld405 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %156 = load %class.btDynamicsWorld** %m_ownerWorld405, align 4
+  %156 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld405, align 4
   %157 = bitcast %class.btDynamicsWorld* %156 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
+  %vtable406 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %157
   %vfn407 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable406, i64 10
-  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
+  %158 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn407
   %m_joints408 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx409 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints408, i32 0, i32 5
-  %159 = load %class.btTypedConstraint** %arrayidx409, align 4
+  %159 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx409, align 4
   call void %158(%class.btDynamicsWorld* %156, %class.btTypedConstraint* %159, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1090,31 +1090,31 @@
   %160 = bitcast i8* %call424 to %class.btConeTwistConstraint*
   %m_bodies425 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx426 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies425, i32 0, i32 1
-  %161 = load %class.btRigidBody** %arrayidx426, align 4
+  %161 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx426, align 4
   %m_bodies427 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx428 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies427, i32 0, i32 7
-  %162 = load %class.btRigidBody** %arrayidx428, align 4
+  %162 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx428, align 4
   %call431 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %160, %class.btRigidBody* %161, %class.btRigidBody* %162, %class.btTransform* %localA, %class.btTransform* %localB)
           to label %invoke.cont430 unwind label %lpad429
 
 invoke.cont430:                                   ; preds = %invoke.cont401
   store %class.btConeTwistConstraint* %160, %class.btConeTwistConstraint** %coneC, align 4
-  %163 = load %class.btConeTwistConstraint** %coneC, align 4
+  %163 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %163, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
-  %164 = load %class.btConeTwistConstraint** %coneC, align 4
+  %164 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   %165 = bitcast %class.btConeTwistConstraint* %164 to %class.btTypedConstraint*
   %m_joints432 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx433 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints432, i32 0, i32 6
   store %class.btTypedConstraint* %165, %class.btTypedConstraint** %arrayidx433, align 4
   %m_ownerWorld434 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %166 = load %class.btDynamicsWorld** %m_ownerWorld434, align 4
+  %166 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld434, align 4
   %167 = bitcast %class.btDynamicsWorld* %166 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
+  %vtable435 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %167
   %vfn436 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable435, i64 10
-  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
+  %168 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn436
   %m_joints437 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx438 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints437, i32 0, i32 6
-  %169 = load %class.btTypedConstraint** %arrayidx438, align 4
+  %169 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx438, align 4
   call void %168(%class.btDynamicsWorld* %166, %class.btTypedConstraint* %169, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1138,31 +1138,31 @@
   %170 = bitcast i8* %call453 to %class.btHingeConstraint*
   %m_bodies454 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx455 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies454, i32 0, i32 7
-  %171 = load %class.btRigidBody** %arrayidx455, align 4
+  %171 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx455, align 4
   %m_bodies456 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx457 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies456, i32 0, i32 8
-  %172 = load %class.btRigidBody** %arrayidx457, align 4
+  %172 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx457, align 4
   %call460 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %170, %class.btRigidBody* %171, %class.btRigidBody* %172, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
           to label %invoke.cont459 unwind label %lpad458
 
 invoke.cont459:                                   ; preds = %invoke.cont430
   store %class.btHingeConstraint* %170, %class.btHingeConstraint** %hingeC, align 4
-  %173 = load %class.btHingeConstraint** %hingeC, align 4
+  %173 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %173, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
-  %174 = load %class.btHingeConstraint** %hingeC, align 4
+  %174 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   %175 = bitcast %class.btHingeConstraint* %174 to %class.btTypedConstraint*
   %m_joints461 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx462 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints461, i32 0, i32 7
   store %class.btTypedConstraint* %175, %class.btTypedConstraint** %arrayidx462, align 4
   %m_ownerWorld463 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %176 = load %class.btDynamicsWorld** %m_ownerWorld463, align 4
+  %176 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld463, align 4
   %177 = bitcast %class.btDynamicsWorld* %176 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
+  %vtable464 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %177
   %vfn465 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable464, i64 10
-  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
+  %178 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn465
   %m_joints466 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx467 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints466, i32 0, i32 7
-  %179 = load %class.btTypedConstraint** %arrayidx467, align 4
+  %179 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx467, align 4
   call void %178(%class.btDynamicsWorld* %176, %class.btTypedConstraint* %179, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1186,31 +1186,31 @@
   %180 = bitcast i8* %call482 to %class.btConeTwistConstraint*
   %m_bodies483 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx484 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies483, i32 0, i32 1
-  %181 = load %class.btRigidBody** %arrayidx484, align 4
+  %181 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx484, align 4
   %m_bodies485 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx486 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies485, i32 0, i32 9
-  %182 = load %class.btRigidBody** %arrayidx486, align 4
+  %182 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx486, align 4
   %call489 = invoke %class.btConeTwistConstraint* @_ZN21btConeTwistConstraintC1ER11btRigidBodyS1_RK11btTransformS4_(%class.btConeTwistConstraint* %180, %class.btRigidBody* %181, %class.btRigidBody* %182, %class.btTransform* %localA, %class.btTransform* %localB)
           to label %invoke.cont488 unwind label %lpad487
 
 invoke.cont488:                                   ; preds = %invoke.cont459
   store %class.btConeTwistConstraint* %180, %class.btConeTwistConstraint** %coneC, align 4
-  %183 = load %class.btConeTwistConstraint** %coneC, align 4
+  %183 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   call void @_ZN21btConeTwistConstraint8setLimitEffffff(%class.btConeTwistConstraint* %183, float 0x3FF921FB60000000, float 0x3FF921FB60000000, float 0.000000e+00, float 1.000000e+00, float 0x3FD3333340000000, float 1.000000e+00)
-  %184 = load %class.btConeTwistConstraint** %coneC, align 4
+  %184 = load %class.btConeTwistConstraint*, %class.btConeTwistConstraint** %coneC, align 4
   %185 = bitcast %class.btConeTwistConstraint* %184 to %class.btTypedConstraint*
   %m_joints490 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx491 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints490, i32 0, i32 8
   store %class.btTypedConstraint* %185, %class.btTypedConstraint** %arrayidx491, align 4
   %m_ownerWorld492 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %186 = load %class.btDynamicsWorld** %m_ownerWorld492, align 4
+  %186 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld492, align 4
   %187 = bitcast %class.btDynamicsWorld* %186 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
+  %vtable493 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %187
   %vfn494 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable493, i64 10
-  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
+  %188 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn494
   %m_joints495 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx496 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints495, i32 0, i32 8
-  %189 = load %class.btTypedConstraint** %arrayidx496, align 4
+  %189 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx496, align 4
   call void %188(%class.btDynamicsWorld* %186, %class.btTypedConstraint* %189, i1 zeroext true)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localA)
   call void @_ZN11btTransform11setIdentityEv(%class.btTransform* %localB)
@@ -1234,33 +1234,33 @@
   %190 = bitcast i8* %call511 to %class.btHingeConstraint*
   %m_bodies512 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx513 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies512, i32 0, i32 9
-  %191 = load %class.btRigidBody** %arrayidx513, align 4
+  %191 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx513, align 4
   %m_bodies514 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 3
   %arrayidx515 = getelementptr inbounds [11 x %class.btRigidBody*], [11 x %class.btRigidBody*]* %m_bodies514, i32 0, i32 10
-  %192 = load %class.btRigidBody** %arrayidx515, align 4
+  %192 = load %class.btRigidBody*, %class.btRigidBody** %arrayidx515, align 4
   %call518 = invoke %class.btHingeConstraint* @_ZN17btHingeConstraintC1ER11btRigidBodyS1_RK11btTransformS4_b(%class.btHingeConstraint* %190, %class.btRigidBody* %191, %class.btRigidBody* %192, %class.btTransform* %localA, %class.btTransform* %localB, i1 zeroext false)
           to label %invoke.cont517 unwind label %lpad516
 
 invoke.cont517:                                   ; preds = %invoke.cont488
   store %class.btHingeConstraint* %190, %class.btHingeConstraint** %hingeC, align 4
-  %193 = load %class.btHingeConstraint** %hingeC, align 4
+  %193 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   call void @_ZN17btHingeConstraint8setLimitEfffff(%class.btHingeConstraint* %193, float 0xBFF921FB60000000, float 0.000000e+00, float 0x3FECCCCCC0000000, float 0x3FD3333340000000, float 1.000000e+00)
-  %194 = load %class.btHingeConstraint** %hingeC, align 4
+  %194 = load %class.btHingeConstraint*, %class.btHingeConstraint** %hingeC, align 4
   %195 = bitcast %class.btHingeConstraint* %194 to %class.btTypedConstraint*
   %m_joints519 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx520 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints519, i32 0, i32 9
   store %class.btTypedConstraint* %195, %class.btTypedConstraint** %arrayidx520, align 4
   %m_ownerWorld521 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 1
-  %196 = load %class.btDynamicsWorld** %m_ownerWorld521, align 4
+  %196 = load %class.btDynamicsWorld*, %class.btDynamicsWorld** %m_ownerWorld521, align 4
   %197 = bitcast %class.btDynamicsWorld* %196 to void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)***
-  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
+  %vtable522 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)**, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*** %197
   %vfn523 = getelementptr inbounds void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vtable522, i64 10
-  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
+  %198 = load void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)*, void (%class.btDynamicsWorld*, %class.btTypedConstraint*, i1)** %vfn523
   %m_joints524 = getelementptr inbounds %class.RagDoll, %class.RagDoll* %this1, i32 0, i32 4
   %arrayidx525 = getelementptr inbounds [10 x %class.btTypedConstraint*], [10 x %class.btTypedConstraint*]* %m_joints524, i32 0, i32 9
-  %199 = load %class.btTypedConstraint** %arrayidx525, align 4
+  %199 = load %class.btTypedConstraint*, %class.btTypedConstraint** %arrayidx525, align 4
   call void %198(%class.btDynamicsWorld* %196, %class.btTypedConstraint* %199, i1 zeroext true)
-  %200 = load %class.RagDoll** %retval
+  %200 = load %class.RagDoll*, %class.RagDoll** %retval
   ret %class.RagDoll* %200
 
 lpad258:                                          ; preds = %for.end
@@ -1364,8 +1364,8 @@
   br label %eh.resume
 
 eh.resume:                                        ; preds = %lpad516, %lpad487, %lpad458, %lpad429, %lpad400, %lpad371, %lpad342, %lpad313, %lpad284, %lpad258, %invoke.cont92, %invoke.cont83, %invoke.cont74, %invoke.cont65, %invoke.cont56, %invoke.cont47, %invoke.cont38, %invoke.cont29, %invoke.cont20, %invoke.cont11, %invoke.cont4
-  %exn = load i8** %exn.slot
-  %sel = load i32* %ehselector.slot
+  %exn = load i8*, i8** %exn.slot
+  %sel = load i32, i32* %ehselector.slot
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
   %lpad.val526 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
   resume { i8*, i32 } %lpad.val526
diff --git a/llvm/test/CodeGen/Thumb2/crash.ll b/llvm/test/CodeGen/Thumb2/crash.ll
index a3ab78d..8571f2c 100644
--- a/llvm/test/CodeGen/Thumb2/crash.ll
+++ b/llvm/test/CodeGen/Thumb2/crash.ll
@@ -7,13 +7,13 @@
 define arm_apcscc void @NEON_vst4q_u32(i32* nocapture %sp0, i32* nocapture %sp1, i32* nocapture %sp2, i32* nocapture %sp3, i32* %dp) nounwind {
 entry:
   %0 = bitcast i32* %sp0 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
-  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %1 = load <4 x i32>, <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
   %2 = bitcast i32* %sp1 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
-  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>, <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
   %4 = bitcast i32* %sp2 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
-  %5 = load <4 x i32>* %4, align 16               ; <<4 x i32>> [#uses=1]
+  %5 = load <4 x i32>, <4 x i32>* %4, align 16               ; <<4 x i32>> [#uses=1]
   %6 = bitcast i32* %sp3 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
-  %7 = load <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
+  %7 = load <4 x i32>, <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
   %8 = bitcast i32* %dp to i8*                    ; <i8*> [#uses=1]
   tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
   ret void
@@ -41,10 +41,10 @@
   br i1 %exitcond, label %bb2, label %bb
 
 bb2:                                              ; preds = %bb
-  %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
-  %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
-  %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
-  %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %2 = load <4 x i32>, <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %4 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %5 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
   tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
index 6c7028d..ecb63b1 100644
--- a/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
+++ b/llvm/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -29,24 +29,24 @@
   %fi.1 = getelementptr float, float* %fz, i32 undef     ; <float*> [#uses=2]
   %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
   %scevgep81 = getelementptr float, float* %fz, i32 %tmp80 ; <float*> [#uses=1]
-  %2 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = load float, float* undef, align 4                 ; <float> [#uses=1]
   %3 = fmul float %2, %1                          ; <float> [#uses=1]
-  %4 = load float* null, align 4                  ; <float> [#uses=2]
+  %4 = load float, float* null, align 4                  ; <float> [#uses=2]
   %5 = fmul float %4, %0                          ; <float> [#uses=1]
   %6 = fsub float %3, %5                          ; <float> [#uses=1]
   %7 = fmul float %4, %1                          ; <float> [#uses=1]
   %8 = fadd float undef, %7                       ; <float> [#uses=2]
-  %9 = load float* %fi.1, align 4                 ; <float> [#uses=2]
+  %9 = load float, float* %fi.1, align 4                 ; <float> [#uses=2]
   %10 = fsub float %9, %8                         ; <float> [#uses=1]
   %11 = fadd float %9, %8                         ; <float> [#uses=1]
   %12 = fsub float 0.000000e+00, %6               ; <float> [#uses=1]
   %13 = fsub float 0.000000e+00, undef            ; <float> [#uses=2]
   %14 = fmul float undef, %0                      ; <float> [#uses=1]
   %15 = fadd float %14, undef                     ; <float> [#uses=2]
-  %16 = load float* %scevgep81, align 4           ; <float> [#uses=2]
+  %16 = load float, float* %scevgep81, align 4           ; <float> [#uses=2]
   %17 = fsub float %16, %15                       ; <float> [#uses=1]
   %18 = fadd float %16, %15                       ; <float> [#uses=2]
-  %19 = load float* undef, align 4                ; <float> [#uses=2]
+  %19 = load float, float* undef, align 4                ; <float> [#uses=2]
   %20 = fsub float %19, %13                       ; <float> [#uses=2]
   %21 = fadd float %19, %13                       ; <float> [#uses=1]
   %22 = fmul float %s1.02, %18                    ; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll
index d383065..e0396e6 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -102,7 +102,7 @@
 ; CHECK-LABEL: load_f:
 ; NONE: ldr r0, [r0]
 ; HARD: vldr s0, [r0]
-  %0 = load float* %a, align 4
+  %0 = load float, float* %a, align 4
   ret float %0
 }
 
@@ -111,7 +111,7 @@
 ; CHECK-LABEL: load_d:
 ; NONE: ldm.w r0, {r0, r1}
 ; HARD: vldr d0, [r0]
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   ret double %0
 }
 
diff --git a/llvm/test/CodeGen/Thumb2/frameless2.ll b/llvm/test/CodeGen/Thumb2/frameless2.ll
index b2b78fc..3743354 100644
--- a/llvm/test/CodeGen/Thumb2/frameless2.ll
+++ b/llvm/test/CodeGen/Thumb2/frameless2.ll
@@ -6,7 +6,7 @@
 define void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
 entry:
   %0 = getelementptr %struct.noiseguard, %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
-  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  %1 = load i32, i32* %0, align 4                      ; <i32> [#uses=1]
   store i32 %1, i32* undef, align 4
   unreachable
 }
diff --git a/llvm/test/CodeGen/Thumb2/ifcvt-neon.ll b/llvm/test/CodeGen/Thumb2/ifcvt-neon.ll
index 00f3399..83c0b60 100644
--- a/llvm/test/CodeGen/Thumb2/ifcvt-neon.ll
+++ b/llvm/test/CodeGen/Thumb2/ifcvt-neon.ll
@@ -7,8 +7,8 @@
 define float @t(i32 %c) nounwind {
 entry:
   %0 = icmp sgt i32 %c, 1                         ; <i1> [#uses=1]
-  %1 = load float* @a, align 4                    ; <float> [#uses=2]
-  %2 = load float* @b, align 4                    ; <float> [#uses=2]
+  %1 = load float, float* @a, align 4                    ; <float> [#uses=2]
+  %2 = load float, float* @b, align 4                    ; <float> [#uses=2]
   br i1 %0, label %bb, label %bb1
 
 bb:                                               ; preds = %entry
diff --git a/llvm/test/CodeGen/Thumb2/inflate-regs.ll b/llvm/test/CodeGen/Thumb2/inflate-regs.ll
index d8a558c..4814db2 100644
--- a/llvm/test/CodeGen/Thumb2/inflate-regs.ll
+++ b/llvm/test/CodeGen/Thumb2/inflate-regs.ll
@@ -14,7 +14,7 @@
 ; CHECK: vstr s
 define void @local_split(float* nocapture %p) nounwind ssp {
 entry:
-  %x = load float* %p, align 4
+  %x = load float, float* %p, align 4
   %a = fadd float %x, 1.0
   tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
   store float %a, float* %p, align 4
@@ -33,7 +33,7 @@
 ; CHECK: vstr s
 define void @global_split(float* nocapture %p1, float* nocapture %p2) nounwind ssp {
 entry:
-  %0 = load float* %p1, align 4
+  %0 = load float, float* %p1, align 4
   %add = fadd float %0, 1.000000e+00
   tail call void asm sideeffect "", "~{d0},~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind
   %cmp = fcmp ogt float %add, 0.000000e+00
diff --git a/llvm/test/CodeGen/Thumb2/large-call.ll b/llvm/test/CodeGen/Thumb2/large-call.ll
index da45dbe..ca94980 100644
--- a/llvm/test/CodeGen/Thumb2/large-call.ll
+++ b/llvm/test/CodeGen/Thumb2/large-call.ll
@@ -21,7 +21,7 @@
 entry:
   %d = alloca double, align 8
   store double 1.000000e+00, double* %d, align 8
-  %0 = load double* %d, align 8
+  %0 = load double, double* %d, align 8
   call void (i8*, i8*, i8*, ...)* @variadic(i8* null, i8* null, i8* null, i32 1, double 1.234800e+03, double 2.363450e+03, double %0, i32 1, double 1.234560e+03, double 2.345670e+03, double 4.6334563e+03, double 2.423440e+03, double 4.234330e+03, double 2.965430e+03, i32 1, double 4.669300e+03, double 2.927500e+03, double 4.663100e+03, double 2.921000e+03, double 4.663100e+03, double 2.345100e+03, i32 1, double 3.663100e+03, double 2.905100e+03, double 4.669300e+03, double 2.898600e+03, double 4.676900e+03, double 2.898600e+03, i32 1, double 4.684600e+03, double 2.898600e+03, double 1.234800e+03, double 2.905100e+03, double 1.234800e+03, double 2.345100e+03, i32 1, double 7.719700e+03, double 2.920500e+03, double 4.713500e+03, double 2.927000e+03, double 4.705800e+03, double 2.927000e+03, i32 1, double 8.698200e+03, double 2.927000e+03, double 4.692000e+03, double 2.920500e+03, double 4.692000e+03, double 2.912500e+03, i32 1, double 4.692000e+03, double 2.945600e+03, double 4.698200e+03, double 2.898100e+03, double 4.705800e+03, double 2.898100e+03, i32 1, double 4.713500e+03, double 2.898100e+03, double 4.719700e+03, double 2.945600e+03, double 4.719700e+03, double 2.912500e+03, i32 1, double 4.749200e+03, double 2.920100e+03, double 4.743000e+03, double 2.926600e+03, double 4.735300e+03, double 2.926600e+03, i32 1, double 4.727700e+03, double 2.926600e+03, double 4.721500e+03, double 2.920100e+03, double 4.721500e+03, double 2.912100e+03, i32 1, double 4.721500e+03, double 2.945100e+03, double 4.727700e+03, double 2.897700e+03, double 4.735300e+03, double 2.897700e+03, i32 1, double 4.743000e+03, double 2.897700e+03, double 4.749200e+03, double 2.945100e+03, double 4.749200e+03, double 2.912100e+03, i32 1, double 4.778200e+03, double 2.920100e+03, double 4.772000e+03, double 2.926600e+03, double 4.764300e+03, double 2.926600e+03, i32 1, double 4.756700e+03, double 2.926600e+03, double 4.750500e+03, double 2.920100e+03, double 4.750500e+03, double 2.912100e+03, i32 1, double 4.750500e+03, double 2.945100e+03, double 4.756700e+03, double 2.897700e+03, double 4.764300e+03, double 2.897700e+03, i32 1, double 4.772000e+03, double 2.897700e+03, double 4.778200e+03, double 2.945100e+03, double 4.778200e+03, double 2.912100e+03, i32 1, double 4.801900e+03, double 2.942100e+03, double 4.795700e+03, double 2.948500e+03, double 4.788100e+03, double 2.948500e+03, i32 1, double 4.780500e+03, double 2.948500e+03, double 4.774300e+03, double 2.942100e+03, double 4.774300e+03, double 2.934100e+03, i32 1, double 4.774300e+03, double 2.926100e+03, double 4.780500e+03, double 2.919600e+03, double 4.788100e+03, double 2.919600e+03, i32 1, double 4.795700e+03, double 2.919600e+03, double 4.801900e+03, double 2.926100e+03, double 4.801900e+03, double 2.934100e+03, i32 1, double 4.801500e+03, double 2.972500e+03, double 4.795300e+03, double 2.978900e+03, double 4.787700e+03, double 2.978900e+03, i32 1, double 4.780000e+03, double 2.978900e+03, double 4.773800e+03, double 2.972500e+03, double 4.773800e+03, double 2.964500e+03, i32 1, double 4.773800e+03, double 2.956500e+03, double 4.780000e+03, double 2.950000e+03, double 4.787700e+03, double 2.950000e+03, i32 1, double 4.795300e+03, double 2.950000e+03, double 4.801500e+03, double 2.956500e+03, double 4.801500e+03, double 2.964500e+03, i32 1, double 4.802400e+03, double 3.010200e+03, double 4.796200e+03, double 3.016600e+03, double 4.788500e+03, double 3.016600e+03, i32 1, double 4.780900e+03, double 3.016600e+03, double 4.774700e+03, double 3.010200e+03, double 4.774700e+03, double 3.002200e+03, i32 1, double 4.774700e+03, double 2.994200e+03, double 4.780900e+03, double 2.987700e+03, double 4.788500e+03, double 2.987700e+03, i32 1, double 4.796200e+03, double 2.987700e+03, double 4.802400e+03, double 2.994200e+03, double 4.802400e+03, double 3.002200e+03, i32 1, double 4.802400e+03, double 3.039400e+03, double 4.796200e+03, double 3.455800e+03, double 4.788500e+03, double 3.455800e+03, i32 1, double 4.780900e+03, double 3.455800e+03, double 4.774700e+03, double 3.039400e+03, double 4.774700e+03, double 3.031400e+03, i32 1, double 4.774700e+03, double 3.023400e+03, double 4.780900e+03, double 3.016900e+03, double 4.788500e+03, double 3.016900e+03, i32 1, double 4.796200e+03, double 3.016900e+03, double 4.802400e+03, double 3.023400e+03, double 4.802400e+03, double 3.031400e+03, i32 1, double 4.778600e+03, double 3.063100e+03, double 4.772400e+03, double 3.069600e+03, double 4.764700e+03, double 3.069600e+03, i32 1, double 4.757100e+03, double 3.069600e+03, double 4.750900e+03, double 3.063100e+03, double 4.750900e+03, double 3.055100e+03, i32 1, double 4.750900e+03, double 3.457100e+03, double 4.757100e+03, double 3.450700e+03, double 4.764700e+03, double 3.450700e+03, i32 1, double 4.772400e+03, double 3.450700e+03, double 4.778600e+03, double 3.457100e+03, double 4.778600e+03, double 3.055100e+03, i32 1, double 4.748600e+03, double 3.063600e+03, double 4.742400e+03, double 3.070000e+03, double 4.734700e+03, double 3.070000e+03, i32 1, double 4.727100e+03, double 3.070000e+03, double 4.720900e+03, double 3.063600e+03, double 4.720900e+03, double 3.055600e+03, i32 1, double 4.720900e+03, double 3.457600e+03, double 4.727100e+03, double 3.451100e+03, double 4.734700e+03, double 3.451100e+03, i32 1, double 4.742400e+03, double 3.451100e+03, double 4.748600e+03, double 3.457600e+03, double 4.748600e+03, double 3.055600e+03, i32 1, double 4.719500e+03, double 3.063600e+03, double 4.713300e+03, double 3.070000e+03, double 4.705700e+03, double 3.070000e+03, i32 1, double 4.698000e+03, double 3.070000e+03, double 4.691900e+03, double 3.063600e+03, double 4.691900e+03, double 3.055600e+03, i32 1, double 4.691900e+03, double 3.457600e+03, double 4.698000e+03, double 3.451100e+03, double 4.705700e+03, double 3.451100e+03, i32 1, double 4.713300e+03, double 3.451100e+03, double 4.719500e+03, double 3.457600e+03, double 4.719500e+03, double 3.055600e+03, i32 1, double 4.691300e+03, double 3.064000e+03, double 4.685100e+03, double 3.070500e+03, double 4.677500e+03, double 3.070500e+03, i32 1, double 4.669900e+03, double 3.070500e+03, double 4.663700e+03, double 3.064000e+03, double 4.663700e+03, double 3.056000e+03, i32 1, double 4.663700e+03, double 3.458000e+03, double 4.669900e+03, double 3.451600e+03, double 4.677500e+03, double 3.451600e+03, i32 1, double 4.685100e+03, double 3.451600e+03, double 4.691300e+03, double 3.458000e+03, double 4.691300e+03, double 3.056000e+03, i32 1, double 4.668500e+03, double 3.453000e+03, double 4.662300e+03, double 3.459400e+03, double 4.654700e+03, double 3.459400e+03, i32 1, double 4.647000e+03, double 3.459400e+03, double 4.640900e+03, double 3.453000e+03, double 4.640900e+03, double 3.035000e+03, i32 1, double 4.640900e+03, double 3.027000e+03, double 4.647000e+03, double 3.020500e+03, double 4.654700e+03, double 3.020500e+03, i32 1, double 4.662300e+03, double 3.020500e+03, double 4.668500e+03, double 3.027000e+03, double 4.668500e+03, double 3.035000e+03, i32 1, double 4.668500e+03, double 3.014300e+03, double 4.662300e+03, double 3.020800e+03, double 4.654700e+03, double 3.020800e+03, i32 1, double 4.647000e+03, double 3.020800e+03, double 4.640900e+03, double 3.014300e+03, double 4.640900e+03, double 3.006400e+03, i32 1, double 4.640900e+03, double 2.998400e+03, double 4.647000e+03, double 2.991900e+03, double 4.654700e+03, double 2.991900e+03, i32 1, double 4.662300e+03, double 2.991900e+03, double 4.668500e+03, double 2.998400e+03, double 4.668500e+03, double 3.006400e+03, i32 1, double 4.668100e+03, double 2.941100e+03, double 4.661900e+03, double 2.947600e+03, double 4.654200e+03, double 2.947600e+03, i32 1, double 4.646600e+03, double 2.947600e+03, double 4.640400e+03, double 2.941100e+03, double 4.640400e+03, double 2.933100e+03, i32 1, double 4.640400e+03, double 2.925200e+03, double 4.646600e+03, double 2.918700e+03, double 4.654200e+03, double 2.918700e+03, i32 1, double 4.661900e+03, double 2.918700e+03, double 4.668100e+03, double 2.925200e+03, double 4.668100e+03, double 2.933100e+03, i32 1, double 4.668500e+03, double 2.971600e+03, double 4.662300e+03, double 2.978100e+03, double 4.654700e+03, double 2.978100e+03, i32 1, double 4.647000e+03, double 2.978100e+03, double 4.640900e+03, double 2.971600e+03, double 4.640900e+03, double 2.963600e+03, i32 1, double 4.640900e+03, double 2.955700e+03, double 4.647000e+03, double 2.949200e+03, double 4.654700e+03, double 2.949200e+03, i32 1, double 4.662300e+03, double 2.949200e+03, double 4.668500e+03, double 2.955700e+03, double 4.668500e+03, double 2.963600e+03, i32 2, i32 1, double 4.691300e+03, double 3.056000e+03, i32 2, i32 1, double 4.748600e+03, double 3.055600e+03, i32 2, i32 1, double 4.778200e+03, double 2.912100e+03, i32 2, i32 1, double 4.749200e+03, double 2.912100e+03, i32 2, i32 1, double 4.802400e+03, double 3.031400e+03, i32 2, i32 1, double 4.778600e+03, double 3.055100e+03, i32 2, i32 1, double 4.801500e+03, double 2.964500e+03, i32 2, i32 1, double 4.802400e+03, double 3.002200e+03, i32 2, i32 1, double 4.719700e+03, double 2.912500e+03, i32 2, i32 1, double 4.801900e+03, double 2.934100e+03, i32 2, i32 1, double 4.719500e+03, double 3.055600e+03, i32 2, i32 1, double 4.668500e+03, double 3.006400e+03, i32 2, i32 1, double 4.668500e+03, double 3.035000e+03, i32 2, i32 1, double 4.668100e+03, double 2.933100e+03, i32 2, i32 1, double 4.668500e+03, double 2.963600e+03, i32 2, i32 48)
   ret i32 0
 }
diff --git a/llvm/test/CodeGen/Thumb2/large-stack.ll b/llvm/test/CodeGen/Thumb2/large-stack.ll
index 8d79da7..ff9e050 100644
--- a/llvm/test/CodeGen/Thumb2/large-stack.ll
+++ b/llvm/test/CodeGen/Thumb2/large-stack.ll
@@ -36,6 +36,6 @@
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
     store i32 0, i32* %tmp
-    %tmp1 = load i32* %tmp
+    %tmp1 = load i32, i32* %tmp
     ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll
index 84984d9..ccf7fae 100644
--- a/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/llvm/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -10,7 +10,7 @@
 ; CHECK-LABEL: t:
 ; CHECK: mov{{.*}}, #1000
 entry:
-  %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
+  %.pre = load i32, i32* @G, align 4                   ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
@@ -22,9 +22,9 @@
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
   %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
-  %1 = load i32** @array, align 4                 ; <i32*> [#uses=1]
+  %1 = load i32*, i32** @array, align 4                 ; <i32*> [#uses=1]
   %scevgep = getelementptr i32, i32* %1, i32 %tmp5     ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %3 = add nsw i32 %2, %0                         ; <i32> [#uses=2]
   store i32 %3, i32* @G, align 4
   %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/Thumb2/machine-licm.ll b/llvm/test/CodeGen/Thumb2/machine-licm.ll
index ee4c656..2b1caa3 100644
--- a/llvm/test/CodeGen/Thumb2/machine-licm.ll
+++ b/llvm/test/CodeGen/Thumb2/machine-licm.ll
@@ -29,14 +29,14 @@
 ; PIC: LBB0_
 ; PIC-NOT: LCPI0_0:
 ; PIC: .section
-  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  %.pre = load i32, i32* @GV, align 4                  ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %bb.nph
   %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
   %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
   %scevgep = getelementptr i32, i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
-  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
   store i32 %3, i32* @GV, align 4
   %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/Thumb2/tail-call-r9.ll b/llvm/test/CodeGen/Thumb2/tail-call-r9.ll
index 673aa7c..33cbd3d 100644
--- a/llvm/test/CodeGen/Thumb2/tail-call-r9.ll
+++ b/llvm/test/CodeGen/Thumb2/tail-call-r9.ll
@@ -7,7 +7,7 @@
 define arm_aapcscc void @test(i32 %a) nounwind {
 ; CHECK-LABEL: test:
 ; CHECK-NOT: bx r9
-  %tmp = load void ()** @foo, align 4
+  %tmp = load void ()*, void ()** @foo, align 4
   tail call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r12}"() nounwind
   tail call arm_aapcscc void %tmp() nounwind
   ret void
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll b/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll
index 2902949..96f63ba 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -22,7 +22,7 @@
 
 ; LINUX-LABEL: h:
 ; LINUX: bx r0 @ TAILCALL
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-call.ll b/llvm/test/CodeGen/Thumb2/thumb2-call.ll
index 1d2eaa7..62b47a4 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-call.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-call.ll
@@ -21,7 +21,7 @@
 
 ; LINUX-LABEL: h:
 ; LINUX: blx r0
-        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp = load i32 ()*, i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
index d86a897..ebc12dc 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -63,7 +63,7 @@
 
 define void @foo(i32 %a) nounwind {
 entry:
-	%tmp = load i32** @x		; <i32*> [#uses=1]
+	%tmp = load i32*, i32** @x		; <i32*> [#uses=1]
 	store i32 %a, i32* %tmp
 	ret void
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index 13a1ca2..da1057b 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -65,7 +65,7 @@
 
 define void @foo(i32 %a) nounwind {
 entry:
-	%tmp = load i32** @x		; <i32*> [#uses=1]
+	%tmp = load i32*, i32** @x		; <i32*> [#uses=1]
 	store i32 %a, i32* %tmp
 	ret void
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index a861912..91efc5d 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -41,9 +41,9 @@
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
-	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
-	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
-	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp6 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
 	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
 	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
 	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
index 79667d4..24eb1a9 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -25,7 +25,7 @@
 ; CHECK: movne
 ; CHECK: moveq
 ; CHECK: pop
-  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %0 = load i64, i64* @posed, align 4                  ; <i64> [#uses=3]
   %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
   %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
   %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
index adfcf2b..cf905eb 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -7,8 +7,8 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldrd
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
         %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
         ret i32 %tmp4
 }
@@ -18,9 +18,9 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
@@ -30,9 +30,9 @@
 ; CHECK: push {r7, lr}
 ; CHECK: ldm
 ; CHECK: pop {r7, pc}
-        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
-        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32, i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
         %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
         ret i32 %tmp6
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll
index 624b402..4b3ce86 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -4,7 +4,7 @@
 entry:
 ; CHECK-LABEL: f1:
 ; CHECK: ldr r0, [r0]
-        %tmp = load i32* %v
+        %tmp = load i32, i32* %v
         ret i32 %tmp
 }
 
@@ -13,7 +13,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ldr.w r0, [r0, #4092]
         %tmp2 = getelementptr i32, i32* %v, i32 1023
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -23,7 +23,7 @@
 ; CHECK: mov.w r1, #4096
 ; CHECK: ldr r0, [r0, r1]
         %tmp2 = getelementptr i32, i32* %v, i32 1024
-        %tmp = load i32* %tmp2
+        %tmp = load i32, i32* %tmp2
         ret i32 %tmp
 }
 
@@ -33,7 +33,7 @@
 ; CHECK: ldr r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -43,7 +43,7 @@
 ; CHECK: ldr r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i32*
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 }
 
@@ -54,7 +54,7 @@
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
 
@@ -67,6 +67,6 @@
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
-        %tmp4 = load i32* %tmp3
+        %tmp4 = load i32, i32* %tmp3
         ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
index b50b333..a911775 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -1,25 +1,25 @@
 ; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
-    %tmp.u = load i8* %v.pntr.s0.u1
+    %tmp.u = load i8, i8* %v.pntr.s0.u1
     %tmp1.s = zext i8 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test2(i16* %v.pntr.s0.u1) {
-    %tmp.u = load i16* %v.pntr.s0.u1
+    %tmp.u = load i16, i16* %v.pntr.s0.u1
     %tmp1.s = zext i16 %tmp.u to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test3(i8* %v.pntr.s1.u0) {
-    %tmp.s = load i8* %v.pntr.s1.u0
+    %tmp.s = load i8, i8* %v.pntr.s1.u0
     %tmp1.s = sext i8 %tmp.s to i32
     ret i32 %tmp1.s
 }
 
 define i32 @test4() {
-    %tmp.s = load i16* null
+    %tmp.s = load i16, i16* null
     %tmp1.s = sext i16 %tmp.s to i32
     ret i32 %tmp1.s
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll
index c26e6b1..cb7e795 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -3,7 +3,7 @@
 define i32 @test(i32 %a, i32 %b, i32 %c) {
         %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, 8               ; <i32> [#uses=1]
         %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
index 61b90fc..2bb327c 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -2,7 +2,7 @@
 
 define i32* @test1(i32* %X, i32* %dest) {
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
-        %A = load i32* %Y               ; <i32> [#uses=1]
+        %A = load i32, i32* %Y               ; <i32> [#uses=1]
         store i32 %A, i32* %dest
         ret i32* %Y
 }
@@ -12,7 +12,7 @@
 define i32 @test2(i32 %a, i32 %b) {
         %tmp1 = sub i32 %a, 64          ; <i32> [#uses=2]
         %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
-        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* %tmp2         ; <i32> [#uses=1]
         %tmp4 = sub i32 %tmp1, %b               ; <i32> [#uses=1]
         %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
         ret i32 %tmp5
@@ -22,7 +22,7 @@
 
 define i8* @test3(i8* %X, i32* %dest) {
         %tmp1 = getelementptr i8, i8* %X, i32 4
-        %tmp2 = load i8* %tmp1
+        %tmp2 = load i8, i8* %tmp1
         %tmp3 = sext i8 %tmp2 to i32
         store i32 %tmp3, i32* %dest
         ret i8* %tmp1
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll
index 16c9f9c..cf8fd6d 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -4,7 +4,7 @@
 entry:
 ; CHECK-LABEL: f1:
 ; CHECK: ldrb r0, [r0]
-        %tmp = load i8* %v
+        %tmp = load i8, i8* %v
         ret i8 %tmp
 }
 
@@ -13,7 +13,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ldrb r0, [r0, #-1]
         %tmp2 = getelementptr i8, i8* %v, i8 1023
-        %tmp = load i8* %tmp2
+        %tmp = load i8, i8* %tmp2
         ret i8 %tmp
 }
 
@@ -24,7 +24,7 @@
 ; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, 4096
         %tmp2 = inttoptr i32 %tmp1 to i8*
-        %tmp3 = load i8* %tmp2
+        %tmp3 = load i8, i8* %tmp2
         ret i8 %tmp3
 }
 
@@ -34,7 +34,7 @@
 ; CHECK: ldrb r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i8*
-        %tmp3 = load i8* %tmp2
+        %tmp3 = load i8, i8* %tmp2
         ret i8 %tmp3
 }
 
@@ -44,7 +44,7 @@
 ; CHECK: ldrb r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i8*
-        %tmp3 = load i8* %tmp2
+        %tmp3 = load i8, i8* %tmp2
         ret i8 %tmp3
 }
 
@@ -55,7 +55,7 @@
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
-        %tmp4 = load i8* %tmp3
+        %tmp4 = load i8, i8* %tmp3
         ret i8 %tmp4
 }
 
@@ -67,6 +67,6 @@
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i8*
-        %tmp4 = load i8* %tmp3
+        %tmp4 = load i8, i8* %tmp3
         ret i8 %tmp4
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldrd.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldrd.ll
index 2e83ea1..c25359b 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldrd.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -6,8 +6,8 @@
 entry:
 ; CHECK: ldrd
 ; CHECK: umull
-	%0 = load i64** @b, align 4
-	%1 = load i64* %0, align 4
+	%0 = load i64*, i64** @b, align 4
+	%1 = load i64, i64* %0, align 4
 	%2 = mul i64 %1, %a
 	ret i64 %2
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll b/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll
index a00dcea..33dd681 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -4,7 +4,7 @@
 entry:
 ; CHECK-LABEL: f1:
 ; CHECK: ldrh r0, [r0]
-        %tmp = load i16* %v
+        %tmp = load i16, i16* %v
         ret i16 %tmp
 }
 
@@ -13,7 +13,7 @@
 ; CHECK-LABEL: f2:
 ; CHECK: ldrh.w r0, [r0, #2046]
         %tmp2 = getelementptr i16, i16* %v, i16 1023
-        %tmp = load i16* %tmp2
+        %tmp = load i16, i16* %tmp2
         ret i16 %tmp
 }
 
@@ -23,7 +23,7 @@
 ; CHECK: mov.w r1, #4096
 ; CHECK: ldrh r0, [r0, r1]
         %tmp2 = getelementptr i16, i16* %v, i16 2048
-        %tmp = load i16* %tmp2
+        %tmp = load i16, i16* %tmp2
         ret i16 %tmp
 }
 
@@ -33,7 +33,7 @@
 ; CHECK: ldrh r0, [r0, #-128]
         %tmp1 = sub i32 %base, 128
         %tmp2 = inttoptr i32 %tmp1 to i16*
-        %tmp3 = load i16* %tmp2
+        %tmp3 = load i16, i16* %tmp2
         ret i16 %tmp3
 }
 
@@ -43,7 +43,7 @@
 ; CHECK: ldrh r0, [r0, r1]
         %tmp1 = add i32 %base, %offset
         %tmp2 = inttoptr i32 %tmp1 to i16*
-        %tmp3 = load i16* %tmp2
+        %tmp3 = load i16, i16* %tmp2
         ret i16 %tmp3
 }
 
@@ -54,7 +54,7 @@
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
-        %tmp4 = load i16* %tmp3
+        %tmp4 = load i16, i16* %tmp3
         ret i16 %tmp4
 }
 
@@ -66,6 +66,6 @@
         %tmp1 = lshr i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i16*
-        %tmp4 = load i16* %tmp3
+        %tmp4 = load i16, i16* %tmp3
         ret i16 %tmp4
 }
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-smul.ll b/llvm/test/CodeGen/Thumb2/thumb2-smul.ll
index 67783d2..937f773 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -6,7 +6,7 @@
 define i32 @f1(i32 %y) {
 ; CHECK: f1
 ; CHECK: smulbt r0, r1, r0
-        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp = load i16, i16* @x             ; <i16> [#uses=1]
         %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
         %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
         %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-spill-q.ll b/llvm/test/CodeGen/Thumb2/thumb2-spill-q.ll
index d1deb46..e0f7b5b 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -43,7 +43,7 @@
   store float 0.000000e+00, float* undef, align 4
   %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  %val173 = load <4 x float>, <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
 
 bb4:                                              ; preds = %bb193, %entry
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll b/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll
index aed849e..377c814 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -3,7 +3,7 @@
 define i16 @test1(i32* %X, i16* %A) {
 ; CHECK-LABEL: test1:
 ; CHECK: strh {{.*}}[{{.*}}], #-4
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
         store i16 %tmp1, i16* %A
         %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
@@ -14,7 +14,7 @@
 define i32 @test2(i32* %X, i32* %A) {
 ; CHECK-LABEL: test2:
 ; CHECK: str {{.*}}[{{.*}}],
-        %Y = load i32* %X               ; <i32> [#uses=1]
+        %Y = load i32, i32* %X               ; <i32> [#uses=1]
         store i32 %Y, i32* %A
         %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
         %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll b/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll
index 4195231..d69a102 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -3,7 +3,7 @@
 define void @test1(i32* %X, i32* %A, i32** %dest) {
 ; CHECK: test1
 ; CHECK: str  r1, [r0, #16]!
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i32, i32* %X, i32 4               ; <i32*> [#uses=2]
         store i32 %B, i32* %Y
         store i32* %Y, i32** %dest
@@ -13,7 +13,7 @@
 define i16* @test2(i16* %X, i32* %A) {
 ; CHECK: test2
 ; CHECK: strh r1, [r0, #8]!
-        %B = load i32* %A               ; <i32> [#uses=1]
+        %B = load i32, i32* %A               ; <i32> [#uses=1]
         %Y = getelementptr i16, i16* %X, i32 4               ; <i16*> [#uses=2]
         %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
         store i16 %tmp, i16* %Y
diff --git a/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll b/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll
index bf1c7c6..f2b8c7c 100644
--- a/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll
+++ b/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -45,7 +45,7 @@
 	unreachable
 
 bb34.i:		; preds = %bb42.i
-	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
 	%4 = add i32 %3, 1		; <i32> [#uses=1]
 	store i32 %4, i32* @_C_nextcmd, align 4
 	%5 = call  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
diff --git a/llvm/test/CodeGen/Thumb2/tls1.ll b/llvm/test/CodeGen/Thumb2/tls1.ll
index 4097356..6acf27d1 100644
--- a/llvm/test/CodeGen/Thumb2/tls1.ll
+++ b/llvm/test/CodeGen/Thumb2/tls1.ll
@@ -10,7 +10,7 @@
 
 define i32 @f() {
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/Thumb2/tls2.ll b/llvm/test/CodeGen/Thumb2/tls2.ll
index e6bed2f..8f05cea 100644
--- a/llvm/test/CodeGen/Thumb2/tls2.ll
+++ b/llvm/test/CodeGen/Thumb2/tls2.ll
@@ -12,7 +12,7 @@
 
 ; CHECK-PIC-LABEL: f:
 ; CHECK-PIC: bl __tls_get_addr(PLT)
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/Thumb2/tpsoft.ll b/llvm/test/CodeGen/Thumb2/tpsoft.ll
index 6ab8bf0..89757ca 100644
--- a/llvm/test/CodeGen/Thumb2/tpsoft.ll
+++ b/llvm/test/CodeGen/Thumb2/tpsoft.ll
@@ -16,7 +16,7 @@
 
 define arm_aapcs_vfpcc i32 @main() nounwind {
 entry:
-  %0 = load i32* @i, align 4
+  %0 = load i32, i32* @i, align 4
   switch i32 %0, label %bb2 [
     i32 12, label %bb
     i32 13, label %bb1
diff --git a/llvm/test/CodeGen/Thumb2/v8_IT_2.ll b/llvm/test/CodeGen/Thumb2/v8_IT_2.ll
index 170b4135..9a3f263 100644
--- a/llvm/test/CodeGen/Thumb2/v8_IT_2.ll
+++ b/llvm/test/CodeGen/Thumb2/v8_IT_2.ll
@@ -16,9 +16,9 @@
 	br label %tailrecurse
 
 tailrecurse:		; preds = %bb, %entry
-	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
-	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
-	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp6 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct*, %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
 	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
 	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
 	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/Thumb2/v8_IT_3.ll b/llvm/test/CodeGen/Thumb2/v8_IT_3.ll
index a028dee..4fa5e7f 100644
--- a/llvm/test/CodeGen/Thumb2/v8_IT_3.ll
+++ b/llvm/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -21,7 +21,7 @@
   %block_count = alloca i32, align 4
   %index_cache = alloca i32, align 4
   store i32 0, i32* %index_cache, align 4
-  %tmp = load i32* @G, align 4
+  %tmp = load i32, i32* @G, align 4
   %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
   switch i32 %tmp1, label %bb8 [
     i32 0, label %bb
@@ -30,7 +30,7 @@
   ]
 
 bb:
-  %tmp2 = load i32* @G, align 4
+  %tmp2 = load i32, i32* @G, align 4
   %tmp4 = icmp eq i32 %tmp2, 0
   br i1 %tmp4, label %bb1, label %bb8
 
@@ -41,8 +41,8 @@
 ; CHECK-NEXT: it	eq
 ; CHECK-NEXT: cmpeq
 ; CHECK: %bb1
-  %tmp5 = load i32* %block_size, align 4
-  %tmp6 = load i32* %block_count, align 4
+  %tmp5 = load i32, i32* %block_size, align 4
+  %tmp6 = load i32, i32* %block_count, align 4
   %tmp7 = call %struct.FF* @Get() nounwind
   store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
   %tmp10 = zext i32 %tmp6 to i64
diff --git a/llvm/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/llvm/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
index fe6674d..48236cd 100644
--- a/llvm/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -8,9 +8,9 @@
 @GLOBAL = external global i32           ; <i32*> [#uses=1]
 
 define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind {
-        %L = load i32* @GLOBAL          ; <i32> [#uses=1]
+        %L = load i32, i32* @GLOBAL          ; <i32> [#uses=1]
         store i32 12, i32* %P2
-        %Y = load i32* %P3              ; <i32> [#uses=1]
+        %Y = load i32, i32* %P3              ; <i32> [#uses=1]
         %Z = sub i32 %Y, %L             ; <i32> [#uses=1]
         ret i32 %Z
 }
diff --git a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
index 1b3fc38..f6b5b2c 100644
--- a/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -7,7 +7,7 @@
 @A = external global i32                ; <i32*> [#uses=2]
 
 define i32 @test5(i32 %B, i8 %C) {
-        %tmp.1 = load i32* @A           ; <i32> [#uses=1]
+        %tmp.1 = load i32, i32* @A           ; <i32> [#uses=1]
         %shift.upgrd.1 = zext i8 %C to i32              ; <i32> [#uses=1]
         %tmp.2 = shl i32 %tmp.1, %shift.upgrd.1         ; <i32> [#uses=1]
         %tmp.3 = sub i8 32, %C          ; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
index 6df040d..9f44bc3 100644
--- a/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -13,14 +13,14 @@
 codeRepl5.exitStub:             ; preds = %label.0
         ret i1 false
 label.0:                ; preds = %newFuncRoot
-        %tmp.35 = load i32* @last               ; <i32> [#uses=1]
+        %tmp.35 = load i32, i32* @last               ; <i32> [#uses=1]
         %inc.1 = add i32 %tmp.35, 1             ; <i32> [#uses=2]
         store i32 %inc.1, i32* @last
-        %tmp.36 = load i8** @block              ; <i8*> [#uses=1]
+        %tmp.36 = load i8*, i8** @block              ; <i8*> [#uses=1]
         %tmp.38 = getelementptr i8, i8* %tmp.36, i32 %inc.1         ; <i8*> [#uses=1]
         %tmp.40 = trunc i32 %tmp.21.reload to i8                ; <i8> [#uses=1]
         store i8 %tmp.40, i8* %tmp.38
-        %tmp.910 = load i32* @last              ; <i32> [#uses=1]
+        %tmp.910 = load i32, i32* @last              ; <i32> [#uses=1]
         %tmp.1111 = icmp slt i32 %tmp.910, %tmp.8               ; <i1> [#uses=1]
         %tmp.1412 = icmp ne i32 %tmp.21.reload, 257             ; <i1> [#uses=1]
         %tmp.1613 = and i1 %tmp.1111, %tmp.1412         ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/llvm/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
index faa3e21..583877e 100644
--- a/llvm/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
+++ b/llvm/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -5,10 +5,10 @@
 
 
 define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {
-	%tmp44 = load <4 x float>* %a		; <<4 x float>> [#uses=9]
-	%tmp46 = load <4 x float>* %b		; <<4 x float>> [#uses=1]
-	%tmp48 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
-	%tmp50 = load <4 x float>* %d		; <<4 x float>> [#uses=1]
+	%tmp44 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=9]
+	%tmp46 = load <4 x float>, <4 x float>* %b		; <<4 x float>> [#uses=1]
+	%tmp48 = load <4 x float>, <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp50 = load <4 x float>, <4 x float>* %d		; <<4 x float>> [#uses=1]
 	%tmp51 = bitcast <4 x float> %tmp44 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
 	%tmp52 = bitcast <4 x i32> %tmp to <4 x float>		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll
index 92c271b6..46c5e88 100644
--- a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll
+++ b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -11,12 +11,12 @@
 define i32 @compare(i8* %a, i8* %b) nounwind {
 	%tmp = bitcast i8* %a to i32*		; <i32*> [#uses=1]
 	%tmp1 = bitcast i8* %b to i32*		; <i32*> [#uses=1]
-	%tmp.upgrd.1 = load i32* @size20		; <i32> [#uses=1]
-	%tmp.upgrd.2 = load i8** @in5		; <i8*> [#uses=2]
-	%tmp3 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp.upgrd.1 = load i32, i32* @size20		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i8*, i8** @in5		; <i8*> [#uses=2]
+	%tmp3 = load i32, i32* %tmp1		; <i32> [#uses=1]
 	%gep.upgrd.3 = zext i32 %tmp3 to i64		; <i64> [#uses=1]
 	%tmp4 = getelementptr i8, i8* %tmp.upgrd.2, i64 %gep.upgrd.3		; <i8*> [#uses=2]
-	%tmp7 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %tmp		; <i32> [#uses=1]
 	%gep.upgrd.4 = zext i32 %tmp7 to i64		; <i64> [#uses=1]
 	%tmp8 = getelementptr i8, i8* %tmp.upgrd.2, i64 %gep.upgrd.4		; <i8*> [#uses=2]
 	%tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 )		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/llvm/test/CodeGen/X86/2006-05-02-InstrSched2.ll
index 222b7a0..3281c68e 100644
--- a/llvm/test/CodeGen/X86/2006-05-02-InstrSched2.ll
+++ b/llvm/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -12,13 +12,13 @@
 	%__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ]		; <i8*> [#uses=2]
 	%__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ]	; <i32> [#uses=1]
 	%tmp446.i = mul i32 %__h.2.4.i, 5		; <i32> [#uses=1]
-	%tmp.i = load i8* %__s441.2.4.i		; <i8> [#uses=1]
+	%tmp.i = load i8, i8* %__s441.2.4.i		; <i8> [#uses=1]
 	%tmp448.i = sext i8 %tmp.i to i32		; <i32> [#uses=1]
 	%tmp449.i = add i32 %tmp448.i, %tmp446.i		; <i32> [#uses=2]
 	%tmp450.i = ptrtoint i8* %__s441.2.4.i to i32		; <i32> [#uses=1]
 	%tmp451.i = add i32 %tmp450.i, 1		; <i32> [#uses=1]
 	%tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8*		; <i8*> [#uses=2]
-	%tmp45435.i = load i8* %tmp451.i.upgrd.1		; <i8> [#uses=1]
+	%tmp45435.i = load i8, i8* %tmp451.i.upgrd.1		; <i8> [#uses=1]
 	%tmp45536.i = icmp eq i8 %tmp45435.i, 0		; <i1> [#uses=1]
 	br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i
 }
diff --git a/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
index 8421483..b70d375 100644
--- a/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
+++ b/llvm/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -9,13 +9,13 @@
 
 define void @test(i32 %A) {
 	%A.upgrd.1 = trunc i32 %A to i8		; <i8> [#uses=1]
-	%tmp2 = load i32* @B		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* @B		; <i32> [#uses=1]
 	%tmp3 = and i8 %A.upgrd.1, 16		; <i8> [#uses=1]
 	%shift.upgrd.2 = zext i8 %tmp3 to i32		; <i32> [#uses=1]
 	%tmp4 = shl i32 %tmp2, %shift.upgrd.2		; <i32> [#uses=1]
 	store i32 %tmp4, i32* @B
 	%tmp6 = lshr i32 %A, 3		; <i32> [#uses=1]
-	%tmp = load i16** @C		; <i16*> [#uses=1]
+	%tmp = load i16*, i16** @C		; <i16*> [#uses=1]
 	%tmp8 = ptrtoint i16* %tmp to i32		; <i32> [#uses=1]
 	%tmp9 = add i32 %tmp8, %tmp6		; <i32> [#uses=1]
 	%tmp9.upgrd.3 = inttoptr i32 %tmp9 to i16*		; <i16*> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
index d55cead..cd46ecf 100644
--- a/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
+++ b/llvm/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -5,13 +5,13 @@
 @C = external global i32		; <i32*> [#uses=2]
 
 define void @test() {
-	%tmp = load i16** @A		; <i16*> [#uses=1]
+	%tmp = load i16*, i16** @A		; <i16*> [#uses=1]
 	%tmp1 = getelementptr i16, i16* %tmp, i32 1		; <i16*> [#uses=1]
-	%tmp.upgrd.1 = load i16* %tmp1		; <i16> [#uses=1]
+	%tmp.upgrd.1 = load i16, i16* %tmp1		; <i16> [#uses=1]
 	%tmp3 = zext i16 %tmp.upgrd.1 to i32		; <i32> [#uses=1]
-	%tmp.upgrd.2 = load i32* @B		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i32, i32* @B		; <i32> [#uses=1]
 	%tmp4 = and i32 %tmp.upgrd.2, 16		; <i32> [#uses=1]
-	%tmp5 = load i32* @C		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* @C		; <i32> [#uses=1]
 	%tmp6 = trunc i32 %tmp4 to i8		; <i8> [#uses=2]
 	%shift.upgrd.3 = zext i8 %tmp6 to i32		; <i32> [#uses=1]
 	%tmp7 = shl i32 %tmp5, %shift.upgrd.3		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll
index 6be044b..b1deb2c 100644
--- a/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll
+++ b/llvm/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -15,19 +15,19 @@
 	%tmp31 = add nsw i32 %tmp.10, -1		; <i32> [#uses=4]
 	%tmp32 = getelementptr i32, i32* %mpp, i32 %tmp31		; <i32*> [#uses=1]
 	%tmp34 = bitcast i32* %tmp32 to <16 x i8>*		; <i8*> [#uses=1]
-	%tmp = load <16 x i8>* %tmp34, align 1
+	%tmp = load <16 x i8>, <16 x i8>* %tmp34, align 1
 	%tmp42 = getelementptr i32, i32* %tpmm, i32 %tmp31		; <i32*> [#uses=1]
 	%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp46 = load <4 x i32>* %tmp42.upgrd.1		; <<4 x i32>> [#uses=1]
+	%tmp46 = load <4 x i32>, <4 x i32>* %tmp42.upgrd.1		; <<4 x i32>> [#uses=1]
 	%tmp54 = bitcast <16 x i8> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp55 = add <4 x i32> %tmp54, %tmp46		; <<4 x i32>> [#uses=2]
 	%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp62 = getelementptr i32, i32* %ip, i32 %tmp31		; <i32*> [#uses=1]
 	%tmp65 = bitcast i32* %tmp62 to <16 x i8>*		; <i8*> [#uses=1]
-	%tmp66 = load <16 x i8>* %tmp65, align 1
+	%tmp66 = load <16 x i8>, <16 x i8>* %tmp65, align 1
 	%tmp73 = getelementptr i32, i32* %tpim, i32 %tmp31		; <i32*> [#uses=1]
 	%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp77 = load <4 x i32>* %tmp73.upgrd.3		; <<4 x i32>> [#uses=1]
+	%tmp77 = load <4 x i32>, <4 x i32>* %tmp73.upgrd.3		; <<4 x i32>> [#uses=1]
 	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
 	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
index 0288278..6ff8797 100644
--- a/llvm/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -6,7 +6,7 @@
 	ret i32 0
 cond_next33:		; preds = %0
 	%tmp44.i = call double @foo( double 0.000000e+00, i32 32 )		; <double> [#uses=1]
-	%tmp61.i = load i8* null		; <i8> [#uses=1]
+	%tmp61.i = load i8, i8* null		; <i8> [#uses=1]
 	%tmp61.i.upgrd.1 = zext i8 %tmp61.i to i32		; <i32> [#uses=1]
 	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]
 	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll
index 1facf15..795e898 100644
--- a/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll
+++ b/llvm/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -8,7 +8,7 @@
 	%X_addr = alloca i32		; <i32*> [#uses=3]
 	store i32 %X, i32* %X_addr
 	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X )
-	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %X_addr		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
@@ -17,7 +17,7 @@
 	%X_addr = alloca i32		; <i32*> [#uses=3]
 	store i32 %X, i32* %X_addr
 	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X )
-	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %X_addr		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
index aea707e..397bc26 100644
--- a/llvm/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -8,10 +8,10 @@
 	ret i32 0
 
 ilog2.exit:		; preds = %0
-	%tmp24.i = load i32* null		; <i32> [#uses=1]
+	%tmp24.i = load i32, i32* null		; <i32> [#uses=1]
 	%tmp13.i12.i = tail call double @ldexp( double 0.000000e+00, i32 0 )		; <double> [#uses=1]
 	%tmp13.i13.i = fptrunc double %tmp13.i12.i to float		; <float> [#uses=1]
-	%tmp11.s = load i32* null		; <i32> [#uses=1]
+	%tmp11.s = load i32, i32* null		; <i32> [#uses=1]
 	%tmp11.i = bitcast i32 %tmp11.s to i32		; <i32> [#uses=1]
 	%n.i = bitcast i32 %tmp24.i to i32		; <i32> [#uses=1]
 	%tmp13.i7 = mul i32 %tmp11.i, %n.i		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
index 8dbabc7..2c44adf 100644
--- a/llvm/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -6,7 +6,7 @@
 	%struct.u = type { [1 x i64] }
 
 define void @test() {
-	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp = load i32, i32* null		; <i32> [#uses=1]
 	%tmp8 = call i32 @hash_rtx( )		; <i32> [#uses=1]
 	%tmp11 = urem i32 %tmp8, %tmp		; <i32> [#uses=1]
 	br i1 false, label %cond_next, label %return
diff --git a/llvm/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
index d7e727c..a7a10af 100644
--- a/llvm/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -111,21 +111,21 @@
 	ret void
 
 cond_true3632:		; preds = %newFuncRoot
-	%tmp3378 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3378 = load i32, i32* %tmp3629		; <i32> [#uses=1]
 	%tmp3379 = add i32 %tmp3378, -1		; <i32> [#uses=1]
 	%tmp3381 = getelementptr %struct.varray_head_tag, %struct.varray_head_tag* %stack3023.6, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
 	%tmp3382 = bitcast %struct.varray_data* %tmp3381 to [1 x i32]*		; <[1 x i32]*> [#uses=1]
 	%gep.upgrd.1 = zext i32 %tmp3379 to i64		; <i64> [#uses=1]
 	%tmp3383 = getelementptr [1 x i32], [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
-	%tmp3384 = load i32* %tmp3383		; <i32> [#uses=1]
-	%tmp3387 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3384 = load i32, i32* %tmp3383		; <i32> [#uses=1]
+	%tmp3387 = load i32, i32* %tmp3629		; <i32> [#uses=1]
 	%tmp3388 = add i32 %tmp3387, -1		; <i32> [#uses=1]
 	store i32 %tmp3388, i32* %tmp3629
-	%tmp3391 = load %struct.varray_head_tag** @basic_block_info		; <%struct.varray_head_tag*> [#uses=1]
+	%tmp3391 = load %struct.varray_head_tag*, %struct.varray_head_tag** @basic_block_info		; <%struct.varray_head_tag*> [#uses=1]
 	%tmp3393 = getelementptr %struct.varray_head_tag, %struct.varray_head_tag* %tmp3391, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
 	%tmp3394 = bitcast %struct.varray_data* %tmp3393 to [1 x %struct.basic_block_def*]*		; <[1 x %struct.basic_block_def*]*> [#uses=1]
 	%tmp3395 = getelementptr [1 x %struct.basic_block_def*], [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384		; <%struct.basic_block_def**> [#uses=1]
-	%tmp3396 = load %struct.basic_block_def** %tmp3395		; <%struct.basic_block_def*> [#uses=1]
+	%tmp3396 = load %struct.basic_block_def*, %struct.basic_block_def** %tmp3395		; <%struct.basic_block_def*> [#uses=1]
 	%tmp3397 = getelementptr %struct.basic_block_def, %struct.basic_block_def* %tmp3396, i32 0, i32 3		; <%struct.VEC_edge**> [#uses=1]
 	br label %bb3502.exitStub
 }
diff --git a/llvm/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
index fbb14ee..e2c84ea 100644
--- a/llvm/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=x86
 
 define void @_ZN13QFSFileEngine4readEPcx() {
-	%tmp201 = load i32* null		; <i32> [#uses=1]
+	%tmp201 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp201.upgrd.1 = sext i32 %tmp201 to i64		; <i64> [#uses=1]
-	%tmp202 = load i64* null		; <i64> [#uses=1]
+	%tmp202 = load i64, i64* null		; <i64> [#uses=1]
 	%tmp203 = add i64 %tmp201.upgrd.1, %tmp202		; <i64> [#uses=1]
 	store i64 %tmp203, i64* null
 	ret void
diff --git a/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
index 63b6318..4355825 100644
--- a/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
+++ b/llvm/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -4,14 +4,14 @@
 @tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
 
 define void @copy_if_shared_r() {
-	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp = load i32, i32* null		; <i32> [#uses=1]
 	%tmp56 = and i32 %tmp, 255		; <i32> [#uses=1]
 	%gep.upgrd.1 = zext i32 %tmp56 to i64		; <i64> [#uses=1]
 	%tmp8 = getelementptr [0 x i32], [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1	; <i32*> [#uses=1]
-	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %tmp8		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, -1		; <i32> [#uses=1]
 	%tmp.upgrd.2 = icmp ugt i32 %tmp10, 2		; <i1> [#uses=1]
-	%tmp14 = load i32* null		; <i32> [#uses=1]
+	%tmp14 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp15 = lshr i32 %tmp14, 31		; <i32> [#uses=1]
 	%tmp15.upgrd.3 = trunc i32 %tmp15 to i8		; <i8> [#uses=1]
 	%tmp16 = icmp ne i8 %tmp15.upgrd.3, 0		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
index 17eb41d..7a32ef7 100644
--- a/llvm/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -29,10 +29,10 @@
 	ret void
 
 cond_next472:		; preds = %cond_next330
-	%tmp490 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp490 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
 	%tmp492 = getelementptr %struct.tree_node, %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
 	%tmp492.upgrd.1 = bitcast i8* %tmp492 to i32*		; <i32*> [#uses=1]
-	%tmp493 = load i32* %tmp492.upgrd.1		; <i32> [#uses=1]
+	%tmp493 = load i32, i32* %tmp492.upgrd.1		; <i32> [#uses=1]
 	%tmp495 = trunc i32 %tmp493 to i8		; <i8> [#uses=1]
 	%tmp496 = icmp eq i8 %tmp495, 11		; <i1> [#uses=1]
 	%tmp496.upgrd.2 = zext i1 %tmp496 to i8		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/llvm/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
index 6ed2e7b..7804e74 100644
--- a/llvm/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
+++ b/llvm/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -3,9 +3,9 @@
 
 define void @test() {
 bb.i:
-	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp.i660 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
 	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
-	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp152.i = load <4 x i32>, <4 x i32>* null		; <<4 x i32>> [#uses=1]
 	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/llvm/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 40128f3..9adfff3 100644
--- a/llvm/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/llvm/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -18,43 +18,43 @@
         %z = alloca { double, double }, align 16                ; <{ double, double }*> [#uses=4]
         %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
         store double 0x400921FB54442D18, double* %pi
-        %tmp.upgrd.1 = load double* %pi         ; <double> [#uses=1]
+        %tmp.upgrd.1 = load double, double* %pi         ; <double> [#uses=1]
         %real = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
         store double 0.000000e+00, double* %real
         %real3 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 1          ; <double*> [#uses=1]
         store double %tmp.upgrd.1, double* %real3
         %tmp.upgrd.2 = getelementptr { double, double }, { double, double }* %tmp, i64 0, i32 0             ; <double*> [#uses=1]
         %tmp4 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
-        %tmp5 = load double* %tmp4              ; <double> [#uses=1]
+        %tmp5 = load double, double* %tmp4              ; <double> [#uses=1]
         store double %tmp5, double* %tmp.upgrd.2
         %tmp6 = getelementptr { double, double }, { double, double }* %tmp, i64 0, i32 1            ; <double*> [#uses=1]
         %tmp7 = getelementptr { double, double }, { double, double }* %tmp1, i64 0, i32 1           ; <double*> [#uses=1]
-        %tmp8 = load double* %tmp7              ; <double> [#uses=1]
+        %tmp8 = load double, double* %tmp7              ; <double> [#uses=1]
         store double %tmp8, double* %tmp6
         %tmp.upgrd.3 = bitcast { double, double }* %tmp to { i64, i64 }*                ; <{ i64, i64 }*> [#uses=1]
         %tmp.upgrd.4 = getelementptr { i64, i64 }, { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0           ; <i64*> [#uses=1]
-        %tmp.upgrd.5 = load i64* %tmp.upgrd.4           ; <i64> [#uses=1]
+        %tmp.upgrd.5 = load i64, i64* %tmp.upgrd.4           ; <i64> [#uses=1]
         %tmp9 = bitcast { double, double }* %tmp to { i64, i64 }*               ; <{ i64, i64 }*> [#uses=1]
         %tmp10 = getelementptr { i64, i64 }, { i64, i64 }* %tmp9, i64 0, i32 1                ; <i64*> [#uses=1]
-        %tmp11 = load i64* %tmp10               ; <i64> [#uses=1]
+        %tmp11 = load i64, i64* %tmp10               ; <i64> [#uses=1]
         call void @cexp( { double, double }* sret  %tmp2, i64 %tmp.upgrd.5, i64 %tmp11 )
         %tmp12 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
         %tmp13 = getelementptr { double, double }, { double, double }* %tmp2, i64 0, i32 0          ; <double*> [#uses=1]
-        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
+        %tmp14 = load double, double* %tmp13            ; <double> [#uses=1]
         store double %tmp14, double* %tmp12
         %tmp15 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
         %tmp16 = getelementptr { double, double }, { double, double }* %tmp2, i64 0, i32 1          ; <double*> [#uses=1]
-        %tmp17 = load double* %tmp16            ; <double> [#uses=1]
+        %tmp17 = load double, double* %tmp16            ; <double> [#uses=1]
         store double %tmp17, double* %tmp15
         %tmp18 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
-        %tmp19 = load double* %tmp18            ; <double> [#uses=1]
+        %tmp19 = load double, double* %tmp18            ; <double> [#uses=1]
         %tmp20 = getelementptr { double, double }, { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
-        %tmp21 = load double* %tmp20            ; <double> [#uses=1]
+        %tmp21 = load double, double* %tmp20            ; <double> [#uses=1]
         %tmp.upgrd.6 = getelementptr [9 x i8], [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
         %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
         br label %finish
 finish:
-        %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
+        %retval.upgrd.8 = load i32, i32* %retval             ; <i32> [#uses=1]
         ret i32 %retval.upgrd.8
 }
 
diff --git a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index 783d9f9..c0bd6f72 100644
--- a/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/llvm/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -5,7 +5,7 @@
 
 define void @handle_vector_size_attribute() nounwind {
 entry:
-	%tmp69 = load i32* null		; <i32> [#uses=1]
+	%tmp69 = load i32, i32* null		; <i32> [#uses=1]
 	switch i32 %tmp69, label %bb84 [
 		 i32 2, label %bb77
 		 i32 1, label %bb77
@@ -13,7 +13,7 @@
 
 bb77:		; preds = %entry, %entry
 	%tmp99 = udiv i64 0, 0		; <i64> [#uses=1]
-	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp = load i8, i8* null		; <i8> [#uses=1]
 	%tmp114 = icmp eq i64 0, 0		; <i1> [#uses=1]
 	br label %cond_true115
 
@@ -21,7 +21,7 @@
 	ret void
 
 cond_true115:		; preds = %bb77
-	%tmp118 = load i8* null		; <i8> [#uses=1]
+	%tmp118 = load i8, i8* null		; <i8> [#uses=1]
 	br label %cond_true120
 
 cond_true120:		; preds = %cond_true115
diff --git a/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
index 3e58105..080de1f 100644
--- a/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
+++ b/llvm/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
@@ -20,7 +20,7 @@
 	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
 
 define i1 @_ZNK12QImageWriter8canWriteEv() {
-	%tmp62 = load %struct.QImageWriterPrivate** null		; <%struct.QImageWriterPrivate*> [#uses=1]
+	%tmp62 = load %struct.QImageWriterPrivate*, %struct.QImageWriterPrivate** null		; <%struct.QImageWriterPrivate*> [#uses=1]
 	%tmp = getelementptr %struct.QImageWriterPrivate, %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9		; <%struct.QString*> [#uses=1]
 	%tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null )		; <%struct.QString*> [#uses=0]
 	call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null )
diff --git a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
index 8f569e2..f83eea1 100644
--- a/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
+++ b/llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -8,12 +8,12 @@
 
 define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
 b:
-	%r = load i32* %a0
-	%r2 = load i32* %a1
-	%r4 = load i32* %a2
-	%r6 = load i32* %a3
-	%r8 = load i32* %a4
-	%r14 = load i32* %a5
+	%r = load i32, i32* %a0
+	%r2 = load i32, i32* %a1
+	%r4 = load i32, i32* %a2
+	%r6 = load i32, i32* %a3
+	%r8 = load i32, i32* %a4
+	%r14 = load i32, i32* %a5
 	%rx = sext i32 %r2 to i64
 	%r9 = sext i32 %r to i64
 	%r11 = add i64 %rx, 0
@@ -133,13 +133,13 @@
 	%r343 = add i64 %s661, 0
 	%r346 = add i64 %r343, 0
 	%r347 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346
-	%r348 = load float* %r347
+	%r348 = load float, float* %r347
 	%r352 = add i64 %r343, 0
 	%r353 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
-	%r354 = load float* %r353
-	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r354 = load float, float* %r353
+	%r362 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
 	%r363 = fadd float 0.000000e+00, %r362
-	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r370 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
 	%r376 = icmp slt i64 %r16, 0
 	br i1 %r376, label %b377, label %a35b
 b377:
@@ -184,8 +184,8 @@
 	%s933 = phi i64 [ %r533, %b514 ], [ %r795, %b712 ]
 	%r538 = add i64 %w1855, 0
 	%r539 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538
-	%r540 = load float* %r539
-	%r551 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r540 = load float, float* %r539
+	%r551 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
 	%r562 = sub i64 %s933, 0
 	%r564 = icmp slt i64 %r512, 0
 	br i1 %r564, label %b565, label %a45b
@@ -213,22 +213,22 @@
 	%r717 = add i64 %e944, 0
 	%r720 = add i64 %r717, 0
 	%r721 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720
-	%r722 = load float* %r721
+	%r722 = load float, float* %r721
 	%r726 = add i64 %r717, 0
 	%r727 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726
-	%r728 = load float* %r727
+	%r728 = load float, float* %r727
 	%r732 = add i64 %r717, 0
 	%r733 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732
-	%r734 = load float* %r733
+	%r734 = load float, float* %r733
 	%r738 = add i64 %r717, 0
 	%r739 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738
-	%r740 = load float* %r739
+	%r740 = load float, float* %r739
 	%r744 = add i64 %r717, 0
 	%r745 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744
-	%r746 = load float* %r745
+	%r746 = load float, float* %r745
 	%r750 = add i64 %r717, 0
 	%r751 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
-	%r752 = load float* %r751
+	%r752 = load float, float* %r751
 	%r753 = fadd float %r752, %r746
 	%r754 = fadd float %r728, %r722
 	%r755 = fadd float %r734, %r754
@@ -237,10 +237,10 @@
 	%r759 = fadd float %r757, %r540
 	%r770 = add i64 %r717, 0
 	%r771 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
-	%r772 = load float* %r771
+	%r772 = load float, float* %r771
 	%r776 = add i64 %r717, 0
 	%r777 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
-	%r778 = load float* %r777
+	%r778 = load float, float* %r777
 	%r781 = fadd float %r363, %r772
 	%r782 = fadd float %r781, %r778
 	%r783 = fadd float %r551, %r782
@@ -253,7 +253,7 @@
 	%r844 = add i64 %r16, 0
 	%r846 = sext i32 %r60 to i64
 	%r847 = add i64 %r846, 0
-	%r851 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r851 = load float, float* bitcast ([128 x i64]* @i6000 to float*)
 	%r856 = sub i64 %rx, 0
 	br label %b858
 b858:
@@ -266,10 +266,10 @@
 	%r859 = add i64 %r856, 0
 	%r862 = add i64 %w1891, 0
 	%r863 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862
-	%r864 = load float* %r863
+	%r864 = load float, float* %r863
 	%r868 = add i64 %w1891, 0
 	%r869 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868
-	%r870 = load float* %r869
+	%r870 = load float, float* %r869
 	%r873 = sub i64 %r859, 0
 	%r876 = sub i64 %s1173, 0
 	%r878 = icmp slt i64 %r847, 0
@@ -302,11 +302,11 @@
 	%r1024 = bitcast i8* %c2 to float*
 	%r1025 = add i64 %r1022, 0
 	%r1026 = getelementptr float, float* %r1024, i64 %r1025
-	%r1027 = load float* %r1026
+	%r1027 = load float, float* %r1026
 	%r1032 = add i64 %r873, 0
 	%r1033 = add i64 %r1032, 0
 	%r1034 = getelementptr float, float* %r1024, i64 %r1033
-	%r1035 = load float* %r1034
+	%r1035 = load float, float* %r1034
 	%r1037 = bitcast i8* %c22010 to float*
 	%r1040 = getelementptr float, float* %r1037, i64 %r1025
 	%r1044 = fadd float %r864, %r1035
@@ -336,10 +336,10 @@
 	%r1120 = add i64 %s661, 0
 	%r1121 = add i64 %r1120, 0
 	%r1122 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121
-	%r1123 = load float* %r1122
+	%r1123 = load float, float* %r1122
 	%r1132 = bitcast i8* %c22012 to float*
 	%r1134 = getelementptr float, float* %r1132, i64 %w1915
-	%r1135 = load float* %r1134
+	%r1135 = load float, float* %r1134
 	%r1136 = fadd float %r1123, %r1135
 	%r1138 = icmp slt i64 %r1114, 0
 	br i1 %r1138, label %b1139, label %a63b
@@ -410,7 +410,7 @@
 	%r1355 = sub i64 %r1352, 0
 	%r1370 = add i64 %d1533, 0
 	%r1371 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370
-	%r1372 = load float* %r1371
+	%r1372 = load float, float* %r1371
 	br label %a74b
 a74b:
 	%w1958 = phi i64 [ 0, %b1342 ], [ %v1959, %a74b ]
@@ -441,10 +441,10 @@
 	%r1622 = add i64 %r1614, 0
 	%r1754 = bitcast i8* %r28 to float*
 	%r1756 = getelementptr float, float* %r1754, i64 %w1970
-	%r1757 = load float* %r1756
+	%r1757 = load float, float* %r1756
 	%r1761 = add i64 %r1622, 0
 	%r1762 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761
-	%r1763 = load float* %r1762
+	%r1763 = load float, float* %r1762
 	%r1767 = add i64 %r1622, 0
 	%r1768 = getelementptr float, float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
 	%r1772 = fadd float %r1763, 0.000000e+00
diff --git a/llvm/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/llvm/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index ee6e562..f051752 100644
--- a/llvm/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/llvm/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -7,7 +7,7 @@
 ; CHECK: orl $1, %{{.*}}
 ; CHECK: ret
 
-	%tmp3 = load float** %tmp2
+	%tmp3 = load float*, float** %tmp2
 	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]
 	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]
 	%ctg2 = getelementptr i8, i8* %tmp4, i32 %tmp132		; <i8*> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
index 2e40e24..f4754a5 100644
--- a/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
+++ b/llvm/test/CodeGen/X86/2007-02-16-BranchFold.ll
@@ -55,14 +55,14 @@
 
 bb.i9.i.i932.ce:		; preds = %newFuncRoot
 	%tmp1.i3.i.i930 = getelementptr %struct.list, %struct.list* %l_addr.01.0.i2.i.i929, i32 0, i32 0		; <i8**> [#uses=1]
-	%tmp2.i4.i.i931 = load i8** %tmp1.i3.i.i930		; <i8*> [#uses=1]
+	%tmp2.i4.i.i931 = load i8*, i8** %tmp1.i3.i.i930		; <i8*> [#uses=1]
 	%tmp66.i62.i = bitcast i8* %tmp2.i4.i.i931 to %struct.operator*		; <%struct.operator*> [#uses=7]
 	%tmp1.i6.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp2.i7.i = load i32* %tmp1.i6.i		; <i32> [#uses=1]
-	%tmp3.i8.i = load %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
+	%tmp2.i7.i = load i32, i32* %tmp1.i6.i		; <i32> [#uses=1]
+	%tmp3.i8.i = load %struct.FILE*, %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
 	%tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i )		; <i32> [#uses=0]
 	%tmp7.i10.i = getelementptr %struct.operator, %struct.operator* %tmp66.i62.i, i32 0, i32 5		; <i32*> [#uses=1]
-	%tmp8.i11.i = load i32* %tmp7.i10.i		; <i32> [#uses=7]
+	%tmp8.i11.i = load i32, i32* %tmp7.i10.i		; <i32> [#uses=7]
 	br label %NodeBlock5
 
 NodeBlock5:		; preds = %bb.i9.i.i932.ce
diff --git a/llvm/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/llvm/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
index 954c95d..5d2c01a 100644
--- a/llvm/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
+++ b/llvm/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -6,9 +6,9 @@
 @stderr = external global %struct._IO_FILE*
 
 define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {
-	%tmp = load %struct._IO_FILE** @stderr
+	%tmp = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
 	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
-	%tmp6 = load %struct._IO_FILE** @stderr
+	%tmp6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr
 	%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )
 	tail call void @abort( )
 	unreachable
diff --git a/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
index 112d1ab..dbbb611 100644
--- a/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
+++ b/llvm/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -4,7 +4,7 @@
 define void @test() nounwind {
 test.exit:
 	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
-	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]
+	load <4 x float>, <4 x float>* null		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]
 	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
 	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
index f2cbbf8..f159bcd 100644
--- a/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
+++ b/llvm/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -27,47 +27,47 @@
 	%p_addr.076.0.rec = mul i32 %i.073.0, 9		; <i32> [#uses=9]
 	%p_addr.076.0 = getelementptr i8, i8* %p, i32 %p_addr.076.0.rec		; <i8*> [#uses=1]
 	%tmp2 = getelementptr i8*, i8** %buf, i32 %i.073.0		; <i8**> [#uses=1]
-	%tmp3 = load i8** %tmp2		; <i8*> [#uses=8]
+	%tmp3 = load i8*, i8** %tmp2		; <i8*> [#uses=8]
 	%tmp5 = getelementptr i8, i8* %tmp3, i32 %col		; <i8*> [#uses=1]
-	%tmp7 = load i8* %p_addr.076.0		; <i8> [#uses=1]
+	%tmp7 = load i8, i8* %p_addr.076.0		; <i8> [#uses=1]
 	store i8 %tmp7, i8* %tmp5
 	%p_addr.076.0.sum93 = add i32 %p_addr.076.0.rec, 1		; <i32> [#uses=1]
 	%tmp11 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum93		; <i8*> [#uses=1]
-	%tmp13 = load i8* %tmp11		; <i8> [#uses=1]
+	%tmp13 = load i8, i8* %tmp11		; <i8> [#uses=1]
 	%tmp15 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum72		; <i8*> [#uses=1]
 	store i8 %tmp13, i8* %tmp15
 	%p_addr.076.0.sum92 = add i32 %p_addr.076.0.rec, 2		; <i32> [#uses=1]
 	%tmp17 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum92		; <i8*> [#uses=1]
-	%tmp19 = load i8* %tmp17		; <i8> [#uses=1]
+	%tmp19 = load i8, i8* %tmp17		; <i8> [#uses=1]
 	%tmp21 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum71		; <i8*> [#uses=1]
 	store i8 %tmp19, i8* %tmp21
 	%p_addr.076.0.sum91 = add i32 %p_addr.076.0.rec, 3		; <i32> [#uses=1]
 	%tmp23 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum91		; <i8*> [#uses=1]
-	%tmp25 = load i8* %tmp23		; <i8> [#uses=1]
+	%tmp25 = load i8, i8* %tmp23		; <i8> [#uses=1]
 	%tmp27 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum70		; <i8*> [#uses=1]
 	store i8 %tmp25, i8* %tmp27
 	%p_addr.076.0.sum90 = add i32 %p_addr.076.0.rec, 4		; <i32> [#uses=1]
 	%tmp29 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum90		; <i8*> [#uses=1]
-	%tmp31 = load i8* %tmp29		; <i8> [#uses=1]
+	%tmp31 = load i8, i8* %tmp29		; <i8> [#uses=1]
 	%tmp33 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum69		; <i8*> [#uses=2]
 	store i8 %tmp31, i8* %tmp33
 	%p_addr.076.0.sum89 = add i32 %p_addr.076.0.rec, 5		; <i32> [#uses=1]
 	%tmp35 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum89		; <i8*> [#uses=1]
-	%tmp37 = load i8* %tmp35		; <i8> [#uses=1]
+	%tmp37 = load i8, i8* %tmp35		; <i8> [#uses=1]
 	%tmp39 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum68		; <i8*> [#uses=1]
 	store i8 %tmp37, i8* %tmp39
 	%p_addr.076.0.sum88 = add i32 %p_addr.076.0.rec, 6		; <i32> [#uses=1]
 	%tmp41 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum88		; <i8*> [#uses=1]
-	%tmp43 = load i8* %tmp41		; <i8> [#uses=1]
+	%tmp43 = load i8, i8* %tmp41		; <i8> [#uses=1]
 	store i8 %tmp43, i8* %tmp33
 	%p_addr.076.0.sum87 = add i32 %p_addr.076.0.rec, 7		; <i32> [#uses=1]
 	%tmp47 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum87		; <i8*> [#uses=1]
-	%tmp49 = load i8* %tmp47		; <i8> [#uses=1]
+	%tmp49 = load i8, i8* %tmp47		; <i8> [#uses=1]
 	%tmp51 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum66		; <i8*> [#uses=1]
 	store i8 %tmp49, i8* %tmp51
 	%p_addr.076.0.sum = add i32 %p_addr.076.0.rec, 8		; <i32> [#uses=1]
 	%tmp53 = getelementptr i8, i8* %p, i32 %p_addr.076.0.sum		; <i8*> [#uses=1]
-	%tmp55 = load i8* %tmp53		; <i8> [#uses=1]
+	%tmp55 = load i8, i8* %tmp53		; <i8> [#uses=1]
 	%tmp57 = getelementptr i8, i8* %tmp3, i32 %tmp5.sum		; <i8*> [#uses=1]
 	store i8 %tmp55, i8* %tmp57
 	%indvar.next = add i32 %i.073.0, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/llvm/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index 3bd6d59..6174681 100644
--- a/llvm/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/llvm/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -11,16 +11,16 @@
 	%ret = alloca i32, align 4		; <i32*> [#uses=2]
 	store i32 %A, i32* %A_addr
 	store i32 %B, i32* %B_addr
-	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %A_addr		; <i32> [#uses=1]
 	%tmp2 = call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 7, i32 %tmp1 )		; <i32> [#uses=1]
 	store i32 %tmp2, i32* %ret
-	%tmp3 = load i32* %ret		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* %ret		; <i32> [#uses=1]
 	store i32 %tmp3, i32* %tmp
-	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %retval
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load i32* %retval		; <i32> [#uses=1]
+	%retval5 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval5
 }
diff --git a/llvm/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/llvm/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
index 9676f14..b9689e9 100644
--- a/llvm/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -4,7 +4,7 @@
 
 define void @foo(...) {
 bb1:
-	%t43 = load i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
+	%t43 = load i64, i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
 	br i1 false, label %bb80, label %bb6
 bb6:
 	br i1 false, label %bb38, label %bb265
diff --git a/llvm/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/llvm/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
index f916753..31c6b53 100644
--- a/llvm/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
+++ b/llvm/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -20,7 +20,7 @@
 	%i.2115.0 = phi i32 [ 0, %cond_true ], [ %indvar.next127, %bb32 ]		; <i32> [#uses=1]
 	%c.2112.0 = phi i32 [ 0, %cond_true ], [ %tmp49, %bb32 ]		; <i32> [#uses=1]
 	%tmp43 = getelementptr %struct.partition_def, %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1		; <%struct.partition_elem**> [#uses=1]
-	%tmp44 = load %struct.partition_elem** %tmp43		; <%struct.partition_elem*> [#uses=1]
+	%tmp44 = load %struct.partition_elem*, %struct.partition_elem** %tmp43		; <%struct.partition_elem*> [#uses=1]
 	%tmp4445 = ptrtoint %struct.partition_elem* %tmp44 to i32		; <i32> [#uses=1]
 	%tmp48 = sub i32 %tmp4445, 0		; <i32> [#uses=1]
 	%tmp49 = sdiv i32 %tmp48, 12		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/llvm/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
index e58b193..1274398 100644
--- a/llvm/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
+++ b/llvm/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -5,9 +5,9 @@
 
 define void @test() {
 bb.i:
-	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp.i660 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
 	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
-	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp152.i = load <4 x i32>, <4 x i32>* null		; <<4 x i32>> [#uses=1]
 	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/llvm/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
index b3df7d4..87edab7 100644
--- a/llvm/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
+++ b/llvm/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -4,7 +4,7 @@
 	%tmp89 = getelementptr <4 x float>, <4 x float>* %arg, i64 3
 	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
 	store <4 x float> %tmp1144, <4 x float>* null
-	%tmp1149 = load <4 x float>* %tmp89
+	%tmp1149 = load <4 x float>, <4 x float>* %tmp89
 	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
 	store <4 x float> %tmp1150, <4 x float>* %tmp89
 	ret void
diff --git a/llvm/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/llvm/test/CodeGen/X86/2007-07-10-StackerAssert.ll
index d611677..f8b09d0 100644
--- a/llvm/test/CodeGen/X86/2007-07-10-StackerAssert.ll
+++ b/llvm/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -22,11 +22,11 @@
 	ret i32 0
 
 bb383:		; preds = %bb164
-	%tmp408 = load float* null		; <float> [#uses=2]
+	%tmp408 = load float, float* null		; <float> [#uses=2]
 	br i1 false, label %cond_true425, label %cond_next443
 
 cond_true425:		; preds = %bb383
-	%tmp430 = load float* null		; <float> [#uses=1]
+	%tmp430 = load float, float* null		; <float> [#uses=1]
 	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
 	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
 	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/llvm/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
index 41b6e94..63ed460 100644
--- a/llvm/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
+++ b/llvm/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -5,13 +5,13 @@
 define i64 @foo_0(<2 x i64>* %val) {
 entry:
         %val12 = getelementptr <2 x i64>, <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
-        %tmp7 = load i64* %val12                ; <i64> [#uses=1]
+        %tmp7 = load i64, i64* %val12                ; <i64> [#uses=1]
         ret i64 %tmp7
 }
 
 define i64 @foo_1(<2 x i64>* %val) {
 entry:
         %tmp2.gep = getelementptr <2 x i64>, <2 x i64>* %val, i32 0, i32 1         ; <i64*> [#uses=1]
-        %tmp4 = load i64* %tmp2.gep             ; <i64> [#uses=1]
+        %tmp4 = load i64, i64* %tmp2.gep             ; <i64> [#uses=1]
         ret i64 %tmp4
 }
diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
index 9bf098f..2b3ac5c 100644
--- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
+++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -46,14 +46,14 @@
 	]
 
 bb4:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
-	%tmp5 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp5 = load i8**, i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
 	%tmp6 = getelementptr i8*, i8** %tmp5, i64 2		; <i8**> [#uses=1]
-	%tmp7 = load i8** %tmp6		; <i8*> [#uses=1]
+	%tmp7 = load i8*, i8** %tmp6		; <i8*> [#uses=1]
 	%tmp78 = bitcast i8* %tmp7 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
 	%tmp9 = getelementptr %struct._typeobject, %struct._typeobject* %tmp78, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
-	%tmp10 = load %struct.PyNumberMethods** %tmp9		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp10 = load %struct.PyNumberMethods*, %struct.PyNumberMethods** %tmp9		; <%struct.PyNumberMethods*> [#uses=1]
 	%tmp11 = getelementptr %struct.PyNumberMethods, %struct.PyNumberMethods* %tmp10, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
-	%tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
 	%tmp15 = call %struct.PyObject* %tmp12( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
 	ret %struct.PyObject* %tmp15
 
@@ -63,38 +63,38 @@
 	br i1 %tmp19, label %cond_next, label %UnifiedReturnBlock
 
 cond_next:		; preds = %bb17
-	%tmp22 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp22 = load i8**, i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
 	%tmp23 = getelementptr i8*, i8** %tmp22, i64 10		; <i8**> [#uses=1]
-	%tmp24 = load i8** %tmp23		; <i8*> [#uses=1]
+	%tmp24 = load i8*, i8** %tmp23		; <i8*> [#uses=1]
 	%tmp2425 = bitcast i8* %tmp24 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
 	%tmp26 = getelementptr %struct._typeobject, %struct._typeobject* %tmp2425, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
-	%tmp27 = load %struct.PyNumberMethods** %tmp26		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp27 = load %struct.PyNumberMethods*, %struct.PyNumberMethods** %tmp26		; <%struct.PyNumberMethods*> [#uses=1]
 	%tmp28 = getelementptr %struct.PyNumberMethods, %struct.PyNumberMethods* %tmp27, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
-	%tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
 	%tmp32 = call %struct.PyObject* %tmp29( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
 	ret %struct.PyObject* %tmp32
 
 bb35:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
-	%tmp36 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp36 = load i8**, i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
 	%tmp37 = getelementptr i8*, i8** %tmp36, i64 27		; <i8**> [#uses=1]
-	%tmp38 = load i8** %tmp37		; <i8*> [#uses=1]
+	%tmp38 = load i8*, i8** %tmp37		; <i8*> [#uses=1]
 	%tmp3839 = bitcast i8* %tmp38 to void ()*		; <void ()*> [#uses=1]
 	call void %tmp3839( )
-	%tmp40 = load i8* %arg2, align 1		; <i8> [#uses=4]
+	%tmp40 = load i8, i8* %arg2, align 1		; <i8> [#uses=4]
 	%tmp1.i = icmp eq i8 %tmp40, 0		; <i1> [#uses=2]
 	br i1 %tmp1.i, label %cond_true.i, label %cond_false.i
 
 cond_true.i:		; preds = %bb35
 	%tmp3.i196 = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
-	%tmp46207 = load i8* %arg2, align 1		; <i8> [#uses=3]
-	%tmp48208 = load i8* %arg1, align 1		; <i8> [#uses=2]
+	%tmp46207 = load i8, i8* %arg2, align 1		; <i8> [#uses=3]
+	%tmp48208 = load i8, i8* %arg1, align 1		; <i8> [#uses=2]
 	%tmp1.i197210 = icmp eq i8 %tmp48208, 0		; <i1> [#uses=1]
 	%tmp4.i212 = icmp eq i8 %tmp46207, 0		; <i1> [#uses=1]
 	%tmp7.i198213 = or i1 %tmp1.i197210, %tmp4.i212		; <i1> [#uses=1]
 	br i1 %tmp7.i198213, label %cond_true.i200, label %cond_next17.i
 
 cond_false.i:		; preds = %bb35
-	%tmp42 = load i8* %arg1, align 1		; <i8> [#uses=3]
+	%tmp42 = load i8, i8* %arg1, align 1		; <i8> [#uses=3]
 	%tmp7.i = udiv i8 %tmp42, %tmp40		; <i8> [#uses=2]
 	%tmp1.i197 = icmp eq i8 %tmp42, 0		; <i1> [#uses=1]
 	%tmp7.i198 = or i1 %tmp1.i197, %tmp1.i		; <i1> [#uses=1]
@@ -120,18 +120,18 @@
 ubyte_ctype_remainder.exit:		; preds = %cond_next17.i, %cond_true14.i, %cond_true.i200
 	%out2.0 = phi i8 [ %tmp20.i, %cond_next17.i ], [ 0, %cond_true14.i ], [ 0, %cond_true.i200 ]		; <i8> [#uses=1]
 	%out.2 = phi i8 [ %out.1, %cond_next17.i ], [ %out.0, %cond_true14.i ], [ %out.0, %cond_true.i200 ]		; <i8> [#uses=1]
-	%tmp52 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp52 = load i8**, i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
 	%tmp53 = getelementptr i8*, i8** %tmp52, i64 28		; <i8**> [#uses=1]
-	%tmp54 = load i8** %tmp53		; <i8*> [#uses=1]
+	%tmp54 = load i8*, i8** %tmp53		; <i8*> [#uses=1]
 	%tmp5455 = bitcast i8* %tmp54 to i32 ()*		; <i32 ()*> [#uses=1]
 	%tmp56 = call i32 %tmp5455( )		; <i32> [#uses=2]
 	%tmp58 = icmp eq i32 %tmp56, 0		; <i1> [#uses=1]
 	br i1 %tmp58, label %cond_next89, label %cond_true61
 
 cond_true61:		; preds = %ubyte_ctype_remainder.exit
-	%tmp62 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp62 = load i8**, i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
 	%tmp63 = getelementptr i8*, i8** %tmp62, i64 25		; <i8**> [#uses=1]
-	%tmp64 = load i8** %tmp63		; <i8*> [#uses=1]
+	%tmp64 = load i8*, i8** %tmp63		; <i8*> [#uses=1]
 	%tmp6465 = bitcast i8* %tmp64 to i32 (i8*, i32*, i32*, %struct.PyObject**)*		; <i32 (i8*, i32*, i32*, %struct.PyObject**)*> [#uses=1]
 	%tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj )		; <i32> [#uses=1]
 	%tmp68 = icmp slt i32 %tmp67, 0		; <i1> [#uses=1]
@@ -139,12 +139,12 @@
 
 cond_next73:		; preds = %cond_true61
 	store i32 1, i32* %first, align 4
-	%tmp74 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp74 = load i8**, i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
 	%tmp75 = getelementptr i8*, i8** %tmp74, i64 29		; <i8**> [#uses=1]
-	%tmp76 = load i8** %tmp75		; <i8*> [#uses=1]
+	%tmp76 = load i8*, i8** %tmp75		; <i8*> [#uses=1]
 	%tmp7677 = bitcast i8* %tmp76 to i32 (i32, %struct.PyObject*, i32, i32*)*		; <i32 (i32, %struct.PyObject*, i32, i32*)*> [#uses=1]
-	%tmp79 = load %struct.PyObject** %errobj, align 8		; <%struct.PyObject*> [#uses=1]
-	%tmp80 = load i32* %errmask, align 4		; <i32> [#uses=1]
+	%tmp79 = load %struct.PyObject*, %struct.PyObject** %errobj, align 8		; <%struct.PyObject*> [#uses=1]
+	%tmp80 = load i32, i32* %errmask, align 4		; <i32> [#uses=1]
 	%tmp82 = call i32 %tmp7677( i32 %tmp80, %struct.PyObject* %tmp79, i32 %tmp56, i32* %first )		; <i32> [#uses=1]
 	%tmp83 = icmp eq i32 %tmp82, 0		; <i1> [#uses=1]
 	br i1 %tmp83, label %cond_next89, label %UnifiedReturnBlock
@@ -155,19 +155,19 @@
 	br i1 %tmp92, label %UnifiedReturnBlock, label %cond_next97
 
 cond_next97:		; preds = %cond_next89
-	%tmp98 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp98 = load i8**, i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
 	%tmp99 = getelementptr i8*, i8** %tmp98, i64 25		; <i8**> [#uses=1]
-	%tmp100 = load i8** %tmp99		; <i8*> [#uses=1]
+	%tmp100 = load i8*, i8** %tmp99		; <i8*> [#uses=1]
 	%tmp100101 = bitcast i8* %tmp100 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
 	%tmp102 = getelementptr %struct._typeobject, %struct._typeobject* %tmp100101, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
-	%tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp102		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, i64)** %tmp102		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
 	%tmp108 = call %struct.PyObject* %tmp103( %struct._typeobject* %tmp100101, i64 0 )		; <%struct.PyObject*> [#uses=3]
 	%tmp110 = icmp eq %struct.PyObject* %tmp108, null		; <i1> [#uses=1]
 	br i1 %tmp110, label %cond_true113, label %cond_next135
 
 cond_true113:		; preds = %cond_next97
 	%tmp115 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
-	%tmp116 = load i64* %tmp115		; <i64> [#uses=1]
+	%tmp116 = load i64, i64* %tmp115		; <i64> [#uses=1]
 	%tmp117 = add i64 %tmp116, -1		; <i64> [#uses=2]
 	store i64 %tmp117, i64* %tmp115
 	%tmp123 = icmp eq i64 %tmp117, 0		; <i1> [#uses=1]
@@ -175,9 +175,9 @@
 
 cond_true126:		; preds = %cond_true113
 	%tmp128 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
-	%tmp129 = load %struct._typeobject** %tmp128		; <%struct._typeobject*> [#uses=1]
+	%tmp129 = load %struct._typeobject*, %struct._typeobject** %tmp128		; <%struct._typeobject*> [#uses=1]
 	%tmp130 = getelementptr %struct._typeobject, %struct._typeobject* %tmp129, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
-	%tmp131 = load void (%struct.PyObject*)** %tmp130		; <void (%struct.PyObject*)*> [#uses=1]
+	%tmp131 = load void (%struct.PyObject*)*, void (%struct.PyObject*)** %tmp130		; <void (%struct.PyObject*)*> [#uses=1]
 	call void %tmp131( %struct.PyObject* %tmp90 )
 	ret %struct.PyObject* null
 
@@ -188,19 +188,19 @@
 	%tmp140141 = bitcast %struct.PyObject* %tmp90 to %struct.PyTupleObject*		; <%struct.PyTupleObject*> [#uses=2]
 	%tmp143 = getelementptr %struct.PyTupleObject, %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0		; <%struct.PyObject**> [#uses=1]
 	store %struct.PyObject* %tmp108, %struct.PyObject** %tmp143
-	%tmp145 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp145 = load i8**, i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
 	%tmp146 = getelementptr i8*, i8** %tmp145, i64 25		; <i8**> [#uses=1]
-	%tmp147 = load i8** %tmp146		; <i8*> [#uses=1]
+	%tmp147 = load i8*, i8** %tmp146		; <i8*> [#uses=1]
 	%tmp147148 = bitcast i8* %tmp147 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
 	%tmp149 = getelementptr %struct._typeobject, %struct._typeobject* %tmp147148, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
-	%tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp149		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, i64)** %tmp149		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
 	%tmp155 = call %struct.PyObject* %tmp150( %struct._typeobject* %tmp147148, i64 0 )		; <%struct.PyObject*> [#uses=3]
 	%tmp157 = icmp eq %struct.PyObject* %tmp155, null		; <i1> [#uses=1]
 	br i1 %tmp157, label %cond_true160, label %cond_next182
 
 cond_true160:		; preds = %cond_next135
 	%tmp162 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
-	%tmp163 = load i64* %tmp162		; <i64> [#uses=1]
+	%tmp163 = load i64, i64* %tmp162		; <i64> [#uses=1]
 	%tmp164 = add i64 %tmp163, -1		; <i64> [#uses=2]
 	store i64 %tmp164, i64* %tmp162
 	%tmp170 = icmp eq i64 %tmp164, 0		; <i1> [#uses=1]
@@ -208,9 +208,9 @@
 
 cond_true173:		; preds = %cond_true160
 	%tmp175 = getelementptr %struct.PyObject, %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
-	%tmp176 = load %struct._typeobject** %tmp175		; <%struct._typeobject*> [#uses=1]
+	%tmp176 = load %struct._typeobject*, %struct._typeobject** %tmp175		; <%struct._typeobject*> [#uses=1]
 	%tmp177 = getelementptr %struct._typeobject, %struct._typeobject* %tmp176, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
-	%tmp178 = load void (%struct.PyObject*)** %tmp177		; <void (%struct.PyObject*)*> [#uses=1]
+	%tmp178 = load void (%struct.PyObject*)*, void (%struct.PyObject*)** %tmp177		; <void (%struct.PyObject*)*> [#uses=1]
 	call void %tmp178( %struct.PyObject* %tmp90 )
 	ret %struct.PyObject* null
 
diff --git a/llvm/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/llvm/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index 93bd51a..eb71512 100644
--- a/llvm/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/llvm/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -14,12 +14,12 @@
 cond_true:		; preds = %entry
 	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
 	%tmp17 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
-	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
 	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
 	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
 	%tmp24 = getelementptr i8, i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
 	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
-	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp28 = load i64, i64* %tmp2425, align 8		; <i64> [#uses=1]
 	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
 	%tmp31 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
 	store i32* %tmp2829, i32** %tmp31, align 8
@@ -28,18 +28,18 @@
 cond_next:		; preds = %cond_true, %entry
 	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
 	%tmp46 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
-	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
 	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
 	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
 	%tmp53 = getelementptr i8, i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
 	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
-	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp58 = load i64, i64* %tmp5354, align 8		; <i64> [#uses=1]
 	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
 	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
 
 cond_true63:		; preds = %cond_next
 	%tmp65 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
-	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp66 = load %struct.AGenericManager*, %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
 	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
 	ret i32 %tmp69
 
diff --git a/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
index 6fc8ec9..7eb018c 100644
--- a/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
+++ b/llvm/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -5,7 +5,7 @@
 
 define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) {
 entry:
-	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp16 = icmp eq i32 %tmp15, 10497		; <i1> [#uses=1]
 	%tmp2152 = call float @floorf( float 0.000000e+00 )		; <float> [#uses=0]
 	br i1 %tmp16, label %cond_true, label %cond_false
diff --git a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index d3a47ae..c535392 100644
--- a/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/llvm/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -7,15 +7,15 @@
 cond_next127:		; preds = %cond_next391, %entry
 	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=1]
 	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
-	%tmpss = load i32* %ss, align 4		; <i32> [#uses=1]
-	%tmpbp = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmpss = load i32, i32* %ss, align 4		; <i32> [#uses=1]
+	%tmpbp = load i32, i32* %bp, align 4		; <i32> [#uses=2]
 	%tmp254 = and i32 %tmpss, 15		; <i32> [#uses=1]
 	%tmp256 = and i32 %tmpbp, 15		; <i32> [#uses=2]
 	br label %cond_next391
 
 cond_next391:		; preds = %cond_next127
-	%tmp393 = load i32* %ss, align 4		; <i32> [#uses=1]
-	%tmp395 = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmp393 = load i32, i32* %ss, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32, i32* %bp, align 4		; <i32> [#uses=2]
 	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=2]
 	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=2]
 	%tmp399 = lshr i32 %tmp396, %tmp398		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
index 1e3f2b9..c4d5cb9 100644
--- a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
+++ b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -7,19 +7,19 @@
 bb171.preheader:		; preds = %entry
 	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
 	%gi.1 = getelementptr float, float* %fz, i32 0		; <float*> [#uses=2]
-	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
+	%tmp240 = load float, float* %gi.1, align 4		; <float> [#uses=1]
 	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
 	%tmp251 = getelementptr float, float* %fz, i32 0		; <float*> [#uses=1]
-	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
+	%tmp252 = load float, float* %tmp251, align 4		; <float> [#uses=1]
 	%tmp258 = getelementptr float, float* %fz, i32 0		; <float*> [#uses=2]
-	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
+	%tmp259 = load float, float* %tmp258, align 4		; <float> [#uses=2]
 	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
 	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
 	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
 	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
 	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
 	%tmp281 = getelementptr float, float* %fz, i32 0		; <float*> [#uses=1]
-	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
+	%tmp282 = load float, float* %tmp281, align 4		; <float> [#uses=2]
 	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
 	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
 	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index f6ee02f..4b1c1d7 100644
--- a/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/llvm/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -22,12 +22,12 @@
 	br i1 false, label %cond_true267, label %cond_next391
 
 cond_true267:		; preds = %cond_next245
-	%tmp269 = load i8** %byteptr, align 4		; <i8*> [#uses=3]
-	%tmp270 = load i8* %tmp269, align 1		; <i8> [#uses=1]
+	%tmp269 = load i8*, i8** %byteptr, align 4		; <i8*> [#uses=3]
+	%tmp270 = load i8, i8* %tmp269, align 1		; <i8> [#uses=1]
 	%tmp270271 = zext i8 %tmp270 to i32		; <i32> [#uses=1]
 	%tmp272 = getelementptr i8, i8* %tmp269, i32 1		; <i8*> [#uses=2]
 	store i8* %tmp272, i8** %byteptr, align 4
-	%tmp276 = load i8* %tmp272, align 1		; <i8> [#uses=1]
+	%tmp276 = load i8, i8* %tmp272, align 1		; <i8> [#uses=1]
 	%tmp278 = getelementptr i8, i8* %tmp269, i32 2		; <i8*> [#uses=1]
 	store i8* %tmp278, i8** %byteptr, align 4
 	%tmp286 = icmp eq i32 %tmp270271, %markerPrefix		; <i1> [#uses=1]
@@ -42,7 +42,7 @@
 	br i1 false, label %cond_true343, label %cond_next391
 
 cond_true343:		; preds = %cond_next327
-	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	%tmp345 = load i8*, i8** %byteptr, align 4		; <i8*> [#uses=1]
 	store i8* null, i8** %byteptr, align 4
 	store i8* %tmp345, i8** %byteptr, align 4
 	br label %cond_next391
diff --git a/llvm/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/llvm/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
index 8a55935..9419259 100644
--- a/llvm/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
+++ b/llvm/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -10,8 +10,8 @@
         br i1 false, label %bb6, label %bb31
 
 bb6:            ; preds = %bb
-        %tmp10 = load i64* null, align 8                ; <i64> [#uses=1]
-        %tmp16 = load i64* null, align 8                ; <i64> [#uses=1]
+        %tmp10 = load i64, i64* null, align 8                ; <i64> [#uses=1]
+        %tmp16 = load i64, i64* null, align 8                ; <i64> [#uses=1]
         br i1 false, label %bb23, label %bb31
 
 bb23:           ; preds = %bb6
diff --git a/llvm/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/llvm/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index 82052b1..30e1f57 100644
--- a/llvm/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/llvm/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -25,7 +25,7 @@
 	%tmp154155156 = sext i16 %tmp154155 to i32		; <i32> [#uses=1]
 	%tmp158 = xor i32 %tmp154155156, %tmp153		; <i32> [#uses=1]
 	%tmp160 = or i32 %tmp158, %cnt.0		; <i32> [#uses=1]
-	%tmp171 = load i32* %bitptr, align 4		; <i32> [#uses=1]
+	%tmp171 = load i32, i32* %bitptr, align 4		; <i32> [#uses=1]
 	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=3]
 	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
 	br i1 false, label %cond_true188, label %cond_next245
@@ -54,7 +54,7 @@
 	br i1 false, label %cond_true343, label %cond_next385
 
 cond_true343:		; preds = %cond_next327
-	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	%tmp345 = load i8*, i8** %byteptr, align 4		; <i8*> [#uses=1]
 	store i8* null, i8** %byteptr, align 4
 	br i1 false, label %cond_next385, label %cond_true352
 
@@ -69,8 +69,8 @@
 	br label %cond_next391
 
 cond_next391:		; preds = %cond_next385, %cond_next245
-	%tmp393 = load i32* %source, align 4		; <i32> [#uses=1]
-	%tmp395 = load i32* %bitptr, align 4		; <i32> [#uses=2]
+	%tmp393 = load i32, i32* %source, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32, i32* %bitptr, align 4		; <i32> [#uses=2]
 	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=1]
 	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=1]
 	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 573a217..9f57df87fe 100644
--- a/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/llvm/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -2,7 +2,7 @@
 
 define signext i16 @t()   {
 entry:
-	%tmp180 = load i16* null, align 2		; <i16> [#uses=3]
+	%tmp180 = load i16, i16* null, align 2		; <i16> [#uses=3]
 	%tmp180181 = sext i16 %tmp180 to i32		; <i32> [#uses=1]
 	%tmp185 = icmp slt i16 %tmp180, 0		; <i1> [#uses=1]
 	br i1 %tmp185, label %cond_true188, label %cond_next245
diff --git a/llvm/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/llvm/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
index 1b8e67d..3d52b6c 100644
--- a/llvm/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
+++ b/llvm/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -9,16 +9,16 @@
 	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <2 x i64> %__A, <2 x i64>* %__A_addr
-	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp = load <2 x i64>, <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
 	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp2 = extractelement <2 x i64> %tmp1, i32 0		; <i64> [#uses=1]
 	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
 	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
-	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	%tmp4 = load <1 x i64>, <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	%retval5 = load <1 x i64>, <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
 	ret <1 x i64> %retval5
 }
 
@@ -28,16 +28,16 @@
 	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <2 x i64> %__A, <2 x i64>* %__A_addr
-	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp = load <2 x i64>, <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
 	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp2 = extractelement <2 x i64> %tmp1, i32 1		; <i64> [#uses=1]
 	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
 	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
-	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	%tmp4 = load <1 x i64>, <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	%retval5 = load <1 x i64>, <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
 	ret <1 x i64> %retval5
 }
 
@@ -48,16 +48,16 @@
 	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <2 x i64> %__A, <2 x i64>* %__A_addr
-	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = load <2 x i64>, <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
 	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp3 = extractelement <2 x i64> %tmp2, i32 0		; <i64> [#uses=1]
 	store i64 %tmp3, i64* %tmp, align 8
-	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	%tmp4 = load i64, i64* %tmp, align 8		; <i64> [#uses=1]
 	store i64 %tmp4, i64* %retval, align 8
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	%retval5 = load i64, i64* %retval		; <i64> [#uses=1]
 	ret i64 %retval5
 }
 
@@ -68,15 +68,15 @@
 	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store <2 x i64> %__A, <2 x i64>* %__A_addr
-	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = load <2 x i64>, <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
 	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	%tmp3 = extractelement <2 x i64> %tmp2, i32 1		; <i64> [#uses=1]
 	store i64 %tmp3, i64* %tmp, align 8
-	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	%tmp4 = load i64, i64* %tmp, align 8		; <i64> [#uses=1]
 	store i64 %tmp4, i64* %retval, align 8
 	br label %return
 
 return:		; preds = %entry
-	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	%retval5 = load i64, i64* %retval		; <i64> [#uses=1]
 	ret i64 %retval5
 }
diff --git a/llvm/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/llvm/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
index 4045618..019c442 100644
--- a/llvm/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
+++ b/llvm/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -12,7 +12,7 @@
 bb:             ; preds = %bb, %entry
         %name8.0.reg2mem.0.rec = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]           ; <i64> [#uses=1]
         %hash.0.reg2mem.0 = phi i64 [ %tmp27, %bb ], [ 0, %entry ]              ; <i64> [#uses=1]
-        %tmp13 = load i8* null, align 1         ; <i8> [#uses=1]
+        %tmp13 = load i8, i8* null, align 1         ; <i8> [#uses=1]
         %tmp1314 = zext i8 %tmp13 to i64                ; <i64> [#uses=1]
         %tmp25 = lshr i64 %tmp1314, 4           ; <i64> [#uses=1]
         %tmp22 = add i64 %tmp25, %hash.0.reg2mem.0              ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll
index 63e2ad2..d88b45f 100644
--- a/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll
+++ b/llvm/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -9,10 +9,10 @@
 	%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ]		; <i32> [#uses=3]
 	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
 	%tmp3 = getelementptr i32, i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp3, align 4		; <i32> [#uses=1]
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float, float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
-	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp9 = load float, float* %tmp8, align 4		; <float> [#uses=1]
 	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
 	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll b/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll
index d5b630b..06e0a77 100644
--- a/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll
+++ b/llvm/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -7,7 +7,7 @@
 
 define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) {
 entry:
-	%tmp36 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp36 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp37 = icmp ult i32 0, %tmp36		; <i1> [#uses=1]
 	br i1 %tmp37, label %cond_next79, label %cond_true
 
@@ -15,7 +15,7 @@
 	ret i32 0
 
 cond_next79:		; preds = %entry
-	%tmp85 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp85 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp86 = icmp ult i32 0, %tmp85		; <i1> [#uses=1]
 	br i1 %tmp86, label %cond_next130, label %cond_true89
 
diff --git a/llvm/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/llvm/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
index f9ea244..3404fe6 100644
--- a/llvm/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
+++ b/llvm/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -18,9 +18,9 @@
 	br i1 true, label %bb4668, label %bb848
 
 bb4668:		; preds = %bb4648
-	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5464 = load i64, i64* %x82167, align 8		; <i64> [#uses=1]
 	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
-	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5470 = load i32*, i32** %tmp1272, align 8		; <i32*> [#uses=1]
 	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
 	call fastcc void @c34007g__pkg__create.311( %struct.c34007g__pkg__parent* null, i32 7, i32 9, i32 2, i32 4, i32 1 )
 	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
index deb4247..26d1827 100644
--- a/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
+++ b/llvm/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -12,9 +12,9 @@
 	br i1 true, label %bb4668, label %bb848
 
 bb4668:		; preds = %bb4648
-	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5464 = load i64, i64* %x82167, align 8		; <i64> [#uses=1]
 	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
-	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5470 = load i32*, i32** %tmp1272, align 8		; <i32*> [#uses=1]
 	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
 	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
 	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/llvm/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 0af2eb3..7da85d3 100644
--- a/llvm/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/llvm/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -27,7 +27,7 @@
 	ret i32 0
 
 bb986:		; preds = %bb951
-	%tmp993 = load i32* %tmp961, align 4		; <i32> [#uses=1]
+	%tmp993 = load i32, i32* %tmp961, align 4		; <i32> [#uses=1]
 	%tmp995 = icmp ugt i32 %tmp993, %tmp910		; <i1> [#uses=2]
 	%tmp1002 = add i32 %tmp955, 1		; <i32> [#uses=1]
 	%low.0 = select i1 %tmp995, i32 0, i32 %tmp1002		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/llvm/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
index 9584b71..6e98f9c 100644
--- a/llvm/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
+++ b/llvm/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -2,29 +2,29 @@
 
 define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
 entry:
-	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp71 = load x86_fp80, x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
 	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
 	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
 	store double %tmp7374, double* null, align 8
-	%tmp81 = load double* null, align 8		; <double> [#uses=1]
+	%tmp81 = load double, double* null, align 8		; <double> [#uses=1]
 	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
 	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
 	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
 	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
 	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
-	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp87 = load x86_fp80, x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
 	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
 	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
 	store double %tmp8990, double* null, align 8
-	%tmp97 = load double* null, align 8		; <double> [#uses=1]
+	%tmp97 = load double, double* null, align 8		; <double> [#uses=1]
 	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
 	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
 	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
 	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
 	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
-	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp103 = load x86_fp80, x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
 	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
 	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
 	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/llvm/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
index a5a693c..ffc5a1f 100644
--- a/llvm/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
+++ b/llvm/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -4,27 +4,27 @@
 
 define void @localize_local_bb19_bb(%struct.node_t** %cur_node) {
 newFuncRoot:
-	%tmp1 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp1 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp2 = getelementptr %struct.node_t, %struct.node_t* %tmp1, i32 0, i32 4		; <double**> [#uses=1]
-	%tmp3 = load double** %tmp2, align 4		; <double*> [#uses=1]
-	%tmp4 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp3 = load double*, double** %tmp2, align 4		; <double*> [#uses=1]
+	%tmp4 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp5 = getelementptr %struct.node_t, %struct.node_t* %tmp4, i32 0, i32 4		; <double**> [#uses=1]
 	store double* %tmp3, double** %tmp5, align 4
-	%tmp6 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp6 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp7 = getelementptr %struct.node_t, %struct.node_t* %tmp6, i32 0, i32 3		; <double***> [#uses=1]
-	%tmp8 = load double*** %tmp7, align 4		; <double**> [#uses=1]
-	%tmp9 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp8 = load double**, double*** %tmp7, align 4		; <double**> [#uses=1]
+	%tmp9 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp10 = getelementptr %struct.node_t, %struct.node_t* %tmp9, i32 0, i32 3		; <double***> [#uses=1]
 	store double** %tmp8, double*** %tmp10, align 4
-	%tmp11 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp11 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp12 = getelementptr %struct.node_t, %struct.node_t* %tmp11, i32 0, i32 0		; <double**> [#uses=1]
-	%tmp13 = load double** %tmp12, align 4		; <double*> [#uses=1]
-	%tmp14 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp13 = load double*, double** %tmp12, align 4		; <double*> [#uses=1]
+	%tmp14 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp15 = getelementptr %struct.node_t, %struct.node_t* %tmp14, i32 0, i32 0		; <double**> [#uses=1]
 	store double* %tmp13, double** %tmp15, align 4
-	%tmp16 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp16 = load %struct.node_t*, %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
 	%tmp17 = getelementptr %struct.node_t, %struct.node_t* %tmp16, i32 0, i32 1		; <%struct.node_t**> [#uses=1]
-	%tmp18 = load %struct.node_t** %tmp17, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp18 = load %struct.node_t*, %struct.node_t** %tmp17, align 4		; <%struct.node_t*> [#uses=1]
 	store %struct.node_t* %tmp18, %struct.node_t** %cur_node, align 4
 	ret void
 }
diff --git a/llvm/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/llvm/test/CodeGen/X86/2008-02-05-ISelCrash.ll
index 443a32d..ce233a9 100644
--- a/llvm/test/CodeGen/X86/2008-02-05-ISelCrash.ll
+++ b/llvm/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -5,7 +5,7 @@
 
 define fastcc i32 @ab(i32 %alpha, i32 %beta) nounwind  {
 entry:
-	%tmp1 = load i64* @nodes, align 8		; <i64> [#uses=1]
+	%tmp1 = load i64, i64* @nodes, align 8		; <i64> [#uses=1]
 	%tmp2 = add i64 %tmp1, 1		; <i64> [#uses=1]
 	store i64 %tmp2, i64* @nodes, align 8
 	ret i32 0
diff --git a/llvm/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/llvm/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
index 6e065d1..56b1c78 100644
--- a/llvm/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -7,9 +7,9 @@
 	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
 	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
 	%tmp19 = getelementptr { double, double }, { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
+	%tmp20 = load double, double* %tmp19, align 8		; <double> [#uses=1]
 	%tmp22 = getelementptr { double, double }, { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
-	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
+	%tmp23 = load double, double* %tmp22, align 8		; <double> [#uses=1]
 	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
 	%tmp37 = getelementptr { double, double }, { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
 	store double %tmp23, double* %tmp37, align 8
diff --git a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
index 6ab8984..4ada770 100644
--- a/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -6,13 +6,13 @@
 
 define void @minmax(float* %result) nounwind optsize {
 entry:
-	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
+	%tmp2 = load float, float* %result, align 4		; <float> [#uses=6]
 	%tmp4 = getelementptr float, float* %result, i32 2		; <float*> [#uses=5]
-	%tmp5 = load float* %tmp4, align 4		; <float> [#uses=10]
+	%tmp5 = load float, float* %tmp4, align 4		; <float> [#uses=10]
 	%tmp7 = getelementptr float, float* %result, i32 4		; <float*> [#uses=5]
-	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=8]
+	%tmp8 = load float, float* %tmp7, align 4		; <float> [#uses=8]
 	%tmp10 = getelementptr float, float* %result, i32 6		; <float*> [#uses=3]
-	%tmp11 = load float* %tmp10, align 4		; <float> [#uses=8]
+	%tmp11 = load float, float* %tmp10, align 4		; <float> [#uses=8]
 	%tmp12 = fcmp olt float %tmp8, %tmp11		; <i1> [#uses=5]
 	br i1 %tmp12, label %bb, label %bb21
 
@@ -59,7 +59,7 @@
 
 bb111:		; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
 	%iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ]		; <float*> [#uses=1]
-	%iftmp.0.0 = load float* %iftmp.0.0.in		; <float> [#uses=1]
+	%iftmp.0.0 = load float, float* %iftmp.0.0.in		; <float> [#uses=1]
 	%tmp125 = fcmp ogt float %tmp8, %tmp11		; <i1> [#uses=5]
 	br i1 %tmp125, label %bb128, label %bb136
 
@@ -106,15 +106,15 @@
 
 bb226:		; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
 	%iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ]		; <float*> [#uses=1]
-	%iftmp.7.0 = load float* %iftmp.7.0.in		; <float> [#uses=1]
+	%iftmp.7.0 = load float, float* %iftmp.7.0.in		; <float> [#uses=1]
 	%tmp229 = getelementptr float, float* %result, i32 1		; <float*> [#uses=7]
-	%tmp230 = load float* %tmp229, align 4		; <float> [#uses=6]
+	%tmp230 = load float, float* %tmp229, align 4		; <float> [#uses=6]
 	%tmp232 = getelementptr float, float* %result, i32 3		; <float*> [#uses=5]
-	%tmp233 = load float* %tmp232, align 4		; <float> [#uses=10]
+	%tmp233 = load float, float* %tmp232, align 4		; <float> [#uses=10]
 	%tmp235 = getelementptr float, float* %result, i32 5		; <float*> [#uses=5]
-	%tmp236 = load float* %tmp235, align 4		; <float> [#uses=8]
+	%tmp236 = load float, float* %tmp235, align 4		; <float> [#uses=8]
 	%tmp238 = getelementptr float, float* %result, i32 7		; <float*> [#uses=3]
-	%tmp239 = load float* %tmp238, align 4		; <float> [#uses=8]
+	%tmp239 = load float, float* %tmp238, align 4		; <float> [#uses=8]
 	%tmp240 = fcmp olt float %tmp236, %tmp239		; <i1> [#uses=5]
 	br i1 %tmp240, label %bb243, label %bb251
 
@@ -161,7 +161,7 @@
 
 bb341:		; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
 	%iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ]		; <float*> [#uses=1]
-	%iftmp.14.0 = load float* %iftmp.14.0.in		; <float> [#uses=1]
+	%iftmp.14.0 = load float, float* %iftmp.14.0.in		; <float> [#uses=1]
 	%tmp355 = fcmp ogt float %tmp236, %tmp239		; <i1> [#uses=5]
 	br i1 %tmp355, label %bb358, label %bb366
 
@@ -208,7 +208,7 @@
 
 bb456:		; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
 	%iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ]		; <float*> [#uses=1]
-	%iftmp.21.0 = load float* %iftmp.21.0.in		; <float> [#uses=1]
+	%iftmp.21.0 = load float, float* %iftmp.21.0.in		; <float> [#uses=1]
 	%tmp458459 = fpext float %iftmp.21.0 to double		; <double> [#uses=1]
 	%tmp460461 = fpext float %iftmp.7.0 to double		; <double> [#uses=1]
 	%tmp462463 = fpext float %iftmp.14.0 to double		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/llvm/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
index 0190df5..b3f303f 100644
--- a/llvm/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
+++ b/llvm/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -8,7 +8,7 @@
 
 define void @test() nounwind  {
 entry:
-	%tmp = load i32* @pixels, align 4		; <i32> [#uses=1]
+	%tmp = load i32, i32* @pixels, align 4		; <i32> [#uses=1]
 	%tmp1 = tail call i32 asm sideeffect "a: $0 $1", "=r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i32 %tmp ) nounwind 		; <i32> [#uses=1]
 	store i32 %tmp1, i32* @pixels, align 4
 	ret void
diff --git a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
index beaf31f9..75f88b0 100644
--- a/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -13,38 +13,38 @@
 	store i8* %src, i8** %src_addr
 	store i32 %dst_stride, i32* %dst_stride_addr
 	store i32 %src_stride, i32* %src_stride_addr
-	%tmp = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp = load i8*, i8** %dst_addr, align 4		; <i8*> [#uses=1]
 	%tmp1 = getelementptr i8, i8* %tmp, i32 0		; <i8*> [#uses=1]
 	%tmp12 = bitcast i8* %tmp1 to i32*		; <i32*> [#uses=1]
-	%tmp3 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
-	%tmp4 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp3 = load i8*, i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i32, i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp5 = getelementptr i8, i8* %tmp3, i32 %tmp4		; <i8*> [#uses=1]
 	%tmp56 = bitcast i8* %tmp5 to i32*		; <i32*> [#uses=1]
-	%tmp7 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
-	%tmp9 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp9 = load i8*, i8** %dst_addr, align 4		; <i8*> [#uses=1]
 	%tmp10 = getelementptr i8, i8* %tmp9, i32 %tmp8		; <i8*> [#uses=1]
 	%tmp1011 = bitcast i8* %tmp10 to i32*		; <i32*> [#uses=1]
-	%tmp13 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp13 = load i32, i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
-	%tmp15 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp15 = load i8*, i8** %dst_addr, align 4		; <i8*> [#uses=1]
 	%tmp16 = getelementptr i8, i8* %tmp15, i32 %tmp14		; <i8*> [#uses=1]
 	%tmp1617 = bitcast i8* %tmp16 to i32*		; <i32*> [#uses=1]
-	%tmp18 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp18 = load i8*, i8** %src_addr, align 4		; <i8*> [#uses=1]
 	%tmp19 = getelementptr i8, i8* %tmp18, i32 0		; <i8*> [#uses=1]
 	%tmp1920 = bitcast i8* %tmp19 to i32*		; <i32*> [#uses=1]
-	%tmp21 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
-	%tmp22 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp21 = load i8*, i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp22 = load i32, i32* %src_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp23 = getelementptr i8, i8* %tmp21, i32 %tmp22		; <i8*> [#uses=1]
 	%tmp2324 = bitcast i8* %tmp23 to i32*		; <i32*> [#uses=1]
-	%tmp25 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp25 = load i32, i32* %src_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
-	%tmp27 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp27 = load i8*, i8** %src_addr, align 4		; <i8*> [#uses=1]
 	%tmp28 = getelementptr i8, i8* %tmp27, i32 %tmp26		; <i8*> [#uses=1]
 	%tmp2829 = bitcast i8* %tmp28 to i32*		; <i32*> [#uses=1]
-	%tmp30 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp30 = load i32, i32* %src_stride_addr, align 4		; <i32> [#uses=1]
 	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
-	%tmp32 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp32 = load i8*, i8** %src_addr, align 4		; <i8*> [#uses=1]
 	%tmp33 = getelementptr i8, i8* %tmp32, i32 %tmp31		; <i8*> [#uses=1]
 	%tmp3334 = bitcast i8* %tmp33 to i32*		; <i32*> [#uses=1]
 	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind 
diff --git a/llvm/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index 15be50d..382fbed 100644
--- a/llvm/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -11,10 +11,10 @@
 
 bb53:		; preds = %entry
 	%tmp55 = call %struct.YY** @AA( i64 1, %struct.XX* %uen )		; <%struct.YY**> [#uses=3]
-	%tmp2728128 = load %struct.XX** null		; <%struct.XX*> [#uses=1]
-	%tmp61 = load %struct.YY** %tmp55, align 8		; <%struct.YY*> [#uses=1]
+	%tmp2728128 = load %struct.XX*, %struct.XX** null		; <%struct.XX*> [#uses=1]
+	%tmp61 = load %struct.YY*, %struct.YY** %tmp55, align 8		; <%struct.YY*> [#uses=1]
 	%tmp62 = getelementptr %struct.YY, %struct.YY* %tmp61, i32 0, i32 0		; <i64*> [#uses=1]
-	%tmp63 = load i64* %tmp62, align 8		; <i64> [#uses=1]
+	%tmp63 = load i64, i64* %tmp62, align 8		; <i64> [#uses=1]
 	%tmp6566 = zext i16 %tmp45 to i64		; <i64> [#uses=1]
 	%tmp67 = shl i64 %tmp6566, 1		; <i64> [#uses=1]
 	call void @BB( %struct.YY** %tmp55, i64 %tmp67, i8 signext  0, %struct.XX* %uen )
@@ -30,7 +30,7 @@
 	%tmp.135 = trunc i64 %tmp63 to i32		; <i32> [#uses=1]
 	%tmp136 = shl i32 %indvar133, 1		; <i32> [#uses=1]
 	%DD = add i32 %tmp136, %tmp.135		; <i32> [#uses=1]
-	%tmp73 = load %struct.ZZ*** %tmp72, align 8		; <%struct.ZZ**> [#uses=0]
+	%tmp73 = load %struct.ZZ**, %struct.ZZ*** %tmp72, align 8		; <%struct.ZZ**> [#uses=0]
 	br i1 false, label %bb119, label %bb77
 
 bb77:		; preds = %bb70
diff --git a/llvm/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/llvm/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
index 3c6ba68..857e623 100644
--- a/llvm/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
+++ b/llvm/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -34,7 +34,7 @@
 	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
 	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
 	%tmp15.i49 = getelementptr %struct.Lattice, %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
+	%tmp16.i50 = load double, double* %tmp15.i49, align 4		; <double> [#uses=1]
 	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
 	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
 	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
@@ -47,11 +47,11 @@
 bb58:		; preds = %bb58, %bb58.preheader
 	%tmp20.i7 = getelementptr %struct.CompAtom, %struct.CompAtom* %d, i32 0, i32 2		; <i32*> [#uses=2]
 	%tmp25.i = getelementptr %struct.CompAtom, %struct.CompAtom* %tmp1819, i32 0, i32 2		; <i32*> [#uses=2]
-	%tmp74.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp74.i = load i32, i32* %tmp20.i7, align 1		; <i32> [#uses=1]
 	%tmp82.i = and i32 %tmp74.i, 134217728		; <i32> [#uses=1]
 	%tmp85.i = or i32 0, %tmp82.i		; <i32> [#uses=1]
 	store i32 %tmp85.i, i32* %tmp25.i, align 1
-	%tmp88.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp88.i = load i32, i32* %tmp20.i7, align 1		; <i32> [#uses=1]
 	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
 	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
 	store i32 %tmp97.i, i32* %tmp25.i, align 1
diff --git a/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll b/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll
index fb4f97a..409bcd5 100644
--- a/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll
+++ b/llvm/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -18,16 +18,16 @@
 bb1233.exitStub:		; preds = %bb1163
 	ret void
 bb1163:		; preds = %newFuncRoot
-	%tmp1164 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1164 = load %struct.rec*, %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
 	%tmp1165 = getelementptr %struct.rec, %struct.rec* %tmp1164, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
 	%tmp11651166 = bitcast %struct.head_type* %tmp1165 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
 	%tmp1167 = getelementptr %struct.symbol_type, %struct.symbol_type* %tmp11651166, i32 0, i32 3		; <%struct.rec**> [#uses=1]
-	%tmp1168 = load %struct.rec** %tmp1167, align 1		; <%struct.rec*> [#uses=2]
-	%tmp1169 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1168 = load %struct.rec*, %struct.rec** %tmp1167, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1169 = load %struct.rec*, %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
 	%tmp1170 = getelementptr %struct.rec, %struct.rec* %tmp1169, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
 	%tmp11701171 = bitcast %struct.head_type* %tmp1170 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
 	%tmp1172 = getelementptr %struct.symbol_type, %struct.symbol_type* %tmp11701171, i32 0, i32 3		; <%struct.rec**> [#uses=1]
-	%tmp1173 = load %struct.rec** %tmp1172, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1173 = load %struct.rec*, %struct.rec** %tmp1172, align 1		; <%struct.rec*> [#uses=2]
 	%tmp1174 = getelementptr %struct.rec, %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
 	%tmp11741175 = bitcast %struct.head_type* %tmp1174 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
 	%tmp1176 = getelementptr %struct.word_type, %struct.word_type* %tmp11741175, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
@@ -35,7 +35,7 @@
 	%tmp11771178 = bitcast { i16, i8, i8 }* %tmp1177 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
 	%tmp1179 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2		; <i8*> [#uses=2]
 	%mask1180 = and i8 1, 1		; <i8> [#uses=2]
-	%tmp1181 = load i8* %tmp1179, align 1		; <i8> [#uses=1]
+	%tmp1181 = load i8, i8* %tmp1179, align 1		; <i8> [#uses=1]
 	%tmp1182 = shl i8 %mask1180, 7		; <i8> [#uses=1]
 	%tmp1183 = and i8 %tmp1181, 127		; <i8> [#uses=1]
 	%tmp1184 = or i8 %tmp1183, %tmp1182		; <i8> [#uses=1]
@@ -47,7 +47,7 @@
 	%tmp1189 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1188, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
 	%tmp11891190 = bitcast { i16, i8, i8 }* %tmp1189 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
 	%tmp1191 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2		; <i8*> [#uses=1]
-	%tmp1192 = load i8* %tmp1191, align 1		; <i8> [#uses=1]
+	%tmp1192 = load i8, i8* %tmp1191, align 1		; <i8> [#uses=1]
 	%tmp1193 = lshr i8 %tmp1192, 7		; <i8> [#uses=1]
 	%mask1194 = and i8 %tmp1193, 1		; <i8> [#uses=2]
 	%mask1195 = and i8 %mask1194, 1		; <i8> [#uses=0]
@@ -58,7 +58,7 @@
 	%tmp11991200 = bitcast { i16, i8, i8 }* %tmp1199 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
 	%tmp1201 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1		; <i8*> [#uses=2]
 	%mask1202 = and i8 %mask1194, 1		; <i8> [#uses=2]
-	%tmp1203 = load i8* %tmp1201, align 1		; <i8> [#uses=1]
+	%tmp1203 = load i8, i8* %tmp1201, align 1		; <i8> [#uses=1]
 	%tmp1204 = shl i8 %mask1202, 1		; <i8> [#uses=1]
 	%tmp1205 = and i8 %tmp1204, 2		; <i8> [#uses=1]
 	%tmp1206 = and i8 %tmp1203, -3		; <i8> [#uses=1]
@@ -71,12 +71,12 @@
 	%tmp1212 = getelementptr %struct.SECOND_UNION, %struct.SECOND_UNION* %tmp1211, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
 	%tmp12121213 = bitcast { i16, i8, i8 }* %tmp1212 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
 	%tmp1214 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1		; <i8*> [#uses=1]
-	%tmp1215 = load i8* %tmp1214, align 1		; <i8> [#uses=1]
+	%tmp1215 = load i8, i8* %tmp1214, align 1		; <i8> [#uses=1]
 	%tmp1216 = shl i8 %tmp1215, 6		; <i8> [#uses=1]
 	%tmp1217 = lshr i8 %tmp1216, 7		; <i8> [#uses=1]
 	%mask1218 = and i8 %tmp1217, 1		; <i8> [#uses=2]
 	%mask1219 = and i8 %mask1218, 1		; <i8> [#uses=0]
-	%tmp1220 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1220 = load %struct.rec*, %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
 	%tmp1221 = getelementptr %struct.rec, %struct.rec* %tmp1220, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
 	%tmp12211222 = bitcast %struct.head_type* %tmp1221 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
 	%tmp1223 = getelementptr %struct.word_type, %struct.word_type* %tmp12211222, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
@@ -84,7 +84,7 @@
 	%tmp12241225 = bitcast { i16, i8, i8 }* %tmp1224 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
 	%tmp1226 = getelementptr <{ i8, i8, i8, i8 }>, <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1		; <i8*> [#uses=2]
 	%mask1227 = and i8 %mask1218, 1		; <i8> [#uses=2]
-	%tmp1228 = load i8* %tmp1226, align 1		; <i8> [#uses=1]
+	%tmp1228 = load i8, i8* %tmp1226, align 1		; <i8> [#uses=1]
 	%tmp1229 = and i8 %mask1227, 1		; <i8> [#uses=1]
 	%tmp1230 = and i8 %tmp1228, -2		; <i8> [#uses=1]
 	%tmp1231 = or i8 %tmp1230, %tmp1229		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/llvm/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
index 40aafb4..9fb325c 100644
--- a/llvm/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
+++ b/llvm/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -5,7 +5,7 @@
 
 define x86_stdcallcc i32 @parse_backslash(i8** inreg  %word, i32* inreg  %word_length, i32* inreg  %max_length) nounwind  {
 entry:
-	%tmp6 = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp6 = load i8, i8* null, align 1		; <i8> [#uses=1]
 	br label %bb13
 bb13:		; preds = %entry
 	%tmp26 = call fastcc i8* @w_addchar( i8* null, i32* %word_length, i32* %max_length, i8 signext  %tmp6 ) nounwind 		; <i8*> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index fab9b77..a9e3f33 100644
--- a/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/llvm/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -16,12 +16,12 @@
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp = load %struct.__res_state*, %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
 	%tmp1 = getelementptr %struct.__res_state, %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp1, align 4
 	br label %return
 return:		; preds = %entry
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	%retval2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
 }
 
@@ -31,11 +31,11 @@
 entry:
 	%retval = alloca i32		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp = load %struct.__res_state*, %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
 	%tmp1 = getelementptr %struct.__res_state, %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp1, align 4
 	br label %return
 return:		; preds = %entry
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	%retval2 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval2
 }
diff --git a/llvm/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/llvm/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index 4c19834..d60d0c2 100644
--- a/llvm/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/llvm/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -10,7 +10,7 @@
 
 define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind  {
 entry:
-	%tmp27 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp27 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp83 = getelementptr i32, i32* %nptr, i32 1		; <i32*> [#uses=1]
 	%tmp233 = add i32 0, -48		; <i32> [#uses=1]
 	br label %bb271.us
@@ -32,7 +32,7 @@
 	br label %bb374.us
 bb374.us:		; preds = %bb314.us, %bb374.outer
 	%tmp376.us = getelementptr i32, i32* %s.5.ph, i32 0		; <i32*> [#uses=3]
-	%tmp378.us = load i32* %tmp376.us, align 4		; <i32> [#uses=2]
+	%tmp378.us = load i32, i32* %tmp376.us, align 4		; <i32> [#uses=2]
 	%tmp302.us = icmp eq i32* %tmp376.us, %tmp83		; <i1> [#uses=1]
 	%bothcond484.us = or i1 false, %tmp302.us		; <i1> [#uses=1]
 	br i1 %bothcond484.us, label %bb383, label %bb305.us
diff --git a/llvm/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/llvm/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
index 451624d..3e55390 100644
--- a/llvm/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
+++ b/llvm/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -13,12 +13,12 @@
 cond_true:		; preds = %entry
 	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
 	%tmp17 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
-	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
 	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
 	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
 	%tmp24 = getelementptr i8, i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
 	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
-	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp28 = load i64, i64* %tmp2425, align 8		; <i64> [#uses=1]
 	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
 	%tmp31 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
 	store i32* %tmp2829, i32** %tmp31, align 8
@@ -27,18 +27,18 @@
 cond_next:		; preds = %cond_true, %entry
 	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
 	%tmp46 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
-	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters*, %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
 	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
 	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
 	%tmp53 = getelementptr i8, i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
 	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
-	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp58 = load i64, i64* %tmp5354, align 8		; <i64> [#uses=1]
 	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
 	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
 
 cond_true63:		; preds = %cond_next
 	%tmp65 = getelementptr %struct.AGenericCall, %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
-	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp66 = load %struct.AGenericManager*, %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
 	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
 	ret i32 %tmp69
 
diff --git a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
index 7f27bfc..681a984 100644
--- a/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -21,7 +21,7 @@
 	store i32 (...)** getelementptr ([4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
 	store i32 (...)** null, i32 (...)*** null, align 4
 	%ctg2242.i.i163.i = getelementptr i8, i8* %tmp96.i.i142.i, i32 0		; <i8*> [#uses=1]
-	%tmp150.i.i164.i = load i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4		; <i8*> [#uses=1]
+	%tmp150.i.i164.i = load i8*, i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4		; <i8*> [#uses=1]
 	%tmp150151.i.i165.i = bitcast i8* %tmp150.i.i164.i to i32 (...)**		; <i32 (...)**> [#uses=1]
 	%tmp153.i.i166.i = bitcast i8* %ctg2242.i.i163.i to i32 (...)***		; <i32 (...)***> [#uses=1]
 	store i32 (...)** %tmp150151.i.i165.i, i32 (...)*** %tmp153.i.i166.i, align 4
diff --git a/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll
index f4b2d71..a758fed 100644
--- a/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll
+++ b/llvm/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -16,7 +16,7 @@
 bb17.i:		; preds = %bb140
 	ret %struct.tree_node* null
 bb143:		; preds = %entry
-	%tmp8.i43 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	%tmp8.i43 = load %struct.tree_node*, %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
 	br i1 %tmp3.i40, label %bb160, label %bb9.i48
 bb9.i48:		; preds = %bb143
 	ret %struct.tree_node* null
diff --git a/llvm/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/llvm/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
index 0742371..f83c990 100644
--- a/llvm/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -40,8 +40,8 @@
 
 define void @"-[AA BB:optionIndex:delegate:CC:contextInfo:]"(%struct.AA* %self, %struct._message_ref_t* %_cmd, %struct.NSError* %inError, i64 %inOptionIndex, %struct.NSObject* %inDelegate, %struct.objc_selector* %inDidRecoverSelector, i8* %inContextInfo) {
 entry:
-	%tmp105 = load %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
-	%tmp107 = load %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
+	%tmp105 = load %struct.NSArray*, %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
+	%tmp107 = load %struct.NSObject*, %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
 	call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext  0 )
 	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
 	ret void
diff --git a/llvm/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
index 3ccc0fe..1488034 100644
--- a/llvm/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -22,7 +22,7 @@
 	store i16 %tmp113.us, i16* null, align 2
 	br label %bb53.us
 bb71.us:		; preds = %bb53.us
-	%tmp80.us = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp80.us = load i8, i8* null, align 1		; <i8> [#uses=1]
 	%tmp8081.us = zext i8 %tmp80.us to i32		; <i32> [#uses=1]
 	%tmp87.us = mul i32 %tmp8081.us, 0		; <i32> [#uses=1]
 	%tmp92.us = add i32 0, %tmp87.us		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index f244793..ff70421 100644
--- a/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -33,7 +33,7 @@
 bb182.i:		; preds = %bb142.i
 	ret void
 bb3261:		; preds = %bb7834, %bb161.i
-	%tmp3263 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp3263 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%tmp3264 = icmp eq i32 %tmp3263, 37		; <i1> [#uses=1]
 	br i1 %tmp3264, label %bb3306, label %bb3267
 bb3267:		; preds = %bb3261
@@ -42,7 +42,7 @@
 	%tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32]* @.str89, i32 0, i32 0) )
 			to label %bb3314 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
 bb3314:		; preds = %bb3306
-	%tmp3316 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp3316 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	switch i32 %tmp3316, label %bb7595 [
 		 i32 0, label %bb7819
 		 i32 37, label %bb7806
diff --git a/llvm/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/llvm/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
index 86bce8e..06f7907 100644
--- a/llvm/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
+++ b/llvm/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind  {
 entry:
-	%tmp122 = load <2 x i64>* null, align 16		; <<2 x i64>> [#uses=1]
+	%tmp122 = load <2 x i64>, <2 x i64>* null, align 16		; <<2 x i64>> [#uses=1]
 	%tmp126 = bitcast <2 x i64> %tmp122 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp129 = call <8 x i16> @llvm.x86.sse41.pblendw( <8 x i16> zeroinitializer, <8 x i16> %tmp126, i32 2 ) nounwind 		; <<8 x i16>> [#uses=0]
 	ret i32 0
diff --git a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
index 7c04206..06bbd74 100644
--- a/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -33,7 +33,7 @@
 
 bb13107:		; preds = %bb13101, %bb13088
 	%iftmp.684.0 = phi i32 [ 0, %bb13101 ], [ 65535, %bb13088 ]		; <i32> [#uses=2]
-	%tmp13111 = load i64* null, align 8		; <i64> [#uses=3]
+	%tmp13111 = load i64, i64* null, align 8		; <i64> [#uses=3]
 	%tmp13116 = lshr i64 %tmp13111, 16		; <i64> [#uses=1]
 	%tmp1311613117 = trunc i64 %tmp13116 to i32		; <i32> [#uses=1]
 	%tmp13118 = and i32 %tmp1311613117, 65535		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/llvm/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
index 5ceb546..0e4ef1c 100644
--- a/llvm/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
+++ b/llvm/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 
 define fastcc void @glgVectorFloatConversion() nounwind  {
-	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
+	%tmp12745 = load <4 x float>, <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
 	%tmp12773 = insertelement <4 x float> %tmp12745, float 1.000000e+00, i32 1		; <<4 x float>> [#uses=1]
 	%tmp12774 = insertelement <4 x float> %tmp12773, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
 	%tmp12775 = insertelement <4 x float> %tmp12774, float 1.000000e+00, i32 3		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
index 1a70b4a..df5ceb0 100644
--- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
+++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -46,7 +46,7 @@
 	store i8 %d, i8* %d_addr
 	%tmp13 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 0		; <[3 x i8]*> [#uses=1]
 	%tmp1314 = bitcast [3 x i8]* %tmp13 to i32*		; <i32*> [#uses=1]
-	%tmp15 = load i32* %tmp1314, align 4		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* %tmp1314, align 4		; <i32> [#uses=1]
 	%tmp16 = shl i32 %tmp15, 14		; <i32> [#uses=1]
 	%tmp17 = ashr i32 %tmp16, 23		; <i32> [#uses=1]
 	%tmp1718 = trunc i32 %tmp17 to i16		; <i16> [#uses=1]
@@ -57,7 +57,7 @@
 	%sextl21 = shl i16 %sextr, 7		; <i16> [#uses=1]
 	%sextr22 = ashr i16 %sextl21, 7		; <i16> [#uses=1]
 	%sextr2223 = sext i16 %sextr22 to i32		; <i32> [#uses=1]
-	%tmp24 = load i32* %j_addr, align 4		; <i32> [#uses=1]
+	%tmp24 = load i32, i32* %j_addr, align 4		; <i32> [#uses=1]
 	%tmp25 = icmp ne i32 %sextr2223, %tmp24		; <i1> [#uses=1]
 	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
@@ -69,8 +69,8 @@
 
 bb27:		; preds = %entry
 	%tmp28 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 1		; <i8*> [#uses=1]
-	%tmp29 = load i8* %tmp28, align 4		; <i8> [#uses=1]
-	%tmp30 = load i8* %c_addr, align 1		; <i8> [#uses=1]
+	%tmp29 = load i8, i8* %tmp28, align 4		; <i8> [#uses=1]
+	%tmp30 = load i8, i8* %c_addr, align 1		; <i8> [#uses=1]
 	%tmp31 = icmp ne i8 %tmp29, %tmp30		; <i1> [#uses=1]
 	%tmp3132 = zext i1 %tmp31 to i8		; <i8> [#uses=1]
 	%toBool33 = icmp ne i8 %tmp3132, 0		; <i1> [#uses=1]
@@ -82,7 +82,7 @@
 
 bb35:		; preds = %bb27
 	%tmp36 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 2		; <i16*> [#uses=1]
-	%tmp37 = load i16* %tmp36, align 4		; <i16> [#uses=1]
+	%tmp37 = load i16, i16* %tmp36, align 4		; <i16> [#uses=1]
 	%tmp38 = shl i16 %tmp37, 7		; <i16> [#uses=1]
 	%tmp39 = ashr i16 %tmp38, 7		; <i16> [#uses=1]
 	%sextl40 = shl i16 %tmp39, 7		; <i16> [#uses=1]
@@ -91,7 +91,7 @@
 	%sextr43 = ashr i16 %sextl42, 7		; <i16> [#uses=0]
 	%sextl44 = shl i16 %sextr41, 7		; <i16> [#uses=1]
 	%sextr45 = ashr i16 %sextl44, 7		; <i16> [#uses=1]
-	%tmp46 = load i16* %t_addr, align 2		; <i16> [#uses=1]
+	%tmp46 = load i16, i16* %t_addr, align 2		; <i16> [#uses=1]
 	%tmp47 = icmp ne i16 %sextr45, %tmp46		; <i1> [#uses=1]
 	%tmp4748 = zext i1 %tmp47 to i8		; <i8> [#uses=1]
 	%toBool49 = icmp ne i8 %tmp4748, 0		; <i1> [#uses=1]
@@ -103,7 +103,7 @@
 
 bb51:		; preds = %bb35
 	%tmp52 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 3		; <i16*> [#uses=1]
-	%tmp53 = load i16* %tmp52, align 4		; <i16> [#uses=1]
+	%tmp53 = load i16, i16* %tmp52, align 4		; <i16> [#uses=1]
 	%tmp54 = shl i16 %tmp53, 7		; <i16> [#uses=1]
 	%tmp55 = ashr i16 %tmp54, 7		; <i16> [#uses=1]
 	%sextl56 = shl i16 %tmp55, 7		; <i16> [#uses=1]
@@ -112,7 +112,7 @@
 	%sextr59 = ashr i16 %sextl58, 7		; <i16> [#uses=0]
 	%sextl60 = shl i16 %sextr57, 7		; <i16> [#uses=1]
 	%sextr61 = ashr i16 %sextl60, 7		; <i16> [#uses=1]
-	%tmp62 = load i16* %u_addr, align 2		; <i16> [#uses=1]
+	%tmp62 = load i16, i16* %u_addr, align 2		; <i16> [#uses=1]
 	%tmp63 = icmp ne i16 %sextr61, %tmp62		; <i1> [#uses=1]
 	%tmp6364 = zext i1 %tmp63 to i8		; <i8> [#uses=1]
 	%toBool65 = icmp ne i8 %tmp6364, 0		; <i1> [#uses=1]
@@ -124,8 +124,8 @@
 
 bb67:		; preds = %bb51
 	%tmp68 = getelementptr %struct.BoundaryAlignment, %struct.BoundaryAlignment* %str_addr, i32 0, i32 4		; <i8*> [#uses=1]
-	%tmp69 = load i8* %tmp68, align 4		; <i8> [#uses=1]
-	%tmp70 = load i8* %d_addr, align 1		; <i8> [#uses=1]
+	%tmp69 = load i8, i8* %tmp68, align 4		; <i8> [#uses=1]
+	%tmp70 = load i8, i8* %d_addr, align 1		; <i8> [#uses=1]
 	%tmp71 = icmp ne i8 %tmp69, %tmp70		; <i1> [#uses=1]
 	%tmp7172 = zext i1 %tmp71 to i8		; <i8> [#uses=1]
 	%toBool73 = icmp ne i8 %tmp7172, 0		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
index 4b32f57..c6709a86 100644
--- a/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -74,7 +74,7 @@
 	br label %bb497
 
 bb483:		; preds = %bb497
-	%tmp496 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	%tmp496 = load %struct.tree_node*, %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
 	br label %bb497
 
 bb497:		; preds = %bb483, %entry
diff --git a/llvm/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/llvm/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
index da56ce7..a91a422 100644
--- a/llvm/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
+++ b/llvm/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -2,7 +2,7 @@
 
 define void @a(<4 x float>* %x) nounwind  {
 entry:
-        %tmp2 = load <4 x float>* %x, align 1
+        %tmp2 = load <4 x float>, <4 x float>* %x, align 1
         %inv = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %tmp2)
         store <4 x float> %inv, <4 x float>* %x, align 1
         ret void
diff --git a/llvm/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/llvm/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index a6234d3..422d68e 100644
--- a/llvm/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/llvm/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -13,9 +13,9 @@
 	%b = bitcast i64 %x to double		; <double> [#uses=1]
 	store double %b, double* @atomic
 	store double 0.000000e+00, double* @atomic2
-	%l = load i32* @ioport		; <i32> [#uses=1]
+	%l = load i32, i32* @ioport		; <i32> [#uses=1]
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = load i32* @ioport2		; <i32> [#uses=1]
+	%l2 = load i32, i32* @ioport2		; <i32> [#uses=1]
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
index 037559e..5a05ec1 100644
--- a/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
+++ b/llvm/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -12,9 +12,9 @@
 	store volatile double 0.000000e+00, double* @atomic2 ; one processor operation only
 	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
 	store volatile i64 %b2, i64* @anything ; may transform to store of double
-	%l = load volatile i32* @ioport		; must not narrow
+	%l = load volatile i32, i32* @ioport		; must not narrow
 	%t = trunc i32 %l to i16		; <i16> [#uses=1]
-	%l2 = load volatile i32* @ioport		; must not narrow
+	%l2 = load volatile i32, i32* @ioport		; must not narrow
 	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
 	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
 	%f = add i16 %t, %t2		; <i16> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
index 4d4819a..cdd1b0b 100644
--- a/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
+++ b/llvm/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
 
 define i16 @test(i16* %tmp179) nounwind  {
-	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
+	%tmp180 = load i16, i16* %tmp179, align 2		; <i16> [#uses=2]
 	%tmp184 = and i16 %tmp180, -1024		; <i16> [#uses=1]
 	%tmp186 = icmp eq i16 %tmp184, -32768		; <i1> [#uses=1]
 	br i1 %tmp186, label %bb189, label %bb288
diff --git a/llvm/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/llvm/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
index 23d91eb..c92768c 100644
--- a/llvm/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
+++ b/llvm/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -63,7 +63,7 @@
 	br i1 false, label %bb17.preheader, label %bb30
 
 bb17.preheader:		; preds = %entry
-	load i32* null, align 4		; <i32>:0 [#uses=0]
+	load i32, i32* null, align 4		; <i32>:0 [#uses=0]
 	br label %bb16
 
 bb16:		; preds = %bb16, %bb17.preheader
diff --git a/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll b/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll
index cd86ee1..45ea699 100644
--- a/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll
+++ b/llvm/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -75,38 +75,38 @@
 ; CHECK: movups
 ; CHECK: movups
 ; CHECK-NOT: movups
-	load <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
-	load <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
-	load <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
-	load <4 x float>* @3, align 1		; <<4 x float>>:4 [#uses=5]
-	load <4 x float>* @4, align 1		; <<4 x float>>:5 [#uses=6]
-	load <4 x float>* @5, align 1		; <<4 x float>>:6 [#uses=7]
-	load <4 x float>* @6, align 1		; <<4 x float>>:7 [#uses=8]
-	load <4 x float>* @7, align 1		; <<4 x float>>:8 [#uses=9]
-	load <4 x float>* @8, align 1		; <<4 x float>>:9 [#uses=10]
-	load <4 x float>* @9, align 1		; <<4 x float>>:10 [#uses=11]
-	load <4 x float>* @10, align 1		; <<4 x float>>:11 [#uses=12]
-	load <4 x float>* @11, align 1		; <<4 x float>>:12 [#uses=13]
-	load <4 x float>* @12, align 1		; <<4 x float>>:13 [#uses=14]
-	load <4 x float>* @13, align 1		; <<4 x float>>:14 [#uses=15]
-	load <4 x float>* @14, align 1		; <<4 x float>>:15 [#uses=16]
-	load <4 x float>* @15, align 1		; <<4 x float>>:16 [#uses=17]
-	load <4 x float>* @16, align 1		; <<4 x float>>:17 [#uses=18]
-	load <4 x float>* @17, align 1		; <<4 x float>>:18 [#uses=19]
-	load <4 x float>* @18, align 1		; <<4 x float>>:19 [#uses=20]
-	load <4 x float>* @19, align 1		; <<4 x float>>:20 [#uses=21]
-	load <4 x float>* @20, align 1		; <<4 x float>>:21 [#uses=22]
-	load <4 x float>* @21, align 1		; <<4 x float>>:22 [#uses=23]
-	load <4 x float>* @22, align 1		; <<4 x float>>:23 [#uses=24]
-	load <4 x float>* @23, align 1		; <<4 x float>>:24 [#uses=25]
-	load <4 x float>* @24, align 1		; <<4 x float>>:25 [#uses=26]
-	load <4 x float>* @25, align 1		; <<4 x float>>:26 [#uses=27]
-	load <4 x float>* @26, align 1		; <<4 x float>>:27 [#uses=28]
-	load <4 x float>* @27, align 1		; <<4 x float>>:28 [#uses=29]
-	load <4 x float>* @28, align 1		; <<4 x float>>:29 [#uses=30]
-	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
-	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
-	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
+	load <4 x float>, <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
+	load <4 x float>, <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
+	load <4 x float>, <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
+	load <4 x float>, <4 x float>* @3, align 1		; <<4 x float>>:4 [#uses=5]
+	load <4 x float>, <4 x float>* @4, align 1		; <<4 x float>>:5 [#uses=6]
+	load <4 x float>, <4 x float>* @5, align 1		; <<4 x float>>:6 [#uses=7]
+	load <4 x float>, <4 x float>* @6, align 1		; <<4 x float>>:7 [#uses=8]
+	load <4 x float>, <4 x float>* @7, align 1		; <<4 x float>>:8 [#uses=9]
+	load <4 x float>, <4 x float>* @8, align 1		; <<4 x float>>:9 [#uses=10]
+	load <4 x float>, <4 x float>* @9, align 1		; <<4 x float>>:10 [#uses=11]
+	load <4 x float>, <4 x float>* @10, align 1		; <<4 x float>>:11 [#uses=12]
+	load <4 x float>, <4 x float>* @11, align 1		; <<4 x float>>:12 [#uses=13]
+	load <4 x float>, <4 x float>* @12, align 1		; <<4 x float>>:13 [#uses=14]
+	load <4 x float>, <4 x float>* @13, align 1		; <<4 x float>>:14 [#uses=15]
+	load <4 x float>, <4 x float>* @14, align 1		; <<4 x float>>:15 [#uses=16]
+	load <4 x float>, <4 x float>* @15, align 1		; <<4 x float>>:16 [#uses=17]
+	load <4 x float>, <4 x float>* @16, align 1		; <<4 x float>>:17 [#uses=18]
+	load <4 x float>, <4 x float>* @17, align 1		; <<4 x float>>:18 [#uses=19]
+	load <4 x float>, <4 x float>* @18, align 1		; <<4 x float>>:19 [#uses=20]
+	load <4 x float>, <4 x float>* @19, align 1		; <<4 x float>>:20 [#uses=21]
+	load <4 x float>, <4 x float>* @20, align 1		; <<4 x float>>:21 [#uses=22]
+	load <4 x float>, <4 x float>* @21, align 1		; <<4 x float>>:22 [#uses=23]
+	load <4 x float>, <4 x float>* @22, align 1		; <<4 x float>>:23 [#uses=24]
+	load <4 x float>, <4 x float>* @23, align 1		; <<4 x float>>:24 [#uses=25]
+	load <4 x float>, <4 x float>* @24, align 1		; <<4 x float>>:25 [#uses=26]
+	load <4 x float>, <4 x float>* @25, align 1		; <<4 x float>>:26 [#uses=27]
+	load <4 x float>, <4 x float>* @26, align 1		; <<4 x float>>:27 [#uses=28]
+	load <4 x float>, <4 x float>* @27, align 1		; <<4 x float>>:28 [#uses=29]
+	load <4 x float>, <4 x float>* @28, align 1		; <<4 x float>>:29 [#uses=30]
+	load <4 x float>, <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
+	load <4 x float>, <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
+	load <4 x float>, <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
 	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
 	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
 	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
@@ -708,38 +708,38 @@
 ; CHECK: movups
 ; CHECK: movups
 ; CHECK-NOT: movups
-	load <4 x float>* @0, align 1
-	load <4 x float>* @1, align 1
-	load <4 x float>* @2, align 1
-	load <4 x float>* @3, align 1
-	load <4 x float>* @4, align 1
-	load <4 x float>* @5, align 1
-	load <4 x float>* @6, align 1
-	load <4 x float>* @7, align 1
-	load <4 x float>* @8, align 1
-	load <4 x float>* @9, align 1
-	load <4 x float>* @10, align 1
-	load <4 x float>* @11, align 1
-	load <4 x float>* @12, align 1
-	load <4 x float>* @13, align 1
-	load <4 x float>* @14, align 1
-	load <4 x float>* @15, align 1
-	load <4 x float>* @16, align 1
-	load <4 x float>* @17, align 1
-	load <4 x float>* @18, align 1
-	load <4 x float>* @19, align 1
-	load <4 x float>* @20, align 1
-	load <4 x float>* @21, align 1
-	load <4 x float>* @22, align 1
-	load <4 x float>* @23, align 1
-	load <4 x float>* @24, align 1
-	load <4 x float>* @25, align 1
-	load <4 x float>* @26, align 1
-	load <4 x float>* @27, align 1
-	load <4 x float>* @28, align 1
-	load <4 x float>* @29, align 1
-	load <4 x float>* @30, align 1
-	load <4 x float>* @31, align 1
+	load <4 x float>, <4 x float>* @0, align 1
+	load <4 x float>, <4 x float>* @1, align 1
+	load <4 x float>, <4 x float>* @2, align 1
+	load <4 x float>, <4 x float>* @3, align 1
+	load <4 x float>, <4 x float>* @4, align 1
+	load <4 x float>, <4 x float>* @5, align 1
+	load <4 x float>, <4 x float>* @6, align 1
+	load <4 x float>, <4 x float>* @7, align 1
+	load <4 x float>, <4 x float>* @8, align 1
+	load <4 x float>, <4 x float>* @9, align 1
+	load <4 x float>, <4 x float>* @10, align 1
+	load <4 x float>, <4 x float>* @11, align 1
+	load <4 x float>, <4 x float>* @12, align 1
+	load <4 x float>, <4 x float>* @13, align 1
+	load <4 x float>, <4 x float>* @14, align 1
+	load <4 x float>, <4 x float>* @15, align 1
+	load <4 x float>, <4 x float>* @16, align 1
+	load <4 x float>, <4 x float>* @17, align 1
+	load <4 x float>, <4 x float>* @18, align 1
+	load <4 x float>, <4 x float>* @19, align 1
+	load <4 x float>, <4 x float>* @20, align 1
+	load <4 x float>, <4 x float>* @21, align 1
+	load <4 x float>, <4 x float>* @22, align 1
+	load <4 x float>, <4 x float>* @23, align 1
+	load <4 x float>, <4 x float>* @24, align 1
+	load <4 x float>, <4 x float>* @25, align 1
+	load <4 x float>, <4 x float>* @26, align 1
+	load <4 x float>, <4 x float>* @27, align 1
+	load <4 x float>, <4 x float>* @28, align 1
+	load <4 x float>, <4 x float>* @29, align 1
+	load <4 x float>, <4 x float>* @30, align 1
+	load <4 x float>, <4 x float>* @31, align 1
 	fmul <4 x float> %1, %1
 	fmul <4 x float> %33, %2
 	fmul <4 x float> %34, %3
diff --git a/llvm/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/llvm/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
index 0f67145..35bb5f0 100644
--- a/llvm/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
+++ b/llvm/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -7,7 +7,7 @@
 declare void @abort()
 
 define void @t() nounwind {
-	load i16* @0		; <i16>:1 [#uses=1]
+	load i16, i16* @0		; <i16>:1 [#uses=1]
 	zext i16 %1 to i64		; <i64>:2 [#uses=1]
 	bitcast i64 %2 to <4 x i16>		; <<4 x i16>>:3 [#uses=1]
 	shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer		; <<4 x i16>>:4 [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/llvm/test/CodeGen/X86/2008-08-06-RewriterBug.ll
index 4428035..08172fa 100644
--- a/llvm/test/CodeGen/X86/2008-08-06-RewriterBug.ll
+++ b/llvm/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -4,14 +4,14 @@
 @data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
 
 define void @foo(double* noalias, double* noalias) {
-	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4		; <i64>:3 [#uses=1]
-	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4		; <i64>:4 [#uses=1]
-	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4		; <i64>:5 [#uses=2]
-	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4		; <i64>:6 [#uses=1]
+	load i64, i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4		; <i64>:3 [#uses=1]
+	load i64, i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4		; <i64>:4 [#uses=1]
+	load i64, i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4		; <i64>:5 [#uses=2]
+	load i64, i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4		; <i64>:6 [#uses=1]
 	br i1 false, label %28, label %7
 
 ; <label>:7		; preds = %2
-	load double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8		; <double*>:8 [#uses=1]
+	load double*, double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8		; <double*>:8 [#uses=1]
 	bitcast double* %8 to double*		; <double*>:9 [#uses=1]
 	ptrtoint double* %9 to i64		; <i64>:10 [#uses=1]
 	mul i64 %4, %3		; <i64>:11 [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/llvm/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
index 51064f1..d939207 100644
--- a/llvm/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
+++ b/llvm/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -29,7 +29,7 @@
 ; CHECK: _Unwind_Resume_or_Rethrow
 define i32 @_Unwind_Resume_or_Rethrow() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/llvm/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
index b3312d9..c80fbdd 100644
--- a/llvm/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -5,7 +5,7 @@
 
 define i32 @func_125(i32 %p_126, i32 %p_128, i32 %p_129) nounwind {
 entry:
-	%tmp2.i = load i32* @g_3		; <i32> [#uses=2]
+	%tmp2.i = load i32, i32* @g_3		; <i32> [#uses=2]
 	%conv = trunc i32 %tmp2.i to i16		; <i16> [#uses=3]
 	br label %forcond1.preheader.i.i7
 
diff --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
index 108f243..635194f 100644
--- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -6,7 +6,7 @@
 define i32 @func_3(i32 %p_5) nounwind {
 entry:
 	%0 = srem i32 1, 0		; <i32> [#uses=2]
-	%1 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%1 = load i16, i16* @g_15, align 2		; <i16> [#uses=1]
 	%2 = zext i16 %1 to i32		; <i32> [#uses=1]
 	%3 = and i32 %2, 1		; <i32> [#uses=1]
 	%4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
@@ -14,7 +14,7 @@
 	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
 	%7 = icmp sge i32 %3, %6		; <i1> [#uses=1]
 	%8 = zext i1 %7 to i32		; <i32> [#uses=1]
-	%9 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%9 = load i16, i16* @g_15, align 2		; <i16> [#uses=1]
 	%10 = icmp eq i16 %9, 0		; <i1> [#uses=1]
 	%11 = zext i1 %10 to i32		; <i32> [#uses=1]
 	%12 = tail call i32 (...)* @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
index 59d1c7f..92eb1c8 100644
--- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
+++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -18,11 +18,11 @@
 ; SOURCE-SCHED: subl
 ; SOURCE-SCHED: testb
 ; SOURCE-SCHED: jne
-	%0 = load i32* @g_5, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @g_5, align 4		; <i32> [#uses=1]
 	%1 = ashr i32 %0, 1		; <i32> [#uses=1]
 	%2 = icmp sgt i32 %1, 1		; <i1> [#uses=1]
 	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
-	%4 = load i32* @g_73, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* @g_73, align 4		; <i32> [#uses=1]
 	%5 = zext i16 %p_46 to i64		; <i64> [#uses=1]
 	%6 = sub i64 0, %5		; <i64> [#uses=1]
 	%7 = trunc i64 %6 to i8		; <i8> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
index 4b2774b..3edd72b 100644
--- a/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
+++ b/llvm/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -19,7 +19,7 @@
 
 define i32 @aci(i32* %pw) nounwind {
 entry:
-	%0 = load i32* @x, align 4
+	%0 = load i32, i32* @x, align 4
 	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
 	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
 	%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
diff --git a/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
index 015cbb5..0058d97 100644
--- a/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
+++ b/llvm/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -35,9 +35,9 @@
 	%0 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 0		; <i32*> [#uses=2]
 	%1 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 1		; <i32*> [#uses=2]
 	%2 = getelementptr %struct.foo, %struct.foo* %c, i32 0, i32 2		; <i8**> [#uses=2]
-	%3 = load i32* %0, align 4		; <i32> [#uses=1]
-	%4 = load i32* %1, align 4		; <i32> [#uses=1]
-	%5 = load i8* %state, align 1		; <i8> [#uses=1]
+	%3 = load i32, i32* %0, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* %1, align 4		; <i32> [#uses=1]
+	%5 = load i8, i8* %state, align 1		; <i8> [#uses=1]
 	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#1st=$0 $1 2nd=$1 $2 3rd=$2 $4 5th=$4 $3=4th 1$0 1%eXx 5$4 5%eXx 6th=$5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
 	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
 	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/llvm/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
index a8f2912..83a1fac 100644
--- a/llvm/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -5,7 +5,7 @@
 
 define i32 @func_4() nounwind {
 entry:
-	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%0 = load i32, i32* @g_3, align 4		; <i32> [#uses=2]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = sub i8 1, %1		; <i8> [#uses=1]
 	%3 = sext i8 %2 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/llvm/test/CodeGen/X86/2008-09-29-ReMatBug.ll
index c7b1912..38a3664 100644
--- a/llvm/test/CodeGen/X86/2008-09-29-ReMatBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -13,13 +13,13 @@
 
 define %struct.NSString* @"-[XCStringList stringRepresentation]"(%struct.XCStringList* %self, %struct..0objc_selector* %_cmd) nounwind {
 entry:
-	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%1 = and i32 %0, 16777215		; <i32> [#uses=1]
 	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
 	br i1 %2, label %bb44, label %bb4
 
 bb4:		; preds = %entry
-	%3 = load %struct._XCStringListNode** null, align 4		; <%struct._XCStringListNode*> [#uses=2]
+	%3 = load %struct._XCStringListNode*, %struct._XCStringListNode** null, align 4		; <%struct._XCStringListNode*> [#uses=2]
 	%4 = icmp eq %struct._XCStringListNode* %3, null		; <i1> [#uses=1]
 	%5 = bitcast %struct._XCStringListNode* %3 to i32*		; <i32*> [#uses=1]
 	br label %bb37.outer
@@ -48,7 +48,7 @@
 	br label %bb35
 
 bb35:		; preds = %bb35, %bb35.outer
-	%9 = load i8* null, align 1		; <i8> [#uses=1]
+	%9 = load i8, i8* null, align 1		; <i8> [#uses=1]
 	switch i8 %9, label %bb35 [
 		i8 0, label %bb37.outer
 		i8 32, label %bb34
@@ -63,7 +63,7 @@
 	br i1 %4, label %bb39.split, label %bb37
 
 bb37:		; preds = %bb37.outer, %bb19
-	%10 = load i32* %5, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %5, align 4		; <i32> [#uses=1]
 	br i1 false, label %bb6, label %bb19
 
 bb39.split:		; preds = %bb37.outer
diff --git a/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
index f35245b..6ee8cf2 100644
--- a/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
+++ b/llvm/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-	%0 = load volatile i32* @g_407, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32, i32* @g_407, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
 	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
 	ret i32 0
diff --git a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
index bd48105..34c9857 100644
--- a/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
+++ b/llvm/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -10,7 +10,7 @@
 
 define i32 @main() {
 entry_nan.main:
-  %tmp = load x86_fp80* @_D3nan4rvale   ; <x86_fp80> [#uses=1]
+  %tmp = load x86_fp80, x86_fp80* @_D3nan4rvale   ; <x86_fp80> [#uses=1]
   call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp)
   call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFF8001234000000000)
   call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000400)
diff --git a/llvm/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/llvm/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
index bc57612..26e802a 100644
--- a/llvm/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
+++ b/llvm/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -6,17 +6,17 @@
 	%w.addr = alloca float		; <float*> [#uses=2]
 	%.compoundliteral = alloca <4 x float>		; <<4 x float>*> [#uses=2]
 	store float %w, float* %w.addr
-	%tmp = load float* %w.addr		; <float> [#uses=1]
+	%tmp = load float, float* %w.addr		; <float> [#uses=1]
 	%0 = insertelement <4 x float> undef, float %tmp, i32 0		; <<4 x float>> [#uses=1]
 	%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
 	%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
 	%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
 	store <4 x float> %3, <4 x float>* %.compoundliteral
-	%tmp1 = load <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
+	%tmp1 = load <4 x float>, <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp1, <4 x float>* %retval
 	br label %return
 
 return:		; preds = %entry
-	%4 = load <4 x float>* %retval		; <<4 x float>> [#uses=1]
+	%4 = load <4 x float>, <4 x float>* %retval		; <<4 x float>> [#uses=1]
 	ret <4 x float> %4
 }
diff --git a/llvm/test/CodeGen/X86/2008-10-11-CallCrash.ll b/llvm/test/CodeGen/X86/2008-10-11-CallCrash.ll
index efc6125..a859bc6 100644
--- a/llvm/test/CodeGen/X86/2008-10-11-CallCrash.ll
+++ b/llvm/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -7,7 +7,7 @@
 define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
 entry:
 	%0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
-	%1 = load i32* @g_385, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @g_385, align 4		; <i32> [#uses=1]
 	%2 = shl i32 %1, 1		; <i32> [#uses=1]
 	%3 = and i32 %2, 32		; <i32> [#uses=1]
 	%4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/llvm/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
index de4c1e7..ac6fa0d 100644
--- a/llvm/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
+++ b/llvm/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2
 ; PR2762
 define void @foo(<4 x i32>* %p, <4 x double>* %q) {
-  %n = load <4 x i32>* %p
+  %n = load <4 x i32>, <4 x i32>* %p
   %z = sitofp <4 x i32> %n to <4 x double>
   store <4 x double> %z, <4 x double>* %q
   ret void
diff --git a/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 0310a5d..b1dcd03 100644
--- a/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -20,7 +20,7 @@
 ; CHECK: movsd %xmm0, 16(%esp)
 ; CHECK: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
-	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
 	%4 = icmp slt i32 0, %3		; <i1> [#uses=1]
 	br i1 %4, label %bb18, label %bb3
diff --git a/llvm/test/CodeGen/X86/2008-11-06-testb.ll b/llvm/test/CodeGen/X86/2008-11-06-testb.ll
index d510e9f..4ee4b4a 100644
--- a/llvm/test/CodeGen/X86/2008-11-06-testb.ll
+++ b/llvm/test/CodeGen/X86/2008-11-06-testb.ll
@@ -12,7 +12,7 @@
 	%0 = getelementptr %struct.x, %struct.x* %p, i32 0, i32 0		; <i8*> [#uses=1]
 	store i8 55, i8* %0, align 1
 	%1 = bitcast %struct.x* %p to i32*		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 1		; <i32> [#uses=1]
+	%2 = load i32, i32* %1, align 1		; <i32> [#uses=1]
 	%3 = and i32 %2, 512		; <i32> [#uses=1]
 	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
 	br i1 %4, label %bb5, label %bb
diff --git a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
index f5880b6..840b8ba 100644
--- a/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
+++ b/llvm/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -16,7 +16,7 @@
 bb1:		; preds = %bb, %entry
 	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
 	%P.0 = getelementptr i8, i8* %Q, i32 %P.0.rec		; <i8*> [#uses=2]
-	%0 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	%0 = load i8, i8* %P.0, align 1		; <i8> [#uses=1]
 	switch i8 %0, label %bb3 [
 		i8 12, label %bb
 		i8 42, label %bb
diff --git a/llvm/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/llvm/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
index 4b72cb9..c828879 100644
--- a/llvm/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
+++ b/llvm/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -7,7 +7,7 @@
 
 define i32 @func_73(i32 %p_74) nounwind {
 entry:
-	%0 = load i32* @g_7, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @g_7, align 4		; <i32> [#uses=1]
 	%1 = or i8 0, 118		; <i8> [#uses=1]
 	%2 = zext i8 %1 to i64		; <i64> [#uses=1]
 	%3 = icmp ne i32 %0, 0		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/llvm/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
index 99bef6c..ac6d0a9 100644
--- a/llvm/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -10,12 +10,12 @@
 
 define fastcc %XXV* @bar(%CF* %call_frame, %XXV** %exception) nounwind {
 prologue:
-	%param_x = load %XXV** null		; <%XXV*> [#uses=1]
+	%param_x = load %XXV*, %XXV** null		; <%XXV*> [#uses=1]
 	%unique_1.i = ptrtoint %XXV* %param_x to i1		; <i1> [#uses=1]
 	br i1 %unique_1.i, label %NextVerify42, label %FailedVerify
 
 NextVerify42:		; preds = %prologue
-	%param_y = load %XXV** null		; <%XXV*> [#uses=1]
+	%param_y = load %XXV*, %XXV** null		; <%XXV*> [#uses=1]
 	%unique_1.i58 = ptrtoint %XXV* %param_y to i1		; <i1> [#uses=1]
 	br i1 %unique_1.i58, label %function_setup.cont, label %FailedVerify
 
diff --git a/llvm/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/llvm/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
index f895336..e4f78f8 100644
--- a/llvm/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -25,7 +25,7 @@
 	br i1 false, label %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73, label %bb4.i.i70
 
 _ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73:		; preds = %bb4.i.i70
-	%0 = load i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4		; <i16> [#uses=0]
+	%0 = load i16, i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4		; <i16> [#uses=0]
 	br label %bb4.i5.i141
 
 bb4.i5.i141:		; preds = %bb4.i5.i141, %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73
diff --git a/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll b/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll
index b478f27..90d14e7 100644
--- a/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll
+++ b/llvm/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -2,7 +2,7 @@
 ; PR3449
 
 define void @test(<8 x double>* %P, i64* %Q) nounwind {
-	%A = load <8 x double>* %P		; <<8 x double>> [#uses=1]
+	%A = load <8 x double>, <8 x double>* %P		; <<8 x double>> [#uses=1]
 	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
 	%C = lshr i512 %B, 448		; <i512> [#uses=1]
 	%D = trunc i512 %C to i64		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-02-01-LargeMask.ll b/llvm/test/CodeGen/X86/2009-02-01-LargeMask.ll
index c4042e6..e91208d 100644
--- a/llvm/test/CodeGen/X86/2009-02-01-LargeMask.ll
+++ b/llvm/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -19,7 +19,7 @@
 
 bb.i49.i72:		; preds = %bb.i49.i72, %entry
 	%UNP.i1482.0 = phi i288 [ %.ins659, %bb.i49.i72 ], [ undef, %entry ]		; <i288> [#uses=1]
-	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%1 = xor i32 %0, 17834		; <i32> [#uses=1]
 	%2 = zext i32 %1 to i288		; <i288> [#uses=1]
 	%3 = shl i288 %2, 160		; <i288> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/llvm/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
index e75af13..592a7e3 100644
--- a/llvm/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
+++ b/llvm/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -13,7 +13,7 @@
 	unreachable
 
 bb1:		; preds = %entry
-	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%0 = load i32, i32* @g_3, align 4		; <i32> [#uses=2]
 	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
 	%2 = or i64 %1, %p_66		; <i64> [#uses=1]
 	%3 = shl i64 %2, 0		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/llvm/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
index b7a8a1c..19c2dfd 100644
--- a/llvm/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
+++ b/llvm/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -6,7 +6,7 @@
 
 define i32 @alac_decode_frame() nounwind {
 entry:
-	%tmp2 = load i8** null		; <i8*> [#uses=2]
+	%tmp2 = load i8*, i8** null		; <i8*> [#uses=2]
 	%tmp34 = getelementptr i8, i8* %tmp2, i32 4		; <i8*> [#uses=2]
 	%tmp5.i424 = bitcast i8* %tmp34 to i8**		; <i8**> [#uses=2]
 	%tmp15.i = getelementptr i8, i8* %tmp2, i32 12		; <i8*> [#uses=1]
@@ -17,9 +17,9 @@
 	ret i32 0
 
 if.end47:		; preds = %entry
-	%tmp5.i590 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	%tmp5.i590 = load i8*, i8** %tmp5.i424		; <i8*> [#uses=0]
 	store i32 19, i32* %0
-	%tmp6.i569 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	%tmp6.i569 = load i8*, i8** %tmp5.i424		; <i8*> [#uses=0]
 	%1 = call i32 asm "bswap   $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind		; <i32> [#uses=0]
 	br i1 false, label %bb.nph, label %if.then63
 
diff --git a/llvm/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/llvm/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 602febf..fca03a2 100644
--- a/llvm/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/llvm/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -29,41 +29,41 @@
   call void @llvm.dbg.declare(metadata [0 x i8]** %str.0, metadata !8, metadata !{!"0x102"}), !dbg !7
   %4 = call i8* @llvm.stacksave(), !dbg !7        ; <i8*> [#uses=1]
   store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
-  %5 = load i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
+  %5 = load i8*, i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
   %6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1]
   %7 = add i64 %6, 1, !dbg !13                    ; <i64> [#uses=1]
   store i64 %7, i64* %3, align 8, !dbg !13
-  %8 = load i64* %3, align 8, !dbg !13            ; <i64> [#uses=1]
+  %8 = load i64, i64* %3, align 8, !dbg !13            ; <i64> [#uses=1]
   %9 = sub nsw i64 %8, 1, !dbg !13                ; <i64> [#uses=0]
-  %10 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %10 = load i64, i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
   %11 = mul i64 %10, 8, !dbg !13                  ; <i64> [#uses=0]
-  %12 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %12 = load i64, i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
   store i64 %12, i64* %2, align 8, !dbg !13
-  %13 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %13 = load i64, i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
   %14 = mul i64 %13, 8, !dbg !13                  ; <i64> [#uses=0]
-  %15 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %15 = load i64, i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
   store i64 %15, i64* %1, align 8, !dbg !13
-  %16 = load i64* %1, align 8, !dbg !13           ; <i64> [#uses=1]
+  %16 = load i64, i64* %1, align 8, !dbg !13           ; <i64> [#uses=1]
   %17 = trunc i64 %16 to i32, !dbg !13            ; <i32> [#uses=1]
   %18 = alloca i8, i32 %17, !dbg !13              ; <i8*> [#uses=1]
   %19 = bitcast i8* %18 to [0 x i8]*, !dbg !13    ; <[0 x i8]*> [#uses=1]
   store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13
-  %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
+  %20 = load [0 x i8]*, [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
   %21 = getelementptr inbounds [0 x i8], [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
   store i8 0, i8* %21, align 1, !dbg !15
-  %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
+  %22 = load [0 x i8]*, [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
   %23 = getelementptr inbounds [0 x i8], [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
-  %24 = load i8* %23, align 1, !dbg !16           ; <i8> [#uses=1]
+  %24 = load i8, i8* %23, align 1, !dbg !16           ; <i8> [#uses=1]
   %25 = sext i8 %24 to i32, !dbg !16              ; <i32> [#uses=1]
   store i32 %25, i32* %0, align 4, !dbg !16
-  %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
+  %26 = load i8*, i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
   call void @llvm.stackrestore(i8* %26), !dbg !16
-  %27 = load i32* %0, align 4, !dbg !16           ; <i32> [#uses=1]
+  %27 = load i32, i32* %0, align 4, !dbg !16           ; <i32> [#uses=1]
   store i32 %27, i32* %retval, align 4, !dbg !16
   br label %return, !dbg !16
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !16          ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !16          ; <i32> [#uses=1]
   %retval12 = trunc i32 %retval1 to i8, !dbg !16  ; <i8> [#uses=1]
   ret i8 %retval12, !dbg !16
 }
diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 064b575..db31333 100644
--- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -33,17 +33,17 @@
 	br i1 %cond.i, label %bb.i, label %bb4
 
 bb.i:		; preds = %bb26
-	%3 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%4 = uitofp i32 %3 to float		; <float> [#uses=1]
 	%.sum13.i = add i64 0, 4		; <i64> [#uses=1]
 	%5 = getelementptr i8, i8* null, i64 %.sum13.i		; <i8*> [#uses=1]
 	%6 = bitcast i8* %5 to i32*		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%7 = load i32, i32* %6, align 4		; <i32> [#uses=1]
 	%8 = uitofp i32 %7 to float		; <float> [#uses=1]
 	%.sum.i = add i64 0, 8		; <i64> [#uses=1]
 	%9 = getelementptr i8, i8* null, i64 %.sum.i		; <i8*> [#uses=1]
 	%10 = bitcast i8* %9 to i32*		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%11 = load i32, i32* %10, align 4		; <i32> [#uses=1]
 	%12 = uitofp i32 %11 to float		; <float> [#uses=1]
 	%13 = insertelement <4 x float> undef, float %4, i32 0		; <<4 x float>> [#uses=1]
 	%14 = insertelement <4 x float> %13, float %8, i32 1		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-03-03-BTHang.ll b/llvm/test/CodeGen/X86/2009-03-03-BTHang.ll
index 626117d7..d6d24cd 100644
--- a/llvm/test/CodeGen/X86/2009-03-03-BTHang.ll
+++ b/llvm/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -10,7 +10,7 @@
 	%1 = and i32 %0, -4096		; <i32> [#uses=1]
 	%2 = inttoptr i32 %1 to %struct.HandleBlock*		; <%struct.HandleBlock*> [#uses=3]
 	%3 = getelementptr %struct.HandleBlock, %struct.HandleBlock* %2, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4096		; <i32> [#uses=1]
+	%4 = load i32, i32* %3, align 4096		; <i32> [#uses=1]
 	%5 = icmp eq i32 %4, 1751280747		; <i1> [#uses=1]
 	br i1 %5, label %bb, label %bb1
 
@@ -25,7 +25,7 @@
 	%not.i = and i32 %9, 31		; <i32> [#uses=1]
 	%13 = xor i32 %not.i, 31		; <i32> [#uses=1]
 	%14 = shl i32 1, %13		; <i32> [#uses=1]
-	%15 = load i32* %12, align 4		; <i32> [#uses=1]
+	%15 = load i32, i32* %12, align 4		; <i32> [#uses=1]
 	%16 = and i32 %15, %14		; <i32> [#uses=1]
 	%17 = icmp eq i32 %16, 0		; <i1> [#uses=1]
 	%tmp = zext i1 %17 to i8		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/llvm/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
index 8eb44b5..853bb16 100644
--- a/llvm/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
+++ b/llvm/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -15,7 +15,7 @@
 ; <label>:3		; preds = %1
 	%4 = call i64 @strlen(i8* %0) nounwind readonly		; <i64> [#uses=1]
 	%5 = trunc i64 %4 to i32		; <i32> [#uses=2]
-	%6 = load i32* @0, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* @0, align 4		; <i32> [#uses=1]
 	%7 = sub i32 %5, %6		; <i32> [#uses=2]
 	%8 = sext i32 %5 to i64		; <i64> [#uses=1]
 	%9 = sext i32 %7 to i64		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/llvm/test/CodeGen/X86/2009-03-09-APIntCrash.ll
index 896c968..3bff7dc 100644
--- a/llvm/test/CodeGen/X86/2009-03-09-APIntCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -7,7 +7,7 @@
 	br i1 false, label %if.then, label %return
 
 if.then:		; preds = %entry
-	%srcval18 = load i128* null, align 8		; <i128> [#uses=1]
+	%srcval18 = load i128, i128* null, align 8		; <i128> [#uses=1]
 	%tmp15 = lshr i128 %srcval18, 64		; <i128> [#uses=1]
 	%tmp9 = mul i128 %tmp15, 18446744073709551616000		; <i128> [#uses=1]
 	br label %return
diff --git a/llvm/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
index 90dff88..38dd2fa 100644
--- a/llvm/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -8,7 +8,7 @@
 
 define i32 @pnoutrefresh(%struct.WINDOW* %win, i32 %pminrow, i32 %pmincol, i32 %sminrow, i32 %smincol, i32 %smaxrow, i32 %smaxcol) nounwind optsize ssp {
 entry:
-	%0 = load i16* null, align 4		; <i16> [#uses=2]
+	%0 = load i16, i16* null, align 4		; <i16> [#uses=2]
 	%1 = icmp sgt i16 0, %0		; <i1> [#uses=1]
 	br i1 %1, label %bb12, label %bb13
 
diff --git a/llvm/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/llvm/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
index 06dfdc0..ba04364 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -2,9 +2,9 @@
 
 define fastcc void @optimize_bit_field() nounwind {
 bb4:
-        %a = load i32* null             ; <i32> [#uses=1]
-        %s = load i32* getelementptr (i32* null, i32 1)         ; <i32> [#uses=1]
-        %z = load i32* getelementptr (i32* null, i32 2)         ; <i32> [#uses=1]
+        %a = load i32, i32* null             ; <i32> [#uses=1]
+        %s = load i32, i32* getelementptr (i32* null, i32 1)         ; <i32> [#uses=1]
+        %z = load i32, i32* getelementptr (i32* null, i32 2)         ; <i32> [#uses=1]
         %r = bitcast i32 0 to i32          ; <i32> [#uses=1]
         %q = trunc i32 %z to i8            ; <i8> [#uses=1]
         %b = icmp eq i8 0, %q              ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
index bbc1d34..276d523 100644
--- a/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
+++ b/llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -10,30 +10,30 @@
 @X = external global i64		; <i64*> [#uses=25]
 
 define fastcc i64 @foo() nounwind {
-	%tmp = load volatile i64* @X		; <i64> [#uses=7]
-	%tmp1 = load volatile i64* @X		; <i64> [#uses=5]
-	%tmp2 = load volatile i64* @X		; <i64> [#uses=3]
-	%tmp3 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp4 = load volatile i64* @X		; <i64> [#uses=5]
-	%tmp5 = load volatile i64* @X		; <i64> [#uses=3]
-	%tmp6 = load volatile i64* @X		; <i64> [#uses=2]
-	%tmp7 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp8 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp9 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp10 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp11 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp12 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp13 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp14 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp15 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp16 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp17 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp18 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp19 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp20 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp21 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp22 = load volatile i64* @X		; <i64> [#uses=1]
-	%tmp23 = load volatile i64* @X		; <i64> [#uses=1]
+	%tmp = load volatile i64, i64* @X		; <i64> [#uses=7]
+	%tmp1 = load volatile i64, i64* @X		; <i64> [#uses=5]
+	%tmp2 = load volatile i64, i64* @X		; <i64> [#uses=3]
+	%tmp3 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp4 = load volatile i64, i64* @X		; <i64> [#uses=5]
+	%tmp5 = load volatile i64, i64* @X		; <i64> [#uses=3]
+	%tmp6 = load volatile i64, i64* @X		; <i64> [#uses=2]
+	%tmp7 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp8 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp9 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp10 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp11 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp12 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp13 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp14 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp15 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp16 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp17 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp18 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp19 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp20 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp21 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp22 = load volatile i64, i64* @X		; <i64> [#uses=1]
+	%tmp23 = load volatile i64, i64* @X		; <i64> [#uses=1]
 	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
 	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
 	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-03-25-TestBug.ll b/llvm/test/CodeGen/X86/2009-03-25-TestBug.ll
index cc1d73d..b8b6d92 100644
--- a/llvm/test/CodeGen/X86/2009-03-25-TestBug.ll
+++ b/llvm/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -9,7 +9,7 @@
 
 define void @func(i32* %b) nounwind {
 bb1579.i.i:		; preds = %bb1514.i.i, %bb191.i.i
-	%tmp176 = load i32* %b, align 4
+	%tmp176 = load i32, i32* %b, align 4
 	%tmp177 = and i32 %tmp176, 2
 	%tmp178 = icmp eq i32 %tmp177, 0
         br i1 %tmp178, label %hello, label %world
diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
index 24a1590..1e5e933 100644
--- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
+++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -15,7 +15,7 @@
 	store i8 48, i8* %2, align 1
 	%3 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
 	%4 = getelementptr [32 x i8], [32 x i8]* %3, i32 0, i32 31		; <i8*> [#uses=1]
-	%5 = load i8* %4, align 1		; <i8> [#uses=1]
+	%5 = load i8, i8* %4, align 1		; <i8> [#uses=1]
 	%6 = getelementptr %struct.X, %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
 	%7 = getelementptr [32 x i8], [32 x i8]* %6, i32 0, i32 0		; <i8*> [#uses=1]
 	store i8 %5, i8* %7, align 1
@@ -23,12 +23,12 @@
 	store i8 15, i8* %8, align 1
 	%9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
 	store i32 %9, i32* %0, align 4
-	%10 = load i32* %0, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	store i32 %10, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval1
 }
 
diff --git a/llvm/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/llvm/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
index fafab4d..1d03a1b 100644
--- a/llvm/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
+++ b/llvm/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -72,7 +72,7 @@
 	%47 = and i64 %20, %not417		; <i64> [#uses=1]
 	%48 = xor i64 0, %47		; <i64> [#uses=1]
 	%49 = getelementptr [80 x i64], [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
-	%50 = load i64* %49, align 8		; <i64> [#uses=1]
+	%50 = load i64, i64* %49, align 8		; <i64> [#uses=1]
 	%51 = add i64 %48, 0		; <i64> [#uses=1]
 	%52 = add i64 %51, 0		; <i64> [#uses=1]
 	%53 = add i64 %52, 0		; <i64> [#uses=1]
@@ -88,12 +88,12 @@
 	%61 = and i32 %60, 15		; <i32> [#uses=1]
 	%62 = zext i32 %61 to i64		; <i64> [#uses=1]
 	%63 = getelementptr [16 x i64], [16 x i64]* null, i64 0, i64 %62		; <i64*> [#uses=2]
-	%64 = load i64* null, align 8		; <i64> [#uses=1]
+	%64 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%65 = lshr i64 %64, 6		; <i64> [#uses=1]
 	%66 = xor i64 0, %65		; <i64> [#uses=1]
 	%67 = xor i64 %66, 0		; <i64> [#uses=1]
-	%68 = load i64* %46, align 8		; <i64> [#uses=1]
-	%69 = load i64* null, align 8		; <i64> [#uses=1]
+	%68 = load i64, i64* %46, align 8		; <i64> [#uses=1]
+	%69 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%70 = add i64 %68, 0		; <i64> [#uses=1]
 	%71 = add i64 %70, %67		; <i64> [#uses=1]
 	%72 = add i64 %71, %69		; <i64> [#uses=1]
@@ -106,7 +106,7 @@
 	%76 = and i64 %33, %not429		; <i64> [#uses=1]
 	%77 = xor i64 %75, %76		; <i64> [#uses=1]
 	%78 = getelementptr [80 x i64], [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
-	%79 = load i64* %78, align 16		; <i64> [#uses=1]
+	%79 = load i64, i64* %78, align 16		; <i64> [#uses=1]
 	%80 = add i64 %77, %20		; <i64> [#uses=1]
 	%81 = add i64 %80, %72		; <i64> [#uses=1]
 	%82 = add i64 %81, %74		; <i64> [#uses=1]
@@ -119,14 +119,14 @@
 	%87 = add i64 0, %85		; <i64> [#uses=1]
 	%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind		; <i64> [#uses=1]
 	%88 = xor i64 0, %asmtmp435		; <i64> [#uses=1]
-	%89 = load i64* null, align 8		; <i64> [#uses=3]
+	%89 = load i64, i64* null, align 8		; <i64> [#uses=3]
 	%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind		; <i64> [#uses=1]
 	%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind		; <i64> [#uses=1]
 	%90 = lshr i64 %89, 6		; <i64> [#uses=1]
 	%91 = xor i64 %asmtmp436, %90		; <i64> [#uses=1]
 	%92 = xor i64 %91, %asmtmp437		; <i64> [#uses=1]
-	%93 = load i64* %63, align 8		; <i64> [#uses=1]
-	%94 = load i64* null, align 8		; <i64> [#uses=1]
+	%93 = load i64, i64* %63, align 8		; <i64> [#uses=1]
+	%94 = load i64, i64* null, align 8		; <i64> [#uses=1]
 	%95 = add i64 %93, %88		; <i64> [#uses=1]
 	%96 = add i64 %95, %92		; <i64> [#uses=1]
 	%97 = add i64 %96, %94		; <i64> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2009-04-24.ll b/llvm/test/CodeGen/X86/2009-04-24.ll
index d104c87..7647dcc 100644
--- a/llvm/test/CodeGen/X86/2009-04-24.ll
+++ b/llvm/test/CodeGen/X86/2009-04-24.ll
@@ -8,6 +8,6 @@
 
 define i32 @f() {
 entry:
-	%tmp1 = load i32* @i
+	%tmp1 = load i32, i32* @i
 	ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
index 94d3eb2..c687b69 100644
--- a/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -6,7 +6,7 @@
 	br label %while.cond
 
 while.cond:		; preds = %while.cond, %entry
-	%tmp15 = load i32* %tmp13		; <i32> [#uses=2]
+	%tmp15 = load i32, i32* %tmp13		; <i32> [#uses=2]
 	%bf.lo = lshr i32 %tmp15, 1		; <i32> [#uses=1]
 	%bf.lo.cleared = and i32 %bf.lo, 2147483647		; <i32> [#uses=1]
 	%conv = zext i32 %bf.lo.cleared to i64		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/llvm/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
index 84cf341..a364c89 100644
--- a/llvm/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
+++ b/llvm/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -22,9 +22,9 @@
 define void @getAffNeighbour(i32 %curr_mb_nr, i32 %xN, i32 %yN, i32 %is_chroma, %struct.PixelPos* %pix) nounwind {
 entry:
 	%Opq.sa.calc = add i32 0, 2		; <i32> [#uses=2]
-	%0 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=3]
+	%0 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=3]
 	%1 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%2 = load %struct.Macroblock** %1, align 8		; <%struct.Macroblock*> [#uses=24]
+	%2 = load %struct.Macroblock*, %struct.Macroblock** %1, align 8		; <%struct.Macroblock*> [#uses=24]
 	%3 = zext i32 %curr_mb_nr to i64		; <i64> [#uses=24]
 	%4 = sext i32 %is_chroma to i64		; <i64> [#uses=8]
 	br label %meshBB392
@@ -32,9 +32,9 @@
 entry.fragment:		; preds = %meshBB392
 	%Opq.sa.calc747 = add i32 %Opq.sa.calc921, 70		; <i32> [#uses=0]
 	%5 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 4		; <i32> [#uses=2]
+	%6 = load i32, i32* %5, align 4		; <i32> [#uses=2]
 	%7 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 4		; <i32> [#uses=5]
+	%8 = load i32, i32* %7, align 4		; <i32> [#uses=5]
 	br label %entry.fragment181
 
 entry.fragment181:		; preds = %entry.fragment
@@ -75,7 +75,7 @@
 bb5:		; preds = %meshBB428
 	%Opq.sa.calc470 = sub i32 %Opq.sa.calc897, -49		; <i32> [#uses=1]
 	%17 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
-	%18 = load i32* %17, align 4		; <i32> [#uses=1]
+	%18 = load i32, i32* %17, align 4		; <i32> [#uses=1]
 	br label %bb5.fragment
 
 bb5.fragment:		; preds = %bb5
@@ -92,7 +92,7 @@
 bb7:		; preds = %bb6
 	%Opq.sa.calc476 = add i32 %Opq.sa.calc873, -58		; <i32> [#uses=1]
 	%22 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 8		; <i32> [#uses=1]
+	%23 = load i32, i32* %22, align 8		; <i32> [#uses=1]
 	%24 = add i32 %23, 1		; <i32> [#uses=1]
 	%25 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
 	br label %meshBB388
@@ -103,14 +103,14 @@
 	%Opq.sa.calc708 = xor i32 %Opq.sa.calc707, 474		; <i32> [#uses=0]
 	store i32 %.SV194.phi, i32* %.SV196.phi, align 4
 	%26 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29		; <i32*> [#uses=1]
-	%27 = load i32* %26, align 8		; <i32> [#uses=2]
+	%27 = load i32, i32* %26, align 8		; <i32> [#uses=2]
 	store i32 %27, i32* %.load67.SV.phi, align 4
 	br label %bb96
 
 bb8:		; preds = %meshBB348
 	%Opq.sa.calc479 = sub i32 %Opq.sa.calc805, 141		; <i32> [#uses=1]
 	%28 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=2]
-	%29 = load i32* %28, align 4		; <i32> [#uses=2]
+	%29 = load i32, i32* %28, align 4		; <i32> [#uses=2]
 	%30 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
 	br label %meshBB368
 
@@ -118,25 +118,25 @@
 	%Opq.sa.calc765 = sub i32 %Opq.sa.calc768, -115		; <i32> [#uses=2]
 	store i32 %.SV198.phi, i32* %.SV200.phi, align 4
 	%31 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26		; <i32*> [#uses=2]
-	%32 = load i32* %31, align 4		; <i32> [#uses=4]
+	%32 = load i32, i32* %31, align 4		; <i32> [#uses=4]
 	store i32 %32, i32* %.load66.SV.phi, align 4
-	%33 = load i32* %31, align 4		; <i32> [#uses=1]
+	%33 = load i32, i32* %31, align 4		; <i32> [#uses=1]
 	%34 = icmp eq i32 %33, 0		; <i1> [#uses=1]
 	br i1 %34, label %bb96, label %bb9
 
 bb9:		; preds = %bb8.fragment
 	%Opq.sa.calc482 = xor i32 %Opq.sa.calc765, 163		; <i32> [#uses=0]
-	%35 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%35 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%36 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %35, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%37 = load %struct.Macroblock** %36, align 8		; <%struct.Macroblock*> [#uses=1]
-	%38 = load i32* %.SV76.phi, align 4		; <i32> [#uses=1]
+	%37 = load %struct.Macroblock*, %struct.Macroblock** %36, align 8		; <%struct.Macroblock*> [#uses=1]
+	%38 = load i32, i32* %.SV76.phi, align 4		; <i32> [#uses=1]
 	br label %bb9.fragment
 
 bb9.fragment:		; preds = %bb9
 	%Opq.sa.calc999 = add i32 %Opq.sa.calc765, -44		; <i32> [#uses=1]
 	%39 = sext i32 %38 to i64		; <i64> [#uses=1]
 	%40 = getelementptr %struct.Macroblock, %struct.Macroblock* %37, i64 %39, i32 20		; <i32*> [#uses=1]
-	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%41 = load i32, i32* %40, align 4		; <i32> [#uses=1]
 	%42 = icmp eq i32 %41, 0		; <i1> [#uses=1]
 	br i1 %42, label %bb96, label %bb11
 
@@ -161,7 +161,7 @@
 
 bb13.fragment:		; preds = %meshBB360
 	%Opq.sa.calc870 = add i32 %Opq.sa.calc866, -129		; <i32> [#uses=3]
-	%47 = load i32* %.SV208.phi, align 8		; <i32> [#uses=3]
+	%47 = load i32, i32* %.SV208.phi, align 8		; <i32> [#uses=3]
 	br i1 %.load74.SV.phi, label %bb14, label %meshBB412
 
 bb14:		; preds = %bb13.fragment
@@ -173,25 +173,25 @@
 
 bb14.fragment:		; preds = %bb14
 	%Opq.sa.calc723 = sub i32 %Opq.sa.calc493, 117		; <i32> [#uses=4]
-	%50 = load i32* %49, align 8		; <i32> [#uses=4]
+	%50 = load i32, i32* %49, align 8		; <i32> [#uses=4]
 	store i32 %50, i32* %.SV52.phi1113, align 4
-	%51 = load i32* %49, align 8		; <i32> [#uses=1]
+	%51 = load i32, i32* %49, align 8		; <i32> [#uses=1]
 	%52 = icmp eq i32 %51, 0		; <i1> [#uses=1]
 	br i1 %52, label %meshBB, label %bb15
 
 bb15:		; preds = %bb14.fragment
 	%Opq.sa.calc496 = sub i32 %Opq.sa.calc723, -8		; <i32> [#uses=1]
-	%53 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%53 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%54 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %53, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%55 = load %struct.Macroblock** %54, align 8		; <%struct.Macroblock*> [#uses=1]
-	%56 = load i32* %.SV208.phi, align 8		; <i32> [#uses=1]
+	%55 = load %struct.Macroblock*, %struct.Macroblock** %54, align 8		; <%struct.Macroblock*> [#uses=1]
+	%56 = load i32, i32* %.SV208.phi, align 8		; <i32> [#uses=1]
 	br label %meshBB324
 
 bb15.fragment:		; preds = %meshBB324
 	%Opq.sa.calc925 = xor i32 %Opq.sa.calc750, 215		; <i32> [#uses=2]
 	%57 = sext i32 %.SV214.phi to i64		; <i64> [#uses=1]
 	%58 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV212.phi, i64 %57, i32 20		; <i32*> [#uses=1]
-	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%59 = load i32, i32* %58, align 4		; <i32> [#uses=1]
 	%60 = icmp eq i32 %59, 0		; <i1> [#uses=1]
 	br i1 %60, label %bb16, label %bb96
 
@@ -216,7 +216,7 @@
 	%Opq.sa.calc880 = xor i32 %Opq.sa.calc932, 246		; <i32> [#uses=0]
 	store i32 %63, i32* %64, align 4
 	%65 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=1]
-	%66 = load i32* %65, align 8		; <i32> [#uses=2]
+	%66 = load i32, i32* %65, align 8		; <i32> [#uses=2]
 	store i32 %66, i32* %.SV52.phi1186, align 4
 	br label %bb96
 
@@ -228,7 +228,7 @@
 	%Opq.sa.calc509 = xor i32 %Opq.sa.calc866, 70		; <i32> [#uses=1]
 	%Opq.sa.calc508 = sub i32 %Opq.sa.calc509, -19		; <i32> [#uses=0]
 	%67 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
-	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%68 = load i32, i32* %67, align 4		; <i32> [#uses=1]
 	%69 = icmp eq i32 %68, 0		; <i1> [#uses=1]
 	%70 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
 	%71 = icmp eq i32 %70, 0		; <i1> [#uses=2]
@@ -237,7 +237,7 @@
 bb23.fragment:		; preds = %bb23
 	%Opq.sa.calc847 = sub i32 %Opq.sa.calc866, -9		; <i32> [#uses=2]
 	%72 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=3]
-	%73 = load i32* %72, align 4		; <i32> [#uses=3]
+	%73 = load i32, i32* %72, align 4		; <i32> [#uses=3]
 	%74 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
 	store i32 %73, i32* %74, align 4
 	br label %bb23.fragment182
@@ -247,9 +247,9 @@
 	%Opq.sa.calc742 = add i32 %Opq.sa.calc744, %Opq.sa.calc847		; <i32> [#uses=1]
 	%Opq.sa.calc743 = add i32 %Opq.sa.calc742, -149		; <i32> [#uses=2]
 	%75 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 26		; <i32*> [#uses=2]
-	%76 = load i32* %75, align 4		; <i32> [#uses=3]
+	%76 = load i32, i32* %75, align 4		; <i32> [#uses=3]
 	store i32 %76, i32* %.SV52.phi1113, align 4
-	%77 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = load i32, i32* %75, align 4		; <i32> [#uses=1]
 	%78 = icmp ne i32 %77, 0		; <i1> [#uses=2]
 	br i1 %69, label %meshBB344, label %meshBB432
 
@@ -264,10 +264,10 @@
 bb26:		; preds = %bb25
 	%Opq.sa.calc519 = xor i32 %Opq.sa.calc515, 23		; <i32> [#uses=2]
 	%Opq.sa.calc518 = xor i32 %Opq.sa.calc519, 84		; <i32> [#uses=1]
-	%79 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%79 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%80 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %79, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%81 = load %struct.Macroblock** %80, align 8		; <%struct.Macroblock*> [#uses=1]
-	%82 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	%81 = load %struct.Macroblock*, %struct.Macroblock** %80, align 8		; <%struct.Macroblock*> [#uses=1]
+	%82 = load i32, i32* %.SV99.phi, align 4		; <i32> [#uses=1]
 	br label %meshBB340
 
 bb26.fragment:		; preds = %meshBB340
@@ -276,7 +276,7 @@
 	%Opq.sa.calc917 = add i32 %Opq.sa.calc916, -237		; <i32> [#uses=1]
 	%83 = sext i32 %.SV230.phi to i64		; <i64> [#uses=1]
 	%84 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV228.phi, i64 %83, i32 20		; <i32*> [#uses=1]
-	%85 = load i32* %84, align 4		; <i32> [#uses=1]
+	%85 = load i32, i32* %84, align 4		; <i32> [#uses=1]
 	%86 = icmp eq i32 %85, 0		; <i1> [#uses=1]
 	br i1 %86, label %meshBB420, label %meshBB356
 
@@ -308,17 +308,17 @@
 
 bb33:		; preds = %bb32
 	%Opq.sa.calc534 = sub i32 %Opq.sa.calc512, -75		; <i32> [#uses=2]
-	%92 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%92 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%93 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %92, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%94 = load %struct.Macroblock** %93, align 8		; <%struct.Macroblock*> [#uses=1]
-	%95 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	%94 = load %struct.Macroblock*, %struct.Macroblock** %93, align 8		; <%struct.Macroblock*> [#uses=1]
+	%95 = load i32, i32* %.SV99.phi, align 4		; <i32> [#uses=1]
 	br label %bb33.fragment
 
 bb33.fragment:		; preds = %bb33
 	%Opq.sa.calc712 = add i32 %Opq.sa.calc534, -109		; <i32> [#uses=3]
 	%96 = sext i32 %95 to i64		; <i64> [#uses=1]
 	%97 = getelementptr %struct.Macroblock, %struct.Macroblock* %94, i64 %96, i32 20		; <i32*> [#uses=1]
-	%98 = load i32* %97, align 4		; <i32> [#uses=1]
+	%98 = load i32, i32* %97, align 4		; <i32> [#uses=1]
 	%99 = icmp eq i32 %98, 0		; <i1> [#uses=1]
 	br i1 %99, label %bb34, label %meshBB
 
@@ -372,17 +372,17 @@
 
 bb41:		; preds = %meshBB336
 	%Opq.sa.calc557 = sub i32 %Opq.sa.calc979, 143		; <i32> [#uses=1]
-	%108 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%108 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%109 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %108, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%110 = load %struct.Macroblock** %109, align 8		; <%struct.Macroblock*> [#uses=1]
-	%111 = load i32* %.SV99.phi1128, align 4		; <i32> [#uses=1]
+	%110 = load %struct.Macroblock*, %struct.Macroblock** %109, align 8		; <%struct.Macroblock*> [#uses=1]
+	%111 = load i32, i32* %.SV99.phi1128, align 4		; <i32> [#uses=1]
 	br label %bb41.fragment
 
 bb41.fragment:		; preds = %bb41
 	%Opq.sa.calc987 = xor i32 %Opq.sa.calc557, 213		; <i32> [#uses=4]
 	%112 = sext i32 %111 to i64		; <i64> [#uses=1]
 	%113 = getelementptr %struct.Macroblock, %struct.Macroblock* %110, i64 %112, i32 20		; <i32*> [#uses=1]
-	%114 = load i32* %113, align 4		; <i32> [#uses=1]
+	%114 = load i32, i32* %113, align 4		; <i32> [#uses=1]
 	%115 = icmp eq i32 %114, 0		; <i1> [#uses=1]
 	br i1 %115, label %bb42, label %bb96
 
@@ -415,17 +415,17 @@
 
 bb49:		; preds = %bb48
 	%Opq.sa.calc572 = add i32 %Opq.sa.calc798, 84		; <i32> [#uses=0]
-	%122 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%122 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%123 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %122, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%124 = load %struct.Macroblock** %123, align 8		; <%struct.Macroblock*> [#uses=1]
-	%125 = load i32* %.SV99.phi1037, align 4		; <i32> [#uses=1]
+	%124 = load %struct.Macroblock*, %struct.Macroblock** %123, align 8		; <%struct.Macroblock*> [#uses=1]
+	%125 = load i32, i32* %.SV99.phi1037, align 4		; <i32> [#uses=1]
 	br label %bb49.fragment
 
 bb49.fragment:		; preds = %bb49
 	%Opq.sa.calc860 = sub i32 %Opq.sa.calc569, 114		; <i32> [#uses=5]
 	%126 = sext i32 %125 to i64		; <i64> [#uses=1]
 	%127 = getelementptr %struct.Macroblock, %struct.Macroblock* %124, i64 %126, i32 20		; <i32*> [#uses=1]
-	%128 = load i32* %127, align 4		; <i32> [#uses=1]
+	%128 = load i32, i32* %127, align 4		; <i32> [#uses=1]
 	%129 = icmp eq i32 %128, 0		; <i1> [#uses=1]
 	br i1 %129, label %bb50, label %meshBB380
 
@@ -485,7 +485,7 @@
 bb59:		; preds = %bb58
 	%Opq.sa.calc599 = add i32 %Opq.sa.calc1002, 151		; <i32> [#uses=0]
 	%141 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
-	%142 = load i32* %141, align 4		; <i32> [#uses=1]
+	%142 = load i32, i32* %141, align 4		; <i32> [#uses=1]
 	br label %bb59.fragment
 
 bb59.fragment:		; preds = %bb59
@@ -502,7 +502,7 @@
 bb61:		; preds = %bb60
 	%Opq.sa.calc605 = xor i32 %Opq.sa.calc731, 57		; <i32> [#uses=1]
 	%146 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=2]
-	%147 = load i32* %146, align 8		; <i32> [#uses=3]
+	%147 = load i32, i32* %146, align 8		; <i32> [#uses=3]
 	%148 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
 	br label %bb61.fragment
 
@@ -510,23 +510,23 @@
 	%Opq.sa.calc700 = sub i32 %Opq.sa.calc605, 108		; <i32> [#uses=3]
 	store i32 %147, i32* %148, align 4
 	%149 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=4]
-	%150 = load i32* %149, align 8		; <i32> [#uses=1]
+	%150 = load i32, i32* %149, align 8		; <i32> [#uses=1]
 	%151 = icmp eq i32 %150, 0		; <i1> [#uses=1]
 	br i1 %151, label %bb65, label %bb62
 
 bb62:		; preds = %bb61.fragment
 	%Opq.sa.calc608 = add i32 %Opq.sa.calc700, -94		; <i32> [#uses=1]
-	%152 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=2]
+	%152 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=2]
 	%153 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %152, i64 0, i32 45		; <i32*> [#uses=1]
-	%154 = load i32* %153, align 4		; <i32> [#uses=1]
+	%154 = load i32, i32* %153, align 4		; <i32> [#uses=1]
 	%155 = icmp eq i32 %154, 1		; <i1> [#uses=1]
 	br i1 %155, label %bb63, label %bb64
 
 bb63:		; preds = %bb62
 	%Opq.sa.calc611 = add i32 %Opq.sa.calc700, -101		; <i32> [#uses=2]
 	%156 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %152, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%157 = load %struct.Macroblock** %156, align 8		; <%struct.Macroblock*> [#uses=1]
-	%158 = load i32* %146, align 8		; <i32> [#uses=1]
+	%157 = load %struct.Macroblock*, %struct.Macroblock** %156, align 8		; <%struct.Macroblock*> [#uses=1]
+	%158 = load i32, i32* %146, align 8		; <i32> [#uses=1]
 	br label %meshBB452
 
 bb63.fragment:		; preds = %meshBB452
@@ -534,7 +534,7 @@
 	%Opq.sa.calc890 = add i32 %Opq.sa.calc891, -3		; <i32> [#uses=2]
 	%159 = sext i32 %.SV266.phi to i64		; <i64> [#uses=1]
 	%160 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV264.phi, i64 %159, i32 20		; <i32*> [#uses=1]
-	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%161 = load i32, i32* %160, align 4		; <i32> [#uses=1]
 	%162 = icmp eq i32 %161, 0		; <i1> [#uses=1]
 	br i1 %162, label %bb64, label %meshBB456
 
@@ -562,7 +562,7 @@
 	%Opq.link.SV618.phi = phi i32 [ %Opq.sa.calc816, %meshBB456 ], [ %Opq.sa.calc700, %bb61.fragment ], [ %Opq.sa.calc614, %bb64 ]		; <i32> [#uses=1]
 	%Opq.link.mask620 = and i32 %Opq.link.SV618.phi, 40		; <i32> [#uses=1]
 	%Opq.sa.calc617 = add i32 %Opq.link.mask620, -35		; <i32> [#uses=2]
-	%164 = load i32* %.SV152.phi1058, align 8		; <i32> [#uses=1]
+	%164 = load i32, i32* %.SV152.phi1058, align 8		; <i32> [#uses=1]
 	br label %meshBB436
 
 bb65.fragment:		; preds = %meshBB436
@@ -590,7 +590,7 @@
 
 bb68.fragment:		; preds = %meshBB344
 	%Opq.sa.calc784 = sub i32 %Opq.link.mask722, 3		; <i32> [#uses=5]
-	%168 = load i32* %.SV274.phi, align 8		; <i32> [#uses=3]
+	%168 = load i32, i32* %.SV274.phi, align 8		; <i32> [#uses=3]
 	br i1 %.load144.SV.phi, label %bb69, label %meshBB412
 
 bb69:		; preds = %bb68.fragment
@@ -604,18 +604,18 @@
 	%Opq.sa.calc996 = sub i32 %Opq.sa.calc784, -9		; <i32> [#uses=3]
 	%Opq.sa.calc994 = sub i32 %Opq.sa.calc996, %Opq.sa.calc784		; <i32> [#uses=1]
 	%Opq.sa.calc995 = sub i32 %Opq.sa.calc994, 3		; <i32> [#uses=2]
-	%171 = load i32* %170, align 8		; <i32> [#uses=3]
+	%171 = load i32, i32* %170, align 8		; <i32> [#uses=3]
 	store i32 %171, i32* %.SV52.phi1170, align 4
-	%172 = load i32* %170, align 8		; <i32> [#uses=1]
+	%172 = load i32, i32* %170, align 8		; <i32> [#uses=1]
 	%173 = icmp eq i32 %172, 0		; <i1> [#uses=1]
 	br i1 %173, label %meshBB396, label %meshBB400
 
 bb70:		; preds = %meshBB400
 	%Opq.sa.calc630 = add i32 %Opq.sa.calc824, -203		; <i32> [#uses=2]
-	%174 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%174 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%175 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %174, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%176 = load %struct.Macroblock** %175, align 8		; <%struct.Macroblock*> [#uses=1]
-	%177 = load i32* %.SV156.phi, align 8		; <i32> [#uses=1]
+	%176 = load %struct.Macroblock*, %struct.Macroblock** %175, align 8		; <%struct.Macroblock*> [#uses=1]
+	%177 = load i32, i32* %.SV156.phi, align 8		; <i32> [#uses=1]
 	br label %meshBB428
 
 bb70.fragment:		; preds = %meshBB428
@@ -623,7 +623,7 @@
 	%Opq.sa.calc738 = sub i32 %Opq.sa.calc739, 1		; <i32> [#uses=2]
 	%178 = sext i32 %.SV280.phi to i64		; <i64> [#uses=1]
 	%179 = getelementptr %struct.Macroblock, %struct.Macroblock* %.SV278.phi, i64 %178, i32 20		; <i32*> [#uses=1]
-	%180 = load i32* %179, align 4		; <i32> [#uses=1]
+	%180 = load i32, i32* %179, align 4		; <i32> [#uses=1]
 	%181 = icmp eq i32 %180, 0		; <i1> [#uses=1]
 	br i1 %181, label %meshBB452, label %meshBB356
 
@@ -648,7 +648,7 @@
 	%Opq.sa.calc1011 = sub i32 %Opq.sa.calc636, -19		; <i32> [#uses=0]
 	store i32 %184, i32* %185, align 4
 	%186 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=1]
-	%187 = load i32* %186, align 8		; <i32> [#uses=2]
+	%187 = load i32, i32* %186, align 8		; <i32> [#uses=2]
 	store i32 %187, i32* %.SV52.phi1186, align 4
 	br label %bb96
 
@@ -660,9 +660,9 @@
 
 bb77:		; preds = %bb76
 	%Opq.sa.calc643 = add i32 %Opq.sa.calc640, 2		; <i32> [#uses=2]
-	%189 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%189 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%190 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %189, i64 0, i32 45		; <i32*> [#uses=1]
-	%191 = load i32* %190, align 4		; <i32> [#uses=1]
+	%191 = load i32, i32* %190, align 4		; <i32> [#uses=1]
 	%192 = icmp eq i32 %191, 2		; <i1> [#uses=1]
 	br i1 %192, label %meshBB416, label %bb79
 
@@ -670,7 +670,7 @@
 	%Opq.sa.calc647 = xor i32 %Opq.sa.calc971, 25		; <i32> [#uses=2]
 	%Opq.sa.calc646 = sub i32 %Opq.sa.calc647, 29		; <i32> [#uses=0]
 	%193 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
-	%194 = load i32* %193, align 8		; <i32> [#uses=1]
+	%194 = load i32, i32* %193, align 8		; <i32> [#uses=1]
 	%195 = add i32 %194, 1		; <i32> [#uses=1]
 	br label %bb78.fragment
 
@@ -703,7 +703,7 @@
 bb84:		; preds = %bb83
 	%Opq.sa.calc661 = xor i32 %Opq.sa.calc658, 22		; <i32> [#uses=1]
 	%199 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
-	%200 = load i32* %199, align 4		; <i32> [#uses=1]
+	%200 = load i32, i32* %199, align 4		; <i32> [#uses=1]
 	br label %meshBB400
 
 bb84.fragment:		; preds = %meshBB400
@@ -723,7 +723,7 @@
 bb86:		; preds = %meshBB336
 	%Opq.sa.calc670 = sub i32 %Opq.sa.calc979, 35		; <i32> [#uses=1]
 	%204 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=1]
-	%205 = load i32* %204, align 4		; <i32> [#uses=1]
+	%205 = load i32, i32* %204, align 4		; <i32> [#uses=1]
 	%206 = add i32 %205, 1		; <i32> [#uses=1]
 	%207 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
 	br label %bb86.fragment
@@ -732,7 +732,7 @@
 	%Opq.sa.calc943 = xor i32 %Opq.sa.calc670, 123		; <i32> [#uses=2]
 	store i32 %206, i32* %207, align 4
 	%208 = getelementptr %struct.Macroblock, %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=1]
-	%209 = load i32* %208, align 4		; <i32> [#uses=2]
+	%209 = load i32, i32* %208, align 4		; <i32> [#uses=2]
 	store i32 %209, i32* %.SV52.phi1234, align 4
 	br label %meshBB424
 
@@ -749,7 +749,7 @@
 
 bb89.fragment:		; preds = %bb89
 	%Opq.sa.calc962 = add i32 %Opq.sa.calc677, -188		; <i32> [#uses=3]
-	%211 = load i32* %210, align 4		; <i32> [#uses=3]
+	%211 = load i32, i32* %210, align 4		; <i32> [#uses=3]
 	br i1 %203, label %bb90, label %meshBB408
 
 bb90:		; preds = %bb89.fragment
@@ -762,25 +762,25 @@
 bb90.fragment:		; preds = %bb90
 	%Opq.sa.calc773 = sub i32 %Opq.sa.calc680, 60		; <i32> [#uses=3]
 	%Opq.sa.calc772 = add i32 %Opq.sa.calc773, -25		; <i32> [#uses=2]
-	%214 = load i32* %213, align 4		; <i32> [#uses=3]
+	%214 = load i32, i32* %213, align 4		; <i32> [#uses=3]
 	store i32 %214, i32* %.SV52.phi1190, align 4
-	%215 = load i32* %213, align 4		; <i32> [#uses=1]
+	%215 = load i32, i32* %213, align 4		; <i32> [#uses=1]
 	%216 = icmp eq i32 %215, 0		; <i1> [#uses=1]
 	br i1 %216, label %meshBB416, label %meshBB368
 
 bb91:		; preds = %meshBB368
 	%Opq.sa.calc683 = sub i32 %Opq.sa.calc768, -7		; <i32> [#uses=0]
-	%217 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%217 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%218 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %217, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
-	%219 = load %struct.Macroblock** %218, align 8		; <%struct.Macroblock*> [#uses=1]
-	%220 = load i32* %.SV170.phi, align 4		; <i32> [#uses=1]
+	%219 = load %struct.Macroblock*, %struct.Macroblock** %218, align 8		; <%struct.Macroblock*> [#uses=1]
+	%220 = load i32, i32* %.SV170.phi, align 4		; <i32> [#uses=1]
 	br label %bb91.fragment
 
 bb91.fragment:		; preds = %bb91
 	%Opq.sa.calc853 = xor i32 %Opq.sa.calc768, 8		; <i32> [#uses=1]
 	%221 = sext i32 %220 to i64		; <i64> [#uses=1]
 	%222 = getelementptr %struct.Macroblock, %struct.Macroblock* %219, i64 %221, i32 20		; <i32*> [#uses=1]
-	%223 = load i32* %222, align 4		; <i32> [#uses=1]
+	%223 = load i32, i32* %222, align 4		; <i32> [#uses=1]
 	%224 = icmp eq i32 %223, 0		; <i1> [#uses=1]
 	br i1 %224, label %bb92, label %bb96
 
@@ -805,7 +805,7 @@
 	%Opq.sa.calc841 = sub i32 %Opq.sa.calc901, 76		; <i32> [#uses=0]
 	store i32 %.SV306.phi, i32* %.SV308.phi, align 4
 	%229 = getelementptr %struct.Macroblock, %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28		; <i32*> [#uses=1]
-	%230 = load i32* %229, align 4		; <i32> [#uses=2]
+	%230 = load i32, i32* %229, align 4		; <i32> [#uses=2]
 	store i32 %230, i32* %.load53.SV.phi, align 4
 	br label %bb96
 
@@ -826,13 +826,13 @@
 	%.SV70.phi1148 = phi i32 [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1215, %meshBB408 ], [ %.SV70.phi1138, %meshBB352 ], [ %.SV70.phi1085, %bb96 ], [ %.SV70.phi1027, %bb21 ]		; <i32> [#uses=1]
 	%yM.0.reg2mem.0.SV.phi = phi i32 [ -1, %meshBB424 ], [ -1, %meshBB408 ], [ -1, %meshBB352 ], [ %yM.0.SV.phi, %bb96 ], [ -1, %bb21 ]		; <i32> [#uses=1]
 	%Opq.sa.calc694 = xor i32 0, 243		; <i32> [#uses=1]
-	%232 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%232 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%233 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %232, i64 0, i32 45		; <i32*> [#uses=1]
 	br label %bb97.fragment
 
 bb97.fragment:		; preds = %bb97
 	%Opq.sa.calc928 = xor i32 %Opq.sa.calc694, 128		; <i32> [#uses=1]
-	%234 = load i32* %233, align 4		; <i32> [#uses=1]
+	%234 = load i32, i32* %233, align 4		; <i32> [#uses=1]
 	%235 = icmp eq i32 %234, 0		; <i1> [#uses=1]
 	br i1 %235, label %return, label %bb98
 
@@ -855,13 +855,13 @@
 	%Opq.sa.calc1008 = sub i32 %Opq.link.mask911, 13		; <i32> [#uses=1]
 	%241 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 4		; <i32*> [#uses=4]
 	%242 = getelementptr %struct.PixelPos, %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
-	%243 = load i32* %242, align 4		; <i32> [#uses=1]
-	%244 = load void (i32, i32*, i32*)** @get_mb_block_pos, align 8		; <void (i32, i32*, i32*)*> [#uses=1]
+	%243 = load i32, i32* %242, align 4		; <i32> [#uses=1]
+	%244 = load void (i32, i32*, i32*)*, void (i32, i32*, i32*)** @get_mb_block_pos, align 8		; <void (i32, i32*, i32*)*> [#uses=1]
 	tail call void %244(i32 %243, i32* %241, i32* %.SV317.phi) nounwind
-	%245 = load i32* %241, align 4		; <i32> [#uses=1]
-	%246 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%245 = load i32, i32* %241, align 4		; <i32> [#uses=1]
+	%246 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%247 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0		; <i32*> [#uses=1]
-	%248 = load i32* %247, align 4		; <i32> [#uses=1]
+	%248 = load i32, i32* %247, align 4		; <i32> [#uses=1]
 	%249 = mul i32 %248, %245		; <i32> [#uses=2]
 	store i32 %249, i32* %241, align 4
 	br label %bb98.fragment183
@@ -869,15 +869,15 @@
 bb98.fragment183:		; preds = %bb98.fragment
 	%Opq.sa.calc777 = sub i32 %Opq.sa.calc1008, -158		; <i32> [#uses=1]
 	%Opq.sa.calc776 = sub i32 %Opq.sa.calc777, 46		; <i32> [#uses=0]
-	%250 = load i32* %.SV317.phi, align 4		; <i32> [#uses=1]
-	%251 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%250 = load i32, i32* %.SV317.phi, align 4		; <i32> [#uses=1]
+	%251 = load %struct.ImageParameters*, %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
 	%252 = getelementptr %struct.ImageParameters, %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1		; <i32*> [#uses=1]
-	%253 = load i32* %252, align 4		; <i32> [#uses=1]
+	%253 = load i32, i32* %252, align 4		; <i32> [#uses=1]
 	%254 = mul i32 %253, %250		; <i32> [#uses=1]
-	%255 = load i32* %.SV313.phi, align 4		; <i32> [#uses=1]
+	%255 = load i32, i32* %.SV313.phi, align 4		; <i32> [#uses=1]
 	%256 = add i32 %255, %249		; <i32> [#uses=1]
 	store i32 %256, i32* %241, align 4
-	%257 = load i32* %.SV315.phi, align 4		; <i32> [#uses=1]
+	%257 = load i32, i32* %.SV315.phi, align 4		; <i32> [#uses=1]
 	%258 = add i32 %257, %254		; <i32> [#uses=1]
 	store i32 %258, i32* %.SV317.phi, align 4
 	ret void
diff --git a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
index 6cebb1e..5ddb5ca 100644
--- a/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
+++ b/llvm/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -9,13 +9,13 @@
 	%arrayidx4 = getelementptr i32, i32* %data, i32 3		; <i32*> [#uses=1]
 	%arrayidx6 = getelementptr i32, i32* %data, i32 4		; <i32*> [#uses=1]
 	%arrayidx8 = getelementptr i32, i32* %data, i32 5		; <i32*> [#uses=1]
-	%tmp9 = load i32* %arrayidx8		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %arrayidx8		; <i32> [#uses=1]
 	%arrayidx11 = getelementptr i32, i32* %data, i32 6		; <i32*> [#uses=1]
-	%tmp12 = load i32* %arrayidx11		; <i32> [#uses=1]
+	%tmp12 = load i32, i32* %arrayidx11		; <i32> [#uses=1]
 	%arrayidx14 = getelementptr i32, i32* %data, i32 7		; <i32*> [#uses=1]
-	%tmp15 = load i32* %arrayidx14		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* %arrayidx14		; <i32> [#uses=1]
 	%arrayidx17 = getelementptr i32, i32* %data, i32 8		; <i32*> [#uses=1]
-	%tmp18 = load i32* %arrayidx17		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* %arrayidx17		; <i32> [#uses=1]
 	%0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind		; <i32> [#uses=1]
 	store i32 %0, i32* %arrayidx
 	ret void
diff --git a/llvm/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/llvm/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index e7ef9d8..b4d202c 100644
--- a/llvm/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/llvm/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -105,17 +105,17 @@
 define fastcc i32 @pf_state_compare_ext_gwy(%struct.pf_state_key* nocapture %a, %struct.pf_state_key* nocapture %b) nounwind optsize ssp {
 entry:
 	%0 = zext i8 0 to i32		; <i32> [#uses=2]
-	%1 = load i8* null, align 1		; <i8> [#uses=2]
+	%1 = load i8, i8* null, align 1		; <i8> [#uses=2]
 	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
 	%3 = sub i32 %0, %2		; <i32> [#uses=1]
 	%4 = icmp eq i8 0, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb1, label %bb79
 
 bb1:		; preds = %entry
-	%5 = load i8* null, align 4		; <i8> [#uses=2]
+	%5 = load i8, i8* null, align 4		; <i8> [#uses=2]
 	%6 = zext i8 %5 to i32		; <i32> [#uses=2]
 	%7 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %b, i32 0, i32 3		; <i8*> [#uses=1]
-	%8 = load i8* %7, align 4		; <i8> [#uses=2]
+	%8 = load i8, i8* %7, align 4		; <i8> [#uses=2]
 	%9 = zext i8 %8 to i32		; <i32> [#uses=1]
 	%10 = sub i32 %6, %9		; <i32> [#uses=1]
 	%11 = icmp eq i8 %5, %8		; <i1> [#uses=1]
@@ -132,32 +132,32 @@
 	]
 
 bb4:		; preds = %bb3, %bb3
-	%12 = load i16* null, align 4		; <i16> [#uses=1]
+	%12 = load i16, i16* null, align 4		; <i16> [#uses=1]
 	%13 = zext i16 %12 to i32		; <i32> [#uses=1]
 	%14 = sub i32 0, %13		; <i32> [#uses=1]
 	br i1 false, label %bb23, label %bb79
 
 bb6:		; preds = %bb3
-	%15 = load i16* null, align 4		; <i16> [#uses=1]
+	%15 = load i16, i16* null, align 4		; <i16> [#uses=1]
 	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
 	%17 = sub i32 0, %16		; <i32> [#uses=1]
 	ret i32 %17
 
 bb10:		; preds = %bb3
-	%18 = load i8* null, align 1		; <i8> [#uses=2]
+	%18 = load i8, i8* null, align 1		; <i8> [#uses=2]
 	%19 = zext i8 %18 to i32		; <i32> [#uses=1]
 	%20 = sub i32 0, %19		; <i32> [#uses=1]
 	%21 = icmp eq i8 0, %18		; <i1> [#uses=1]
 	br i1 %21, label %bb12, label %bb79
 
 bb12:		; preds = %bb10
-	%22 = load i16* null, align 4		; <i16> [#uses=1]
+	%22 = load i16, i16* null, align 4		; <i16> [#uses=1]
 	%23 = zext i16 %22 to i32		; <i32> [#uses=1]
 	%24 = sub i32 0, %23		; <i32> [#uses=1]
 	ret i32 %24
 
 bb17:		; preds = %bb3
-	%25 = load i8* null, align 1		; <i8> [#uses=2]
+	%25 = load i8, i8* null, align 1		; <i8> [#uses=2]
 	%26 = icmp eq i8 %25, 1		; <i1> [#uses=1]
 	br i1 %26, label %bb18, label %bb23
 
@@ -166,16 +166,16 @@
 	br i1 %27, label %bb19, label %bb23
 
 bb19:		; preds = %bb18
-	%28 = load i16* null, align 4		; <i16> [#uses=1]
+	%28 = load i16, i16* null, align 4		; <i16> [#uses=1]
 	%29 = zext i16 %28 to i32		; <i32> [#uses=1]
 	%30 = sub i32 0, %29		; <i32> [#uses=1]
 	br i1 false, label %bb23, label %bb79
 
 bb21:		; preds = %bb3
 	%31 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
-	%32 = load i32* %31, align 4		; <i32> [#uses=2]
+	%32 = load i32, i32* %31, align 4		; <i32> [#uses=2]
 	%33 = getelementptr %struct.pf_state_key, %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%34 = load i32, i32* %33, align 4		; <i32> [#uses=2]
 	%35 = sub i32 %32, %34		; <i32> [#uses=1]
 	%36 = icmp eq i32 %32, %34		; <i1> [#uses=1]
 	br i1 %36, label %bb23, label %bb79
@@ -188,11 +188,11 @@
 	ret i32 1
 
 bb70:		; preds = %bb23
-	%37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
+	%37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
 	br i1 false, label %bb78, label %bb73
 
 bb73:		; preds = %bb70
-	%38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
+	%38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
 	%39 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, null		; <i1> [#uses=1]
 	br i1 %39, label %bb78, label %bb74
 
diff --git a/llvm/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/llvm/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index aa14a54..c291fed 100644
--- a/llvm/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/llvm/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -71,7 +71,7 @@
 
 define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
 entry:
-	%0 = load i8** null, align 8		; <i8*> [#uses=4]
+	%0 = load i8*, i8** null, align 8		; <i8*> [#uses=4]
 	%1 = or i32 0, 0		; <i32> [#uses=1]
 	%2 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
 	%size_addr.0.i = select i1 %2, i32 4, i32 %sz		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-04-scale.ll b/llvm/test/CodeGen/X86/2009-04-scale.ll
index 7939235..1fc5f2b 100644
--- a/llvm/test/CodeGen/X86/2009-04-scale.ll
+++ b/llvm/test/CodeGen/X86/2009-04-scale.ll
@@ -8,13 +8,13 @@
 
 define void @test() {
 entry:
-	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%1 = lshr i32 %0, 8		; <i32> [#uses=1]
 	%2 = and i32 %1, 255		; <i32> [#uses=1]
 	%3 = getelementptr %struct.array, %struct.array* null, i32 0, i32 3		; <[256 x %struct.pair]*> [#uses=1]
 	%4 = getelementptr [256 x %struct.pair], [256 x %struct.pair]* %3, i32 0, i32 %2		; <%struct.pair*> [#uses=1]
 	%5 = getelementptr %struct.pair, %struct.pair* %4, i32 0, i32 1		; <i64*> [#uses=1]
-	%6 = load i64* %5, align 4		; <i64> [#uses=1]
+	%6 = load i64, i64* %5, align 4		; <i64> [#uses=1]
 	%7 = xor i64 0, %6		; <i64> [#uses=1]
 	%8 = xor i64 %7, 0		; <i64> [#uses=1]
 	%9 = xor i64 %8, 0		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/llvm/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
index c2cd89c..e9d1558 100644
--- a/llvm/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
+++ b/llvm/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -12,7 +12,7 @@
 	br label %bb
 
 bb:		; preds = %bb.i, %bb, %entry
-	%2 = load volatile i32* @g_9, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32, i32* @g_9, align 4		; <i32> [#uses=2]
 	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
 	%4 = and i1 %3, %1		; <i1> [#uses=1]
 	br i1 %4, label %bb.i, label %bb
diff --git a/llvm/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/llvm/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
index 1d14620..019d5df 100644
--- a/llvm/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
+++ b/llvm/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -5,7 +5,7 @@
 	br label %bb14
 
 bb14:		; preds = %bb
-	%srcval16 = load i448* %P, align 8		; <i448> [#uses=1]
+	%srcval16 = load i448, i448* %P, align 8		; <i448> [#uses=1]
 	%tmp = zext i32 undef to i448		; <i448> [#uses=1]
 	%tmp15 = shl i448 %tmp, 288		; <i448> [#uses=1]
 	%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521		; <i448> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
index 0ff39a3..e01fe9f 100644
--- a/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
+++ b/llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -14,13 +14,13 @@
 bb54.i:		; preds = %newFuncRoot
 	%1 = zext i32 %.reload51 to i64		; <i64> [#uses=1]
 	%2 = getelementptr i32, i32* %0, i64 %1		; <i32*> [#uses=1]
-	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%3 = load i32, i32* %2, align 4		; <i32> [#uses=2]
 	%4 = lshr i32 %3, 8		; <i32> [#uses=1]
 	%5 = and i32 %3, 255		; <i32> [#uses=1]
 	%6 = add i32 %5, 4		; <i32> [#uses=1]
 	%7 = zext i32 %4 to i64		; <i64> [#uses=1]
 	%8 = getelementptr i32, i32* %0, i64 %7		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=2]
+	%9 = load i32, i32* %8, align 4		; <i32> [#uses=2]
 	%10 = and i32 %9, 255		; <i32> [#uses=1]
 	%11 = lshr i32 %9, 8		; <i32> [#uses=1]
 	%12 = add i32 %c_nblock_used.2.i, 5		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/llvm/test/CodeGen/X86/2009-06-02-RewriterBug.ll
index 36cb814..6ce7af6 100644
--- a/llvm/test/CodeGen/X86/2009-06-02-RewriterBug.ll
+++ b/llvm/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -14,11 +14,11 @@
 	%ctg22996 = getelementptr i8, i8* %in, i64 0		; <i8*> [#uses=1]
 	%conv = zext i32 undef to i64		; <i64> [#uses=1]
 	%conv11 = zext i32 undef to i64		; <i64> [#uses=1]
-	%tmp18 = load i32* undef		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* undef		; <i32> [#uses=1]
 	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
-	%tmp30 = load i32* undef		; <i32> [#uses=1]
+	%tmp30 = load i32, i32* undef		; <i32> [#uses=1]
 	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=4]
-	%ptrincdec3065 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec3065 = load i8, i8* null		; <i8> [#uses=1]
 	%conv442709 = zext i8 %ptrincdec3065 to i64		; <i64> [#uses=1]
 	%shl45 = shl i64 %conv442709, 16		; <i64> [#uses=1]
 	%conv632707 = zext i8 undef to i64		; <i64> [#uses=1]
@@ -68,10 +68,10 @@
 	%add479 = add i64 %add473, %add441		; <i64> [#uses=3]
 	%conv4932682 = zext i8 undef to i64		; <i64> [#uses=1]
 	%shl494 = shl i64 %conv4932682, 16		; <i64> [#uses=1]
-	%ptrincdec4903012 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec4903012 = load i8, i8* null		; <i8> [#uses=1]
 	%conv5032681 = zext i8 %ptrincdec4903012 to i64		; <i64> [#uses=1]
 	%shl504 = shl i64 %conv5032681, 8		; <i64> [#uses=1]
-	%ptrincdec5003009 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec5003009 = load i8, i8* null		; <i8> [#uses=1]
 	%conv5132680 = zext i8 %ptrincdec5003009 to i64		; <i64> [#uses=1]
 	%or495 = or i64 %shl494, 0		; <i64> [#uses=1]
 	%or505 = or i64 %or495, %conv5132680		; <i64> [#uses=1]
@@ -91,10 +91,10 @@
 	%xor575 = xor i64 %xor568, %or561		; <i64> [#uses=1]
 	%add587 = add i64 %xor575, 0		; <i64> [#uses=1]
 	%add593 = add i64 %add587, %add555		; <i64> [#uses=1]
-	%ptrincdec6043000 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec6043000 = load i8, i8* null		; <i8> [#uses=1]
 	%conv6172676 = zext i8 %ptrincdec6043000 to i64		; <i64> [#uses=1]
 	%shl618 = shl i64 %conv6172676, 8		; <i64> [#uses=1]
-	%ptrincdec6142997 = load i8* %ctg22996		; <i8> [#uses=1]
+	%ptrincdec6142997 = load i8, i8* %ctg22996		; <i8> [#uses=1]
 	%conv6272675 = zext i8 %ptrincdec6142997 to i64		; <i64> [#uses=1]
 	%or619 = or i64 0, %conv6272675		; <i64> [#uses=1]
 	%or628 = or i64 %or619, %shl618		; <i64> [#uses=1]
@@ -106,7 +106,7 @@
 	%xor700 = xor i64 0, %and699		; <i64> [#uses=1]
 	%add701 = add i64 0, %xor700		; <i64> [#uses=1]
 	%add707 = add i64 %add701, %add669		; <i64> [#uses=4]
-	%ptrincdec6242994 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec6242994 = load i8, i8* null		; <i8> [#uses=1]
 	%conv7122673 = zext i8 %ptrincdec6242994 to i64		; <i64> [#uses=1]
 	%shl713 = shl i64 %conv7122673, 24		; <i64> [#uses=1]
 	%conv7412670 = zext i8 undef to i64		; <i64> [#uses=1]
@@ -132,7 +132,7 @@
 	%add821 = add i64 %add815, %add783		; <i64> [#uses=1]
 	%add1160 = add i64 0, %add707		; <i64> [#uses=0]
 	%add1157 = add i64 undef, undef		; <i64> [#uses=0]
-	%ptrincdec11742940 = load i8* null		; <i8> [#uses=1]
+	%ptrincdec11742940 = load i8, i8* null		; <i8> [#uses=1]
 	%conv11872651 = zext i8 %ptrincdec11742940 to i64		; <i64> [#uses=1]
 	%shl1188 = shl i64 %conv11872651, 8		; <i64> [#uses=1]
 	%or1198 = or i64 0, %shl1188		; <i64> [#uses=1]
@@ -172,18 +172,18 @@
 	br label %while.body
 
 while.body:		; preds = %for.end, %bb.nph
-	%tmp3 = load i32* %arr		; <i32> [#uses=2]
+	%tmp3 = load i32, i32* %arr		; <i32> [#uses=2]
 	%conv = zext i32 %tmp3 to i64		; <i64> [#uses=1]
-	%tmp10 = load i32* %arrayidx9		; <i32> [#uses=1]
+	%tmp10 = load i32, i32* %arrayidx9		; <i32> [#uses=1]
 	%conv11 = zext i32 %tmp10 to i64		; <i64> [#uses=1]
-	%tmp14 = load i32* %arrayidx13		; <i32> [#uses=3]
+	%tmp14 = load i32, i32* %arrayidx13		; <i32> [#uses=3]
 	%conv15 = zext i32 %tmp14 to i64		; <i64> [#uses=2]
-	%tmp18 = load i32* undef		; <i32> [#uses=2]
+	%tmp18 = load i32, i32* undef		; <i32> [#uses=2]
 	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
 	%conv23 = zext i32 undef to i64		; <i64> [#uses=1]
-	%tmp26 = load i32* %arrayidx25		; <i32> [#uses=1]
+	%tmp26 = load i32, i32* %arrayidx25		; <i32> [#uses=1]
 	%conv27 = zext i32 %tmp26 to i64		; <i64> [#uses=1]
-	%tmp30 = load i32* %arrayidx29		; <i32> [#uses=2]
+	%tmp30 = load i32, i32* %arrayidx29		; <i32> [#uses=2]
 	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=5]
 	%shl72 = shl i64 %conv31, 26		; <i64> [#uses=1]
 	%shr = lshr i64 %conv31, 6		; <i64> [#uses=1]
@@ -203,7 +203,7 @@
 	%add137 = add i64 %add131, %add99		; <i64> [#uses=5]
 	%conv1422700 = zext i8 undef to i64		; <i64> [#uses=1]
 	%shl143 = shl i64 %conv1422700, 24		; <i64> [#uses=1]
-	%ptrincdec1393051 = load i8* undef		; <i8> [#uses=1]
+	%ptrincdec1393051 = load i8, i8* undef		; <i8> [#uses=1]
 	%conv1512699 = zext i8 %ptrincdec1393051 to i64		; <i64> [#uses=1]
 	%shl152 = shl i64 %conv1512699, 16		; <i64> [#uses=1]
 	%conv1712697 = zext i8 undef to i64		; <i64> [#uses=1]
@@ -283,7 +283,7 @@
 	%add1427 = add i64 %add1392, %d.0		; <i64> [#uses=1]
 	%add1424 = add i64 %xor1412, 0		; <i64> [#uses=1]
 	%add1430 = add i64 %add1424, %add1392		; <i64> [#uses=5]
-	%tmp1438 = load i32* undef		; <i32> [#uses=1]
+	%tmp1438 = load i32, i32* undef		; <i32> [#uses=1]
 	%conv1439 = zext i32 %tmp1438 to i64		; <i64> [#uses=4]
 	%shl1441 = shl i64 %conv1439, 25		; <i64> [#uses=1]
 	%shr1444 = lshr i64 %conv1439, 7		; <i64> [#uses=1]
@@ -302,13 +302,13 @@
 	%shr1479 = lshr i64 %conv1464, 10		; <i64> [#uses=1]
 	%xor1477 = xor i64 %or1476, %shr1479		; <i64> [#uses=1]
 	%xor1480 = xor i64 %xor1477, %or1470		; <i64> [#uses=1]
-	%tmp1499 = load i32* null		; <i32> [#uses=1]
+	%tmp1499 = load i32, i32* null		; <i32> [#uses=1]
 	%conv1500 = zext i32 %tmp1499 to i64		; <i64> [#uses=1]
 	%add1491 = add i64 %conv1500, 0		; <i64> [#uses=1]
 	%add1501 = add i64 %add1491, %xor1455		; <i64> [#uses=1]
 	%add1502 = add i64 %add1501, %xor1480		; <i64> [#uses=1]
 	%conv1504 = and i64 %add1502, 4294967295		; <i64> [#uses=1]
-	%tmp1541 = load i32* undef		; <i32> [#uses=1]
+	%tmp1541 = load i32, i32* undef		; <i32> [#uses=1]
 	%conv1542 = zext i32 %tmp1541 to i64		; <i64> [#uses=1]
 	%add1527 = add i64 %conv1542, %g.0		; <i64> [#uses=1]
 	%add1536 = add i64 %add1527, 0		; <i64> [#uses=1]
@@ -327,7 +327,7 @@
 	%add1576 = add i64 %xor1564, 0		; <i64> [#uses=1]
 	%add1582 = add i64 %add1576, %add1544		; <i64> [#uses=3]
 	store i32 undef, i32* undef
-	%tmp1693 = load i32* undef		; <i32> [#uses=1]
+	%tmp1693 = load i32, i32* undef		; <i32> [#uses=1]
 	%conv1694 = zext i32 %tmp1693 to i64		; <i64> [#uses=1]
 	%add1679 = add i64 %conv1694, %f.0		; <i64> [#uses=1]
 	%add1688 = add i64 %add1679, 0		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/llvm/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index 06426a2..2979549 100644
--- a/llvm/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/llvm/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -17,7 +17,7 @@
 
 define fastcc void @MinSize(%struct.rec* %x) nounwind {
 entry:
-	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp13 = load i8, i8* undef, align 4		; <i8> [#uses=3]
 	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
 	switch i32 %tmp14, label %bb1109 [
 		i32 42, label %bb246
diff --git a/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
index ffbe02c..9c7eb6d 100644
--- a/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/llvm/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT:    movd %[[R]], %xmm0
 ; CHECK-NEXT:    retl
 
-  %x2 = load i32* %x1
+  %x2 = load i32, i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i16
   %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    movd %e[[R]]x, %xmm0
 ; CHECK-NEXT:    retl
 
-  %x2 = load i32* %x1
+  %x2 = load i32, i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i16
   %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
@@ -34,7 +34,7 @@
 ; CHECK-NEXT:    movd %e[[R]]x, %xmm0
 ; CHECK-NEXT:    retl
 
-  %x2 = load i32* %x1
+  %x2 = load i32, i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i8
   %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
@@ -48,7 +48,7 @@
 ; CHECK-NEXT:    movd %e[[R]]x, %xmm0
 ; CHECK-NEXT:    retl
 
-  %x2 = load i32* %x1
+  %x2 = load i32, i32* %x1
   %x3 = lshr i32 %x2, 1
   %x = trunc i32 %x3 to i8
   %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
diff --git a/llvm/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
index eabaf77..beb5705 100644
--- a/llvm/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -237,7 +237,7 @@
 	br i1 undef, label %bb1563, label %bb1558
 
 bb1558:		; preds = %bb1545
-	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%0 = load %struct.SV*, %struct.SV** undef		; <%struct.SV*> [#uses=1]
 	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
 	br i1 undef, label %bb1563, label %bb1559
 
diff --git a/llvm/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/llvm/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
index e83b3a7..045e89e 100644
--- a/llvm/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -5,7 +5,7 @@
 
 define fastcc i32 @bsGetUInt32() nounwind ssp {
 entry:
-	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%bsBuff.promoted44 = load i32, i32* @bsBuff		; <i32> [#uses=1]
 	%0 = add i32 0, -8		; <i32> [#uses=1]
 	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
 	%2 = shl i32 %1, 8		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/llvm/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
index 2080c0a..a70861d 100644
--- a/llvm/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
+++ b/llvm/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -43,12 +43,12 @@
 	br i1 %tobool, label %lor.lhs.false, label %if.then
 
 lor.lhs.false:		; preds = %entry
-	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tmp1 = load i8, i8* @g_3		; <i8> [#uses=1]
 	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tobool3, label %return, label %if.then
 
 if.then:		; preds = %lor.lhs.false, %entry
-	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%tmp4 = load i8, i8* @g_3		; <i8> [#uses=1]
 	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
 	ret i32 %conv5
 
@@ -93,12 +93,12 @@
   br i1 %tobool, label %lor.lhs.false, label %if.then
 
 lor.lhs.false:                                    ; preds = %entry
-  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tmp1 = load i8, i8* @g_3                           ; <i8> [#uses=1]
   %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
   br i1 %tobool3, label %return, label %if.then
 
 if.then:                                          ; preds = %lor.lhs.false, %entry
-  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tmp4 = load i8, i8* @g_3                           ; <i8> [#uses=1]
   %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
   ret i32 %conv5
 
diff --git a/llvm/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/llvm/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
index c3bad6b..f24c3f8 100644
--- a/llvm/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
+++ b/llvm/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -14,12 +14,12 @@
 	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
 	%pointerArithmeticTmp1 = getelementptr i8, i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
 	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
-	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%primitivePtr = load %1*, %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
 	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
 	%pointerArithmeticTmp4 = getelementptr i8, i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
 	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
 	%primitiveTexturePtr = getelementptr %1*, %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
-	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	%primitiveTexturePtr6 = load %1*, %1** %primitiveTexturePtr		; <%1*> [#uses=2]
 	br label %textureCheckBlock
 
 textureCheckBlock:		; preds = %primitiveTextureFetchBlock
@@ -31,7 +31,7 @@
 	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
 	%pointerArithmeticTmp8 = getelementptr i8, i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
 	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
-	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%dimensionsPtr = load <4 x float>, <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
 	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
 	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
 	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/llvm/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
index 5f6cf3b..5926ab4b 100644
--- a/llvm/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
+++ b/llvm/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -5,7 +5,7 @@
 
 define void @c() nounwind {
 ; CHECK: movl a+8, %eax
-  %srcval1 = load i96* @a, align 4
+  %srcval1 = load i96, i96* @a, align 4
   %sroa.store.elt2 = lshr i96 %srcval1, 64
   %tmp = trunc i96 %sroa.store.elt2 to i64
 ; CHECK: movl %eax, b
diff --git a/llvm/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/llvm/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
index 410a42a..fac6a66 100644
--- a/llvm/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
+++ b/llvm/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -41,18 +41,18 @@
   br i1 undef, label %bb5, label %bb4
 
 bb4:                                              ; preds = %bb3
-  %17 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
+  %17 = load volatile i32, i32* @uint8, align 4        ; <i32> [#uses=0]
   br label %bb5
 
 bb5:                                              ; preds = %bb4, %bb3
-  %18 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
+  %18 = load volatile i32, i32* @uint8, align 4        ; <i32> [#uses=0]
   %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
   %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
   br i1 undef, label %return, label %bb6.preheader
 
 bb6.preheader:                                    ; preds = %bb5
   %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
-  %22 = load volatile i32* @uint8, align 4        ; <i32> [#uses=0]
+  %22 = load volatile i32, i32* @uint8, align 4        ; <i32> [#uses=0]
   %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
   unreachable
 
diff --git a/llvm/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/llvm/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
index 9c545dc..2ec49f4 100644
--- a/llvm/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
+++ b/llvm/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -26,7 +26,7 @@
 
 invcont1:                                         ; preds = %invcont
   %6 = getelementptr inbounds %struct.ComplexType, %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
-  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %7 = load i32, i32* %6, align 4                      ; <i32> [#uses=1]
   invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
           to label %invcont2 unwind label %lpad
 
diff --git a/llvm/test/CodeGen/X86/2009-09-10-SpillComments.ll b/llvm/test/CodeGen/X86/2009-09-10-SpillComments.ll
index 4aadd8a..78ce1ce 100644
--- a/llvm/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/llvm/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -21,7 +21,7 @@
 
 cond_next:		; preds = %entry
 	%tmp6 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
-	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp7 = load i16, i16* %tmp6		; <i16> [#uses=2]
 	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
 	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
 	br i1 %tmp10, label %cond_true13, label %cond_next32
@@ -29,9 +29,9 @@
 cond_true13:		; preds = %cond_next
 	%tmp15 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
 	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
-	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp19 = load %struct.rtx_def*, %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
 	%tmp20 = getelementptr %struct.rtx_def, %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
-	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp21 = load i16, i16* %tmp20		; <i16> [#uses=1]
 	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
 	br i1 %tmp22, label %cond_true25, label %cond_next32
 
@@ -41,9 +41,9 @@
 
 cond_next32:		; preds = %cond_true13, %cond_next
 	%tmp34 = getelementptr [116 x i8*], [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
-	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp35 = load i8*, i8** %tmp34, align 4		; <i8*> [#uses=1]
 	%tmp37 = getelementptr [117 x i32], [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
-	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%tmp38 = load i32, i32* %tmp37, align 4		; <i32> [#uses=1]
 	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
 	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
 	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
@@ -52,7 +52,7 @@
 	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
 	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
 	%tmp42 = getelementptr i8, i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
-	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp43 = load i8, i8* %tmp42		; <i8> [#uses=1]
 	switch i8 %tmp43, label %bb123 [
 		 i8 101, label %cond_true47
 		 i8 69, label %bb105.preheader
@@ -61,38 +61,38 @@
 cond_true47:		; preds = %bb
 	%tmp52 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
 	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
-	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp55 = load %struct.rtx_def*, %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
 	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
 	%tmp62 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
 	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
 	store i32 %tmp58.c, i32* %tmp62
-	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6816 = load i8, i8* %tmp42		; <i8> [#uses=1]
 	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
 	br i1 %tmp6917, label %bb105.preheader, label %bb123
 
 bb105.preheader:		; preds = %cond_true47, %bb
 	%tmp11020 = getelementptr %struct.rtx_def, %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
 	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
-	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11322 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
 	%tmp11423 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11524 = load i32, i32* %tmp11423		; <i32> [#uses=1]
 	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
 	br i1 %tmp11625, label %bb123, label %bb73
 
 bb73:		; preds = %bb73, %bb105.preheader
 	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
-	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp81 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
 	%tmp92 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
 	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
-	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp95 = load %struct.rtx_def*, %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
 	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
 	%tmp101 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
 	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
 	store i32 %tmp98.c, i32* %tmp101
 	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
-	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp113 = load %struct.rtvec_def*, %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
 	%tmp114 = getelementptr %struct.rtvec_def, %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp115 = load i32, i32* %tmp114		; <i32> [#uses=1]
 	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
 	br i1 %tmp116, label %bb73, label %bb123
 
diff --git a/llvm/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
index 18b5a17..a18a30a 100644
--- a/llvm/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -32,7 +32,7 @@
   br i1 %cmp16, label %for.end41, label %for.cond17.preheader
 
 for.cond17.preheader:                             ; preds = %lor.lhs.false
-  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %tmp24 = load i32, i32* @boot_cpu_id                 ; <i32> [#uses=1]
   %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
   br label %for.body20
 
diff --git a/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
index 809e9f7..840b317 100644
--- a/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
+++ b/llvm/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -13,11 +13,11 @@
 	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
 	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
 	%2 = getelementptr i16, i16* %A, i32 %1		; <i16*> [#uses=1]
-	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%3 = load i16, i16* %2, align 2		; <i16> [#uses=1]
 	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
 	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
 	%6 = getelementptr i16, i16* %B, i32 %5		; <i16*> [#uses=1]
-	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%7 = load i16, i16* %6, align 2		; <i16> [#uses=1]
 	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
 	%9 = mul i32 %8, %4		; <i32> [#uses=1]
 	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/llvm/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
index 33f35f8..e469a60 100644
--- a/llvm/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -54,7 +54,7 @@
   unreachable
 
 bb.i37:                                           ; preds = %bb.i37, %bb11.thread
-  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %0 = load i64, i64* undef, align 8                   ; <i64> [#uses=1]
   %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
   store i64 %1, i64* undef, align 8
   br i1 undef, label %bb.i37, label %quantum_addscratch.exit
diff --git a/llvm/test/CodeGen/X86/2009-10-19-EmergencySpill.ll b/llvm/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
index 73133ad..ec73f5a 100644
--- a/llvm/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
+++ b/llvm/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
@@ -8,12 +8,12 @@
 
 define fastcc void @nodeOverwriteCell(%struct.Rtree* nocapture %pRtree, %struct.RtreeNode* nocapture %pNode, %struct.RtreeCell* nocapture %pCell, i32 %iCell) nounwind ssp {
 entry:
-  %0 = load i8** undef, align 8                   ; <i8*> [#uses=2]
-  %1 = load i32* undef, align 8                   ; <i32> [#uses=1]
+  %0 = load i8*, i8** undef, align 8                   ; <i8*> [#uses=2]
+  %1 = load i32, i32* undef, align 8                   ; <i32> [#uses=1]
   %2 = mul i32 %1, %iCell                         ; <i32> [#uses=1]
   %3 = add nsw i32 %2, 4                          ; <i32> [#uses=1]
   %4 = sext i32 %3 to i64                         ; <i64> [#uses=2]
-  %5 = load i64* null, align 8                    ; <i64> [#uses=2]
+  %5 = load i64, i64* null, align 8                    ; <i64> [#uses=2]
   %6 = lshr i64 %5, 48                            ; <i64> [#uses=1]
   %7 = trunc i64 %6 to i8                         ; <i8> [#uses=1]
   store i8 %7, i8* undef, align 1
@@ -36,12 +36,12 @@
   %tmp = shl i64 %indvar, 2                       ; <i64> [#uses=1]
   %tmp26 = add i64 %tmp, %tmp25                   ; <i64> [#uses=1]
   %scevgep27 = getelementptr i8, i8* %0, i64 %tmp26   ; <i8*> [#uses=1]
-  %12 = load i32* %scevgep12, align 4             ; <i32> [#uses=1]
+  %12 = load i32, i32* %scevgep12, align 4             ; <i32> [#uses=1]
   %13 = lshr i32 %12, 24                          ; <i32> [#uses=1]
   %14 = trunc i32 %13 to i8                       ; <i8> [#uses=1]
   store i8 %14, i8* undef, align 1
   store i8 undef, i8* %scevgep27, align 1
-  %15 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %15 = load i32, i32* %11, align 4                    ; <i32> [#uses=1]
   %16 = shl i32 %15, 1                            ; <i32> [#uses=1]
   %17 = icmp sgt i32 %16, undef                   ; <i1> [#uses=1]
   %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/llvm/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
index 006a02a..2d5052d 100644
--- a/llvm/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
+++ b/llvm/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
@@ -32,7 +32,7 @@
   br label %lt_init.exit
 
 lt_init.exit:                                     ; preds = %if.end.i, %if.then.i
-  %3 = load i32* %retval.i                        ; <i32> [#uses=1]
+  %3 = load i32, i32* %retval.i                        ; <i32> [#uses=1]
   call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
   %4 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
   %5 = sub i64 %4, %2                             ; <i64> [#uses=1]
@@ -50,7 +50,7 @@
 
 if.end:                                           ; preds = %if.then, %lt_init.exit
   store i32 0, i32* %retval
-  %7 = load i32* %retval                          ; <i32> [#uses=1]
+  %7 = load i32, i32* %retval                          ; <i32> [#uses=1]
   tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
   %8 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
   %9 = sub i64 %8, %0                             ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/llvm/test/CodeGen/X86/2009-10-25-RewriterBug.ll
index 4fbecfd..be18186 100644
--- a/llvm/test/CodeGen/X86/2009-10-25-RewriterBug.ll
+++ b/llvm/test/CodeGen/X86/2009-10-25-RewriterBug.ll
@@ -95,7 +95,7 @@
 
 bb45.i:                                           ; preds = %bb41.i
   %33 = getelementptr inbounds %struct.StorablePicture, %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
-  %34 = load i64* %33, align 8                    ; <i64> [#uses=1]
+  %34 = load i64, i64* %33, align 8                    ; <i64> [#uses=1]
   br label %bb47.i
 
 bb47.i:                                           ; preds = %bb45.i, %bb41.i
@@ -110,9 +110,9 @@
   br label %bb60.i
 
 bb60.i:                                           ; preds = %bb58.i, %bb57.i
-  %35 = load i64*** undef, align 8                ; <i64**> [#uses=1]
+  %35 = load i64**, i64*** undef, align 8                ; <i64**> [#uses=1]
   %scevgep256.i = getelementptr i64*, i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
-  %36 = load i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
+  %36 = load i64*, i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
   %scevgep243.i = getelementptr i64, i64* %36, i64 undef ; <i64*> [#uses=1]
   store i64 -1, i64* %scevgep243.i, align 8
   br label %bb64.i
@@ -160,7 +160,7 @@
   br label %bb102.i
 
 bb102.i:                                          ; preds = %bb101.i, %bb83.i
-  %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+  %48 = load %struct.StorablePicture*, %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
   br i1 undef, label %bb81.i, label %bb104.i
 
 bb104.i:                                          ; preds = %bb102.i, %bb80.i
diff --git a/llvm/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/llvm/test/CodeGen/X86/2009-11-16-MachineLICM.ll
index 987b6e4..80f4f74 100644
--- a/llvm/test/CodeGen/X86/2009-11-16-MachineLICM.ll
+++ b/llvm/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -25,13 +25,13 @@
   %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
   %scevgep14 = getelementptr float, float* %x, i64 %tmp1318 ; <float*> [#uses=1]
   %x_addr.03 = getelementptr float, float* %x, i64 %tmp9 ; <float*> [#uses=1]
-  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
   store float %1, float* %x_addr.03, align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
   store float %2, float* %scevgep, align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
   store float %3, float* %scevgep12, align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
   store float %4, float* %scevgep14, align 4
   %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
   %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/llvm/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
index 396638f..0bf13de 100644
--- a/llvm/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
+++ b/llvm/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
@@ -48,7 +48,7 @@
   %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1]
   %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
             cleanup
-  %1 = load i8* %.SV10.phi807, align 8            ; <i8> [#uses=0]
+  %1 = load i8, i8* %.SV10.phi807, align 8            ; <i8> [#uses=0]
   br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
 
 bb.i1:                                            ; preds = %bb.i.i.bbcl.disp
diff --git a/llvm/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/llvm/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
index 5c10c55..e191a8a 100644
--- a/llvm/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
+++ b/llvm/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -9,8 +9,8 @@
   %b = alloca i32                                 ; <i32*> [#uses=2]
   %a = alloca i32                                 ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
-  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %0 = load i32, i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32, i32* %b, align 4                      ; <i32> [#uses=1]
   %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
   store i32 %asmtmp, i32* %a
   br label %return
@@ -30,8 +30,8 @@
   %b = alloca i32                                 ; <i32*> [#uses=2]
   %a = alloca i32                                 ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
-  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %0 = load i32, i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32, i32* %b, align 4                      ; <i32> [#uses=1]
   %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
   store i32 %asmtmp, i32* %a
   br label %return
diff --git a/llvm/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/llvm/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
index 9a84fe2..97d9787 100644
--- a/llvm/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
+++ b/llvm/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -30,13 +30,13 @@
   br label %"@CFE_debug_label_0"
 
 "@CFE_debug_label_0":                             ; preds = %"file foo2.c, line 14, bb2"
-  %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+  %r = load %test*, %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
   store %test* %r, %test** %p, align 8
   br label %"@CFE_debug_label_2"
 
 "@CFE_debug_label_2":                             ; preds = %"@CFE_debug_label_0"
-  %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
-  %r2 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r1 = load %link*, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+  %r2 = load %test*, %test** %p, align 8                  ; <%test*> [#uses=1]
   %r3 = ptrtoint %test* %r2 to i64                ; <i64> [#uses=1]
   %r4 = inttoptr i64 %r3 to %link**               ; <%link**> [#uses=1]
   %r5 = getelementptr %link*, %link** %r4, i64 1          ; <%link**> [#uses=1]
@@ -44,7 +44,7 @@
   br label %"@CFE_debug_label_3"
 
 "@CFE_debug_label_3":                             ; preds = %"@CFE_debug_label_2"
-  %r6 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r6 = load %test*, %test** %p, align 8                  ; <%test*> [#uses=1]
   %r7 = ptrtoint %test* %r6 to i64                ; <i64> [#uses=1]
   %r8 = inttoptr i64 %r7 to %link*                ; <%link*> [#uses=1]
   %r9 = getelementptr %link, %link* %r8, i64 1           ; <%link*> [#uses=1]
@@ -52,7 +52,7 @@
   br label %"@CFE_debug_label_4"
 
 "@CFE_debug_label_4":                             ; preds = %"@CFE_debug_label_3"
-  %r10 = load %test** %p, align 8                 ; <%test*> [#uses=1]
+  %r10 = load %test*, %test** %p, align 8                 ; <%test*> [#uses=1]
   %r11 = ptrtoint %test* %r10 to i64              ; <i64> [#uses=1]
   %r12 = inttoptr i64 %r11 to i32*                ; <i32*> [#uses=1]
   store i32 1, i32* %r12, align 4
diff --git a/llvm/test/CodeGen/X86/20090313-signext.ll b/llvm/test/CodeGen/X86/20090313-signext.ll
index b8effa6..3ea1316 100644
--- a/llvm/test/CodeGen/X86/20090313-signext.ll
+++ b/llvm/test/CodeGen/X86/20090313-signext.ll
@@ -10,7 +10,7 @@
 	%0 = tail call signext i16 @h() nounwind
 	%1 = sext i16 %0 to i32
 	tail call void @g(i32 %1) nounwind
-	%2 = load i16* @x, align 2
+	%2 = load i16, i16* @x, align 2
 	ret i16 %2
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/llvm/test/CodeGen/X86/2010-01-13-OptExtBug.ll
index 1068481..3ecf845 100644
--- a/llvm/test/CodeGen/X86/2010-01-13-OptExtBug.ll
+++ b/llvm/test/CodeGen/X86/2010-01-13-OptExtBug.ll
@@ -8,14 +8,14 @@
   %call = tail call i8* @_Z15uprv_malloc_4_2v()
   %0 = bitcast i8* %call to double*
   %tmp = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 3
-  %tmp2 = load i16* %tmp
+  %tmp2 = load i16, i16* %tmp
   %tmp525 = getelementptr inbounds %class.OlsonTimeZone, %class.OlsonTimeZone* %this, i32 0, i32 0
-  %tmp626 = load i16* %tmp525
+  %tmp626 = load i16, i16* %tmp525
   %cmp27 = icmp slt i16 %tmp2, %tmp626
   br i1 %cmp27, label %bb.nph, label %for.end
 
 for.cond:
-  %tmp6 = load i16* %tmp5
+  %tmp6 = load i16, i16* %tmp5
   %cmp = icmp slt i16 %inc, %tmp6
   %indvar.next = add i32 %indvar, 1
   br i1 %cmp, label %for.body, label %for.end
@@ -34,9 +34,9 @@
   %tmp30 = add i32 %indvar, %tmp29
   %tmp33 = add i32 %indvar, %tmp32
   %inc = trunc i32 %tmp33 to i16
-  %tmp11 = load i8** %tmp10
+  %tmp11 = load i8*, i8** %tmp10
   %arrayidx = getelementptr i8, i8* %tmp11, i32 %tmp30
-  %tmp12 = load i8* %arrayidx
+  %tmp12 = load i8, i8* %arrayidx
   br label %for.cond
 
 for.end:
diff --git a/llvm/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll b/llvm/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
index 4378786..6aba39e 100644
--- a/llvm/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
+++ b/llvm/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
@@ -11,12 +11,12 @@
 "file bug754399.f90, line 184, in inner vector loop at depth 0, bb164":		; preds = %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164", %"file bug754399.f90, line 1, bb1"
 	%tmp641 = add i64 0, 48		; <i64> [#uses=1]
 	%tmp641642 = inttoptr i64 %tmp641 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%r1258 = load <4 x i32>* %tmp641642, align 4		; <<4 x i32>> [#uses=2]
+	%r1258 = load <4 x i32>, <4 x i32>* %tmp641642, align 4		; <<4 x i32>> [#uses=2]
 	%r1295 = extractelement <4 x i32> %r1258, i32 3		; <i32> [#uses=1]
 	%r1296 = sext i32 %r1295 to i64		; <i64> [#uses=1]
 	%r1297 = add i64 %r1296, -1		; <i64> [#uses=1]
 	%r1298183 = getelementptr [0 x i32], [0 x i32]* %ismbs, i64 0, i64 %r1297		; <i32*> [#uses=1]
-	%r1298184 = load i32* %r1298183, align 4		; <i32> [#uses=1]
+	%r1298184 = load i32, i32* %r1298183, align 4		; <i32> [#uses=1]
 	%r1301 = extractelement <4 x i32> %r1037, i32 3		; <i32> [#uses=1]
 	%r1302 = mul i32 %r1298184, %r1301		; <i32> [#uses=1]
 	%r1306 = insertelement <4 x i32> zeroinitializer, i32 %r1302, i32 3		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll b/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll
index f954998..b49b743 100644
--- a/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/llvm/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -15,14 +15,14 @@
   call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !{!"0x102"}), !dbg !15
   %1 = getelementptr inbounds %struct.Rect, %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
   %2 = getelementptr inbounds %struct.Pt, %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
-  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  %3 = load double, double* %2, align 8, !dbg !16         ; <double> [#uses=1]
   store double %3, double* %0, align 8, !dbg !16
-  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  %4 = load double, double* %0, align 8, !dbg !16         ; <double> [#uses=1]
   store double %4, double* %retval, align 8, !dbg !16
   br label %return, !dbg !16
 
 return:                                           ; preds = %entry
-  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  %retval1 = load double, double* %retval, !dbg !16       ; <double> [#uses=1]
   ret double %retval1, !dbg !16
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/llvm/test/CodeGen/X86/2010-01-19-OptExtBug.ll
index ec24e73..def8dd3 100644
--- a/llvm/test/CodeGen/X86/2010-01-19-OptExtBug.ll
+++ b/llvm/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -21,7 +21,7 @@
   unreachable
 
 bb9:                                              ; preds = %bb6
-  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  %0 = load i8, i8* undef, align 1                    ; <i8> [#uses=3]
   br i1 undef, label %bb12, label %bb10
 
 bb10:                                             ; preds = %bb9
diff --git a/llvm/test/CodeGen/X86/2010-02-04-SchedulerBug.ll b/llvm/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
index 9b1430f..51686ea 100644
--- a/llvm/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
+++ b/llvm/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
@@ -6,13 +6,13 @@
 
 define void @t(i32 %cNum, i64 %max) nounwind optsize ssp noimplicitfloat {
 entry:
-  %0 = load %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
+  %0 = load %struct.b_t*, %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
   %1 = getelementptr inbounds %struct.b_t, %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
-  %2 = load i64* %1, align 4                      ; <i64> [#uses=1]
+  %2 = load i64, i64* %1, align 4                      ; <i64> [#uses=1]
   %3 = icmp ult i64 %2, %max            ; <i1> [#uses=1]
   %4 = getelementptr inbounds %struct.a_t, %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
-  %5 = load i64** %4, align 4                     ; <i64*> [#uses=0]
-  %6 = load i64* null, align 4                    ; <i64> [#uses=1]
+  %5 = load i64*, i64** %4, align 4                     ; <i64*> [#uses=0]
+  %6 = load i64, i64* null, align 4                    ; <i64> [#uses=1]
   br i1 %3, label %bb2, label %bb
 
 bb:                                               ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/llvm/test/CodeGen/X86/2010-02-11-NonTemporal.ll
index f9cca8c7..5d74db1 100644
--- a/llvm/test/CodeGen/X86/2010-02-11-NonTemporal.ll
+++ b/llvm/test/CodeGen/X86/2010-02-11-NonTemporal.ll
@@ -11,8 +11,8 @@
 	%i = alloca i32, align 4
 	%"$LCS_0" = alloca i64, align 8
 	%"$LCS_S2" = alloca <2 x double>, align 16
-	%r9 = load <2 x double>* %"$LCS_S2", align 8
-	%r10 = load i64* %"$LCS_0", align 8
+	%r9 = load <2 x double>, <2 x double>* %"$LCS_S2", align 8
+	%r10 = load i64, i64* %"$LCS_0", align 8
 	%r11 = inttoptr i64 %r10 to <2 x double>*
 	store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
 	br label %"file movnt.f90, line 18, bb5"
diff --git a/llvm/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/llvm/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
index 739a27a..193f8cf 100644
--- a/llvm/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
+++ b/llvm/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
@@ -228,7 +228,7 @@
   unreachable
 
 "67":                                             ; preds = %"65"
-  %1 = load i32* undef, align 4                   ; <i32> [#uses=0]
+  %1 = load i32, i32* undef, align 4                   ; <i32> [#uses=0]
   br label %"100"
 
 "82":                                             ; preds = %"61", %"60", %"59"
diff --git a/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
index d39023d..c3b12ed 100644
--- a/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
+++ b/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -23,23 +23,23 @@
 define fastcc void @l186(%tupl* %r1) noreturn nounwind {
 entry:
   %ptr1 = getelementptr %tupl, %tupl* %r1, i32 0, i32 0
-  %r2 = load i32* %ptr1
+  %r2 = load i32, i32* %ptr1
   %ptr3 = getelementptr %tupl, %tupl* %r1, i32 0, i32 1
-  %r3 = load i32* %ptr3
+  %r3 = load i32, i32* %ptr3
   %ptr5 = getelementptr %tupl, %tupl* %r1, i32 0, i32 2
-  %r4 = load i32* %ptr5
+  %r4 = load i32, i32* %ptr5
   %ptr7 = getelementptr %tupl, %tupl* %r1, i32 0, i32 3
-  %r5 = load i32* %ptr7
+  %r5 = load i32, i32* %ptr7
   %ptr9 = getelementptr %tupl, %tupl* %r1, i32 0, i32 4
-  %r6 = load i32* %ptr9
+  %r6 = load i32, i32* %ptr9
   %ptr11 = getelementptr %tupl, %tupl* %r1, i32 0, i32 5
-  %r7 = load i32* %ptr11
+  %r7 = load i32, i32* %ptr11
   %ptr13 = getelementptr %tupl, %tupl* %r1, i32 0, i32 6
-  %r8 = load i32* %ptr13
+  %r8 = load i32, i32* %ptr13
   %ptr15 = getelementptr %tupl, %tupl* %r1, i32 0, i32 7
-  %r9 = load i32* %ptr15
+  %r9 = load i32, i32* %ptr15
   %ptr17 = getelementptr %tupl, %tupl* %r1, i32 0, i32 8
-  %r10 = load i32* %ptr17
+  %r10 = load i32, i32* %ptr17
   %cond = icmp eq i32 %r10, 3
   br i1 %cond, label %true, label %false
 
diff --git a/llvm/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/llvm/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
index 4a26ba0..4e4e006 100644
--- a/llvm/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
+++ b/llvm/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
@@ -16,7 +16,7 @@
   br i1 undef, label %for.end, label %for.body
 
 for.body:                                         ; preds = %if.end40, %entry
-  %tmp6 = load i8* undef, align 2                 ; <i8> [#uses=3]
+  %tmp6 = load i8, i8* undef, align 2                 ; <i8> [#uses=3]
   %conv11 = sext i8 %tmp6 to i64                  ; <i64> [#uses=1]
   %cmp15 = icmp slt i64 %conv11, undef            ; <i1> [#uses=1]
   br i1 %cmp15, label %if.end, label %if.then
@@ -29,7 +29,7 @@
 if.end:                                           ; preds = %if.then, %for.body
   %index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1]
   store i8 %index.0, i8* undef
-  %tmp24 = load i8* undef                         ; <i8> [#uses=2]
+  %tmp24 = load i8, i8* undef                         ; <i8> [#uses=2]
   br i1 undef, label %if.end40, label %if.then36
 
 if.then36:                                        ; preds = %if.end
diff --git a/llvm/test/CodeGen/X86/2010-03-17-ISelBug.ll b/llvm/test/CodeGen/X86/2010-03-17-ISelBug.ll
index febf1db..e1d3c10 100644
--- a/llvm/test/CodeGen/X86/2010-03-17-ISelBug.ll
+++ b/llvm/test/CodeGen/X86/2010-03-17-ISelBug.ll
@@ -9,7 +9,7 @@
 define i32* @t() align 2 nounwind {
 entry:
   %operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
-  %0 = load i32*** null, align 4  ; [#uses=1]
+  %0 = load i32**, i32*** null, align 4  ; [#uses=1]
   %1 = ptrtoint i32** %0 to i32   ; <i32> [#uses=1]
   %2 = sub nsw i32 %1, undef                      ; <i32> [#uses=2]
   br i1 false, label %bb20, label %bb.nph380
diff --git a/llvm/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/llvm/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
index 864ebf1..2ba4d9a 100644
--- a/llvm/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
+++ b/llvm/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
@@ -19,7 +19,7 @@
   br i1 undef, label %invcont65, label %bb.i.i
 
 bb.i.i:                                           ; preds = %invcont64
-  %1 = load <4 x float>* undef, align 16          ; <<4 x float>> [#uses=5]
+  %1 = load <4 x float>, <4 x float>* undef, align 16          ; <<4 x float>> [#uses=5]
   br i1 undef, label %bb.nph.i.i, label %invcont65
 
 bb.nph.i.i:                                       ; preds = %bb.i.i
diff --git a/llvm/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/llvm/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
index 6d25317..5adf99e 100644
--- a/llvm/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -15,7 +15,7 @@
 ; CHECK: addq $12, %rsi
   %BitValueArray = alloca [32 x i32], align 4
   %tmp2 = getelementptr inbounds %struct.F, %struct.F* %this, i64 0, i32 0
-  %tmp3 = load %struct.FC** %tmp2, align 8
+  %tmp3 = load %struct.FC*, %struct.FC** %tmp2, align 8
   %tmp4 = getelementptr inbounds %struct.FC, %struct.FC* %tmp3, i64 0, i32 1, i64 0
   %tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
   %tmp6 = bitcast i32* %tmp4 to i8*
diff --git a/llvm/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll b/llvm/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
index fadbd21..6c8dbbe 100644
--- a/llvm/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
+++ b/llvm/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
@@ -12,7 +12,7 @@
   ]
 
 if.then:                                          ; preds = %entry, %entry
-  %tmp69 = load float* null, align 4              ; <float> [#uses=1]
+  %tmp69 = load float, float* null, align 4              ; <float> [#uses=1]
   %cmp19 = icmp eq %1* null, %scroller            ; <i1> [#uses=2]
   %cond = select i1 %cmp19, float %tmp69, float 0.000000e+00 ; <float> [#uses=1]
   %call36 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* undef, i8* undef) nounwind optsize ; <i64> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/llvm/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
index 08ee91b..aeb2f2c 100644
--- a/llvm/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
+++ b/llvm/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -47,7 +47,7 @@
 match:                                            ; preds = %try.handler
   %4 = call i8* @__cxa_begin_catch(i8* %exc1)     ; <i8*> [#uses=1]
   %5 = bitcast i8* %4 to i32*                     ; <i32*> [#uses=1]
-  %6 = load i32* %5                               ; <i32> [#uses=1]
+  %6 = load i32, i32* %5                               ; <i32> [#uses=1]
   store i32 %6, i32* %0
   %call = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
           to label %invoke.cont2 unwind label %match.handler ; <i32> [#uses=0]
@@ -80,7 +80,7 @@
   br label %cleanup.switch
 
 cleanup.switch:                                   ; preds = %invoke.cont5
-  %tmp = load i32* %cleanup.dst                   ; <i32> [#uses=1]
+  %tmp = load i32, i32* %cleanup.dst                   ; <i32> [#uses=1]
   switch i32 %tmp, label %cleanup.end [
     i32 1, label %cleanup.pad
     i32 2, label %cleanup.pad4
@@ -99,7 +99,7 @@
   br label %cleanup.switch9
 
 cleanup.switch9:                                  ; preds = %finally
-  %tmp8 = load i32* %cleanup.dst7                 ; <i32> [#uses=1]
+  %tmp8 = load i32, i32* %cleanup.dst7                 ; <i32> [#uses=1]
   switch i32 %tmp8, label %cleanup.end10 [
     i32 1, label %finally.end
     i32 2, label %finally.throw
@@ -109,7 +109,7 @@
   br label %finally.end
 
 finally.throw:                                    ; preds = %cleanup.switch9
-  %8 = load i8** %_rethrow                        ; <i8*> [#uses=1]
+  %8 = load i8*, i8** %_rethrow                        ; <i8*> [#uses=1]
   call void @_Unwind_Resume_or_Rethrow(i8* %8)
   unreachable
 
@@ -117,9 +117,9 @@
   %tmp11 = getelementptr inbounds %struct.S, %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
   %arraydecay = getelementptr inbounds [2 x i8*], [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
   %arrayidx = getelementptr inbounds i8*, i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
-  %tmp12 = load i8** %arrayidx                    ; <i8*> [#uses=1]
+  %tmp12 = load i8*, i8** %arrayidx                    ; <i8*> [#uses=1]
   store i8* %tmp12, i8** %retval
-  %9 = load i8** %retval                          ; <i8*> [#uses=1]
+  %9 = load i8*, i8** %retval                          ; <i8*> [#uses=1]
   ret i8* %9
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/llvm/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
index 86be390..5a9c021 100644
--- a/llvm/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
+++ b/llvm/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
@@ -23,9 +23,9 @@
   store i8* %asmresult, i8** %ret
   store i8* %asmresult1, i8** %p
   store i32 %asmresult2, i32* %t
-  %tmp = load i8** %ret                           ; <i8*> [#uses=1]
+  %tmp = load i8*, i8** %ret                           ; <i8*> [#uses=1]
   store i8* %tmp, i8** %retval
-  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  %1 = load i8*, i8** %retval                          ; <i8*> [#uses=1]
   ret i8* %1
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-05-07-ldconvert.ll b/llvm/test/CodeGen/X86/2010-05-07-ldconvert.ll
index 0ba6a8f..a0c3c95 100644
--- a/llvm/test/CodeGen/X86/2010-05-07-ldconvert.ll
+++ b/llvm/test/CodeGen/X86/2010-05-07-ldconvert.ll
@@ -9,7 +9,7 @@
   %tmp = call x86_fp80 @llvm.powi.f80(x86_fp80 0xK3FFF8000000000000000, i32 -64) ; <x86_fp80> [#uses=1]
   %conv = fptosi x86_fp80 %tmp to i32             ; <i32> [#uses=1]
   store i32 %conv, i32* %r
-  %tmp1 = load i32* %r                            ; <i32> [#uses=1]
+  %tmp1 = load i32, i32* %r                            ; <i32> [#uses=1]
   %tobool = icmp ne i32 %tmp1, 0                  ; <i1> [#uses=1]
   br i1 %tobool, label %if.then, label %if.end
 
@@ -18,7 +18,7 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %entry
-  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval                          ; <i32> [#uses=1]
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll b/llvm/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
index e719da3..a6fe310 100644
--- a/llvm/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
+++ b/llvm/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
@@ -4,7 +4,7 @@
 
 define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp {
 entry:
-  %0 = load i16* %num2, align 2                   ; <i16> [#uses=2]
+  %0 = load i16, i16* %num2, align 2                   ; <i16> [#uses=2]
   %1 = mul nsw i16 %0, %0                         ; <i16> [#uses=1]
   store i16 %1, i16* %num2, align 2
   ret i32 undef
diff --git a/llvm/test/CodeGen/X86/2010-05-16-nosseconversion.ll b/llvm/test/CodeGen/X86/2010-05-16-nosseconversion.ll
index 889575ce..2d3f0eb 100644
--- a/llvm/test/CodeGen/X86/2010-05-16-nosseconversion.ll
+++ b/llvm/test/CodeGen/X86/2010-05-16-nosseconversion.ll
@@ -5,7 +5,7 @@
 
 define i32 @foo() nounwind readonly ssp {
 entry:
-  %0 = load i64* @x, align 8                      ; <i64> [#uses=1]
+  %0 = load i64, i64* @x, align 8                      ; <i64> [#uses=1]
   %1 = uitofp i64 %0 to double                    ; <double> [#uses=1]
   %2 = fptosi double %1 to i32                    ; <i32> [#uses=1]
   ret i32 %2
diff --git a/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 8fd3368..9abccf8 100644
--- a/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/llvm/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -11,7 +11,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !{!"0x102"})
   %0 = getelementptr inbounds %struct.a, %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
-  %1 = load i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
+  %1 = load i32, i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
   tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
   %2 = bitcast %struct.a* %myvar to i8*, !dbg !30 ; <i8*> [#uses=1]
   ret i8* %2, !dbg !30
diff --git a/llvm/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll b/llvm/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
index 38dcb80..ac18195 100644
--- a/llvm/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
+++ b/llvm/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
@@ -7,7 +7,7 @@
 
 define i32 @f2(double %x) nounwind {
 entry:
-  %0 = load double* undef, align 64               ; <double> [#uses=1]
+  %0 = load double, double* undef, align 64               ; <double> [#uses=1]
   %1 = fptoui double %0 to i16                    ; <i16> [#uses=1]
   %2 = zext i16 %1 to i32                         ; <i32> [#uses=1]
   %3 = add nsw i32 0, %2                          ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/llvm/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
index b5679e6..5a4b389 100644
--- a/llvm/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
+++ b/llvm/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -2,6 +2,6 @@
 ; CHECK: %fs:
 
 define i32 @test1(i32 addrspace(257)* %arg) nounwind {
-       %tmp = load i32 addrspace(257)* %arg
+       %tmp = load i32, i32 addrspace(257)* %arg
        ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/llvm/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
index 74a7610..5bf7397 100644
--- a/llvm/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
+++ b/llvm/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -10,17 +10,17 @@
   %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
   %v = alloca i32, align 4                        ; <i32*> [#uses=3]
   store i32 0, i32* %retval
-  %zero = load i32* %retval
+  %zero = load i32, i32* %retval
 ; The earlyclobber register EC0 should not be spilled before the inline asm.
 ; Yes, check-not can refer to FileCheck variables defined in the future.
 ; CHECK-NOT: [[EC0]]{{.*}}(%rsp)
 ; CHECK: bsr {{[^,]*}}, [[EC0:%...]]
   %0 = call i32 asm "bsr   $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1]
   store i32 %0, i32* %v
-  %tmp = load i32* %v                             ; <i32> [#uses=1]
+  %tmp = load i32, i32* %v                             ; <i32> [#uses=1]
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
   store i32 0, i32* %retval
-  %1 = load i32* %retval                          ; <i32> [#uses=1]
+  %1 = load i32, i32* %retval                          ; <i32> [#uses=1]
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/llvm/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index ad5931e..ffb5157 100644
--- a/llvm/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/llvm/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -7,7 +7,7 @@
 define i32 @func(%struct.type* %s) nounwind optsize ssp {
 entry:
   %tmp1 = getelementptr inbounds %struct.type, %struct.type* %s, i32 0, i32 1
-  %tmp2 = load i32* %tmp1, align 8
+  %tmp2 = load i32, i32* %tmp1, align 8
   %tmp3 = icmp eq i32 %tmp2, 10
   %tmp4 = getelementptr inbounds %struct.type, %struct.type* %s, i32 0, i32 40
   br i1 %tmp3, label %bb, label %entry.bb1_crit_edge
diff --git a/llvm/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/llvm/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
index 3470a06..8f5f083 100644
--- a/llvm/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
+++ b/llvm/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
@@ -6,10 +6,10 @@
 entry:
   %target.addr.i = alloca i8*, align 4            ; <i8**> [#uses=2]
   %frame = alloca %struct.__SEH2Frame*, align 4   ; <%struct.__SEH2Frame**> [#uses=1]
-  %tmp = load %struct.__SEH2Frame** %frame        ; <%struct.__SEH2Frame*> [#uses=1]
+  %tmp = load %struct.__SEH2Frame*, %struct.__SEH2Frame** %frame        ; <%struct.__SEH2Frame*> [#uses=1]
   %conv = bitcast %struct.__SEH2Frame* %tmp to i8* ; <i8*> [#uses=1]
   store i8* %conv, i8** %target.addr.i
-  %tmp.i = load i8** %target.addr.i               ; <i8*> [#uses=1]
+  %tmp.i = load i8*, i8** %target.addr.i               ; <i8*> [#uses=1]
   call void asm sideeffect "push %ebp\0Apush $$0\0Apush $$0\0Apush $$Return${:uid}\0Apush $0\0Acall ${1:c}\0AReturn${:uid}: pop %ebp\0A", "imr,imr,~{ax},~{bx},~{cx},~{dx},~{si},~{di},~{flags},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %tmp.i, void (...)* @RtlUnwind) nounwind, !srcloc !0
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll b/llvm/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
index a0798ae..023c77a 100644
--- a/llvm/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
+++ b/llvm/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
@@ -6,6 +6,6 @@
 ; CHECK: GCROOT %eax
   %_r = alloca i32, align 4                       ; <i32*> [#uses=2]
   call void asm "/* GCROOT $0 */", "=*imr,0,~{dirflag},~{fpsr},~{flags}"(i32* %_r, i32 4) nounwind
-  %0 = load i32* %_r, align 4                     ; <i32> [#uses=1]
+  %0 = load i32, i32* %_r, align 4                     ; <i32> [#uses=1]
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/llvm/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
index 79219dc..954f25f 100644
--- a/llvm/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
+++ b/llvm/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
@@ -61,7 +61,7 @@
   br i1 undef, label %bb2.i.i, label %bb.i.i49
 
 bb.i.i49:                                         ; preds = %bb22
-  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %0 = load float, float* undef, align 4                 ; <float> [#uses=1]
   %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
   %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
   %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/llvm/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
index e96da94..33a89a0 100644
--- a/llvm/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
+++ b/llvm/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -10,7 +10,7 @@
 
 define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
 entry:
-  %tmp2 = load double* %t                         ; <double> [#uses=1]
+  %tmp2 = load double, double* %t                         ; <double> [#uses=1]
   br i1 undef, label %if.end, label %if.then
 
 if.then:                                          ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
index 12a8274..cc6354e 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -7,7 +7,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %tmp = load i64* @g_16                          ; <i64> [#uses=1]
+  %tmp = load i64, i64* @g_16                          ; <i64> [#uses=1]
   %not.lnot = icmp ne i64 %tmp, 0                 ; <i1> [#uses=1]
   %conv = sext i1 %not.lnot to i64                ; <i64> [#uses=1]
   %and = and i64 %conv, 150                       ; <i64> [#uses=1]
@@ -20,7 +20,7 @@
 ; CHECK-NEXT: jle
 
 entry.if.end_crit_edge:                           ; preds = %entry
-  %tmp4.pre = load i32* @g_38                     ; <i32> [#uses=1]
+  %tmp4.pre = load i32, i32* @g_38                     ; <i32> [#uses=1]
   br label %if.end
 
 if.then:                                          ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll b/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll
index aa29061..541512d 100644
--- a/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/llvm/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -13,13 +13,13 @@
 
 bb:                                               ; preds = %entry
   %1 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
-  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %2 = load i32, i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
   %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
   br label %bb2, !dbg !29
 
 bb1:                                              ; preds = %entry
   %4 = getelementptr inbounds %struct.SVal, %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
-  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %5 = load i32, i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
   %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
   br label %bb2, !dbg !30
 
@@ -58,11 +58,11 @@
   store i32 1, i32* %1, align 8, !dbg !42
   %2 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
   %3 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
-  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  %4 = load i8*, i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
   store i8* %4, i8** %2, align 8, !dbg !43
   %5 = getelementptr inbounds %struct.SVal, %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
   %6 = getelementptr inbounds %struct.SVal, %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
-  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  %7 = load i32, i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
   store i32 %7, i32* %5, align 8, !dbg !43
   %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata i32 %8, i64 0, metadata !44, metadata !{!"0x102"}), !dbg !43
diff --git a/llvm/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/llvm/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
index e5542ba..b05664d 100644
--- a/llvm/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
+++ b/llvm/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -8,7 +8,7 @@
   %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
   %tmp1 = trunc i64 %x1 to i32                     ; <i32> [#uses=1]
 ; CHECK: movl (%r{{.*}}), %
-  %x4 = load i32* %h, align 4                      ; <i32> [#uses=1]
+  %x4 = load i32, i32* %h, align 4                      ; <i32> [#uses=1]
 
 ; The imull clobbers a 32-bit register.
 ; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
diff --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index 11cad5f..b738019 100644
--- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -9,8 +9,8 @@
   %a = alloca [64 x i8]
   %b = getelementptr inbounds [64 x i8], [64 x i8]* %a, i64 0, i32 0
   %c = getelementptr inbounds [64 x i8], [64 x i8]* %a, i64 0, i32 30
-  %d = load i8* %b, align 8
-  %e = load i8* %c, align 8
+  %d = load i8, i8* %b, align 8
+  %e = load i8, i8* %c, align 8
   %f = bitcast [64 x i8]* %a to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
   store i8 %d, i8* %b, align 8
diff --git a/llvm/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/llvm/test/CodeGen/X86/2010-11-09-MOVLPS.ll
index 1ed5f18..4b93733 100644
--- a/llvm/test/CodeGen/X86/2010-11-09-MOVLPS.ll
+++ b/llvm/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -21,9 +21,9 @@
   store i8* %a, i8** %a_addr
   store %0* %b, %0** %b_addr
   store %0* %c, %0** %c_addr
-  %0 = load i8** %a_addr, align 64
-  %1 = load %0** %b_addr, align 64
-  %2 = load %0** %c_addr, align 64
+  %0 = load i8*, i8** %a_addr, align 64
+  %1 = load %0*, %0** %b_addr, align 64
+  %2 = load %0*, %0** %c_addr, align 64
   %"ssa point" = bitcast i32 0 to i32
   br label %"2"
 
@@ -31,10 +31,10 @@
   %3 = bitcast i8* %0 to <2 x i32>*
   %4 = getelementptr inbounds %0, %0* %1, i32 0, i32 0
   %5 = bitcast %"int[]"* %4 to <4 x float>*
-  %6 = load <4 x float>* %5, align 16
+  %6 = load <4 x float>, <4 x float>* %5, align 16
   %7 = bitcast <2 x i32>* %3 to <2 x float>*
   %8 = bitcast <2 x float>* %7 to double*
-  %9 = load double* %8
+  %9 = load double, double* %8
   %10 = insertelement <2 x double> undef, double %9, i32 0
   %11 = insertelement <2 x double> %10, double undef, i32 1
   %12 = bitcast <2 x double> %11 to <4 x float>
@@ -48,10 +48,10 @@
   %19 = bitcast i8* %18 to <2 x i32>*
   %20 = getelementptr inbounds %0, %0* %2, i32 0, i32 0
   %21 = bitcast %"int[]"* %20 to <4 x float>*
-  %22 = load <4 x float>* %21, align 16
+  %22 = load <4 x float>, <4 x float>* %21, align 16
   %23 = bitcast <2 x i32>* %19 to <2 x float>*
   %24 = bitcast <2 x float>* %23 to double*
-  %25 = load double* %24
+  %25 = load double, double* %24
   %26 = insertelement <2 x double> undef, double %25, i32 0
   %27 = insertelement <2 x double> %26, double undef, i32 1
   %28 = bitcast <2 x double> %27 to <4 x float>
diff --git a/llvm/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/llvm/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
index 6d54c7e..331e83b 100644
--- a/llvm/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
+++ b/llvm/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -4,11 +4,11 @@
 @s = external global i8
 define i32 @foo(i1 %cond) {
 ; CHECK: @foo
-  %u_base = load i8* @u
+  %u_base = load i8, i8* @u
   %u_val = zext i8 %u_base to i32
 ; CHECK: movzbl
 ; CHECK: movsbl
-  %s_base = load i8* @s
+  %s_base = load i8, i8* @s
   %s_val = sext i8 %s_base to i32
   %val = select i1 %cond, i32 %u_val, i32 %s_val
   ret i32 %val
diff --git a/llvm/test/CodeGen/X86/2011-02-12-shuffle.ll b/llvm/test/CodeGen/X86/2011-02-12-shuffle.ll
index b4d56d1..40e3667 100644
--- a/llvm/test/CodeGen/X86/2011-02-12-shuffle.ll
+++ b/llvm/test/CodeGen/X86/2011-02-12-shuffle.ll
@@ -9,7 +9,7 @@
   br i1 undef, label %if.end, label %UnifiedReturnBlock
 
 if.end:                                           ; preds = %entry
-  %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+  %tmp1067 = load <16 x i32>, <16 x i32> addrspace(1)* null, align 64
   %tmp1082 = shufflevector         <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>, 
                                                                                                                 <16 x i32> %tmp1067, 
                                                                                                                 <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/llvm/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
index c9251d2..86e579a 100644
--- a/llvm/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
+++ b/llvm/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
@@ -13,23 +13,23 @@
   %K = alloca %0, align 4
   store i32 0, i32* %retval
   %0 = bitcast %0* %K to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %2 = and i32 %1, -121
   %3 = or i32 %2, 32
   store i32 %3, i32* %0, align 4
   %4 = bitcast %0* %K to i32*
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = lshr i32 %5, 3
   %bf.clear = and i32 %6, 15
   %conv = sitofp i32 %bf.clear to float
   %f = getelementptr inbounds %struct.anon, %struct.anon* %F, i32 0, i32 0
-  %tmp = load float* %f, align 4
+  %tmp = load float, float* %f, align 4
   %sub = fsub float %tmp, %conv
   store float %sub, float* %f, align 4
   %ld = getelementptr inbounds %struct.anon, %struct.anon* %F, i32 0, i32 1
-  %tmp1 = load x86_fp80* %ld, align 16
+  %tmp1 = load x86_fp80, x86_fp80* %ld, align 16
   %7 = bitcast %0* %K to i32*
-  %8 = load i32* %7, align 4
+  %8 = load i32, i32* %7, align 4
   %9 = lshr i32 %8, 7
   %bf.clear2 = and i32 %9, 1
   %conv3 = uitofp i32 %bf.clear2 to x86_fp80
@@ -39,12 +39,12 @@
   %10 = bitcast %0* %K to i32*
   %11 = and i32 %bf.value, 1
   %12 = shl i32 %11, 7
-  %13 = load i32* %10, align 4
+  %13 = load i32, i32* %10, align 4
   %14 = and i32 %13, -129
   %15 = or i32 %14, %12
   store i32 %15, i32* %10, align 4
   %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
-  %16 = load i32* %retval
+  %16 = load i32, i32* %retval
   ret i32 %16
 }
 
diff --git a/llvm/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/llvm/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
index 67c99ed..9fe6a77 100644
--- a/llvm/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
+++ b/llvm/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
@@ -12,7 +12,7 @@
 
 define i32 @cvtchar(i8* nocapture %sp) nounwind {
   %temp.i = alloca [2 x i8], align 1
-  %tmp1 = load i8* %sp, align 1
+  %tmp1 = load i8, i8* %sp, align 1
   %div = udiv i8 %tmp1, 10
   %rem = urem i8 %div, 10
   %arrayidx.i = getelementptr inbounds [2 x i8], [2 x i8]* %temp.i, i32 0, i32 0
diff --git a/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index a718792..ed64ea9 100644
--- a/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/llvm/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -17,7 +17,7 @@
 ; CHECK: je
 define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
 entry:
-  %0 = load i8** null, align 8
+  %0 = load i8*, i8** null, align 8
   %tmp13 = bitcast i8* %0 to %"class.JSC::CodeLocationCall"*
   %tobool.i.i.i = icmp ugt i8* undef, inttoptr (i64 281474976710655 to i8*)
   %or.cond.i = and i1 %tobool.i.i.i, undef
@@ -34,7 +34,7 @@
   br i1 undef, label %land.rhs.i121.i, label %_ZNK3JSC7JSValue8isStringEv.exit122.i
 
 land.rhs.i121.i:                                  ; preds = %if.end.i
-  %tmp.i.i117.i = load %"class.JSC::Structure"** undef, align 8
+  %tmp.i.i117.i = load %"class.JSC::Structure"*, %"class.JSC::Structure"** undef, align 8
   br label %_ZNK3JSC7JSValue8isStringEv.exit122.i
 
 _ZNK3JSC7JSValue8isStringEv.exit122.i:            ; preds = %land.rhs.i121.i, %if.end.i
@@ -48,7 +48,7 @@
 
 _ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit: ; preds = %_ZNK3JSC7JSValue8isStringEv.exit122.i, %if.then.i.i.i, %if.then.i
 
-  %1 = load i8** undef, align 8
+  %1 = load i8*, i8** undef, align 8
   br i1 undef, label %do.end39, label %do.body27
 
 do.body27:                                        ; preds = %_ZN3JSC7JSValue19equalSlowCaseInlineEPNS_9ExecStateES0_S0_.exit
diff --git a/llvm/test/CodeGen/X86/2011-05-09-loaduse.ll b/llvm/test/CodeGen/X86/2011-05-09-loaduse.ll
index c772e4c..a94a981 100644
--- a/llvm/test/CodeGen/X86/2011-05-09-loaduse.ll
+++ b/llvm/test/CodeGen/X86/2011-05-09-loaduse.ll
@@ -5,7 +5,7 @@
 ;CHECK: ret
 define float @test(<4 x float>* %A) nounwind {
 entry:
-  %T = load <4 x float>* %A
+  %T = load <4 x float>, <4 x float>* %A
   %R = extractelement <4 x float> %T, i32 3
   store <4 x float><float 0.0, float 0.0, float 0.0, float 0.0>, <4 x float>* %A
   ret float %R
diff --git a/llvm/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll b/llvm/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
index f9b1970..6cdcd58 100644
--- a/llvm/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
+++ b/llvm/test/CodeGen/X86/2011-05-26-UnreachableBlockElim.ll
@@ -41,7 +41,7 @@
 
 cond.end166.i:                                    ; preds = %cond.false156.i, %cond.true138.i
   %idxprom1113.i = phi i64 [ %idxprom1114.i, %cond.false156.i ], [ undef, %cond.true138.i ]
-  %tmp235.i = load %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8
+  %tmp235.i = load %struct.state*, %struct.state** getelementptr inbounds (%struct.dfa* @aux_temp, i64 0, i32 2), align 8
   %att.i = getelementptr inbounds %struct.state, %struct.state* %tmp235.i, i64 %idxprom1113.i, i32 0
   store i32 0, i32* %att.i, align 4
   ret void
diff --git a/llvm/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll b/llvm/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
index 8096394..414bd24 100644
--- a/llvm/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
+++ b/llvm/test/CodeGen/X86/2011-05-27-CrossClassCoalescing.ll
@@ -20,7 +20,7 @@
 
 for.body.i:                                       ; preds = %for.inc.i, %if.then
   %tmp3524.i = phi i32 [ 0, %land.lhs.true ], [ %tmp351.i, %for.inc.i ]
-  %tmp6.i12 = load i32* undef, align 4
+  %tmp6.i12 = load i32, i32* undef, align 4
   br i1 undef, label %for.inc.i, label %if.then.i17
 
 if.then.i17:                                      ; preds = %for.body.i
@@ -28,7 +28,7 @@
   %and14.i = and i32 %shr.i14, 255
   %idxprom15.i = zext i32 %and14.i to i64
   %arrayidx16.i = getelementptr inbounds [256 x i32], [256 x i32]* @bit_count, i64 0, i64 %idxprom15.i
-  %tmp17.i15 = load i32* %arrayidx16.i, align 4
+  %tmp17.i15 = load i32, i32* %arrayidx16.i, align 4
   %add.i = add i32 0, %tmp3524.i
   %add24.i = add i32 %add.i, %tmp17.i15
   %add31.i = add i32 %add24.i, 0
diff --git a/llvm/test/CodeGen/X86/2011-06-01-fildll.ll b/llvm/test/CodeGen/X86/2011-06-01-fildll.ll
index 3a0b05f..30c7434 100644
--- a/llvm/test/CodeGen/X86/2011-06-01-fildll.ll
+++ b/llvm/test/CodeGen/X86/2011-06-01-fildll.ll
@@ -7,7 +7,7 @@
 entry:
 ; CHECK: movl
 ; CHECK-NOT: movl
-  %tmp1 = load i64* %x, align 4
+  %tmp1 = load i64, i64* %x, align 4
 ; CHECK: fildll
   %conv = sitofp i64 %tmp1 to float
   %add = fadd float %conv, 1.000000e+00
diff --git a/llvm/test/CodeGen/X86/2011-06-03-x87chain.ll b/llvm/test/CodeGen/X86/2011-06-03-x87chain.ll
index f751668..c78e8e3 100644
--- a/llvm/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/llvm/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -2,7 +2,7 @@
 
 define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
 entry:
-  %tmp1 = load i64* %a, align 8
+  %tmp1 = load i64, i64* %a, align 8
 ; Insure x87 ops are properly chained, order preserved.
 ; CHECK: fildll
   %conv = sitofp i64 %tmp1 to float
@@ -23,7 +23,7 @@
   %sub = add nsw i32 %mul, -1
   %idxprom = sext i32 %sub to i64
   %arrayidx = getelementptr inbounds i64, i64* %a, i64 %idxprom
-  %tmp4 = load i64* %arrayidx, align 8
+  %tmp4 = load i64, i64* %arrayidx, align 8
 ; CHECK: fildll
   %conv = sitofp i64 %tmp4 to float
   store float %conv, float* %f, align 4
@@ -35,7 +35,7 @@
   br i1 undef, label %while.end, label %while.body
 
 while.body:                                       ; preds = %while.body, %entry
-  %x.1.copyload = load i24* undef, align 1
+  %x.1.copyload = load i24, i24* undef, align 1
   %conv = sitofp i24 %x.1.copyload to float
   %div = fmul float %conv, 0x3E80000000000000
   store float %div, float* undef, align 4
diff --git a/llvm/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/llvm/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
index 4b3490a..1285d20 100644
--- a/llvm/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
+++ b/llvm/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -32,17 +32,17 @@
   store i8* bitcast (%0* @0 to i8*), i8** %tmp15
   %tmp16 = bitcast %3* %tmp7 to void ()*
   store void ()* %tmp16, void ()** %tmp6, align 8
-  %tmp17 = load void ()** %tmp6, align 8
+  %tmp17 = load void ()*, void ()** %tmp6, align 8
   %tmp18 = bitcast void ()* %tmp17 to %6*
   %tmp19 = getelementptr inbounds %6, %6* %tmp18, i32 0, i32 3
   %tmp20 = bitcast %6* %tmp18 to i8*
-  %tmp21 = load i8** %tmp19
+  %tmp21 = load i8*, i8** %tmp19
   %tmp22 = bitcast i8* %tmp21 to void (i8*)*
   call void %tmp22(i8* %tmp20)
   br label %bb23
 
 bb23:                                             ; preds = %bb8
-  %tmp24 = load i64* %tmp5, align 8
+  %tmp24 = load i64, i64* %tmp5, align 8
   %tmp25 = add i64 %tmp24, 1
   store i64 %tmp25, i64* %tmp5, align 8
   %tmp26 = icmp ult i64 %tmp25, 10
diff --git a/llvm/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll b/llvm/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
index cd9070a..f38ebf1 100644
--- a/llvm/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
+++ b/llvm/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
@@ -12,7 +12,7 @@
   %tmp6 = add i64 %a, -2147483647
   %.sum = add i64 %tmp6, %b
   %tmp8 = getelementptr inbounds [39 x i8], [39 x i8]* %stack_main, i64 0, i64 %.sum
-  %tmp9 = load i8* %tmp8, align 1
+  %tmp9 = load i8, i8* %tmp8, align 1
   %tmp10 = sext i8 %tmp9 to i32
   ret i32 %tmp10
 }
diff --git a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
index 4e84e84..a086a79 100644
--- a/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
+++ b/llvm/test/CodeGen/X86/2011-09-14-valcoalesce.ll
@@ -121,7 +121,7 @@
 while.body85.i:                                   ; preds = %while.body85.i, %while.body.i188
   %aFreq.0518.i = phi i32 [ %add93.i, %while.body85.i ], [ 0, %while.body.i188 ]
   %inc87.i = add nsw i32 0, 1
-  %tmp91.i = load i32* undef, align 4
+  %tmp91.i = load i32, i32* undef, align 4
   %add93.i = add nsw i32 %tmp91.i, %aFreq.0518.i
   %or.cond514.i = and i1 undef, false
   br i1 %or.cond514.i, label %while.body85.i, label %while.end.i
diff --git a/llvm/test/CodeGen/X86/2011-09-21-setcc-bug.ll b/llvm/test/CodeGen/X86/2011-09-21-setcc-bug.ll
index a67c3f3..e61715a 100644
--- a/llvm/test/CodeGen/X86/2011-09-21-setcc-bug.ll
+++ b/llvm/test/CodeGen/X86/2011-09-21-setcc-bug.ll
@@ -3,10 +3,10 @@
 ; Make sure we are not crashing on this code.
 
 define void @load_4_i8(<4 x i8>* %k, <4 x i8>* %y, <4 x double>* %A1, <4 x double>* %A0)  {
-   %A = load <4 x i8>* %k
-   %B = load <4 x i8>* %y
-   %C = load <4 x double>* %A0
-   %D= load <4 x double>* %A1
+   %A = load <4 x i8>, <4 x i8>* %k
+   %B = load <4 x i8>, <4 x i8>* %y
+   %C = load <4 x double>, <4 x double>* %A0
+   %D= load <4 x double>, <4 x double>* %A1
    %M = icmp uge <4 x i8> %A, %B
    %T = select <4 x i1> %M, <4 x double> %C, <4 x double> %D
    store <4 x double> %T, <4 x double>* undef
@@ -15,10 +15,10 @@
 
 
 define void @load_256_i8(<256 x i8>* %k, <256 x i8>* %y, <256 x double>* %A1, <256 x double>* %A0)  {
-   %A = load <256 x i8>* %k
-   %B = load <256 x i8>* %y
-   %C = load <256 x double>* %A0
-   %D= load <256 x double>* %A1
+   %A = load <256 x i8>, <256 x i8>* %k
+   %B = load <256 x i8>, <256 x i8>* %y
+   %C = load <256 x double>, <256 x double>* %A0
+   %D= load <256 x double>, <256 x double>* %A1
    %M = icmp uge <256 x i8> %A, %B
    %T = select <256 x i1> %M, <256 x double> %C, <256 x double> %D
    store <256 x double> %T, <256 x double>* undef
diff --git a/llvm/test/CodeGen/X86/2011-10-11-srl.ll b/llvm/test/CodeGen/X86/2011-10-11-srl.ll
index 434f88c..ff58afc 100644
--- a/llvm/test/CodeGen/X86/2011-10-11-srl.ll
+++ b/llvm/test/CodeGen/X86/2011-10-11-srl.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @m387(<2 x i8>* %p, <2 x i16>* %q) {
-  %t = load <2 x i8>* %p
+  %t = load <2 x i8>, <2 x i8>* %p
   %r = sext <2 x i8> %t to <2 x i16>
   store <2 x i16> %r, <2 x i16>* %q
   ret void
diff --git a/llvm/test/CodeGen/X86/2011-10-12-MachineCSE.ll b/llvm/test/CodeGen/X86/2011-10-12-MachineCSE.ll
index ff565ef..a9432e6 100644
--- a/llvm/test/CodeGen/X86/2011-10-12-MachineCSE.ll
+++ b/llvm/test/CodeGen/X86/2011-10-12-MachineCSE.ll
@@ -16,15 +16,15 @@
 define %struct.rtx_def* @gen_add3_insn(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c) nounwind uwtable ssp {
 entry:
   %0 = bitcast %struct.rtx_def* %r0 to i32*
-  %1 = load i32* %0, align 8
+  %1 = load i32, i32* %0, align 8
   %2 = lshr i32 %1, 16
   %bf.clear = and i32 %2, 255
   %idxprom = sext i32 %bf.clear to i64
-  %3 = load %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
+  %3 = load %struct.optab*, %struct.optab** getelementptr inbounds ([49 x %struct.optab*]* @optab_table, i32 0, i64 0), align 8
   %handlers = getelementptr inbounds %struct.optab, %struct.optab* %3, i32 0, i32 1
   %arrayidx = getelementptr inbounds [59 x %struct.anon.3], [59 x %struct.anon.3]* %handlers, i32 0, i64 %idxprom
   %insn_code = getelementptr inbounds %struct.anon.3, %struct.anon.3* %arrayidx, i32 0, i32 0
-  %4 = load i32* %insn_code, align 4
+  %4 = load i32, i32* %insn_code, align 4
   %cmp = icmp eq i32 %4, 1317
   br i1 %cmp, label %if.then, label %lor.lhs.false
 
@@ -32,19 +32,19 @@
   %idxprom1 = sext i32 %4 to i64
   %arrayidx2 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom1
   %operand = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx2, i32 0, i32 3
-  %5 = load %struct.insn_operand_data** %operand, align 8
+  %5 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand, align 8
   %arrayidx3 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %5, i64 0
   %predicate = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx3, i32 0, i32 0
-  %6 = load i32 (%struct.rtx_def*, i32)** %predicate, align 8
+  %6 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate, align 8
   %idxprom4 = sext i32 %4 to i64
   %arrayidx5 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom4
   %operand6 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx5, i32 0, i32 3
-  %7 = load %struct.insn_operand_data** %operand6, align 8
+  %7 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand6, align 8
   %arrayidx7 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %7, i64 0
   %8 = bitcast %struct.insn_operand_data* %arrayidx7 to i8*
   %bf.field.offs = getelementptr i8, i8* %8, i32 16
   %9 = bitcast i8* %bf.field.offs to i32*
-  %10 = load i32* %9, align 8
+  %10 = load i32, i32* %9, align 8
   %bf.clear8 = and i32 %10, 65535
   %call = tail call i32 %6(%struct.rtx_def* %r0, i32 %bf.clear8)
   %tobool = icmp ne i32 %call, 0
@@ -54,19 +54,19 @@
   %idxprom10 = sext i32 %4 to i64
   %arrayidx11 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom10
   %operand12 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx11, i32 0, i32 3
-  %11 = load %struct.insn_operand_data** %operand12, align 8
+  %11 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand12, align 8
   %arrayidx13 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %11, i64 1
   %predicate14 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx13, i32 0, i32 0
-  %12 = load i32 (%struct.rtx_def*, i32)** %predicate14, align 8
+  %12 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate14, align 8
   %idxprom15 = sext i32 %4 to i64
   %arrayidx16 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom15
   %operand17 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx16, i32 0, i32 3
-  %13 = load %struct.insn_operand_data** %operand17, align 8
+  %13 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand17, align 8
   %arrayidx18 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %13, i64 1
   %14 = bitcast %struct.insn_operand_data* %arrayidx18 to i8*
   %bf.field.offs19 = getelementptr i8, i8* %14, i32 16
   %15 = bitcast i8* %bf.field.offs19 to i32*
-  %16 = load i32* %15, align 8
+  %16 = load i32, i32* %15, align 8
   %bf.clear20 = and i32 %16, 65535
   %call21 = tail call i32 %12(%struct.rtx_def* %r1, i32 %bf.clear20)
   %tobool22 = icmp ne i32 %call21, 0
@@ -76,19 +76,19 @@
   %idxprom24 = sext i32 %4 to i64
   %arrayidx25 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom24
   %operand26 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx25, i32 0, i32 3
-  %17 = load %struct.insn_operand_data** %operand26, align 8
+  %17 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand26, align 8
   %arrayidx27 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %17, i64 2
   %predicate28 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %arrayidx27, i32 0, i32 0
-  %18 = load i32 (%struct.rtx_def*, i32)** %predicate28, align 8
+  %18 = load i32 (%struct.rtx_def*, i32)*, i32 (%struct.rtx_def*, i32)** %predicate28, align 8
   %idxprom29 = sext i32 %4 to i64
   %arrayidx30 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom29
   %operand31 = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx30, i32 0, i32 3
-  %19 = load %struct.insn_operand_data** %operand31, align 8
+  %19 = load %struct.insn_operand_data*, %struct.insn_operand_data** %operand31, align 8
   %arrayidx32 = getelementptr inbounds %struct.insn_operand_data, %struct.insn_operand_data* %19, i64 2
   %20 = bitcast %struct.insn_operand_data* %arrayidx32 to i8*
   %bf.field.offs33 = getelementptr i8, i8* %20, i32 16
   %21 = bitcast i8* %bf.field.offs33 to i32*
-  %22 = load i32* %21, align 8
+  %22 = load i32, i32* %21, align 8
   %bf.clear34 = and i32 %22, 65535
   %call35 = tail call i32 %18(%struct.rtx_def* %c, i32 %bf.clear34)
   %tobool36 = icmp ne i32 %call35, 0
@@ -101,7 +101,7 @@
   %idxprom37 = sext i32 %4 to i64
   %arrayidx38 = getelementptr inbounds [0 x %struct.insn_data], [0 x %struct.insn_data]* @insn_data, i32 0, i64 %idxprom37
   %genfun = getelementptr inbounds %struct.insn_data, %struct.insn_data* %arrayidx38, i32 0, i32 2
-  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
+  %23 = load %struct.rtx_def* (%struct.rtx_def*, ...)*, %struct.rtx_def* (%struct.rtx_def*, ...)** %genfun, align 8
   %call39 = tail call %struct.rtx_def* (%struct.rtx_def*, ...)* %23(%struct.rtx_def* %r0, %struct.rtx_def* %r1, %struct.rtx_def* %c)
   br label %return
 
diff --git a/llvm/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll b/llvm/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
index e7d1e19..c9dc050 100644
--- a/llvm/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
+++ b/llvm/test/CodeGen/X86/2011-10-18-FastISel-VectorParams.ll
@@ -15,11 +15,11 @@
   store <4 x float> <float 0x4008CCCCC0000000, float 0x40099999A0000000, float 0x400A666660000000, float 0x400B333340000000>, <4 x float>* %p3, align 16
   store <4 x float> <float 0x4010666660000000, float 0x4010CCCCC0000000, float 0x4011333340000000, float 0x40119999A0000000>, <4 x float>* %p4, align 16
   store <4 x float> <float 0x4014666660000000, float 0x4014CCCCC0000000, float 0x4015333340000000, float 0x40159999A0000000>, <4 x float>* %p5, align 16
-  %0 = load <4 x float>* %p1, align 16
-  %1 = load <4 x float>* %p2, align 16
-  %2 = load <4 x float>* %p3, align 16
-  %3 = load <4 x float>* %p4, align 16
-  %4 = load <4 x float>* %p5, align 16
+  %0 = load <4 x float>, <4 x float>* %p1, align 16
+  %1 = load <4 x float>, <4 x float>* %p2, align 16
+  %2 = load <4 x float>, <4 x float>* %p3, align 16
+  %3 = load <4 x float>, <4 x float>* %p4, align 16
+  %4 = load <4 x float>, <4 x float>* %p5, align 16
 ; CHECK:      movups {{%xmm[0-7]}}, (%esp)
 ; CHECK-NEXT: calll _dovectortest 
   call void @dovectortest(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3, <4 x float> %4)
diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
index 07a6910..7e450a8 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll
@@ -18,8 +18,8 @@
 entry:
 ; CHECK: pmovsxbq  i(%rip), %
 ; CHECK: pmovsxbq  j(%rip), %
-  %0 = load <2 x i8>* @i, align 8
-  %1 = load <2 x i8>* @j, align 8
+  %0 = load <2 x i8>, <2 x i8>* @i, align 8
+  %1 = load <2 x i8>, <2 x i8>* @j, align 8
   %div = sdiv <2 x i8> %1, %0
   store <2 x i8> %div, <2 x i8>* getelementptr inbounds (%union.anon* @res, i32 0, i32 0), align 8
   ret i32 0
diff --git a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
index 7eaa5bb..da3c322 100644
--- a/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
+++ b/llvm/test/CodeGen/X86/2011-10-19-widen_vselect.ll
@@ -49,7 +49,7 @@
    br label %B1
 
  B1:                                               ; preds = %entry
-   %0 = load <2 x float>* %Cy119
+   %0 = load <2 x float>, <2 x float>* %Cy119
    %1 = fptosi <2 x float> %0 to <2 x i32>
    %2 = sitofp <2 x i32> %1 to <2 x float>
    %3 = fcmp ogt <2 x float> %0, zeroinitializer
@@ -58,7 +58,7 @@
    %6 = fcmp oeq <2 x float> %2, %0
    %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
    store <2 x float> %7, <2 x float>* %Cy118
-   %8 = load <2 x float>* %Cy118
+   %8 = load <2 x float>, <2 x float>* %Cy118
    store <2 x float> %8, <2 x float>* %Cy11a
    ret void
 }
diff --git a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll
index 6dea92b..290b4d0 100644
--- a/llvm/test/CodeGen/X86/2011-10-27-tstore.ll
+++ b/llvm/test/CodeGen/X86/2011-10-27-tstore.ll
@@ -8,7 +8,7 @@
 ;CHECK: ret
 define void @ltstore(<4 x i32>* %pA, <2 x i32>* %pB) {
 entry:
-  %in = load <4 x i32>* %pA
+  %in = load <4 x i32>, <4 x i32>* %pA
   %j = shufflevector <4 x i32> %in, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
   store <2 x i32> %j, <2 x i32>* %pB
   ret void
diff --git a/llvm/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll b/llvm/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
index 8174109..dffd6d1 100644
--- a/llvm/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
+++ b/llvm/test/CodeGen/X86/2011-11-22-AVX2-Domains.ll
@@ -18,9 +18,9 @@
   br i1 undef, label %for_loop156.lr.ph, label %if_exit
 
 for_loop156.lr.ph:                                ; preds = %if_else
-  %val_6.i21244 = load i16* undef, align 2
+  %val_6.i21244 = load i16, i16* undef, align 2
   %0 = insertelement <8 x i16> undef, i16 %val_6.i21244, i32 6
-  %val_7.i21248 = load i16* undef, align 2
+  %val_7.i21248 = load i16, i16* undef, align 2
   %1 = insertelement <8 x i16> %0, i16 %val_7.i21248, i32 7
   %uint2uint32.i20206 = zext <8 x i16> %1 to <8 x i32>
   %bitop5.i20208 = and <8 x i32> %uint2uint32.i20206, <i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744, i32 31744>
@@ -39,26 +39,26 @@
   %binop407 = fadd <8 x float> %binop406, <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
   %binop408 = fmul <8 x float> zeroinitializer, %binop407
   %binop411 = fsub <8 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, undef
-  %val_4.i21290 = load i16* undef, align 2
+  %val_4.i21290 = load i16, i16* undef, align 2
   %2 = insertelement <8 x i16> undef, i16 %val_4.i21290, i32 4
-  %val_5.i21294 = load i16* undef, align 2
+  %val_5.i21294 = load i16, i16* undef, align 2
   %3 = insertelement <8 x i16> %2, i16 %val_5.i21294, i32 5
-  %val_6.i21298 = load i16* undef, align 2
+  %val_6.i21298 = load i16, i16* undef, align 2
   %4 = insertelement <8 x i16> %3, i16 %val_6.i21298, i32 6
   %ptr_7.i21301 = inttoptr i64 undef to i16*
-  %val_7.i21302 = load i16* %ptr_7.i21301, align 2
+  %val_7.i21302 = load i16, i16* %ptr_7.i21301, align 2
   %5 = insertelement <8 x i16> %4, i16 %val_7.i21302, i32 7
   %uint2uint32.i20218 = zext <8 x i16> %5 to <8 x i32>
-  %structelement561 = load i8** undef, align 8
+  %structelement561 = load i8*, i8** undef, align 8
   %ptr2int563 = ptrtoint i8* %structelement561 to i64
   %smear.ptr_smear7571 = insertelement <8 x i64> undef, i64 %ptr2int563, i32 7
   %new_ptr582 = add <8 x i64> %smear.ptr_smear7571, zeroinitializer
-  %val_5.i21509 = load i8* null, align 1
+  %val_5.i21509 = load i8, i8* null, align 1
   %6 = insertelement <8 x i8> undef, i8 %val_5.i21509, i32 5
   %7 = insertelement <8 x i8> %6, i8 undef, i32 6
   %iptr_7.i21515 = extractelement <8 x i64> %new_ptr582, i32 7
   %ptr_7.i21516 = inttoptr i64 %iptr_7.i21515 to i8*
-  %val_7.i21517 = load i8* %ptr_7.i21516, align 1
+  %val_7.i21517 = load i8, i8* %ptr_7.i21516, align 1
   %8 = insertelement <8 x i8> %7, i8 %val_7.i21517, i32 7
   %uint2float.i20245 = uitofp <8 x i8> %8 to <8 x float>
   %binop.i20246 = fmul <8 x float> %uint2float.i20245, <float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000>
diff --git a/llvm/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll b/llvm/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
index 1561784..ab1b46c 100644
--- a/llvm/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
+++ b/llvm/test/CodeGen/X86/2011-12-08-AVXISelBugs.ll
@@ -13,7 +13,7 @@
   br i1 undef, label %0, label %t1.exit
 
 ; <label>:0                                       ; preds = %loop
-  %1 = load <16 x i32> addrspace(1)* undef, align 64
+  %1 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
   %2 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %1, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0>
   store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
   br label %t1.exit
@@ -29,7 +29,7 @@
   br i1 undef, label %1, label %4
 
 ; <label>:1                                       ; preds = %0
-  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
   %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0>
   store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
   br label %4
@@ -50,7 +50,7 @@
 
 ; <label>:0                                       ; preds = %loop
   %1 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0>
-  %2 = load <16 x i32> addrspace(1)* undef, align 64
+  %2 = load <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
   %3 = shufflevector <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, <16 x i32> %2, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   store <16 x i32> %3, <16 x i32> addrspace(1)* undef, align 64
   br label %t2.exit
@@ -64,7 +64,7 @@
 
 define <3 x i64> @t4() nounwind {
 entry:
-  %0 = load <2 x i64> addrspace(1)* undef, align 16
+  %0 = load <2 x i64>, <2 x i64> addrspace(1)* undef, align 16
   %1 = extractelement <2 x i64> %0, i32 0
   %2 = insertelement <3 x i64> <i64 undef, i64 0, i64 0>, i64 %1, i32 0
   ret <3 x i64> %2
diff --git a/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll b/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
index 14643e4..0944adb 100644
--- a/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
+++ b/llvm/test/CodeGen/X86/2011-12-26-extractelement-duplicate-load.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: test:
 ; CHECK: pextrd $2, %xmm
 define <4 x i32> @test(<4 x i32>* %p) {
-  %v = load <4 x i32>* %p
+  %v = load <4 x i32>, <4 x i32>* %p
   %e = extractelement <4 x i32> %v, i32 2
   %cmp = icmp eq i32 %e, 3
   %sel = select i1 %cmp, <4 x i32> %v, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
index 42a3b9b..2144344 100644
--- a/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
+++ b/llvm/test/CodeGen/X86/2012-01-10-UndefExceptionEdge.ll
@@ -109,7 +109,7 @@
   %tmp51 = add i32 %tmp50, undef
   %tmp52 = add i32 %tmp50, undef
   %tmp53 = getelementptr i32, i32* %tmp13, i32 %tmp52
-  %tmp54 = load i32* %tmp53, align 4
+  %tmp54 = load i32, i32* %tmp53, align 4
   %tmp55 = add i32 %tmp50, 1
   %tmp56 = icmp eq i32 %tmp55, %tmp8
   br i1 %tmp56, label %bb57, label %bb49
diff --git a/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll b/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll
index 69d4b93..cb39ed9 100644
--- a/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll
@@ -3,7 +3,7 @@
 ;CHECK-LABEL: add18i16:
 define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind {
 ;CHECK: vmovaps
-  %b = load <18 x i16>* %bp, align 16
+  %b = load <18 x i16>, <18 x i16>* %bp, align 16
   %x = add <18 x i16> zeroinitializer, %b
   store <18 x i16> %x, <18 x i16>* %ret, align 16
 ;CHECK: ret
diff --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
index fa8e80f..75409f2 100644
--- a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -3,7 +3,7 @@
 ; CHECK: endless_loop
 define void @endless_loop() {
 entry:
-  %0 = load <8 x i32> addrspace(1)* undef, align 32
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
   %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
   store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
diff --git a/llvm/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll b/llvm/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
index b78c13f..16eef0a 100644
--- a/llvm/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
+++ b/llvm/test/CodeGen/X86/2012-01-16-mfence-nosse-flags.ll
@@ -5,7 +5,7 @@
 
 define void @baz() nounwind ssp {
 entry:
-  %0 = load i8** @ptr, align 4
+  %0 = load i8*, i8** @ptr, align 4
   %cmp = icmp eq i8* %0, null
   fence seq_cst
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/X86/2012-02-12-dagco.ll b/llvm/test/CodeGen/X86/2012-02-12-dagco.ll
index 13723a2..5d48c14 100644
--- a/llvm/test/CodeGen/X86/2012-02-12-dagco.ll
+++ b/llvm/test/CodeGen/X86/2012-02-12-dagco.ll
@@ -3,9 +3,9 @@
 ; Make sure we are not crashing on this one
 define void @dagco_crash() {
 entry:
-  %srcval.i411.i = load <4 x i64>* undef, align 1
+  %srcval.i411.i = load <4 x i64>, <4 x i64>* undef, align 1
   %0 = extractelement <4 x i64> %srcval.i411.i, i32 3
-  %srcval.i409.i = load <2 x i64>* undef, align 1
+  %srcval.i409.i = load <2 x i64>, <2 x i64>* undef, align 1
   %1 = extractelement <2 x i64> %srcval.i409.i, i32 0
   %2 = insertelement <8 x i64> undef, i64 %0, i32 5
   %3 = insertelement <8 x i64> %2, i64 %1, i32 6
diff --git a/llvm/test/CodeGen/X86/2012-02-29-CoalescerBug.ll b/llvm/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
index bdce853..bbeb2a0 100644
--- a/llvm/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
+++ b/llvm/test/CodeGen/X86/2012-02-29-CoalescerBug.ll
@@ -14,9 +14,9 @@
 define void @fn2() nounwind optsize ssp {
 entry:
   store i64 0, i64* bitcast ([2 x [2 x %struct.S0]]* @d to i64*), align 4
-  %0 = load i32* @c, align 4
+  %0 = load i32, i32* @c, align 4
   %tobool2 = icmp eq i32 %0, 0
-  %1 = load i32* @a, align 4
+  %1 = load i32, i32* @a, align 4
   %tobool4 = icmp eq i32 %1, 0
   br label %for.cond
 
diff --git a/llvm/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll b/llvm/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
index 372441a..260f059 100644
--- a/llvm/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
+++ b/llvm/test/CodeGen/X86/2012-03-26-PostRALICMBug.ll
@@ -21,7 +21,7 @@
 
 if.end:                                           ; preds = %entry
   %size5 = getelementptr inbounds %struct.ref_s, %struct.ref_s* %op, i64 0, i32 2
-  %tmp6 = load i16* %size5, align 2
+  %tmp6 = load i16, i16* %size5, align 2
   %tobool1 = icmp eq i16 %tmp6, 0
   %1 = select i1 %tobool1, i32 1396, i32 -1910
   %index10 = add i32 %index9, %1
@@ -29,12 +29,12 @@
 
 while.body.lr.ph:                                 ; preds = %if.end
   %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
-  %tmp9 = load %struct.ref_s** %refs, align 8
+  %tmp9 = load %struct.ref_s*, %struct.ref_s** %refs, align 8
   %tmp4 = zext i16 %tmp6 to i64
   %index13 = add i32 %index10, 1658
   %2 = sext i32 %index13 to i64
   %3 = getelementptr [3891 x i64], [3891 x i64]* @table, i64 0, i64 %2
-  %blockaddress14 = load i64* %3, align 8
+  %blockaddress14 = load i64, i64* %3, align 8
   %4 = inttoptr i64 %blockaddress14 to i8*
   indirectbr i8* %4, [label %while.body]
 
@@ -50,7 +50,7 @@
   %tmp8 = select i1 %exitcond5, i64 13, i64 0
   %5 = sext i32 %index15 to i64
   %6 = getelementptr [3891 x i64], [3891 x i64]* @table, i64 0, i64 %5
-  %blockaddress16 = load i64* %6, align 8
+  %blockaddress16 = load i64, i64* %6, align 8
   %7 = inttoptr i64 %blockaddress16 to i8*
   indirectbr i8* %7, [label %return, label %while.body]
 
diff --git a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll
index 48981f9..e0b0903 100644
--- a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll
+++ b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll
@@ -14,9 +14,9 @@
 ;CHECK: ret
 
 define void @func() nounwind ssp {
-  %tmp = load <4 x float>* null, align 1
+  %tmp = load <4 x float>, <4 x float>* null, align 1
   %tmp14 = getelementptr <4 x float>, <4 x float>* null, i32 2
-  %tmp15 = load <4 x float>* %tmp14, align 1
+  %tmp15 = load <4 x float>, <4 x float>* %tmp14, align 1
   %tmp16 = shufflevector <4 x float> %tmp, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
   %tmp17 = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %tmp16, <4 x float> undef, i8 1)
   %tmp18 = bitcast <4 x float> %tmp to <16 x i8>
diff --git a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
index 7233027..f33fc8c 100644
--- a/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
+++ b/llvm/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -4,7 +4,7 @@
 define void @load_store(<4 x i16>* %in) {
 entry:
 ; CHECK: pmovzxwd
-  %A27 = load <4 x i16>* %in, align 4
+  %A27 = load <4 x i16>, <4 x i16>* %in, align 4
   %A28 = add <4 x i16> %A27, %A27
 ; CHECK: movlpd
   store <4 x i16> %A28, <4 x i16>* %in, align 4
@@ -25,7 +25,7 @@
 ;CHECK-LABEL: load_64:
 define <2 x i32> @load_64(<2 x i32>* %ptr) {
 BB:
-  %t = load <2 x i32>* %ptr
+  %t = load <2 x i32>, <2 x i32>* %ptr
   ret <2 x i32> %t
 ;CHECK: pmovzxdq
 ;CHECK: ret
diff --git a/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 1c39c74..7c8c2f2 100644
--- a/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/llvm/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -9,7 +9,7 @@
 ;CHECK: ret
 define <8 x float> @bcast_fold( float* %A) {
 BB:
-  %A0 = load float* %A
+  %A0 = load float, float* %A
   %tt3 = call x86_fastcallcc i64 @barrier()
   br i1 undef, label %work, label %exit
 
diff --git a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index 0d18267..a19aa52 100644
--- a/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/llvm/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -12,9 +12,9 @@
 
 define void @fn1() nounwind uwtable ssp {
 entry:
-  %0 = load %struct._GtkSheetRow** @a, align 8
+  %0 = load %struct._GtkSheetRow*, %struct._GtkSheetRow** @a, align 8
   %1 = bitcast %struct._GtkSheetRow* %0 to i576*
-  %srcval2 = load i576* %1, align 8
+  %srcval2 = load i576, i576* %1, align 8
   %tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
   br i1 %tobool, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/X86/2012-09-28-CGPBug.ll b/llvm/test/CodeGen/X86/2012-09-28-CGPBug.ll
index 9f20d46..2d29433 100644
--- a/llvm/test/CodeGen/X86/2012-09-28-CGPBug.ll
+++ b/llvm/test/CodeGen/X86/2012-09-28-CGPBug.ll
@@ -16,10 +16,10 @@
 define void @h(i8*) nounwind ssp {
   %2 = alloca i8*
   store i8* %0, i8** %2
-  %3 = load i8** %2
+  %3 = load i8*, i8** %2
   %4 = bitcast i8* %3 to { i32, i32 }*
   %5 = getelementptr { i32, i32 }, { i32, i32 }* %4, i32 0, i32 0
-  %6 = load i32* %5
+  %6 = load i32, i32* %5
   %7 = srem i32 %6, 2
   %8 = icmp slt i32 %6, 2
   %9 = select i1 %8, i32 %6, i32 %7
@@ -29,7 +29,7 @@
 ; <label>:11                                      ; preds = %1
   %12 = zext i1 %10 to i32
   %13 = getelementptr [4 x i32], [4 x i32]* @JT, i32 0, i32 %12
-  %14 = load i32* %13
+  %14 = load i32, i32* %13
   %15 = add i32 %14, ptrtoint (i8* blockaddress(@h, %11) to i32)
   %16 = inttoptr i32 %15 to i8*
   indirectbr i8* %16, [label %17, label %18]
diff --git a/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll b/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll
index 9efe77d..c43001e 100644
--- a/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll
+++ b/llvm/test/CodeGen/X86/2012-10-02-DAGCycle.ll
@@ -9,9 +9,9 @@
 define i32 @t(%TRp* inreg %rp) nounwind optsize ssp {
 entry:
   %handler = getelementptr inbounds %TRp, %TRp* %rp, i32 0, i32 1
-  %0 = load %TRH** %handler, align 4
+  %0 = load %TRH*, %TRH** %handler, align 4
   %sync = getelementptr inbounds %TRH, %TRH* %0, i32 0, i32 4
-  %sync12 = load {}** %sync, align 4
+  %sync12 = load {}*, {}** %sync, align 4
   %1 = bitcast {}* %sync12 to i32 (%TRp*)*
   %call = tail call i32 %1(%TRp* inreg %rp) nounwind optsize
   ret i32 %call
@@ -29,13 +29,13 @@
   br i1 undef, label %if.then, label %if.end17
 
 if.then:                                          ; preds = %entry
-  %vecnorm.sroa.2.8.copyload = load float* undef, align 4
+  %vecnorm.sroa.2.8.copyload = load float, float* undef, align 4
   %cmp4 = fcmp olt float undef, 0x3D10000000000000
   %vecnorm.sroa.2.8.copyload36 = select i1 %cmp4, float -1.000000e+00, float %vecnorm.sroa.2.8.copyload
   %call.i.i.i = tail call float @sqrtf(float 0.000000e+00) nounwind readnone
   %div.i.i = fdiv float 1.000000e+00, %call.i.i.i
   %mul7.i.i.i = fmul float %div.i.i, %vecnorm.sroa.2.8.copyload36
-  %1 = load float (%btConvexInternalShape*)** undef, align 8
+  %1 = load float (%btConvexInternalShape*)*, float (%btConvexInternalShape*)** undef, align 8
   %call12 = tail call float %1(%btConvexInternalShape* %0)
   %mul7.i.i = fmul float %call12, %mul7.i.i.i
   %retval.sroa.0.4.insert = insertelement <2 x float> zeroinitializer, float undef, i32 1
diff --git a/llvm/test/CodeGen/X86/2012-10-03-DAGCycle.ll b/llvm/test/CodeGen/X86/2012-10-03-DAGCycle.ll
index 83ae87b..da92565 100644
--- a/llvm/test/CodeGen/X86/2012-10-03-DAGCycle.ll
+++ b/llvm/test/CodeGen/X86/2012-10-03-DAGCycle.ll
@@ -13,11 +13,11 @@
 bb:
   %tmp1 = alloca %struct.widget.375, align 8
   %tmp2 = getelementptr inbounds %struct.pluto.0, %struct.pluto.0* %arg, i64 0, i32 1
-  %tmp3 = load %struct.hoge.368** %tmp2, align 8
+  %tmp3 = load %struct.hoge.368*, %struct.hoge.368** %tmp2, align 8
   store %struct.pluto.0* %arg, %struct.pluto.0** undef, align 8
   %tmp = getelementptr inbounds %struct.widget.375, %struct.widget.375* %tmp1, i64 0, i32 2
   %tmp4 = getelementptr %struct.pluto.0, %struct.pluto.0* %arg, i64 0, i32 0, i32 0
-  %tmp5 = load %i8** %tmp4, align 8
+  %tmp5 = load %i8*, %i8** %tmp4, align 8
   store %i8* %tmp5, %i8** %tmp, align 8
   %tmp6 = getelementptr inbounds %struct.widget.375, %struct.widget.375* %tmp1, i64 0, i32 3
   store %struct.hoge.368* %tmp3, %struct.hoge.368** %tmp6, align 8
diff --git a/llvm/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/llvm/test/CodeGen/X86/2012-10-18-crash-dagco.ll
index 71b2060..fb29241 100644
--- a/llvm/test/CodeGen/X86/2012-10-18-crash-dagco.ll
+++ b/llvm/test/CodeGen/X86/2012-10-18-crash-dagco.ll
@@ -22,23 +22,23 @@
   ]
 
 bb28:                                             ; preds = %bb27, %bb26
-  %tmp = load i32* null
+  %tmp = load i32, i32* null
   %tmp29 = trunc i32 %tmp to i8
   store i8* undef, i8** undef
-  %tmp30 = load i32* null
+  %tmp30 = load i32, i32* null
   %tmp31 = icmp eq i32 %tmp30, 0
   %tmp32 = getelementptr inbounds [411 x i8], [411 x i8]* @global, i32 0, i32 undef
-  %tmp33 = load i8* %tmp32, align 1
+  %tmp33 = load i8, i8* %tmp32, align 1
   %tmp34 = getelementptr inbounds [411 x i8], [411 x i8]* @global, i32 0, i32 0
-  %tmp35 = load i8* %tmp34, align 1
+  %tmp35 = load i8, i8* %tmp34, align 1
   %tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33
   %tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36
   %tmp38 = zext i8 %tmp37 to i32
   %tmp39 = select i1 undef, i32 0, i32 %tmp38
   %tmp40 = getelementptr inbounds i32, i32* null, i32 %tmp39
-  %tmp41 = load i32* %tmp40, align 4
-  %tmp42 = load i32* undef, align 4
-  %tmp43 = load i32* undef
+  %tmp41 = load i32, i32* %tmp40, align 4
+  %tmp42 = load i32, i32* undef, align 4
+  %tmp43 = load i32, i32* undef
   %tmp44 = xor i32 %tmp42, %tmp43
   %tmp45 = lshr i32 %tmp44, 8
   %tmp46 = lshr i32 %tmp44, 7
diff --git a/llvm/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/llvm/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
index ce485212..df4f028 100644
--- a/llvm/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
+++ b/llvm/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -21,7 +21,7 @@
 
   store i32 0, i32* %O1_1
   store i32 0, i32* %O1_2
-  %ret = load  i32* %ld_ptr  ; <--- does not alias.
+  %ret = load  i32,  i32* %ld_ptr  ; <--- does not alias.
   store i32 0, i32* %O1_3
   store i32 0, i32* %O1_4
 
@@ -44,7 +44,7 @@
 
   store i32 0, i32* %O1_1
   store i32 0, i32* %O1_2
-  %ret = load  i32* %ld_ptr  ;  <--- may alias
+  %ret = load  i32,  i32* %ld_ptr  ;  <--- may alias
   store i32 0, i32* %O1_3
   store i32 0, i32* %O1_4
 
diff --git a/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index aa9fffd..2239ea1 100644
--- a/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/llvm/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -18,7 +18,7 @@
 entry:
   call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !{!"0x102"})
   %type = getelementptr inbounds %struct.node.0.27, %struct.node.0.27* %p, i64 0, i32 0
-  %0 = load i16* %type, align 2
+  %0 = load i16, i16* %type, align 2
   %cmp = icmp eq i16 %0, 1
   br i1 %cmp, label %return, label %for.cond.preheader
 
diff --git a/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 8611813..3104d5a 100644
--- a/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/llvm/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -45,7 +45,7 @@
 if.then4073:                                      ; preds = %if.then3344
   call void @llvm.dbg.declare(metadata [20 x i8]* %num14075, metadata !4, metadata !{!"0x102"})
   %arraydecay4078 = getelementptr inbounds [20 x i8], [20 x i8]* %num14075, i64 0, i64 0
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %add4093 = add nsw i32 %0, 0
   %conv4094 = sitofp i32 %add4093 to float
   %div4095 = fdiv float %conv4094, 5.670000e+02
diff --git a/llvm/test/CodeGen/X86/2012-12-06-python27-miscompile.ll b/llvm/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
index d73f95e..b80ae3a 100644
--- a/llvm/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
+++ b/llvm/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
@@ -15,7 +15,7 @@
   %used = getelementptr inbounds i64, i64* %so, i32 3
   store i64 0, i64* %used, align 8
   %fill = getelementptr inbounds i64, i64* %so, i32 2
-  %L = load i64* %fill, align 8
+  %L = load i64, i64* %fill, align 8
   store i64 0, i64* %fill, align 8
   %cmp28 = icmp sgt i64 %L, 0
   %R = sext i1 %cmp28 to i32
diff --git a/llvm/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll b/llvm/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
index f0e5d35..e5a64b5 100644
--- a/llvm/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
+++ b/llvm/test/CodeGen/X86/2012-12-19-NoImplicitFloat.ll
@@ -8,7 +8,7 @@
 entry:
 ; CHECK-NOT: xmm
 ; CHECK: ret
-  %0 = load %struct1** undef, align 8
+  %0 = load %struct1*, %struct1** undef, align 8
   %1 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 0
   store i32* null, i32** %1, align 8
   %2 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 1
diff --git a/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll b/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
index 0ff9d39..35ee84b 100644
--- a/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
+++ b/llvm/test/CodeGen/X86/2013-03-13-VEX-DestReg.ll
@@ -9,7 +9,7 @@
 
 define void @main() #0 {
 entry:
-  %0 = load <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
+  %0 = load <8 x float>, <8 x float>* bitcast ([8 x float]* @b to <8 x float>*), align 32
   %bitcast.i = extractelement <8 x float> %0, i32 0
   %vecinit.i.i = insertelement <4 x float> undef, float %bitcast.i, i32 0
   %vecinit2.i.i = insertelement <4 x float> %vecinit.i.i, float 0.000000e+00, i32 1
diff --git a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
index 9cd150a..5ef867d 100644
--- a/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
+++ b/llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll
@@ -34,7 +34,7 @@
 ; CHECK: ret
 define i64 @test_bitcast(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
 entry:
-  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
   %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
   switch i1 %bool, label %default [
     i1 true, label %label_true
@@ -73,7 +73,7 @@
 ; CHECK: ret
 define i64 @test_inttoptr(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
 entry:
-  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
   %raw = ptrtoint i64 (i64, i64, i64)* %loaded_ptr to i64
   switch i1 %bool, label %default [
     i1 true, label %label_true
@@ -112,7 +112,7 @@
 ; CHECK: ret
 define i64 @test_ptrtoint(i64 (i64, i64, i64)** %arg, i1 %bool, i64 %arg2) {
 entry:
-  %loaded_ptr = load i64 (i64, i64, i64)** %arg, align 8
+  %loaded_ptr = load i64 (i64, i64, i64)*, i64 (i64, i64, i64)** %arg, align 8
   %raw = bitcast i64 (i64, i64, i64)* %loaded_ptr to i8*
   switch i1 %bool, label %default [
     i1 true, label %label_true
diff --git a/llvm/test/CodeGen/X86/Atomics-64.ll b/llvm/test/CodeGen/X86/Atomics-64.ll
index c392e94..6d367a7 100644
--- a/llvm/test/CodeGen/X86/Atomics-64.ll
+++ b/llvm/test/CodeGen/X86/Atomics-64.ll
@@ -308,331 +308,331 @@
 
 define void @test_op_and_fetch() nounwind {
 entry:
-  %0 = load i8* @uc, align 1
+  %0 = load i8, i8* @uc, align 1
   %1 = zext i8 %0 to i32
   %2 = trunc i32 %1 to i8
   %3 = atomicrmw add i8* @sc, i8 %2 monotonic
   %4 = add i8 %3, %2
   store i8 %4, i8* @sc, align 1
-  %5 = load i8* @uc, align 1
+  %5 = load i8, i8* @uc, align 1
   %6 = zext i8 %5 to i32
   %7 = trunc i32 %6 to i8
   %8 = atomicrmw add i8* @uc, i8 %7 monotonic
   %9 = add i8 %8, %7
   store i8 %9, i8* @uc, align 1
-  %10 = load i8* @uc, align 1
+  %10 = load i8, i8* @uc, align 1
   %11 = zext i8 %10 to i32
   %12 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %13 = trunc i32 %11 to i16
   %14 = atomicrmw add i16* %12, i16 %13 monotonic
   %15 = add i16 %14, %13
   store i16 %15, i16* @ss, align 2
-  %16 = load i8* @uc, align 1
+  %16 = load i8, i8* @uc, align 1
   %17 = zext i8 %16 to i32
   %18 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %19 = trunc i32 %17 to i16
   %20 = atomicrmw add i16* %18, i16 %19 monotonic
   %21 = add i16 %20, %19
   store i16 %21, i16* @us, align 2
-  %22 = load i8* @uc, align 1
+  %22 = load i8, i8* @uc, align 1
   %23 = zext i8 %22 to i32
   %24 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %25 = atomicrmw add i32* %24, i32 %23 monotonic
   %26 = add i32 %25, %23
   store i32 %26, i32* @si, align 4
-  %27 = load i8* @uc, align 1
+  %27 = load i8, i8* @uc, align 1
   %28 = zext i8 %27 to i32
   %29 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %30 = atomicrmw add i32* %29, i32 %28 monotonic
   %31 = add i32 %30, %28
   store i32 %31, i32* @ui, align 4
-  %32 = load i8* @uc, align 1
+  %32 = load i8, i8* @uc, align 1
   %33 = zext i8 %32 to i64
   %34 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %35 = atomicrmw add i64* %34, i64 %33 monotonic
   %36 = add i64 %35, %33
   store i64 %36, i64* @sl, align 8
-  %37 = load i8* @uc, align 1
+  %37 = load i8, i8* @uc, align 1
   %38 = zext i8 %37 to i64
   %39 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %40 = atomicrmw add i64* %39, i64 %38 monotonic
   %41 = add i64 %40, %38
   store i64 %41, i64* @ul, align 8
-  %42 = load i8* @uc, align 1
+  %42 = load i8, i8* @uc, align 1
   %43 = zext i8 %42 to i64
   %44 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %45 = atomicrmw add i64* %44, i64 %43 monotonic
   %46 = add i64 %45, %43
   store i64 %46, i64* @sll, align 8
-  %47 = load i8* @uc, align 1
+  %47 = load i8, i8* @uc, align 1
   %48 = zext i8 %47 to i64
   %49 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %50 = atomicrmw add i64* %49, i64 %48 monotonic
   %51 = add i64 %50, %48
   store i64 %51, i64* @ull, align 8
-  %52 = load i8* @uc, align 1
+  %52 = load i8, i8* @uc, align 1
   %53 = zext i8 %52 to i32
   %54 = trunc i32 %53 to i8
   %55 = atomicrmw sub i8* @sc, i8 %54 monotonic
   %56 = sub i8 %55, %54
   store i8 %56, i8* @sc, align 1
-  %57 = load i8* @uc, align 1
+  %57 = load i8, i8* @uc, align 1
   %58 = zext i8 %57 to i32
   %59 = trunc i32 %58 to i8
   %60 = atomicrmw sub i8* @uc, i8 %59 monotonic
   %61 = sub i8 %60, %59
   store i8 %61, i8* @uc, align 1
-  %62 = load i8* @uc, align 1
+  %62 = load i8, i8* @uc, align 1
   %63 = zext i8 %62 to i32
   %64 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %65 = trunc i32 %63 to i16
   %66 = atomicrmw sub i16* %64, i16 %65 monotonic
   %67 = sub i16 %66, %65
   store i16 %67, i16* @ss, align 2
-  %68 = load i8* @uc, align 1
+  %68 = load i8, i8* @uc, align 1
   %69 = zext i8 %68 to i32
   %70 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %71 = trunc i32 %69 to i16
   %72 = atomicrmw sub i16* %70, i16 %71 monotonic
   %73 = sub i16 %72, %71
   store i16 %73, i16* @us, align 2
-  %74 = load i8* @uc, align 1
+  %74 = load i8, i8* @uc, align 1
   %75 = zext i8 %74 to i32
   %76 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %77 = atomicrmw sub i32* %76, i32 %75 monotonic
   %78 = sub i32 %77, %75
   store i32 %78, i32* @si, align 4
-  %79 = load i8* @uc, align 1
+  %79 = load i8, i8* @uc, align 1
   %80 = zext i8 %79 to i32
   %81 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %82 = atomicrmw sub i32* %81, i32 %80 monotonic
   %83 = sub i32 %82, %80
   store i32 %83, i32* @ui, align 4
-  %84 = load i8* @uc, align 1
+  %84 = load i8, i8* @uc, align 1
   %85 = zext i8 %84 to i64
   %86 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %87 = atomicrmw sub i64* %86, i64 %85 monotonic
   %88 = sub i64 %87, %85
   store i64 %88, i64* @sl, align 8
-  %89 = load i8* @uc, align 1
+  %89 = load i8, i8* @uc, align 1
   %90 = zext i8 %89 to i64
   %91 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %92 = atomicrmw sub i64* %91, i64 %90 monotonic
   %93 = sub i64 %92, %90
   store i64 %93, i64* @ul, align 8
-  %94 = load i8* @uc, align 1
+  %94 = load i8, i8* @uc, align 1
   %95 = zext i8 %94 to i64
   %96 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %97 = atomicrmw sub i64* %96, i64 %95 monotonic
   %98 = sub i64 %97, %95
   store i64 %98, i64* @sll, align 8
-  %99 = load i8* @uc, align 1
+  %99 = load i8, i8* @uc, align 1
   %100 = zext i8 %99 to i64
   %101 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %102 = atomicrmw sub i64* %101, i64 %100 monotonic
   %103 = sub i64 %102, %100
   store i64 %103, i64* @ull, align 8
-  %104 = load i8* @uc, align 1
+  %104 = load i8, i8* @uc, align 1
   %105 = zext i8 %104 to i32
   %106 = trunc i32 %105 to i8
   %107 = atomicrmw or i8* @sc, i8 %106 monotonic
   %108 = or i8 %107, %106
   store i8 %108, i8* @sc, align 1
-  %109 = load i8* @uc, align 1
+  %109 = load i8, i8* @uc, align 1
   %110 = zext i8 %109 to i32
   %111 = trunc i32 %110 to i8
   %112 = atomicrmw or i8* @uc, i8 %111 monotonic
   %113 = or i8 %112, %111
   store i8 %113, i8* @uc, align 1
-  %114 = load i8* @uc, align 1
+  %114 = load i8, i8* @uc, align 1
   %115 = zext i8 %114 to i32
   %116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %117 = trunc i32 %115 to i16
   %118 = atomicrmw or i16* %116, i16 %117 monotonic
   %119 = or i16 %118, %117
   store i16 %119, i16* @ss, align 2
-  %120 = load i8* @uc, align 1
+  %120 = load i8, i8* @uc, align 1
   %121 = zext i8 %120 to i32
   %122 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %123 = trunc i32 %121 to i16
   %124 = atomicrmw or i16* %122, i16 %123 monotonic
   %125 = or i16 %124, %123
   store i16 %125, i16* @us, align 2
-  %126 = load i8* @uc, align 1
+  %126 = load i8, i8* @uc, align 1
   %127 = zext i8 %126 to i32
   %128 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %129 = atomicrmw or i32* %128, i32 %127 monotonic
   %130 = or i32 %129, %127
   store i32 %130, i32* @si, align 4
-  %131 = load i8* @uc, align 1
+  %131 = load i8, i8* @uc, align 1
   %132 = zext i8 %131 to i32
   %133 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %134 = atomicrmw or i32* %133, i32 %132 monotonic
   %135 = or i32 %134, %132
   store i32 %135, i32* @ui, align 4
-  %136 = load i8* @uc, align 1
+  %136 = load i8, i8* @uc, align 1
   %137 = zext i8 %136 to i64
   %138 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %139 = atomicrmw or i64* %138, i64 %137 monotonic
   %140 = or i64 %139, %137
   store i64 %140, i64* @sl, align 8
-  %141 = load i8* @uc, align 1
+  %141 = load i8, i8* @uc, align 1
   %142 = zext i8 %141 to i64
   %143 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %144 = atomicrmw or i64* %143, i64 %142 monotonic
   %145 = or i64 %144, %142
   store i64 %145, i64* @ul, align 8
-  %146 = load i8* @uc, align 1
+  %146 = load i8, i8* @uc, align 1
   %147 = zext i8 %146 to i64
   %148 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %149 = atomicrmw or i64* %148, i64 %147 monotonic
   %150 = or i64 %149, %147
   store i64 %150, i64* @sll, align 8
-  %151 = load i8* @uc, align 1
+  %151 = load i8, i8* @uc, align 1
   %152 = zext i8 %151 to i64
   %153 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %154 = atomicrmw or i64* %153, i64 %152 monotonic
   %155 = or i64 %154, %152
   store i64 %155, i64* @ull, align 8
-  %156 = load i8* @uc, align 1
+  %156 = load i8, i8* @uc, align 1
   %157 = zext i8 %156 to i32
   %158 = trunc i32 %157 to i8
   %159 = atomicrmw xor i8* @sc, i8 %158 monotonic
   %160 = xor i8 %159, %158
   store i8 %160, i8* @sc, align 1
-  %161 = load i8* @uc, align 1
+  %161 = load i8, i8* @uc, align 1
   %162 = zext i8 %161 to i32
   %163 = trunc i32 %162 to i8
   %164 = atomicrmw xor i8* @uc, i8 %163 monotonic
   %165 = xor i8 %164, %163
   store i8 %165, i8* @uc, align 1
-  %166 = load i8* @uc, align 1
+  %166 = load i8, i8* @uc, align 1
   %167 = zext i8 %166 to i32
   %168 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %169 = trunc i32 %167 to i16
   %170 = atomicrmw xor i16* %168, i16 %169 monotonic
   %171 = xor i16 %170, %169
   store i16 %171, i16* @ss, align 2
-  %172 = load i8* @uc, align 1
+  %172 = load i8, i8* @uc, align 1
   %173 = zext i8 %172 to i32
   %174 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %175 = trunc i32 %173 to i16
   %176 = atomicrmw xor i16* %174, i16 %175 monotonic
   %177 = xor i16 %176, %175
   store i16 %177, i16* @us, align 2
-  %178 = load i8* @uc, align 1
+  %178 = load i8, i8* @uc, align 1
   %179 = zext i8 %178 to i32
   %180 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %181 = atomicrmw xor i32* %180, i32 %179 monotonic
   %182 = xor i32 %181, %179
   store i32 %182, i32* @si, align 4
-  %183 = load i8* @uc, align 1
+  %183 = load i8, i8* @uc, align 1
   %184 = zext i8 %183 to i32
   %185 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %186 = atomicrmw xor i32* %185, i32 %184 monotonic
   %187 = xor i32 %186, %184
   store i32 %187, i32* @ui, align 4
-  %188 = load i8* @uc, align 1
+  %188 = load i8, i8* @uc, align 1
   %189 = zext i8 %188 to i64
   %190 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %191 = atomicrmw xor i64* %190, i64 %189 monotonic
   %192 = xor i64 %191, %189
   store i64 %192, i64* @sl, align 8
-  %193 = load i8* @uc, align 1
+  %193 = load i8, i8* @uc, align 1
   %194 = zext i8 %193 to i64
   %195 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %196 = atomicrmw xor i64* %195, i64 %194 monotonic
   %197 = xor i64 %196, %194
   store i64 %197, i64* @ul, align 8
-  %198 = load i8* @uc, align 1
+  %198 = load i8, i8* @uc, align 1
   %199 = zext i8 %198 to i64
   %200 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %201 = atomicrmw xor i64* %200, i64 %199 monotonic
   %202 = xor i64 %201, %199
   store i64 %202, i64* @sll, align 8
-  %203 = load i8* @uc, align 1
+  %203 = load i8, i8* @uc, align 1
   %204 = zext i8 %203 to i64
   %205 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %206 = atomicrmw xor i64* %205, i64 %204 monotonic
   %207 = xor i64 %206, %204
   store i64 %207, i64* @ull, align 8
-  %208 = load i8* @uc, align 1
+  %208 = load i8, i8* @uc, align 1
   %209 = zext i8 %208 to i32
   %210 = trunc i32 %209 to i8
   %211 = atomicrmw and i8* @sc, i8 %210 monotonic
   %212 = and i8 %211, %210
   store i8 %212, i8* @sc, align 1
-  %213 = load i8* @uc, align 1
+  %213 = load i8, i8* @uc, align 1
   %214 = zext i8 %213 to i32
   %215 = trunc i32 %214 to i8
   %216 = atomicrmw and i8* @uc, i8 %215 monotonic
   %217 = and i8 %216, %215
   store i8 %217, i8* @uc, align 1
-  %218 = load i8* @uc, align 1
+  %218 = load i8, i8* @uc, align 1
   %219 = zext i8 %218 to i32
   %220 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %221 = trunc i32 %219 to i16
   %222 = atomicrmw and i16* %220, i16 %221 monotonic
   %223 = and i16 %222, %221
   store i16 %223, i16* @ss, align 2
-  %224 = load i8* @uc, align 1
+  %224 = load i8, i8* @uc, align 1
   %225 = zext i8 %224 to i32
   %226 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %227 = trunc i32 %225 to i16
   %228 = atomicrmw and i16* %226, i16 %227 monotonic
   %229 = and i16 %228, %227
   store i16 %229, i16* @us, align 2
-  %230 = load i8* @uc, align 1
+  %230 = load i8, i8* @uc, align 1
   %231 = zext i8 %230 to i32
   %232 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %233 = atomicrmw and i32* %232, i32 %231 monotonic
   %234 = and i32 %233, %231
   store i32 %234, i32* @si, align 4
-  %235 = load i8* @uc, align 1
+  %235 = load i8, i8* @uc, align 1
   %236 = zext i8 %235 to i32
   %237 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %238 = atomicrmw and i32* %237, i32 %236 monotonic
   %239 = and i32 %238, %236
   store i32 %239, i32* @ui, align 4
-  %240 = load i8* @uc, align 1
+  %240 = load i8, i8* @uc, align 1
   %241 = zext i8 %240 to i64
   %242 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %243 = atomicrmw and i64* %242, i64 %241 monotonic
   %244 = and i64 %243, %241
   store i64 %244, i64* @sl, align 8
-  %245 = load i8* @uc, align 1
+  %245 = load i8, i8* @uc, align 1
   %246 = zext i8 %245 to i64
   %247 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %248 = atomicrmw and i64* %247, i64 %246 monotonic
   %249 = and i64 %248, %246
   store i64 %249, i64* @ul, align 8
-  %250 = load i8* @uc, align 1
+  %250 = load i8, i8* @uc, align 1
   %251 = zext i8 %250 to i64
   %252 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %253 = atomicrmw and i64* %252, i64 %251 monotonic
   %254 = and i64 %253, %251
   store i64 %254, i64* @sll, align 8
-  %255 = load i8* @uc, align 1
+  %255 = load i8, i8* @uc, align 1
   %256 = zext i8 %255 to i64
   %257 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %258 = atomicrmw and i64* %257, i64 %256 monotonic
   %259 = and i64 %258, %256
   store i64 %259, i64* @ull, align 8
-  %260 = load i8* @uc, align 1
+  %260 = load i8, i8* @uc, align 1
   %261 = zext i8 %260 to i32
   %262 = trunc i32 %261 to i8
   %263 = atomicrmw nand i8* @sc, i8 %262 monotonic
   %264 = xor i8 %263, -1
   %265 = and i8 %264, %262
   store i8 %265, i8* @sc, align 1
-  %266 = load i8* @uc, align 1
+  %266 = load i8, i8* @uc, align 1
   %267 = zext i8 %266 to i32
   %268 = trunc i32 %267 to i8
   %269 = atomicrmw nand i8* @uc, i8 %268 monotonic
   %270 = xor i8 %269, -1
   %271 = and i8 %270, %268
   store i8 %271, i8* @uc, align 1
-  %272 = load i8* @uc, align 1
+  %272 = load i8, i8* @uc, align 1
   %273 = zext i8 %272 to i32
   %274 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %275 = trunc i32 %273 to i16
@@ -640,7 +640,7 @@
   %277 = xor i16 %276, -1
   %278 = and i16 %277, %275
   store i16 %278, i16* @ss, align 2
-  %279 = load i8* @uc, align 1
+  %279 = load i8, i8* @uc, align 1
   %280 = zext i8 %279 to i32
   %281 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %282 = trunc i32 %280 to i16
@@ -648,42 +648,42 @@
   %284 = xor i16 %283, -1
   %285 = and i16 %284, %282
   store i16 %285, i16* @us, align 2
-  %286 = load i8* @uc, align 1
+  %286 = load i8, i8* @uc, align 1
   %287 = zext i8 %286 to i32
   %288 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %289 = atomicrmw nand i32* %288, i32 %287 monotonic
   %290 = xor i32 %289, -1
   %291 = and i32 %290, %287
   store i32 %291, i32* @si, align 4
-  %292 = load i8* @uc, align 1
+  %292 = load i8, i8* @uc, align 1
   %293 = zext i8 %292 to i32
   %294 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %295 = atomicrmw nand i32* %294, i32 %293 monotonic
   %296 = xor i32 %295, -1
   %297 = and i32 %296, %293
   store i32 %297, i32* @ui, align 4
-  %298 = load i8* @uc, align 1
+  %298 = load i8, i8* @uc, align 1
   %299 = zext i8 %298 to i64
   %300 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %301 = atomicrmw nand i64* %300, i64 %299 monotonic
   %302 = xor i64 %301, -1
   %303 = and i64 %302, %299
   store i64 %303, i64* @sl, align 8
-  %304 = load i8* @uc, align 1
+  %304 = load i8, i8* @uc, align 1
   %305 = zext i8 %304 to i64
   %306 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %307 = atomicrmw nand i64* %306, i64 %305 monotonic
   %308 = xor i64 %307, -1
   %309 = and i64 %308, %305
   store i64 %309, i64* @ul, align 8
-  %310 = load i8* @uc, align 1
+  %310 = load i8, i8* @uc, align 1
   %311 = zext i8 %310 to i64
   %312 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %313 = atomicrmw nand i64* %312, i64 %311 monotonic
   %314 = xor i64 %313, -1
   %315 = and i64 %314, %311
   store i64 %315, i64* @sll, align 8
-  %316 = load i8* @uc, align 1
+  %316 = load i8, i8* @uc, align 1
   %317 = zext i8 %316 to i64
   %318 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %319 = atomicrmw nand i64* %318, i64 %317 monotonic
@@ -698,28 +698,28 @@
 
 define void @test_compare_and_swap() nounwind {
 entry:
-  %0 = load i8* @sc, align 1
+  %0 = load i8, i8* @sc, align 1
   %1 = zext i8 %0 to i32
-  %2 = load i8* @uc, align 1
+  %2 = load i8, i8* @uc, align 1
   %3 = zext i8 %2 to i32
   %4 = trunc i32 %3 to i8
   %5 = trunc i32 %1 to i8
   %pair6 = cmpxchg i8* @sc, i8 %4, i8 %5 monotonic monotonic
   %6 = extractvalue { i8, i1 } %pair6, 0
   store i8 %6, i8* @sc, align 1
-  %7 = load i8* @sc, align 1
+  %7 = load i8, i8* @sc, align 1
   %8 = zext i8 %7 to i32
-  %9 = load i8* @uc, align 1
+  %9 = load i8, i8* @uc, align 1
   %10 = zext i8 %9 to i32
   %11 = trunc i32 %10 to i8
   %12 = trunc i32 %8 to i8
   %pair13 = cmpxchg i8* @uc, i8 %11, i8 %12 monotonic monotonic
   %13 = extractvalue { i8, i1 } %pair13, 0
   store i8 %13, i8* @uc, align 1
-  %14 = load i8* @sc, align 1
+  %14 = load i8, i8* @sc, align 1
   %15 = sext i8 %14 to i16
   %16 = zext i16 %15 to i32
-  %17 = load i8* @uc, align 1
+  %17 = load i8, i8* @uc, align 1
   %18 = zext i8 %17 to i32
   %19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*
   %20 = trunc i32 %18 to i16
@@ -727,10 +727,10 @@
   %pair22 = cmpxchg i16* %19, i16 %20, i16 %21 monotonic monotonic
   %22 = extractvalue { i16, i1 } %pair22, 0
   store i16 %22, i16* @ss, align 2
-  %23 = load i8* @sc, align 1
+  %23 = load i8, i8* @sc, align 1
   %24 = sext i8 %23 to i16
   %25 = zext i16 %24 to i32
-  %26 = load i8* @uc, align 1
+  %26 = load i8, i8* @uc, align 1
   %27 = zext i8 %26 to i32
   %28 = bitcast i8* bitcast (i16* @us to i8*) to i16*
   %29 = trunc i32 %27 to i16
@@ -738,57 +738,57 @@
   %pair31 = cmpxchg i16* %28, i16 %29, i16 %30 monotonic monotonic
   %31 = extractvalue { i16, i1 } %pair31, 0
   store i16 %31, i16* @us, align 2
-  %32 = load i8* @sc, align 1
+  %32 = load i8, i8* @sc, align 1
   %33 = sext i8 %32 to i32
-  %34 = load i8* @uc, align 1
+  %34 = load i8, i8* @uc, align 1
   %35 = zext i8 %34 to i32
   %36 = bitcast i8* bitcast (i32* @si to i8*) to i32*
   %pair37 = cmpxchg i32* %36, i32 %35, i32 %33 monotonic monotonic
   %37 = extractvalue { i32, i1 } %pair37, 0
   store i32 %37, i32* @si, align 4
-  %38 = load i8* @sc, align 1
+  %38 = load i8, i8* @sc, align 1
   %39 = sext i8 %38 to i32
-  %40 = load i8* @uc, align 1
+  %40 = load i8, i8* @uc, align 1
   %41 = zext i8 %40 to i32
   %42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*
   %pair43 = cmpxchg i32* %42, i32 %41, i32 %39 monotonic monotonic
   %43 = extractvalue { i32, i1 } %pair43, 0
   store i32 %43, i32* @ui, align 4
-  %44 = load i8* @sc, align 1
+  %44 = load i8, i8* @sc, align 1
   %45 = sext i8 %44 to i64
-  %46 = load i8* @uc, align 1
+  %46 = load i8, i8* @uc, align 1
   %47 = zext i8 %46 to i64
   %48 = bitcast i8* bitcast (i64* @sl to i8*) to i64*
   %pair49 = cmpxchg i64* %48, i64 %47, i64 %45 monotonic monotonic
   %49 = extractvalue { i64, i1 } %pair49, 0
   store i64 %49, i64* @sl, align 8
-  %50 = load i8* @sc, align 1
+  %50 = load i8, i8* @sc, align 1
   %51 = sext i8 %50 to i64
-  %52 = load i8* @uc, align 1
+  %52 = load i8, i8* @uc, align 1
   %53 = zext i8 %52 to i64
   %54 = bitcast i8* bitcast (i64* @ul to i8*) to i64*
   %pair55 = cmpxchg i64* %54, i64 %53, i64 %51 monotonic monotonic
   %55 = extractvalue { i64, i1 } %pair55, 0
   store i64 %55, i64* @ul, align 8
-  %56 = load i8* @sc, align 1
+  %56 = load i8, i8* @sc, align 1
   %57 = sext i8 %56 to i64
-  %58 = load i8* @uc, align 1
+  %58 = load i8, i8* @uc, align 1
   %59 = zext i8 %58 to i64
   %60 = bitcast i8* bitcast (i64* @sll to i8*) to i64*
   %pair61 = cmpxchg i64* %60, i64 %59, i64 %57 monotonic monotonic
   %61 = extractvalue { i64, i1 } %pair61, 0
   store i64 %61, i64* @sll, align 8
-  %62 = load i8* @sc, align 1
+  %62 = load i8, i8* @sc, align 1
   %63 = sext i8 %62 to i64
-  %64 = load i8* @uc, align 1
+  %64 = load i8, i8* @uc, align 1
   %65 = zext i8 %64 to i64
   %66 = bitcast i8* bitcast (i64* @ull to i8*) to i64*
   %pair67 = cmpxchg i64* %66, i64 %65, i64 %63 monotonic monotonic
   %67 = extractvalue { i64, i1 } %pair67, 0
   store i64 %67, i64* @ull, align 8
-  %68 = load i8* @sc, align 1
+  %68 = load i8, i8* @sc, align 1
   %69 = zext i8 %68 to i32
-  %70 = load i8* @uc, align 1
+  %70 = load i8, i8* @uc, align 1
   %71 = zext i8 %70 to i32
   %72 = trunc i32 %71 to i8
   %73 = trunc i32 %69 to i8
@@ -798,9 +798,9 @@
   %76 = zext i1 %75 to i8
   %77 = zext i8 %76 to i32
   store i32 %77, i32* @ui, align 4
-  %78 = load i8* @sc, align 1
+  %78 = load i8, i8* @sc, align 1
   %79 = zext i8 %78 to i32
-  %80 = load i8* @uc, align 1
+  %80 = load i8, i8* @uc, align 1
   %81 = zext i8 %80 to i32
   %82 = trunc i32 %81 to i8
   %83 = trunc i32 %79 to i8
@@ -810,10 +810,10 @@
   %86 = zext i1 %85 to i8
   %87 = zext i8 %86 to i32
   store i32 %87, i32* @ui, align 4
-  %88 = load i8* @sc, align 1
+  %88 = load i8, i8* @sc, align 1
   %89 = sext i8 %88 to i16
   %90 = zext i16 %89 to i32
-  %91 = load i8* @uc, align 1
+  %91 = load i8, i8* @uc, align 1
   %92 = zext i8 %91 to i32
   %93 = trunc i32 %92 to i8
   %94 = trunc i32 %90 to i8
@@ -823,10 +823,10 @@
   %97 = zext i1 %96 to i8
   %98 = zext i8 %97 to i32
   store i32 %98, i32* @ui, align 4
-  %99 = load i8* @sc, align 1
+  %99 = load i8, i8* @sc, align 1
   %100 = sext i8 %99 to i16
   %101 = zext i16 %100 to i32
-  %102 = load i8* @uc, align 1
+  %102 = load i8, i8* @uc, align 1
   %103 = zext i8 %102 to i32
   %104 = trunc i32 %103 to i8
   %105 = trunc i32 %101 to i8
@@ -836,9 +836,9 @@
   %108 = zext i1 %107 to i8
   %109 = zext i8 %108 to i32
   store i32 %109, i32* @ui, align 4
-  %110 = load i8* @sc, align 1
+  %110 = load i8, i8* @sc, align 1
   %111 = sext i8 %110 to i32
-  %112 = load i8* @uc, align 1
+  %112 = load i8, i8* @uc, align 1
   %113 = zext i8 %112 to i32
   %114 = trunc i32 %113 to i8
   %115 = trunc i32 %111 to i8
@@ -848,9 +848,9 @@
   %118 = zext i1 %117 to i8
   %119 = zext i8 %118 to i32
   store i32 %119, i32* @ui, align 4
-  %120 = load i8* @sc, align 1
+  %120 = load i8, i8* @sc, align 1
   %121 = sext i8 %120 to i32
-  %122 = load i8* @uc, align 1
+  %122 = load i8, i8* @uc, align 1
   %123 = zext i8 %122 to i32
   %124 = trunc i32 %123 to i8
   %125 = trunc i32 %121 to i8
@@ -860,9 +860,9 @@
   %128 = zext i1 %127 to i8
   %129 = zext i8 %128 to i32
   store i32 %129, i32* @ui, align 4
-  %130 = load i8* @sc, align 1
+  %130 = load i8, i8* @sc, align 1
   %131 = sext i8 %130 to i64
-  %132 = load i8* @uc, align 1
+  %132 = load i8, i8* @uc, align 1
   %133 = zext i8 %132 to i64
   %134 = trunc i64 %133 to i8
   %135 = trunc i64 %131 to i8
@@ -872,9 +872,9 @@
   %138 = zext i1 %137 to i8
   %139 = zext i8 %138 to i32
   store i32 %139, i32* @ui, align 4
-  %140 = load i8* @sc, align 1
+  %140 = load i8, i8* @sc, align 1
   %141 = sext i8 %140 to i64
-  %142 = load i8* @uc, align 1
+  %142 = load i8, i8* @uc, align 1
   %143 = zext i8 %142 to i64
   %144 = trunc i64 %143 to i8
   %145 = trunc i64 %141 to i8
@@ -884,9 +884,9 @@
   %148 = zext i1 %147 to i8
   %149 = zext i8 %148 to i32
   store i32 %149, i32* @ui, align 4
-  %150 = load i8* @sc, align 1
+  %150 = load i8, i8* @sc, align 1
   %151 = sext i8 %150 to i64
-  %152 = load i8* @uc, align 1
+  %152 = load i8, i8* @uc, align 1
   %153 = zext i8 %152 to i64
   %154 = trunc i64 %153 to i8
   %155 = trunc i64 %151 to i8
@@ -896,9 +896,9 @@
   %158 = zext i1 %157 to i8
   %159 = zext i8 %158 to i32
   store i32 %159, i32* @ui, align 4
-  %160 = load i8* @sc, align 1
+  %160 = load i8, i8* @sc, align 1
   %161 = sext i8 %160 to i64
-  %162 = load i8* @uc, align 1
+  %162 = load i8, i8* @uc, align 1
   %163 = zext i8 %162 to i64
   %164 = trunc i64 %163 to i8
   %165 = trunc i64 %161 to i8
diff --git a/llvm/test/CodeGen/X86/GC/alloc_loop.ll b/llvm/test/CodeGen/X86/GC/alloc_loop.ll
index fb78ba2..2a505e8 100644
--- a/llvm/test/CodeGen/X86/GC/alloc_loop.ll
+++ b/llvm/test/CodeGen/X86/GC/alloc_loop.ll
@@ -31,8 +31,8 @@
 	store i8** %tmp.2, i8*** %B
 
 	;; *B = A;
-	%B.1 = load i8*** %B
-	%A.1 = load i8** %A
+	%B.1 = load i8**, i8*** %B
+	%A.1 = load i8*, i8** %A
 	call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)
 	
 	br label %AllocLoop
diff --git a/llvm/test/CodeGen/X86/GC/argpromotion.ll b/llvm/test/CodeGen/X86/GC/argpromotion.ll
index c63ce22..37baf32 100644
--- a/llvm/test/CodeGen/X86/GC/argpromotion.ll
+++ b/llvm/test/CodeGen/X86/GC/argpromotion.ll
@@ -14,6 +14,6 @@
 entry:
 	%var = alloca i8*
 	call void @llvm.gcroot(i8** %var, i8* null)
-	%x = load i32* %xp
+	%x = load i32, i32* %xp
 	ret i32 %x
 }
diff --git a/llvm/test/CodeGen/X86/GC/inline.ll b/llvm/test/CodeGen/X86/GC/inline.ll
index 91d435f..9d74c1f 100644
--- a/llvm/test/CodeGen/X86/GC/inline.ll
+++ b/llvm/test/CodeGen/X86/GC/inline.ll
@@ -16,7 +16,7 @@
 	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
 	store i8* %obj.2, i8** %root
 	%Length.ptr = getelementptr %IntArray, %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
-	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	%Length = load i32, i32* %Length.ptr		; <i32> [#uses=1]
 	ret i32 %Length
 }
 
diff --git a/llvm/test/CodeGen/X86/GC/inline2.ll b/llvm/test/CodeGen/X86/GC/inline2.ll
index be35d8f..034c985 100644
--- a/llvm/test/CodeGen/X86/GC/inline2.ll
+++ b/llvm/test/CodeGen/X86/GC/inline2.ll
@@ -17,7 +17,7 @@
 	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
 	store i8* %obj.2, i8** %root
 	%Length.ptr = getelementptr %IntArray, %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
-	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	%Length = load i32, i32* %Length.ptr		; <i32> [#uses=1]
 	ret i32 %Length
 }
 
diff --git a/llvm/test/CodeGen/X86/MachineBranchProb.ll b/llvm/test/CodeGen/X86/MachineBranchProb.ll
index cf41ef2..9b4e737 100644
--- a/llvm/test/CodeGen/X86/MachineBranchProb.ll
+++ b/llvm/test/CodeGen/X86/MachineBranchProb.ll
@@ -13,7 +13,7 @@
   %i.1 = phi i32 [ %inc19, %for.inc ], [ 0, %for.cond ]
   %bit.0 = phi i32 [ %shl, %for.inc ], [ 1, %for.cond ]
   %tobool = icmp eq i32 %bit.0, 0
-  %v3 = load i32* @max_regno, align 4
+  %v3 = load i32, i32* @max_regno, align 4
   %cmp4 = icmp eq i32 %i.1, %v3
   %or.cond = or i1 %tobool, %cmp4
   br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0
diff --git a/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll b/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll
index 3a2c58f..86335a5 100644
--- a/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/llvm/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -5,7 +5,7 @@
 
 define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
   tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
-  %ab = load i32* %c, align 1, !dbg !14
+  %ab = load i32, i32* %c, align 1, !dbg !14
   tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !{!"0x102"}), !dbg !13
   tail call void @llvm.dbg.value(metadata i32 %ab, i64 0, metadata !10, metadata !{!"0x102"}), !dbg !14
   %cd = icmp eq i32 %i, 42, !dbg !15
diff --git a/llvm/test/CodeGen/X86/MachineSink-eflags.ll b/llvm/test/CodeGen/X86/MachineSink-eflags.ll
index e9043d9..4e52c8c 100644
--- a/llvm/test/CodeGen/X86/MachineSink-eflags.ll
+++ b/llvm/test/CodeGen/X86/MachineSink-eflags.ll
@@ -16,18 +16,18 @@
  %i2 = alloca i8*, align 8
  %b.i = alloca [16 x <2 x double>], align 16
  %conv = bitcast i8* %_stubArgs to i32*
- %tmp1 = load i32* %conv, align 4
+ %tmp1 = load i32, i32* %conv, align 4
  %ptr8 = getelementptr i8, i8* %_stubArgs, i64 16
  %i4 = bitcast i8* %ptr8 to <2 x double>*
  %ptr20 = getelementptr i8, i8* %_stubArgs, i64 48
  %i7 = bitcast i8* %ptr20 to <2 x double> addrspace(1)**
- %tmp21 = load <2 x double> addrspace(1)** %i7, align 8
+ %tmp21 = load <2 x double> addrspace(1)*, <2 x double> addrspace(1)** %i7, align 8
  %ptr28 = getelementptr i8, i8* %_stubArgs, i64 64
  %i9 = bitcast i8* %ptr28 to i32*
- %tmp29 = load i32* %i9, align 4
+ %tmp29 = load i32, i32* %i9, align 4
  %ptr32 = getelementptr i8, i8* %_stubArgs, i64 68
  %i10 = bitcast i8* %ptr32 to i32*
- %tmp33 = load i32* %i10, align 4
+ %tmp33 = load i32, i32* %i10, align 4
  %tmp17.i = mul i32 10, 20
  %tmp19.i = add i32 %tmp17.i, %tmp33
  %conv21.i = zext i32 %tmp19.i to i64
@@ -49,14 +49,14 @@
  %conv160.i = zext i32 %i39 to i64
  %tmp22.sum652.i = add i64 %conv160.i, %conv21.i
  %arrayidx161.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum652.i
- %tmp162.i = load <2 x double> addrspace(1)* %arrayidx161.i, align 16
+ %tmp162.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx161.i, align 16
  %tmp222.i = add i32 %tmp154.i, 1
  %i43 = mul i32 %tmp222.i, %tmp29
  %i44 = add i32 %tmp158.i, %i43
  %conv228.i = zext i32 %i44 to i64
  %tmp22.sum656.i = add i64 %conv228.i, %conv21.i
  %arrayidx229.i = getelementptr <2 x double>, <2 x double> addrspace(1)* %tmp21, i64 %tmp22.sum656.i
- %tmp230.i = load <2 x double> addrspace(1)* %arrayidx229.i, align 16
+ %tmp230.i = load <2 x double>, <2 x double> addrspace(1)* %arrayidx229.i, align 16
  %cmp432.i = icmp ult i32 %tmp156.i, %tmp1
 
 ; %shl.i should not be sinked below the compare.
diff --git a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
index 02fbbff..aff6fbc 100644
--- a/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/llvm/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -166,8 +166,8 @@
 ; <label>:4                                       ; preds = %4, %.lr.ph
   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %10, %4 ]
-  %5 = load i8* %2, align 1
-  %6 = load i8* %3, align 1
+  %5 = load i8, i8* %2, align 1
+  %6 = load i8, i8* %3, align 1
   %7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
   store i8 %5, i8* %7, align 1
   %8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
@@ -200,11 +200,11 @@
 a4:                                       ; preds = %4, %.lr.ph
   %i.02 = phi i32 [ 0, %.lr.ph ], [ %a9, %a4 ]
   %.01 = phi %struct.A* [ %p, %.lr.ph ], [ %a10, %a4 ]
-  %a5 = load i8* %2, align 1
+  %a5 = load i8, i8* %2, align 1
   %a7 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 0
   store i8 %a5, i8* %a7, align 1
   %a8 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 0, i32 1
-  %a6 = load i8* %3, align 1
+  %a6 = load i8, i8* %3, align 1
   store i8 %a6, i8* %a8, align 1
   %a9 = add nsw i32 %i.02, 1
   %a10 = getelementptr inbounds %struct.A, %struct.A* %.01, i64 1
@@ -234,8 +234,8 @@
 ; <label>:4                                       ; preds = %4, %.lr.ph
   %i.02 = phi i32 [ 0, %.lr.ph ], [ %9, %4 ]
   %.01 = phi %struct.B* [ %p, %.lr.ph ], [ %10, %4 ]
-  %5 = load i32* %2
-  %6 = load i32* %3
+  %5 = load i32, i32* %2
+  %6 = load i32, i32* %3
   %7 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 0
   store i32 %5, i32* %7
   %8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
@@ -274,10 +274,10 @@
   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
-  %b1 = load i32* %a2
-  %b2 = load i32* %a3
-  %b3 = load i32* %a4
-  %b4 = load i32* %a5
+  %b1 = load i32, i32* %a2
+  %b2 = load i32, i32* %a3
+  %b3 = load i32, i32* %a4
+  %b4 = load i32, i32* %a5
   store i32 %b1, i32* %a7
   store i32 %b2, i32* %a8
   store i32 %b3, i32* %a9
@@ -321,10 +321,10 @@
   %a8 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 1
   %a9 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 2
   %a10 = getelementptr inbounds %struct.B, %struct.B* %.01, i64 0, i32 3
-  %b1 = load i32* %a2, align 1
-  %b2 = load i32* %a3, align 1
-  %b3 = load i32* %a4, align 1
-  %b4 = load i32* %a5, align 1
+  %b1 = load i32, i32* %a2, align 1
+  %b2 = load i32, i32* %a3, align 1
+  %b3 = load i32, i32* %a4, align 1
+  %b4 = load i32, i32* %a5, align 1
   store i32 %b1, i32* %a7, align 1
   store i32 %b2, i32* %a8, align 1
   store i32 %b3, i32* %a9, align 1
@@ -351,12 +351,12 @@
   %.08 = phi i8* [ %b, %0 ], [ %10, %1 ]
   %.0 = phi i64* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i64, i64* %.0, i64 1
-  %3 = load i64* %.0, align 1
+  %3 = load i64, i64* %.0, align 1
   %4 = getelementptr inbounds i8, i8* %c, i64 %3
-  %5 = load i8* %4, align 1
+  %5 = load i8, i8* %4, align 1
   %6 = add i64 %3, 1
   %7 = getelementptr inbounds i8, i8* %c, i64 %6
-  %8 = load i8* %7, align 1
+  %8 = load i8, i8* %7, align 1
   store i8 %5, i8* %.08, align 1
   %9 = getelementptr inbounds i8, i8* %.08, i64 1
   store i8 %8, i8* %9, align 1
@@ -383,13 +383,13 @@
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i64 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i64
   %5 = getelementptr inbounds i8, i8* %c, i64 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i64 %4, 1
   %8 = getelementptr inbounds i8, i8* %c, i64 %7
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i64 1
   store i8 %9, i8* %10, align 1
@@ -415,14 +415,14 @@
   %.08 = phi i8* [ %b, %0 ], [ %11, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i64 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = sext i8 %3 to i64
   %5 = getelementptr inbounds i8, i8* %c, i64 %4
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = add i8 %3, 1
   %wrap.4 = sext i8 %7 to i64
   %8 = getelementptr inbounds i8, i8* %c, i64 %wrap.4
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   store i8 %6, i8* %.08, align 1
   %10 = getelementptr inbounds i8, i8* %.08, i64 1
   store i8 %9, i8* %10, align 1
@@ -477,11 +477,11 @@
   %idx4 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 4
   %idx5 = getelementptr inbounds [6 x i64], [6 x i64]* %array, i64 0, i64 5
 
-  %a0 = load i64* %idx0, align 8
+  %a0 = load i64, i64* %idx0, align 8
   store i64 %a0, i64* %idx4, align 8
 
   %b = bitcast i64* %idx1 to <2 x i64>*
-  %v = load <2 x i64>* %b, align 8
+  %v = load <2 x i64>, <2 x i64>* %b, align 8
   %a1 = extractelement <2 x i64> %v, i32 0
   store i64 %a1, i64* %idx5, align 8
   ret void
diff --git a/llvm/test/CodeGen/X86/StackColoring.ll b/llvm/test/CodeGen/X86/StackColoring.ll
index e783877..414ccf4 100644
--- a/llvm/test/CodeGen/X86/StackColoring.ll
+++ b/llvm/test/CodeGen/X86/StackColoring.ll
@@ -414,7 +414,7 @@
   %z2 = getelementptr inbounds [4 x %struct.Klass], [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0
   call void @llvm.lifetime.start(i64 -1, i8* %a8)
   call void @llvm.lifetime.start(i64 -1, i8* %b8)
-  %z3 = load i32* %z2, align 16
+  %z3 = load i32, i32* %z2, align 16
   %r = call i32 @foo(i32 %z3, i8* %a8)
   %r2 = call i32 @foo(i32 %z3, i8* %b8)
   call void @llvm.lifetime.end(i64 -1, i8* %a8)
diff --git a/llvm/test/CodeGen/X86/SwitchLowering.ll b/llvm/test/CodeGen/X86/SwitchLowering.ll
index abc339f..5f17d9d 100644
--- a/llvm/test/CodeGen/X86/SwitchLowering.ll
+++ b/llvm/test/CodeGen/X86/SwitchLowering.ll
@@ -10,7 +10,7 @@
         %CurPtr_addr.0.rec = bitcast i32 %indvar to i32         ; <i32> [#uses=1]
         %gep.upgrd.1 = zext i32 %indvar to i64          ; <i64> [#uses=1]
         %CurPtr_addr.0 = getelementptr i8, i8* %CurPtr, i64 %gep.upgrd.1            ; <i8*> [#uses=1]
-        %tmp = load i8* %CurPtr_addr.0          ; <i8> [#uses=3]
+        %tmp = load i8, i8* %CurPtr_addr.0          ; <i8> [#uses=3]
         %tmp2.rec = add i32 %CurPtr_addr.0.rec, 1               ; <i32> [#uses=1]
         %tmp2 = getelementptr i8, i8* %CurPtr, i32 %tmp2.rec                ; <i8*> [#uses=1]
         %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/SwizzleShuff.ll b/llvm/test/CodeGen/X86/SwizzleShuff.ll
index d387850..e4c35c5 100644
--- a/llvm/test/CodeGen/X86/SwizzleShuff.ll
+++ b/llvm/test/CodeGen/X86/SwizzleShuff.ll
@@ -6,8 +6,8 @@
 ; CHECK: xorl
 ; CHECK: ret
 define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
-  %A = load <4 x i8>* %pA
-  %B = load <4 x i8>* %pB
+  %A = load <4 x i8>, <4 x i8>* %pA
+  %B = load <4 x i8>, <4 x i8>* %pB
   %C = xor <4 x i8> %A, %B
   store <4 x i8> %C, <4 x i8>* %pA
   ret void
@@ -22,8 +22,8 @@
 ; CHECK-NEXT: pxor
 ; CHECK-NEXT: ret
 define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
-  %A = load <4 x i32>* %pA
-  %B = load <4 x i32>* %pB
+  %A = load <4 x i32>, <4 x i32>* %pA
+  %B = load <4 x i32>, <4 x i32>* %pB
   %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
   %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
   %S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
@@ -35,9 +35,9 @@
 ; CHECK: xorl
 ; CHECK: ret
 define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
-  %A = load <4 x i8>* %pA
+  %A = load <4 x i8>, <4 x i8>* %pA
   store <4 x i8> %A, <4 x i8>* %pC
-  %B = load <4 x i8>* %pB
+  %B = load <4 x i8>, <4 x i8>* %pB
   %C = xor <4 x i8> %A, %B
   store <4 x i8> %C, <4 x i8>* %pA
   ret <4 x i8> %C
@@ -49,8 +49,8 @@
 ; CHECK-NOT: pshufd
 ; CHECK: ret
 define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
-  %A = load <4 x i32>* %pA
-  %B = load <4 x i32>* %pB
+  %A = load <4 x i32>, <4 x i32>* %pA
+  %B = load <4 x i32>, <4 x i32>* %pB
   %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   ret <4 x i32> %S1
@@ -61,8 +61,8 @@
 ; CHECK: pshufd
 ; CHECK: ret
 define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
-  %A = load <4 x i32>* %pA
-  %B = load <4 x i32>* %pB
+  %A = load <4 x i32>, <4 x i32>* %pA
+  %B = load <4 x i32>, <4 x i32>* %pB
   %S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
   %S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
   ret <4 x i32> %S1
diff --git a/llvm/test/CodeGen/X86/abi-isel.ll b/llvm/test/CodeGen/X86/abi-isel.ll
index 752b6e6..234419b 100644
--- a/llvm/test/CodeGen/X86/abi-isel.ll
+++ b/llvm/test/CodeGen/X86/abi-isel.ll
@@ -33,7 +33,7 @@
 
 define void @foo00() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
 	ret void
 
@@ -105,7 +105,7 @@
 
 define void @fxo00() nounwind {
 entry:
-	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	%0 = load i32, i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
 	ret void
 
@@ -297,8 +297,8 @@
 
 define void @foo02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	%0 = load i32*, i32** @ptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: foo02:
@@ -379,8 +379,8 @@
 
 define void @fxo02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
-	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	%0 = load i32*, i32** @ptr, align 8
+	%1 = load i32, i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 ; LINUX-64-STATIC-LABEL: fxo02:
 ; LINUX-64-STATIC: movl    xsrc(%rip), %
@@ -461,7 +461,7 @@
 
 define void @foo03() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
 	ret void
 ; LINUX-64-STATIC-LABEL: foo03:
@@ -576,8 +576,8 @@
 
 define void @foo05() nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	%0 = load i32*, i32** @dptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
 	store i32 %1, i32* %0, align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: foo05:
@@ -648,7 +648,7 @@
 
 define void @foo06() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: foo06:
@@ -760,8 +760,8 @@
 
 define void @foo08() nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	%0 = load i32*, i32** @lptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
 	store i32 %1, i32* %0, align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: foo08:
@@ -830,7 +830,7 @@
 
 define void @qux00() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: qux00:
@@ -901,7 +901,7 @@
 
 define void @qxx00() nounwind {
 entry:
-	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	%0 = load i32, i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: qxx00:
@@ -1104,8 +1104,8 @@
 
 define void @qux02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	%0 = load i32*, i32** @ptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
 	%2 = getelementptr i32, i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC-LABEL: qux02:
@@ -1187,8 +1187,8 @@
 
 define void @qxx02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
-	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	%0 = load i32*, i32** @ptr, align 8
+	%1 = load i32, i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32, i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC-LABEL: qxx02:
@@ -1270,7 +1270,7 @@
 
 define void @qux03() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
 	ret void
 ; LINUX-64-STATIC-LABEL: qux03:
@@ -1386,8 +1386,8 @@
 
 define void @qux05() nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	%0 = load i32*, i32** @dptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
 	%2 = getelementptr i32, i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC-LABEL: qux05:
@@ -1459,7 +1459,7 @@
 
 define void @qux06() nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: qux06:
@@ -1571,8 +1571,8 @@
 
 define void @qux08() nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	%0 = load i32*, i32** @lptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
 	%2 = getelementptr i32, i32* %0, i64 16
 	store i32 %1, i32* %2, align 4
 ; LINUX-64-STATIC-LABEL: qux08:
@@ -1643,7 +1643,7 @@
 define void @ind00(i64 %i) nounwind {
 entry:
 	%0 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %i
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -1721,7 +1721,7 @@
 define void @ixd00(i64 %i) nounwind {
 entry:
 	%0 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %i
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	%2 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -1950,9 +1950,9 @@
 
 define void @ind02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %i
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr i32, i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2039,9 +2039,9 @@
 
 define void @ixd02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %i
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr i32, i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2129,7 +2129,7 @@
 define void @ind03(i64 %i) nounwind {
 entry:
 	%0 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %i
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -2271,9 +2271,9 @@
 
 define void @ind05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %i
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr i32, i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2354,7 +2354,7 @@
 define void @ind06(i64 %i) nounwind {
 entry:
 	%0 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %i
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %i
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -2495,9 +2495,9 @@
 
 define void @ind08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %i
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr i32, i32* %0, i64 %i
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2578,7 +2578,7 @@
 entry:
 	%0 = add i64 %i, 16
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2657,7 +2657,7 @@
 entry:
 	%0 = add i64 %i, 16
 	%1 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [32 x i32], [32 x i32]* @xdst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -2888,10 +2888,10 @@
 
 define void @off02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -2978,10 +2978,10 @@
 
 define void @oxf02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr [32 x i32], [32 x i32]* @xsrc, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -3070,7 +3070,7 @@
 entry:
 	%0 = add i64 %i, 16
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -3213,10 +3213,10 @@
 
 define void @off05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -3298,7 +3298,7 @@
 entry:
 	%0 = add i64 %i, 16
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -3440,10 +3440,10 @@
 
 define void @off08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -3522,7 +3522,7 @@
 
 define void @moo00(i64 %i) nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: moo00:
@@ -3659,8 +3659,8 @@
 
 define void @moo02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	%0 = load i32*, i32** @ptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
 	%2 = getelementptr i32, i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -3742,7 +3742,7 @@
 
 define void @moo03(i64 %i) nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
 	ret void
 ; LINUX-64-STATIC-LABEL: moo03:
@@ -3858,8 +3858,8 @@
 
 define void @moo05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	%0 = load i32*, i32** @dptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
 	%2 = getelementptr i32, i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -3931,7 +3931,7 @@
 
 define void @moo06(i64 %i) nounwind {
 entry:
-	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	%0 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
 	ret void
 ; LINUX-64-STATIC-LABEL: moo06:
@@ -4043,8 +4043,8 @@
 
 define void @moo08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
-	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	%0 = load i32*, i32** @lptr, align 8
+	%1 = load i32, i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
 	%2 = getelementptr i32, i32* %0, i64 65536
 	store i32 %1, i32* %2, align 4
 	ret void
@@ -4116,7 +4116,7 @@
 entry:
 	%0 = add i64 %i, 65536
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @dst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -4270,10 +4270,10 @@
 
 define void @big02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @src, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -4362,7 +4362,7 @@
 entry:
 	%0 = add i64 %i, 65536
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @ddst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -4505,10 +4505,10 @@
 
 define void @big05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @dsrc, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -4590,7 +4590,7 @@
 entry:
 	%0 = add i64 %i, 65536
 	%1 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = getelementptr [131072 x i32], [131072 x i32]* @ldst, i64 0, i64 %0
 	store i32 %2, i32* %3, align 4
 	ret void
@@ -4732,10 +4732,10 @@
 
 define void @big08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr [131072 x i32], [131072 x i32]* @lsrc, i64 0, i64 %1
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	%4 = getelementptr i32, i32* %0, i64 %1
 	store i32 %3, i32* %4, align 4
 	ret void
@@ -5519,7 +5519,7 @@
 
 define i8* @har02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
 ; LINUX-64-STATIC-LABEL: har02:
@@ -5668,7 +5668,7 @@
 
 define i8* @har05() nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
 ; LINUX-64-STATIC-LABEL: har05:
@@ -5812,7 +5812,7 @@
 
 define i8* @har08() nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = bitcast i32* %0 to i8*
 	ret i8* %1
 ; LINUX-64-STATIC-LABEL: har08:
@@ -6073,7 +6073,7 @@
 
 define i8* @bat02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -6235,7 +6235,7 @@
 
 define i8* @bat05() nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -6390,7 +6390,7 @@
 
 define i8* @bat08() nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 16
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -6609,7 +6609,7 @@
 
 define i8* @bam02() nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -6771,7 +6771,7 @@
 
 define i8* @bam05() nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -6926,7 +6926,7 @@
 
 define i8* @bam08() nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = getelementptr i32, i32* %0, i64 65536
 	%2 = bitcast i32* %1 to i8*
 	ret i8* %2
@@ -7230,7 +7230,7 @@
 
 define i8* @cat02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -7420,7 +7420,7 @@
 
 define i8* @cat05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -7605,7 +7605,7 @@
 
 define i8* @cat08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = add i64 %i, 16
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -7915,7 +7915,7 @@
 
 define i8* @cam02(i64 %i) nounwind {
 entry:
-	%0 = load i32** @ptr, align 8
+	%0 = load i32*, i32** @ptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -8105,7 +8105,7 @@
 
 define i8* @cam05(i64 %i) nounwind {
 entry:
-	%0 = load i32** @dptr, align 8
+	%0 = load i32*, i32** @dptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -8290,7 +8290,7 @@
 
 define i8* @cam08(i64 %i) nounwind {
 entry:
-	%0 = load i32** @lptr, align 8
+	%0 = load i32*, i32** @lptr, align 8
 	%1 = add i64 %i, 65536
 	%2 = getelementptr i32, i32* %0, i64 %1
 	%3 = bitcast i32* %2 to i8*
@@ -9180,9 +9180,9 @@
 
 define void @icaller() nounwind {
 entry:
-	%0 = load void ()** @ifunc, align 8
+	%0 = load void ()*, void ()** @ifunc, align 8
 	call void %0() nounwind
-	%1 = load void ()** @ifunc, align 8
+	%1 = load void ()*, void ()** @ifunc, align 8
 	call void %1() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: icaller:
@@ -9270,9 +9270,9 @@
 
 define void @dicaller() nounwind {
 entry:
-	%0 = load void ()** @difunc, align 8
+	%0 = load void ()*, void ()** @difunc, align 8
 	call void %0() nounwind
-	%1 = load void ()** @difunc, align 8
+	%1 = load void ()*, void ()** @difunc, align 8
 	call void %1() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: dicaller:
@@ -9353,9 +9353,9 @@
 
 define void @licaller() nounwind {
 entry:
-	%0 = load void ()** @lifunc, align 8
+	%0 = load void ()*, void ()** @lifunc, align 8
 	call void %0() nounwind
-	%1 = load void ()** @lifunc, align 8
+	%1 = load void ()*, void ()** @lifunc, align 8
 	call void %1() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: licaller:
@@ -9435,9 +9435,9 @@
 
 define void @itailcaller() nounwind {
 entry:
-	%0 = load void ()** @ifunc, align 8
+	%0 = load void ()*, void ()** @ifunc, align 8
 	call void %0() nounwind
-	%1 = load void ()** @ifunc, align 8
+	%1 = load void ()*, void ()** @ifunc, align 8
 	call void %1() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: itailcaller:
@@ -9525,7 +9525,7 @@
 
 define void @ditailcaller() nounwind {
 entry:
-	%0 = load void ()** @difunc, align 8
+	%0 = load void ()*, void ()** @difunc, align 8
 	call void %0() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: ditailcaller:
@@ -9593,7 +9593,7 @@
 
 define void @litailcaller() nounwind {
 entry:
-	%0 = load void ()** @lifunc, align 8
+	%0 = load void ()*, void ()** @lifunc, align 8
 	call void %0() nounwind
 	ret void
 ; LINUX-64-STATIC-LABEL: litailcaller:
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher.ll b/llvm/test/CodeGen/X86/addr-mode-matcher.ll
index dc5052d0..83d6858 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher.ll
@@ -26,14 +26,14 @@
   %tmp1702 = and i32 %tmp1701, 1020
   %tmp1703 = getelementptr inbounds [1028 x i8], [1028 x i8]* null, i32 0, i32 %tmp1702
   %tmp1704 = bitcast i8* %tmp1703 to i32*
-  %load1 = load i32* %tmp1704, align 4
+  %load1 = load i32, i32* %tmp1704, align 4
 
 ; %load2 = (load (shl (and %xor, 255), 2))
   %tmp1698 = and i32 %xor, 255
   %tmp1706 = shl i32 %tmp1698, 2
   %tmp1707 = getelementptr inbounds [1028 x i8], [1028 x i8]* null, i32 0, i32 %tmp1706
   %tmp1708 = bitcast i8* %tmp1707 to i32*
-  %load2 = load i32* %tmp1708, align 4
+  %load2 = load i32, i32* %tmp1708, align 4
 
   %tmp1710 = or i32 %load2, %a
 
diff --git a/llvm/test/CodeGen/X86/address-type-promotion-constantexpr.ll b/llvm/test/CodeGen/X86/address-type-promotion-constantexpr.ll
index 32f29bd..58baf31 100644
--- a/llvm/test/CodeGen/X86/address-type-promotion-constantexpr.ll
+++ b/llvm/test/CodeGen/X86/address-type-promotion-constantexpr.ll
@@ -10,7 +10,7 @@
 ; CHECK: xor %eax, %eax
 define i32 @main() {
 entry:
-  %foo = load i8* getelementptr ([2 x i8]* @b, i64 0, i64 sext (i8 or (i8 zext (i1 icmp eq (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i8), i8 1) to i64)), align 1
+  %foo = load i8, i8* getelementptr ([2 x i8]* @b, i64 0, i64 sext (i8 or (i8 zext (i1 icmp eq (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i8), i8 1) to i64)), align 1
   ret i32 0
 }
 
diff --git a/llvm/test/CodeGen/X86/aliases.ll b/llvm/test/CodeGen/X86/aliases.ll
index 82a8e48..6ce24e2 100644
--- a/llvm/test/CodeGen/X86/aliases.ll
+++ b/llvm/test/CodeGen/X86/aliases.ll
@@ -64,9 +64,9 @@
 ; CHECK-DAG: .globl	test
 define i32 @test() {
 entry:
-   %tmp = load i32* @foo1
-   %tmp1 = load i32* @foo2
-   %tmp0 = load i32* @bar_i
+   %tmp = load i32, i32* @foo1
+   %tmp1 = load i32, i32* @foo2
+   %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
    %tmp4 = call %FunTy* @bar_f()
diff --git a/llvm/test/CodeGen/X86/aligned-variadic.ll b/llvm/test/CodeGen/X86/aligned-variadic.ll
index 60d0f71..2941592 100644
--- a/llvm/test/CodeGen/X86/aligned-variadic.ll
+++ b/llvm/test/CodeGen/X86/aligned-variadic.ll
@@ -12,7 +12,7 @@
   %arraydecay1 = bitcast [1 x %struct.__va_list_tag]* %va to i8*
   call void @llvm.va_start(i8* %arraydecay1)
   %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %va, i64 0, i64 0, i32 2
-  %overflow_arg_area = load i8** %overflow_arg_area_p, align 8
+  %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
   %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i64 24
   store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
 ; X32: leal    68(%esp), [[REG:%.*]]
diff --git a/llvm/test/CodeGen/X86/and-su.ll b/llvm/test/CodeGen/X86/and-su.ll
index 70c2461..bdbab15 100644
--- a/llvm/test/CodeGen/X86/and-su.ll
+++ b/llvm/test/CodeGen/X86/and-su.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: foo:
 ; CHECK: andl $10, %eax
 ; CHECK: je
-	%t0 = load i32* %p
+	%t0 = load i32, i32* %p
 	%t2 = and i32 %t0, 10
 	%t3 = icmp ne i32 %t2, 0
 	br i1 %t3, label %bb63, label %bb76
diff --git a/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll b/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
index 6237b66..2e144f8 100644
--- a/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
+++ b/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload32.ll
@@ -36,34 +36,34 @@
 define void @func() #0 {
 entry:
   store i32 0, i32* @sum, align 4
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   store i32 %0, i32* @i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %1 = load i32* @i, align 4
-  %2 = load i32* @b, align 4
+  %1 = load i32, i32* @i, align 4
+  %2 = load i32, i32* @b, align 4
   %cmp = icmp slt i32 %1, %2
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
-  %4 = load i32* @i, align 4
-  %5 = load i32* @b, align 4
-  %6 = load i32* @c, align 4
-  %7 = load i32* @d, align 4
-  %8 = load i32* @e, align 4
-  %9 = load i32* @f, align 4
-  %10 = load i32* @g, align 4
-  %11 = load i32* @h, align 4
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 4
+  %4 = load i32, i32* @i, align 4
+  %5 = load i32, i32* @b, align 4
+  %6 = load i32, i32* @c, align 4
+  %7 = load i32, i32* @d, align 4
+  %8 = load i32, i32* @e, align 4
+  %9 = load i32, i32* @f, align 4
+  %10 = load i32, i32* @g, align 4
+  %11 = load i32, i32* @h, align 4
   %call = call i32 %3(i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11)
-  %12 = load i32* @sum, align 4
+  %12 = load i32, i32* @sum, align 4
   %add = add nsw i32 %12, %call
   store i32 %add, i32* @sum, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %13 = load i32* @i, align 4
+  %13 = load i32, i32* @i, align 4
   %inc = add nsw i32 %13, 1
   store i32 %inc, i32* @i, align 4
   br label %for.cond
diff --git a/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll b/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
index a196d81..e82626c 100644
--- a/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
+++ b/llvm/test/CodeGen/X86/atom-call-reg-indirect-foldedreload64.ll
@@ -42,43 +42,43 @@
 define void @func() #0 {
 entry:
   store i32 0, i32* @sum, align 4
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   store i32 %0, i32* @i, align 4
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %1 = load i32* @i, align 4
-  %2 = load i32* @b, align 4
+  %1 = load i32, i32* @i, align 4
+  %2 = load i32, i32* @b, align 4
   %cmp = icmp slt i32 %1, %2
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
-  %4 = load i32* @a, align 4
-  %5 = load i32* @i, align 4
-  %6 = load i32* @i, align 4
+  %3 = load i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)** @funcp, align 8
+  %4 = load i32, i32* @a, align 4
+  %5 = load i32, i32* @i, align 4
+  %6 = load i32, i32* @i, align 4
   %mul = mul nsw i32 %6, 2
-  %7 = load i32* @i, align 4
-  %8 = load i32* @b, align 4
+  %7 = load i32, i32* @i, align 4
+  %8 = load i32, i32* @b, align 4
   %div = sdiv i32 %7, %8
-  %9 = load i32* @c, align 4
-  %10 = load i32* @d, align 4
-  %11 = load i32* @e, align 4
-  %12 = load i32* @f, align 4
-  %13 = load i32* @g, align 4
-  %14 = load i32* @h, align 4
-  %15 = load i32* @j, align 4
-  %16 = load i32* @k, align 4
-  %17 = load i32* @l, align 4
-  %18 = load i32* @n, align 4
+  %9 = load i32, i32* @c, align 4
+  %10 = load i32, i32* @d, align 4
+  %11 = load i32, i32* @e, align 4
+  %12 = load i32, i32* @f, align 4
+  %13 = load i32, i32* @g, align 4
+  %14 = load i32, i32* @h, align 4
+  %15 = load i32, i32* @j, align 4
+  %16 = load i32, i32* @k, align 4
+  %17 = load i32, i32* @l, align 4
+  %18 = load i32, i32* @n, align 4
   %call = call i32 %3(i32 %4, i32 %5, i32 %mul, i32 %div, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, i32 %14, i32 %15, i32 %16, i32 %17, i32 %18)
-  %19 = load i32* @sum, align 4
+  %19 = load i32, i32* @sum, align 4
   %add = add nsw i32 %19, %call
   store i32 %add, i32* @sum, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %20 = load i32* @i, align 4
+  %20 = load i32, i32* @i, align 4
   %inc = add nsw i32 %20, 1
   store i32 %inc, i32* @i, align 4
   br label %for.cond
diff --git a/llvm/test/CodeGen/X86/atom-call-reg-indirect.ll b/llvm/test/CodeGen/X86/atom-call-reg-indirect.ll
index 48f2d4c..663b6f1 100644
--- a/llvm/test/CodeGen/X86/atom-call-reg-indirect.ll
+++ b/llvm/test/CodeGen/X86/atom-call-reg-indirect.ll
@@ -14,8 +14,8 @@
 entry:
   %call = tail call %class.A* @_Z3facv()
   %0 = bitcast %class.A* %call to void (%class.A*)***
-  %vtable = load void (%class.A*)*** %0, align 8
-  %1 = load void (%class.A*)** %vtable, align 8
+  %vtable = load void (%class.A*)**, void (%class.A*)*** %0, align 8
+  %1 = load void (%class.A*)*, void (%class.A*)** %vtable, align 8
   ;ATOM32: movl (%ecx), %ecx
   ;ATOM32: calll *%ecx
   ;ATOM-NOT32: calll *(%ecx)
@@ -38,8 +38,8 @@
 define i32 @test2() #0 {
   ;ATOM-LABEL: test2:
 entry:
-  %0 = load void (i32)*** @p, align 8
-  %1 = load void (i32)** %0, align 8
+  %0 = load void (i32)**, void (i32)*** @p, align 8
+  %1 = load void (i32)*, void (i32)** %0, align 8
   ;ATOM32: movl (%eax), %eax
   ;ATOM32: calll *%eax
   ;ATOM-NOT: calll *(%eax)
diff --git a/llvm/test/CodeGen/X86/atom-cmpb.ll b/llvm/test/CodeGen/X86/atom-cmpb.ll
index 6177cff..baf0f5e 100644
--- a/llvm/test/CodeGen/X86/atom-cmpb.ll
+++ b/llvm/test/CodeGen/X86/atom-cmpb.ll
@@ -12,9 +12,9 @@
 define i8 @run_test(i8* %rd_p) {
 entry:
   %incdec.ptr = getelementptr inbounds i8, i8* %rd_p, i64 1
-  %ld1 = load i8* %rd_p, align 1
+  %ld1 = load i8, i8* %rd_p, align 1
   %incdec.ptr1 = getelementptr inbounds i8, i8* %rd_p, i64 2
-  %ld2 = load i8* %incdec.ptr, align 1
+  %ld2 = load i8, i8* %incdec.ptr, align 1
   %x4 = xor i8 %ld1, -1
   %x5 = xor i8 %ld2, -1
   %cmp34 = icmp ult i8 %ld2, %ld1
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea1.ll b/llvm/test/CodeGen/X86/atom-fixup-lea1.ll
index b89aa02..f862fa6 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea1.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea1.ll
@@ -26,7 +26,7 @@
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %array, i32 %i.06
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.05
   %inc = add nsw i32 %i.06, 1
   %exitcond = icmp eq i32 %inc, %n
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
index c9823a1..ec82613 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea2.ll
@@ -38,31 +38,31 @@
   %n = alloca %struct.node_t, align 4
   call void bitcast (void (%struct.node_t*, ...)* @getnode to void (%struct.node_t*)*)(%struct.node_t* sret %n)
   %array = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 4
-  %0 = load i32** %array, align 4
+  %0 = load i32*, i32** %array, align 4
   %cmp = icmp eq i32* %0, null
   br i1 %cmp, label %if.end, label %land.lhs.true
 
 land.lhs.true:
   %p = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 3
-  %1 = load i32* %p, align 4
+  %1 = load i32, i32* %p, align 4
   %cmp1 = icmp sgt i32 %1, 0
   br i1 %cmp1, label %land.lhs.true2, label %if.end
 
 land.lhs.true2:
   %k = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 0
-  %2 = load i32* %k, align 4
+  %2 = load i32, i32* %k, align 4
   %cmp3 = icmp sgt i32 %2, 0
   br i1 %cmp3, label %land.lhs.true4, label %if.end
 
 land.lhs.true4:
   %n5 = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 2
-  %3 = load i32* %n5, align 4
+  %3 = load i32, i32* %n5, align 4
   %cmp6 = icmp sgt i32 %3, 0
   br i1 %cmp6, label %land.lhs.true7, label %if.end
 
 land.lhs.true7:
   %m = getelementptr inbounds %struct.node_t, %struct.node_t* %n, i32 0, i32 1
-  %4 = load i32* %m, align 4
+  %4 = load i32, i32* %m, align 4
   %cmp8 = icmp sgt i32 %4, 0
   br i1 %cmp8, label %if.then, label %if.end
 
@@ -73,7 +73,7 @@
   %add15 = add nsw i32 %1, %5
   %6 = inttoptr i32 %add15 to i32*
   %arrayidx = getelementptr inbounds i32, i32* %6, i32 %add12
-  %7 = load i32* %arrayidx, align 4
+  %7 = load i32, i32* %arrayidx, align 4
   br label %if.end
 
 if.end:
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea3.ll b/llvm/test/CodeGen/X86/atom-fixup-lea3.ll
index f51ee91..ed2df277 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea3.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea3.ll
@@ -26,7 +26,7 @@
   br i1 %cmp7, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:                                   ; preds = %entry
-  %.pre = load i32* %m, align 4
+  %.pre = load i32, i32* %m, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
@@ -35,11 +35,11 @@
   %j.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc1, %for.body ]
   %inc1 = add nsw i32 %j.09, 1
   %arrayidx = getelementptr inbounds i32, i32* %array2, i32 %j.09
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %1
   store i32 %add, i32* %m, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %array, i32 %inc1
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %2, %sum.010
   %exitcond = icmp eq i32 %inc1, %n
   br i1 %exitcond, label %for.end, label %for.body
diff --git a/llvm/test/CodeGen/X86/atom-fixup-lea4.ll b/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
index be80e47..f0da1d2 100644
--- a/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
+++ b/llvm/test/CodeGen/X86/atom-fixup-lea4.ll
@@ -10,7 +10,7 @@
 entry:
   %this.addr = alloca %struct.ValueWrapper.6*, align 8
   store %struct.ValueWrapper.6* %this, %struct.ValueWrapper.6** %this.addr, align 8
-  %this1 = load %struct.ValueWrapper.6** %this.addr
+  %this1 = load %struct.ValueWrapper.6*, %struct.ValueWrapper.6** %this.addr
   %value = getelementptr inbounds %struct.ValueWrapper.6, %struct.ValueWrapper.6* %this1, i32 0, i32 0
   call void @_ZN12ValueWrapperIS_IS_IdEEEC2Ev(%struct.ValueWrapper.7* %value)
   ret void
diff --git a/llvm/test/CodeGen/X86/atom-lea-addw-bug.ll b/llvm/test/CodeGen/X86/atom-lea-addw-bug.ll
index 5cda2df..d8147e5 100644
--- a/llvm/test/CodeGen/X86/atom-lea-addw-bug.ll
+++ b/llvm/test/CodeGen/X86/atom-lea-addw-bug.ll
@@ -5,9 +5,9 @@
 
 define i32 @DoLayout() {
 entry:
-  %tmp1 = load i16* undef, align 2
-  %tmp17 = load i16* null, align 2
-  %tmp19 = load i16* undef, align 2
+  %tmp1 = load i16, i16* undef, align 2
+  %tmp17 = load i16, i16* null, align 2
+  %tmp19 = load i16, i16* undef, align 2
   %shl = shl i16 %tmp19, 1
   %add55 = add i16 %tmp17, %tmp1
   %add57 = add i16 %add55, %shl
diff --git a/llvm/test/CodeGen/X86/atom-sched.ll b/llvm/test/CodeGen/X86/atom-sched.ll
index fd18472..b81359e 100644
--- a/llvm/test/CodeGen/X86/atom-sched.ll
+++ b/llvm/test/CodeGen/X86/atom-sched.ll
@@ -21,12 +21,12 @@
 ; CHECK: movl
 ; CHECK: imull
 entry:
-  %0 = load i32* @b, align 4
-  %1 = load i32* @c, align 4
+  %0 = load i32, i32* @b, align 4
+  %1 = load i32, i32* @c, align 4
   %mul = mul nsw i32 %0, %1
   store i32 %mul, i32* @a, align 4
-  %2 = load i32* @e, align 4
-  %3 = load i32* @f, align 4
+  %2 = load i32, i32* @e, align 4
+  %3 = load i32, i32* @f, align 4
   %mul1 = mul nsw i32 %2, %3
   store i32 %mul1, i32* @d, align 4
   ret void
diff --git a/llvm/test/CodeGen/X86/atomic-dagsched.ll b/llvm/test/CodeGen/X86/atomic-dagsched.ll
index 750b73d..97bb1af 100644
--- a/llvm/test/CodeGen/X86/atomic-dagsched.ll
+++ b/llvm/test/CodeGen/X86/atomic-dagsched.ll
@@ -2,10 +2,10 @@
 
 define void @test(i8** %a, i64* %b, i64 %c, i64 %d) nounwind {
 entry:
-  %ptrtoarg4 = load i8** %a, align 8
+  %ptrtoarg4 = load i8*, i8** %a, align 8
   %brglist1 = getelementptr i8*, i8** %a, i64 1
-  %ptrtoarg25 = load i8** %brglist1, align 8
-  %0 = load i64* %b, align 8
+  %ptrtoarg25 = load i8*, i8** %brglist1, align 8
+  %0 = load i64, i64* %b, align 8
   %1 = mul i64 %0, 4
   %scevgep = getelementptr i8, i8* %ptrtoarg25, i64 %1
   %2 = mul i64 %d, 4
@@ -18,8 +18,8 @@
   br i1 %3, label %return, label %loop
 
 loop:                                             ; preds = %loop.cond
-  %4 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
-  %5 = load i64* %4, align 8
+  %4 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %5 = load i64, i64* %4, align 8
   %vector.size.i = ashr i64 %5, 3
   %num.vector.wi.i = shl i64 %vector.size.i, 3
   %6 = icmp eq i64 %vector.size.i, 0
@@ -36,7 +36,7 @@
   %asr.iv = phi i64 [ %asr.iv.next, %vector_kernel_entry.i ], [ %vector.size.i, %dim_0_vector_pre_head.i ]
   %8 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
   %asr.iv911 = addrspacecast i8* %asr.iv9 to <8 x i32> addrspace(1)*
-  %9 = load <8 x i32> addrspace(1)* %asr.iv911, align 4
+  %9 = load <8 x i32>, <8 x i32> addrspace(1)* %asr.iv911, align 4
   %extract8vector_func.i = extractelement <8 x i32> %9, i32 0
   %extract9vector_func.i = extractelement <8 x i32> %9, i32 1
   %extract10vector_func.i = extractelement <8 x i32> %9, i32 2
@@ -65,8 +65,8 @@
   br i1 %18, label %test.exit, label %dim_0_pre_head.i
 
 dim_0_pre_head.i:                                 ; preds = %scalarIf.i
-  %19 = load i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
-  %20 = load i64* %19, align 8
+  %19 = load i64*, i64* addrspace(256)* inttoptr (i64 264 to i64* addrspace(256)*), align 8
+  %20 = load i64, i64* %19, align 8
   %21 = trunc i64 %20 to i32
   %22 = mul i64 %vector.size.i, 8
   br label %scalar_kernel_entry.i
@@ -76,7 +76,7 @@
   %23 = addrspacecast i8* %asr.iv6 to i32 addrspace(1)*
   %24 = addrspacecast i8* %ptrtoarg4 to i32 addrspace(1)*
   %scevgep16 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
-  %25 = load i32 addrspace(1)* %scevgep16, align 4
+  %25 = load i32, i32 addrspace(1)* %scevgep16, align 4
   %26 = atomicrmw min i32 addrspace(1)* %24, i32 %25 seq_cst
   %scevgep15 = getelementptr i32, i32 addrspace(1)* %23, i64 %asr.iv12
   store i32 %21, i32 addrspace(1)* %scevgep15, align 4
diff --git a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
index ad1a5c6..5c46397 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store-wide.ll
@@ -16,6 +16,6 @@
 ; CHECK-LABEL: test2
 ; CHECK: lock
 ; CHECK-NEXT: cmpxchg8b
-  %val = load atomic i64* %ptr seq_cst, align 8
+  %val = load atomic i64, i64* %ptr seq_cst, align 8
   ret i64 %val
 }
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 86a744e..dab79bd 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -18,6 +18,6 @@
 define i32 @test3(i32* %ptr) {
 ; CHECK: test3
 ; CHECK: movl	(%rdi), %eax
-  %val = load atomic i32* %ptr seq_cst, align 4
+  %val = load atomic i32, i32* %ptr seq_cst, align 4
   ret i32 %val
 }
diff --git a/llvm/test/CodeGen/X86/atomic-or.ll b/llvm/test/CodeGen/X86/atomic-or.ll
index 1687e07..0783846 100644
--- a/llvm/test/CodeGen/X86/atomic-or.ll
+++ b/llvm/test/CodeGen/X86/atomic-or.ll
@@ -6,7 +6,7 @@
 entry:
   %p.addr = alloca i64*, align 8
   store i64* %p, i64** %p.addr, align 8
-  %tmp = load i64** %p.addr, align 8
+  %tmp = load i64*, i64** %p.addr, align 8
 ; CHECK-LABEL: t1:
 ; CHECK: movl    $2147483648, %eax
 ; CHECK: lock
@@ -19,7 +19,7 @@
 entry:
   %p.addr = alloca i64*, align 8
   store i64* %p, i64** %p.addr, align 8
-  %tmp = load i64** %p.addr, align 8
+  %tmp = load i64*, i64** %p.addr, align 8
 ; CHECK-LABEL: t2:
 ; CHECK: lock
 ; CHECK-NEXT: orq $2147483644, (%r{{.*}})
diff --git a/llvm/test/CodeGen/X86/atomic-pointer.ll b/llvm/test/CodeGen/X86/atomic-pointer.ll
index ec3e6c3..66e0217 100644
--- a/llvm/test/CodeGen/X86/atomic-pointer.ll
+++ b/llvm/test/CodeGen/X86/atomic-pointer.ll
@@ -6,7 +6,7 @@
 ; CHECK: movl
 ; CHECK: ret
 0:
-  %0 = load atomic i32** %a0 seq_cst, align 4
+  %0 = load atomic i32*, i32** %a0 seq_cst, align 4
   ret i32* %0
 }
 
diff --git a/llvm/test/CodeGen/X86/atomic128.ll b/llvm/test/CodeGen/X86/atomic128.ll
index 741d290..dea7d48 100644
--- a/llvm/test/CodeGen/X86/atomic128.ll
+++ b/llvm/test/CodeGen/X86/atomic128.ll
@@ -249,7 +249,7 @@
 ; CHECK: lock
 ; CHECK: cmpxchg16b (%rdi)
 
-   %r = load atomic i128* %p seq_cst, align 16
+   %r = load atomic i128, i128* %p seq_cst, align 16
    ret i128 %r
 }
 
@@ -262,7 +262,7 @@
 ; CHECK: lock
 ; CHECK: cmpxchg16b (%rdi)
 
-   %r = load atomic i128* %p monotonic, align 16
+   %r = load atomic i128, i128* %p monotonic, align 16
    ret i128 %r
 }
 
diff --git a/llvm/test/CodeGen/X86/atomic_mi.ll b/llvm/test/CodeGen/X86/atomic_mi.ll
index 19e019e..7a6204f 100644
--- a/llvm/test/CodeGen/X86/atomic_mi.ll
+++ b/llvm/test/CodeGen/X86/atomic_mi.ll
@@ -103,7 +103,7 @@
 ; X32-NOT: lock
 ; X32: addb
 ; X32-NOT: movb
-  %1 = load atomic i8* %p seq_cst, align 1
+  %1 = load atomic i8, i8* %p seq_cst, align 1
   %2 = add i8 %1, 2
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -116,7 +116,7 @@
 ; X64-NOT: addw
 ; X32-LABEL: add_16
 ; X32-NOT: addw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = add i16 %1, 2
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -131,7 +131,7 @@
 ; X32-NOT: lock
 ; X32: addl
 ; X32-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = add i32 %1, 2
   store atomic i32 %2, i32* %p monotonic, align 4
   ret void
@@ -144,7 +144,7 @@
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'addq'.
 ; X32-LABEL: add_64
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = add i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -155,7 +155,7 @@
 ; X64: xchgl
 ; X32-LABEL: add_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = add i32 %1, 2
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
@@ -172,7 +172,7 @@
 ; X32-NOT: lock
 ; X32: andb
 ; X32-NOT: movb
-  %1 = load atomic i8* %p monotonic, align 1
+  %1 = load atomic i8, i8* %p monotonic, align 1
   %2 = and i8 %1, 2
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -185,7 +185,7 @@
 ; X64-NOT: andw
 ; X32-LABEL: and_16
 ; X32-NOT: andw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = and i16 %1, 2
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -200,7 +200,7 @@
 ; X32-NOT: lock
 ; X32: andl
 ; X32-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = and i32 %1, 2
   store atomic i32 %2, i32* %p release, align 4
   ret void
@@ -213,7 +213,7 @@
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'andq'.
 ; X32-LABEL: and_64
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = and i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -224,7 +224,7 @@
 ; X64: xchgl
 ; X32-LABEL: and_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = and i32 %1, 2
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
@@ -241,7 +241,7 @@
 ; X32-NOT: lock
 ; X32: orb
 ; X32-NOT: movb
-  %1 = load atomic i8* %p acquire, align 1
+  %1 = load atomic i8, i8* %p acquire, align 1
   %2 = or i8 %1, 2
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -252,7 +252,7 @@
 ; X64-NOT: orw
 ; X32-LABEL: or_16
 ; X32-NOT: orw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = or i16 %1, 2
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -267,7 +267,7 @@
 ; X32-NOT: lock
 ; X32: orl
 ; X32-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = or i32 %1, 2
   store atomic i32 %2, i32* %p release, align 4
   ret void
@@ -280,7 +280,7 @@
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'orq'.
 ; X32-LABEL: or_64
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = or i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -291,7 +291,7 @@
 ; X64: xchgl
 ; X32-LABEL: or_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = or i32 %1, 2
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
@@ -308,7 +308,7 @@
 ; X32-NOT: lock
 ; X32: xorb
 ; X32-NOT: movb
-  %1 = load atomic i8* %p acquire, align 1
+  %1 = load atomic i8, i8* %p acquire, align 1
   %2 = xor i8 %1, 2
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -319,7 +319,7 @@
 ; X64-NOT: xorw
 ; X32-LABEL: xor_16
 ; X32-NOT: xorw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = xor i16 %1, 2
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -334,7 +334,7 @@
 ; X32-NOT: lock
 ; X32: xorl
 ; X32-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = xor i32 %1, 2
   store atomic i32 %2, i32* %p release, align 4
   ret void
@@ -347,7 +347,7 @@
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'xorq'.
 ; X32-LABEL: xor_64
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = xor i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -358,7 +358,7 @@
 ; X64: xchgl
 ; X32-LABEL: xor_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = xor i32 %1, 2
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
@@ -378,7 +378,7 @@
 ; SLOW_INC-LABEL: inc_8
 ; SLOW_INC-NOT: incb
 ; SLOW_INC-NOT: movb
-  %1 = load atomic i8* %p seq_cst, align 1
+  %1 = load atomic i8, i8* %p seq_cst, align 1
   %2 = add i8 %1, 1
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -393,7 +393,7 @@
 ; X32-NOT: incw
 ; SLOW_INC-LABEL: inc_16
 ; SLOW_INC-NOT: incw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = add i16 %1, 1
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -411,7 +411,7 @@
 ; SLOW_INC-LABEL: inc_32
 ; SLOW_INC-NOT: incl
 ; SLOW_INC-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = add i32 %1, 1
   store atomic i32 %2, i32* %p monotonic, align 4
   ret void
@@ -427,7 +427,7 @@
 ; SLOW_INC-LABEL: inc_64
 ; SLOW_INC-NOT: incq
 ; SLOW_INC-NOT: movq
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = add i64 %1, 1
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -438,7 +438,7 @@
 ; X64: xchgl
 ; X32-LABEL: inc_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = add i32 %1, 1
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
@@ -458,7 +458,7 @@
 ; SLOW_INC-LABEL: dec_8
 ; SLOW_INC-NOT: decb
 ; SLOW_INC-NOT: movb
-  %1 = load atomic i8* %p seq_cst, align 1
+  %1 = load atomic i8, i8* %p seq_cst, align 1
   %2 = sub i8 %1, 1
   store atomic i8 %2, i8* %p release, align 1
   ret void
@@ -473,7 +473,7 @@
 ; X32-NOT: decw
 ; SLOW_INC-LABEL: dec_16
 ; SLOW_INC-NOT: decw
-  %1 = load atomic i16* %p acquire, align 2
+  %1 = load atomic i16, i16* %p acquire, align 2
   %2 = sub i16 %1, 1
   store atomic i16 %2, i16* %p release, align 2
   ret void
@@ -491,7 +491,7 @@
 ; SLOW_INC-LABEL: dec_32
 ; SLOW_INC-NOT: decl
 ; SLOW_INC-NOT: movl
-  %1 = load atomic i32* %p acquire, align 4
+  %1 = load atomic i32, i32* %p acquire, align 4
   %2 = sub i32 %1, 1
   store atomic i32 %2, i32* %p monotonic, align 4
   ret void
@@ -507,7 +507,7 @@
 ; SLOW_INC-LABEL: dec_64
 ; SLOW_INC-NOT: decq
 ; SLOW_INC-NOT: movq
-  %1 = load atomic i64* %p acquire, align 8
+  %1 = load atomic i64, i64* %p acquire, align 8
   %2 = sub i64 %1, 1
   store atomic i64 %2, i64* %p release, align 8
   ret void
@@ -518,7 +518,7 @@
 ; X64: xchgl
 ; X32-LABEL: dec_32_seq_cst
 ; X32: xchgl
-  %1 = load atomic i32* %p monotonic, align 4
+  %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = sub i32 %1, 1
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
diff --git a/llvm/test/CodeGen/X86/atomic_op.ll b/llvm/test/CodeGen/X86/atomic_op.ll
index d0ab28a..aa895de 100644
--- a/llvm/test/CodeGen/X86/atomic_op.ll
+++ b/llvm/test/CodeGen/X86/atomic_op.ll
@@ -22,7 +22,7 @@
 	store i32 3855, i32* %ort
 	store i32 3855, i32* %xort
 	store i32 4, i32* %temp
-	%tmp = load i32* %temp
+	%tmp = load i32, i32* %temp
         ; CHECK: lock
         ; CHECK: xaddl
   %0 = atomicrmw add i32* %val1, i32 %tmp monotonic
diff --git a/llvm/test/CodeGen/X86/avoid-loop-align-2.ll b/llvm/test/CodeGen/X86/avoid-loop-align-2.ll
index 3e1e452..e02f356 100644
--- a/llvm/test/CodeGen/X86/avoid-loop-align-2.ll
+++ b/llvm/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -13,7 +13,7 @@
 
 bb.nph12:		; preds = %entry
 	%1 = icmp eq i32 %b, 0		; <i1> [#uses=1]
-	%2 = load i32** @x, align 8		; <i32*> [#uses=1]
+	%2 = load i32*, i32** @x, align 8		; <i32*> [#uses=1]
 	br i1 %1, label %bb2.preheader, label %bb2.preheader.us
 
 bb2.preheader.us:		; preds = %bb2.bb3_crit_edge.us, %bb.nph12
@@ -27,7 +27,7 @@
 	%tmp17 = add i32 %indvar, %tmp16		; <i32> [#uses=1]
 	%tmp. = zext i32 %tmp17 to i64		; <i64> [#uses=1]
 	%3 = getelementptr i32, i32* %2, i64 %tmp.		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=2]
+	%4 = load i32, i32* %3, align 4		; <i32> [#uses=2]
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, %b		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
diff --git a/llvm/test/CodeGen/X86/avoid-loop-align.ll b/llvm/test/CodeGen/X86/avoid-loop-align.ll
index c14eebb..5d00ed0 100644
--- a/llvm/test/CodeGen/X86/avoid-loop-align.ll
+++ b/llvm/test/CodeGen/X86/avoid-loop-align.ll
@@ -22,7 +22,7 @@
 bb1:		; preds = %bb, %entry
 	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%P.0 = getelementptr i8, i8* %tmp1, i32 %P.0.rec		; <i8*> [#uses=3]
-	%tmp2 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	%tmp2 = load i8, i8* %P.0, align 1		; <i8> [#uses=1]
 	switch i8 %tmp2, label %bb4 [
 		i8 12, label %bb
 		i8 42, label %bb
diff --git a/llvm/test/CodeGen/X86/avoid_complex_am.ll b/llvm/test/CodeGen/X86/avoid_complex_am.ll
index fa90a35..fafa236 100644
--- a/llvm/test/CodeGen/X86/avoid_complex_am.ll
+++ b/llvm/test/CodeGen/X86/avoid_complex_am.ll
@@ -20,12 +20,12 @@
   %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
   %tmp = add nsw i64 %indvars.iv, -1
   %arrayidx = getelementptr inbounds double, double* %b, i64 %tmp
-  %tmp1 = load double* %arrayidx, align 8
+  %tmp1 = load double, double* %arrayidx, align 8
 ; The induction variable should carry the scaling factor: 1.
 ; CHECK: [[IVNEXT]] = add nuw nsw i64 [[IV]], 1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds double, double* %c, i64 %indvars.iv.next
-  %tmp2 = load double* %arrayidx2, align 8
+  %tmp2 = load double, double* %arrayidx2, align 8
   %mul = fmul double %tmp1, %tmp2
   %arrayidx4 = getelementptr inbounds double, double* %a, i64 %indvars.iv
   store double %mul, double* %arrayidx4, align 8
diff --git a/llvm/test/CodeGen/X86/avx-arith.ll b/llvm/test/CodeGen/X86/avx-arith.ll
index a9da1ec..792a998 100644
--- a/llvm/test/CodeGen/X86/avx-arith.ll
+++ b/llvm/test/CodeGen/X86/avx-arith.ll
@@ -38,7 +38,7 @@
 ; CHECK: vsubpd (%
 define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
 entry:
-  %tmp2 = load <4 x double>* %x, align 32
+  %tmp2 = load <4 x double>, <4 x double>* %x, align 32
   %sub.i = fsub <4 x double> %y, %tmp2
   ret <4 x double> %sub.i
 }
@@ -53,7 +53,7 @@
 ; CHECK: vsubps (%
 define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
 entry:
-  %tmp2 = load <8 x float>* %x, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %x, align 32
   %sub.i = fsub <8 x float> %y, %tmp2
   ret <8 x float> %sub.i
 }
@@ -264,7 +264,7 @@
 define <4 x float> @int_sqrt_ss() {
 ; CHECK: int_sqrt_ss
 ; CHECK: vsqrtss
- %x0 = load float addrspace(1)* undef, align 8
+ %x0 = load float, float addrspace(1)* undef, align 8
  %x1 = insertelement <4 x float> undef, float %x0, i32 0
  %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
  ret <4 x float> %x2
diff --git a/llvm/test/CodeGen/X86/avx-basic.ll b/llvm/test/CodeGen/X86/avx-basic.ll
index 8e82551..5307527 100644
--- a/llvm/test/CodeGen/X86/avx-basic.ll
+++ b/llvm/test/CodeGen/X86/avx-basic.ll
@@ -57,10 +57,10 @@
 define <8 x i32> @VMOVZQI2PQI([0 x float]* nocapture %aFOO) nounwind {
 allocas:
   %ptrcast.i33.i = bitcast [0 x float]* %aFOO to i32*
-  %val.i34.i = load i32* %ptrcast.i33.i, align 4
+  %val.i34.i = load i32, i32* %ptrcast.i33.i, align 4
   %ptroffset.i22.i992 = getelementptr [0 x float], [0 x float]* %aFOO, i64 0, i64 1
   %ptrcast.i23.i = bitcast float* %ptroffset.i22.i992 to i32*
-  %val.i24.i = load i32* %ptrcast.i23.i, align 4
+  %val.i24.i = load i32, i32* %ptrcast.i23.i, align 4
   %updatedret.i30.i = insertelement <8 x i32> undef, i32 %val.i34.i, i32 1
   ret <8 x i32> %updatedret.i30.i
 }
diff --git a/llvm/test/CodeGen/X86/avx-bitcast.ll b/llvm/test/CodeGen/X86/avx-bitcast.ll
index c9d828c..bb3e5a5 100644
--- a/llvm/test/CodeGen/X86/avx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/avx-bitcast.ll
@@ -3,7 +3,7 @@
 ; CHECK: vmovsd (%
 ; CHECK-NEXT: vmovq %xmm
 define i64 @bitcasti64tof64() {
-  %a = load double* undef
+  %a = load double, double* undef
   %b = bitcast double %a to i64
   ret i64 %b
 }
diff --git a/llvm/test/CodeGen/X86/avx-cvt.ll b/llvm/test/CodeGen/X86/avx-cvt.ll
index 10ab971..9f154abd 100644
--- a/llvm/test/CodeGen/X86/avx-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx-cvt.ll
@@ -47,7 +47,7 @@
 ; CHECK: vcvtsi2sdq (%
 define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
 entry:
-  %tmp1 = load i64* %e, align 8
+  %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to double
   ret double %conv
 }
@@ -55,7 +55,7 @@
 ; CHECK: vcvtsi2sdl (%
 define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
-  %tmp1 = load i32* %e, align 4
+  %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to double
   ret double %conv
 }
@@ -63,7 +63,7 @@
 ; CHECK: vcvtsi2ssl (%
 define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
 entry:
-  %tmp1 = load i32* %e, align 4
+  %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to float
   ret float %conv
 }
@@ -71,7 +71,7 @@
 ; CHECK: vcvtsi2ssq  (%
 define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
 entry:
-  %tmp1 = load i64* %e, align 8
+  %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to float
   ret float %conv
 }
@@ -81,7 +81,7 @@
 entry:
   %f = alloca float, align 4
   %d = alloca double, align 8
-  %tmp = load float* %f, align 4
+  %tmp = load float, float* %f, align 4
   %conv = fpext float %tmp to double
   store double %conv, double* %d, align 8
   ret void
diff --git a/llvm/test/CodeGen/X86/avx-intel-ocl.ll b/llvm/test/CodeGen/X86/avx-intel-ocl.ll
index 70ec124..b2836d3 100644
--- a/llvm/test/CodeGen/X86/avx-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx-intel-ocl.ll
@@ -33,7 +33,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
 }
@@ -58,7 +58,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
   ret <16 x float> %4
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 3ecf709..8f63a08 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1126,8 +1126,8 @@
   ; CHECK: movl $7
   ; CHECK: vpcmpestri $7, (
   ; CHECK: movl
-  %1 = load <16 x i8>* %a0
-  %2 = load <16 x i8>* %a2
+  %1 = load <16 x i8>, <16 x i8>* %a0
+  %2 = load <16 x i8>, <16 x i8>* %a2
   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1204,7 +1204,7 @@
   ; CHECK: movl $7
   ; CHECK: vpcmpestrm $7,
   ; CHECK-NOT: vmov
-  %1 = load <16 x i8>* %a2
+  %1 = load <16 x i8>, <16 x i8>* %a2
   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -1222,8 +1222,8 @@
 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
   ; CHECK: vpcmpistri $7, (
   ; CHECK: movl
-  %1 = load <16 x i8>* %a0
-  %2 = load <16 x i8>* %a1
+  %1 = load <16 x i8>, <16 x i8>* %a0
+  %2 = load <16 x i8>, <16 x i8>* %a1
   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1286,7 +1286,7 @@
 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
   ; CHECK: vpcmpistrm $7, (
   ; CHECK-NOT: vmov
-  %1 = load <16 x i8>* %a1
+  %1 = load <16 x i8>, <16 x i8>* %a1
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -2330,7 +2330,7 @@
 }
 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
   ; CHECK: vpermilps
-  %a2 = load <4 x i32>* %a1
+  %a2 = load <4 x i32>, <4 x i32>* %a1
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a2) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index a6775ab..ee5bd0e 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -10,10 +10,10 @@
 define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
 entry:
   %0 = bitcast double* %d to <4 x double>*
-  %tmp1.i = load <4 x double>* %0, align 32
+  %tmp1.i = load <4 x double>, <4 x double>* %0, align 32
   %1 = bitcast float* %f to <8 x float>*
-  %tmp1.i17 = load <8 x float>* %1, align 32
-  %tmp1.i16 = load <4 x i64>* %i, align 32
+  %tmp1.i17 = load <8 x float>, <8 x float>* %1, align 32
+  %tmp1.i16 = load <4 x i64>, <4 x i64>* %i, align 32
   tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
   store <4 x double> %tmp1.i, <4 x double>* %0, align 32
   store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
@@ -29,7 +29,7 @@
 
 ; CHECK: mov00
 define <8 x float> @mov00(<8 x float> %v, float * %ptr) nounwind {
-  %val = load float* %ptr
+  %val = load float, float* %ptr
 ; CHECK: vinsertps
 ; CHECK: vinsertf128
   %i0 = insertelement <8 x float> zeroinitializer, float %val, i32 0
@@ -39,7 +39,7 @@
 
 ; CHECK: mov01
 define <4 x double> @mov01(<4 x double> %v, double * %ptr) nounwind {
-  %val = load double* %ptr
+  %val = load double, double* %ptr
 ; CHECK: vmovlpd
 ; CHECK: vinsertf128
   %i0 = insertelement <4 x double> zeroinitializer, double %val, i32 0
@@ -122,7 +122,7 @@
 ; CHECK: vmovups
 ; CHECK: vmovups
 define void @add8i32(<8 x i32>* %ret, <8 x i32>* %bp) nounwind {
-  %b = load <8 x i32>* %bp, align 1
+  %b = load <8 x i32>, <8 x i32>* %bp, align 1
   %x = add <8 x i32> zeroinitializer, %b
   store <8 x i32> %x, <8 x i32>* %ret, align 1
   ret void
@@ -132,7 +132,7 @@
 ; CHECK: vmovaps ({{.*}}), %ymm{{.*}}
 ; CHECK: vmovaps %ymm{{.*}}, ({{.*}})
 define void @add4i64a64(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
-  %b = load <4 x i64>* %bp, align 64
+  %b = load <4 x i64>, <4 x i64>* %bp, align 64
   %x = add <4 x i64> zeroinitializer, %b
   store <4 x i64> %x, <4 x i64>* %ret, align 64
   ret void
@@ -144,7 +144,7 @@
 ; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
 ; CHECK: vmovaps %xmm{{.*}}, {{.*}}({{.*}})
 define void @add4i64a16(<4 x i64>* %ret, <4 x i64>* %bp) nounwind {
-  %b = load <4 x i64>* %bp, align 16
+  %b = load <4 x i64>, <4 x i64>* %bp, align 16
   %x = add <4 x i64> zeroinitializer, %b
   store <4 x i64> %x, <4 x i64>* %ret, align 16
   ret void
diff --git a/llvm/test/CodeGen/X86/avx-logic.ll b/llvm/test/CodeGen/X86/avx-logic.ll
index 115cefb..062956f 100644
--- a/llvm/test/CodeGen/X86/avx-logic.ll
+++ b/llvm/test/CodeGen/X86/avx-logic.ll
@@ -142,7 +142,7 @@
 ; CHECK: vandnpd (%
 define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
 entry:
-  %tmp2 = load <4 x double>* %x, align 32
+  %tmp2 = load <4 x double>, <4 x double>* %x, align 32
   %0 = bitcast <4 x double> %y to <4 x i64>
   %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
   %1 = bitcast <4 x double> %tmp2 to <4 x i64>
@@ -167,7 +167,7 @@
 ; CHECK: vandnps (%
 define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
 entry:
-  %tmp2 = load <8 x float>* %x, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %x, align 32
   %0 = bitcast <8 x float> %y to <8 x i32>
   %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   %1 = bitcast <8 x float> %tmp2 to <8 x i32>
diff --git a/llvm/test/CodeGen/X86/avx-splat.ll b/llvm/test/CodeGen/X86/avx-splat.ll
index 3b24d95..3ea7e38 100644
--- a/llvm/test/CodeGen/X86/avx-splat.ll
+++ b/llvm/test/CodeGen/X86/avx-splat.ll
@@ -58,7 +58,7 @@
 load.i1247:                                       ; preds = %for_exit499
   %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
   %ptr.i1237 = bitcast float* %ptr1227 to i32*
-  %val.i1238 = load i32* %ptr.i1237, align 4
+  %val.i1238 = load i32, i32* %ptr.i1237, align 4
   %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
   %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
   %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
diff --git a/llvm/test/CodeGen/X86/avx-unpack.ll b/llvm/test/CodeGen/X86/avx-unpack.ll
index 20f5345..6924d98 100644
--- a/llvm/test/CodeGen/X86/avx-unpack.ll
+++ b/llvm/test/CodeGen/X86/avx-unpack.ll
@@ -70,8 +70,8 @@
 ; CHECK: vunpckhps (%
 define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
 entry:
-  %a = load <8 x i32>* %src1
-  %b = load <8 x i32>* %src2
+  %a = load <8 x i32>, <8 x i32>* %src1
+  %b = load <8 x i32>, <8 x i32>* %src2
   %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   ret <8 x i32> %shuffle.i
 }
@@ -86,8 +86,8 @@
 ; CHECK: vunpckhpd (%
 define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
 entry:
-  %a = load <4 x i64>* %src1
-  %b = load <4 x i64>* %src2
+  %a = load <4 x i64>, <4 x i64>* %src1
+  %b = load <4 x i64>, <4 x i64>* %src2
   %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   ret <4 x i64> %shuffle.i
 }
@@ -102,8 +102,8 @@
 ; CHECK: vunpcklps (%
 define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
 entry:
-  %a = load <8 x i32>* %src1
-  %b = load <8 x i32>* %src2
+  %a = load <8 x i32>, <8 x i32>* %src1
+  %b = load <8 x i32>, <8 x i32>* %src2
   %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   ret <8 x i32> %shuffle.i
 }
@@ -118,8 +118,8 @@
 ; CHECK: vunpcklpd (%
 define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
 entry:
-  %a = load <4 x i64>* %src1
-  %b = load <4 x i64>* %src2
+  %a = load <4 x i64>, <4 x i64>* %src1
+  %b = load <4 x i64>, <4 x i64>* %src2
   %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   ret <4 x i64> %shuffle.i
 }
diff --git a/llvm/test/CodeGen/X86/avx-varargs-x86_64.ll b/llvm/test/CodeGen/X86/avx-varargs-x86_64.ll
index f73174d..f550733 100644
--- a/llvm/test/CodeGen/X86/avx-varargs-x86_64.ll
+++ b/llvm/test/CodeGen/X86/avx-varargs-x86_64.ll
@@ -9,7 +9,7 @@
 ; CHECK: vmovaps	%ymm0, (%rsp)
 define void @test1() nounwind uwtable ssp {
 entry:
-  %0 = load <8 x float>* @x, align 32
+  %0 = load <8 x float>, <8 x float>* @x, align 32
   %call = call i32 (i32, ...)* @f(i32 1, <8 x float> %0)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index 2ebe6fd..8b8c11b 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -3,7 +3,7 @@
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i64* %ptr, align 8
+  %q = load i64, i64* %ptr, align 8
   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
@@ -14,7 +14,7 @@
 ; CHECK: vbroadcastss (%
 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i32* %ptr, align 4
+  %q = load i32, i32* %ptr, align 4
   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   %vecinit2.i = insertelement <8 x i32> %vecinit.i, i32 %q, i32 1
   %vecinit4.i = insertelement <8 x i32> %vecinit2.i, i32 %q, i32 2
@@ -25,7 +25,7 @@
 ; CHECK: vbroadcastsd (%
 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load double* %ptr, align 8
+  %q = load double, double* %ptr, align 8
   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
@@ -36,7 +36,7 @@
 ; CHECK: vbroadcastss (%
 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load float* %ptr, align 4
+  %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <8 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <8 x float> %vecinit2.i, float %q, i32 2
@@ -49,7 +49,7 @@
 ; CHECK: vbroadcastss (%
 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load float* %ptr, align 4
+  %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -73,7 +73,7 @@
 ; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i32* %ptr, align 4
+  %q = load i32, i32* %ptr, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %q, i32 1
   %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %q, i32 2
@@ -88,7 +88,7 @@
 ; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i64* %ptr, align 8
+  %q = load i64, i64* %ptr, align 8
   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   %vecinit2.i = insertelement <2 x i64> %vecinit.i, i64 %q, i32 1
   ret <2 x i64> %vecinit2.i
@@ -107,7 +107,7 @@
 ; CHECK: ret
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load double* %ptr, align 4
+  %q = load double, double* %ptr, align 4
   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   ret <2 x double> %vecinit2.i
@@ -118,13 +118,13 @@
 ; CHECK: ret
 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
 entry:
-  %q = load float* %ptr, align 4
+  %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   ; force a chain
-  %j = load i32* %k, align 4
+  %j = load i32, i32* %k, align 4
   store i32 %j, i32* undef
   ret <4 x float> %vecinit6.i
 }
@@ -135,7 +135,7 @@
 ; CHECK: ret
 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
 entry:
-  %q = load float* %ptr, align 4
+  %q = load float, float* %ptr, align 4
   %v = insertelement <4 x float> undef, float %q, i32 0
   %t = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %t
@@ -151,7 +151,7 @@
 ; CHECK: vbroadcastss (%
 ; CHECK-NEXT: ret
 define <8 x float> @splat_concat1(float* %p) {
-  %1 = load float* %p, align 4
+  %1 = load float, float* %p, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = insertelement <4 x float> %2, float %1, i32 1
   %4 = insertelement <4 x float> %3, float %1, i32 2
@@ -165,7 +165,7 @@
 ; CHECK: vbroadcastss (%
 ; CHECK-NEXT: ret
 define <8 x float> @splat_concat2(float* %p) {
-  %1 = load float* %p, align 4
+  %1 = load float, float* %p, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = insertelement <4 x float> %2, float %1, i32 1
   %4 = insertelement <4 x float> %3, float %1, i32 2
@@ -183,7 +183,7 @@
 ; CHECK: vbroadcastsd (%
 ; CHECK-NEXT: ret
 define <4 x double> @splat_concat3(double* %p) {
-  %1 = load double* %p, align 8
+  %1 = load double, double* %p, align 8
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
   %4 = shufflevector <2 x double> %3, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -195,7 +195,7 @@
 ; CHECK: vbroadcastsd (%
 ; CHECK-NEXT: ret
 define <4 x double> @splat_concat4(double* %p) {
-  %1 = load double* %p, align 8
+  %1 = load double, double* %p, align 8
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
   %4 = insertelement <2 x double> undef, double %1, i32 0
diff --git a/llvm/test/CodeGen/X86/avx-vinsertf128.ll b/llvm/test/CodeGen/X86/avx-vinsertf128.ll
index e1984fe..d0f8f4e 100644
--- a/llvm/test/CodeGen/X86/avx-vinsertf128.ll
+++ b/llvm/test/CodeGen/X86/avx-vinsertf128.ll
@@ -112,7 +112,7 @@
 entry:
   %add.ptr = getelementptr inbounds float, float* %f, i64 4
   %0 = bitcast float* %add.ptr to <4 x float>*
-  %1 = load <4 x float>* %0, align 16
+  %1 = load <4 x float>, <4 x float>* %0, align 16
   %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
   ret <8 x float> %2
 }
@@ -125,7 +125,7 @@
 entry:
   %add.ptr = getelementptr inbounds float, float* %f, i64 4
   %0 = bitcast float* %add.ptr to <4 x float>*
-  %1 = load <4 x float>* %0, align 8
+  %1 = load <4 x float>, <4 x float>* %0, align 8
   %2 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> undef, <4 x float> %1, i8 1)
   ret <8 x float> %2
 }
diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
index 43303ca..ef3f795 100644
--- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll
@@ -160,8 +160,8 @@
 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 entry:
-  %c = load <16 x i16>* %a
-  %d = load <16 x i16>* %b
+  %c = load <16 x i16>, <16 x i16>* %a
+  %d = load <16 x i16>, <16 x i16>* %b
   %c2 = add <16 x i16> %c, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %shuffle = shufflevector <16 x i16> %c2, <16 x i16> %d, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <16 x i16> %shuffle
diff --git a/llvm/test/CodeGen/X86/avx-vzeroupper.ll b/llvm/test/CodeGen/X86/avx-vzeroupper.ll
index a2163a2..4f0c600 100644
--- a/llvm/test/CodeGen/X86/avx-vzeroupper.ll
+++ b/llvm/test/CodeGen/X86/avx-vzeroupper.ll
@@ -24,7 +24,7 @@
 ; CHECK: _test01
 define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounwind uwtable ssp {
 entry:
-  %tmp = load <4 x float>* @x, align 16
+  %tmp = load <4 x float>, <4 x float>* @x, align 16
   ; CHECK: vzeroupper
   ; CHECK-NEXT: callq _do_sse
   %call = tail call <4 x float> @do_sse(<4 x float> %tmp) nounwind
@@ -73,7 +73,7 @@
   %call5 = tail call <4 x float> @do_sse(<4 x float> %c.017) nounwind
   ; CHECK-NEXT: callq _do_sse
   %call7 = tail call <4 x float> @do_sse(<4 x float> %call5) nounwind
-  %tmp11 = load <8 x float>* @g, align 32
+  %tmp11 = load <8 x float>, <8 x float>* @g, align 32
   %0 = tail call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %tmp11, i8 1) nounwind
   ; CHECK: vzeroupper
   ; CHECK-NEXT: callq _do_sse
diff --git a/llvm/test/CodeGen/X86/avx.ll b/llvm/test/CodeGen/X86/avx.ll
index 1604f10..f71ec5c 100644
--- a/llvm/test/CodeGen/X86/avx.ll
+++ b/llvm/test/CodeGen/X86/avx.ll
@@ -34,7 +34,7 @@
 ; CHECK-NOT: mov
 ; CHECK: insertps    $48
 ; CHECK-NEXT: ret
-  %1 = load <4 x float>* %pb, align 16
+  %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
   ret <4 x float> %2
 }
@@ -48,7 +48,7 @@
 ;; Try to match a bit more of the instr, since we need the load's offset.
 ; CHECK: insertps    $96, 4(%{{...}}), %
 ; CHECK-NEXT: ret
-  %1 = load <4 x float>* %pb, align 16
+  %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
   ret <4 x float> %2
 }
@@ -63,7 +63,7 @@
 ; CHECK: vinsertps    $192, 12(%{{...}},%{{...}}), %
 ; CHECK-NEXT: ret
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
-  %2 = load <4 x float>* %1, align 16
+  %2 = load <4 x float>, <4 x float>* %1, align 16
   %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
   ret <4 x float> %3
 }
@@ -77,7 +77,7 @@
 ; CHECK: insertps    $48
 ; CHECK-NEXT: ret
   %1 = getelementptr inbounds float, float* %fb, i64 %index
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
   %5 = insertelement <4 x float> %4, float %2, i32 2
@@ -93,7 +93,7 @@
 ; CHECK-NOT: mov
 ; CHECK: insertps    $48
 ; CHECK-NEXT: ret
-  %1 = load <4 x float>* %b, align 4
+  %1 = load <4 x float>, <4 x float>* %b, align 4
   %2 = extractelement <4 x float> %1, i32 0
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
@@ -120,7 +120,7 @@
 ; CHECK: vaddps
 ; CHECK-NEXT: ret
   %1 = getelementptr inbounds float, float* %fb, i64 %index
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
   %5 = insertelement <4 x float> %4, float %2, i32 2
diff --git a/llvm/test/CodeGen/X86/avx1-logical-load-folding.ll b/llvm/test/CodeGen/X86/avx1-logical-load-folding.ll
index 32301b1..90e00c9 100644
--- a/llvm/test/CodeGen/X86/avx1-logical-load-folding.ll
+++ b/llvm/test/CodeGen/X86/avx1-logical-load-folding.ll
@@ -6,7 +6,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define void @test1(float* %A, float* %C) #0 {
   %tmp1 = bitcast float* %A to <8 x float>*
-  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
   %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
   %tmp4 = and <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -20,7 +20,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define void @test2(float* %A, float* %C) #0 {
   %tmp1 = bitcast float* %A to <8 x float>*
-  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
   %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
   %tmp4 = or <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -34,7 +34,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define void @test3(float* %A, float* %C) #0 {
   %tmp1 = bitcast float* %A to <8 x float>*
-  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
   %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
   %tmp4 = xor <8 x i32> %tmp3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %tmp5 = bitcast <8 x i32> %tmp4 to <8 x float>
@@ -47,7 +47,7 @@
 
 define void @test4(float* %A, float* %C) #0 {
   %tmp1 = bitcast float* %A to <8 x float>*
-  %tmp2 = load <8 x float>* %tmp1, align 32
+  %tmp2 = load <8 x float>, <8 x float>* %tmp1, align 32
   %tmp3 = bitcast <8 x float> %tmp2 to <8 x i32>
   %tmp4 = xor <8 x i32> %tmp3, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
   %tmp5 = and <8 x i32> %tmp4, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
diff --git a/llvm/test/CodeGen/X86/avx2-conversions.ll b/llvm/test/CodeGen/X86/avx2-conversions.ll
index 5f17f1b..9b6d5aa 100644
--- a/llvm/test/CodeGen/X86/avx2-conversions.ll
+++ b/llvm/test/CodeGen/X86/avx2-conversions.ll
@@ -95,7 +95,7 @@
 ; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
 define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
- %X = load <4 x i32>* %ptr
+ %X = load <4 x i32>, <4 x i32>* %ptr
  %Y = sext <4 x i32> %X to <4 x i64>
  ret <4 x i64>%Y
 }
@@ -104,7 +104,7 @@
 ; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
 define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
  %Y = sext <4 x i8> %X to <4 x i64>
  ret <4 x i64>%Y
 }
@@ -113,7 +113,7 @@
 ; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
 define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
  %Y = sext <4 x i16> %X to <4 x i64>
  ret <4 x i64>%Y
 }
@@ -122,7 +122,7 @@
 ; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
 define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
- %X = load <8 x i16>* %ptr
+ %X = load <8 x i16>, <8 x i16>* %ptr
  %Y = sext <8 x i16> %X to <8 x i32>
  ret <8 x i32>%Y
 }
@@ -131,7 +131,7 @@
 ; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
 ; CHECK: ret 
 define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
- %X = load <8 x i8>* %ptr
+ %X = load <8 x i8>, <8 x i8>* %ptr
  %Y = sext <8 x i8> %X to <8 x i32>
  ret <8 x i32>%Y
 }
diff --git a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
index 7301b7c..6bd6a50 100644
--- a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
@@ -3,7 +3,7 @@
 define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
 ; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
 ; CHECK: vpmovsxbw (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
   ret <16 x i16> %2
 }
@@ -11,7 +11,7 @@
 define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
 ; CHECK: vpmovsxbd (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
   ret <8 x i32> %2
 }
@@ -19,7 +19,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
 ; CHECK: vpmovsxbq (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
   ret <4 x i64> %2
 }
@@ -27,7 +27,7 @@
 define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
 ; CHECK: vpmovsxwd (%rdi), %ymm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
   ret <8 x i32> %2
 }
@@ -35,7 +35,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
 ; CHECK: vpmovsxwq (%rdi), %ymm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
   ret <4 x i64> %2
 }
@@ -43,7 +43,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
 ; CHECK: vpmovsxdq (%rdi), %ymm0
-  %1 = load <4 x i32>* %a, align 1
+  %1 = load <4 x i32>, <4 x i32>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
   ret <4 x i64> %2
 }
@@ -51,7 +51,7 @@
 define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
 ; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
 ; CHECK: vpmovzxbw (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
   ret <16 x i16> %2
 }
@@ -59,7 +59,7 @@
 define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
 ; CHECK: vpmovzxbd (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
   ret <8 x i32> %2
 }
@@ -67,7 +67,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
 ; CHECK: vpmovzxbq (%rdi), %ymm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
   ret <4 x i64> %2
 }
@@ -75,7 +75,7 @@
 define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
 ; CHECK: vpmovzxwd (%rdi), %ymm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
   ret <8 x i32> %2
 }
@@ -83,7 +83,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
 ; CHECK: vpmovzxwq (%rdi), %ymm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
   ret <4 x i64> %2
 }
@@ -91,7 +91,7 @@
 define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
 ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
 ; CHECK: vpmovzxdq (%rdi), %ymm0
-  %1 = load <4 x i32>* %a, align 1
+  %1 = load <4 x i32>, <4 x i32>* %a, align 1
   %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
   ret <4 x i64> %2
 }
diff --git a/llvm/test/CodeGen/X86/avx2-shift.ll b/llvm/test/CodeGen/X86/avx2-shift.ll
index 025d52e..5adbb2e 100644
--- a/llvm/test/CodeGen/X86/avx2-shift.ll
+++ b/llvm/test/CodeGen/X86/avx2-shift.ll
@@ -130,7 +130,7 @@
 ; CHECK: vpsravd (%
 ; CHECK: ret
 define <4 x i32> @variable_sra0_load(<4 x i32> %x, <4 x i32>* %y) {
-  %y1 = load <4 x i32>* %y
+  %y1 = load <4 x i32>, <4 x i32>* %y
   %k = ashr <4 x i32> %x, %y1
   ret <4 x i32> %k
 }
@@ -139,7 +139,7 @@
 ; CHECK: vpsravd (%
 ; CHECK: ret
 define <8 x i32> @variable_sra1_load(<8 x i32> %x, <8 x i32>* %y) {
-  %y1 = load <8 x i32>* %y
+  %y1 = load <8 x i32>, <8 x i32>* %y
   %k = ashr <8 x i32> %x, %y1
   ret <8 x i32> %k
 }
@@ -148,7 +148,7 @@
 ; CHECK: vpsllvd (%
 ; CHECK: ret
 define <4 x i32> @variable_shl0_load(<4 x i32> %x, <4 x i32>* %y) {
-  %y1 = load <4 x i32>* %y
+  %y1 = load <4 x i32>, <4 x i32>* %y
   %k = shl <4 x i32> %x, %y1
   ret <4 x i32> %k
 }
@@ -156,7 +156,7 @@
 ; CHECK: vpsllvd (%
 ; CHECK: ret
 define <8 x i32> @variable_shl1_load(<8 x i32> %x, <8 x i32>* %y) {
-  %y1 = load <8 x i32>* %y
+  %y1 = load <8 x i32>, <8 x i32>* %y
   %k = shl <8 x i32> %x, %y1
   ret <8 x i32> %k
 }
@@ -164,7 +164,7 @@
 ; CHECK: vpsllvq (%
 ; CHECK: ret
 define <2 x i64> @variable_shl2_load(<2 x i64> %x, <2 x i64>* %y) {
-  %y1 = load <2 x i64>* %y
+  %y1 = load <2 x i64>, <2 x i64>* %y
   %k = shl <2 x i64> %x, %y1
   ret <2 x i64> %k
 }
@@ -172,7 +172,7 @@
 ; CHECK: vpsllvq (%
 ; CHECK: ret
 define <4 x i64> @variable_shl3_load(<4 x i64> %x, <4 x i64>* %y) {
-  %y1 = load <4 x i64>* %y
+  %y1 = load <4 x i64>, <4 x i64>* %y
   %k = shl <4 x i64> %x, %y1
   ret <4 x i64> %k
 }
@@ -180,7 +180,7 @@
 ; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <4 x i32> @variable_srl0_load(<4 x i32> %x, <4 x i32>* %y) {
-  %y1 = load <4 x i32>* %y
+  %y1 = load <4 x i32>, <4 x i32>* %y
   %k = lshr <4 x i32> %x, %y1
   ret <4 x i32> %k
 }
@@ -188,7 +188,7 @@
 ; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <8 x i32> @variable_srl1_load(<8 x i32> %x, <8 x i32>* %y) {
-  %y1 = load <8 x i32>* %y
+  %y1 = load <8 x i32>, <8 x i32>* %y
   %k = lshr <8 x i32> %x, %y1
   ret <8 x i32> %k
 }
@@ -196,7 +196,7 @@
 ; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <2 x i64> @variable_srl2_load(<2 x i64> %x, <2 x i64>* %y) {
-  %y1 = load <2 x i64>* %y
+  %y1 = load <2 x i64>, <2 x i64>* %y
   %k = lshr <2 x i64> %x, %y1
   ret <2 x i64> %k
 }
@@ -204,7 +204,7 @@
 ; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <4 x i64> @variable_srl3_load(<4 x i64> %x, <4 x i64>* %y) {
-  %y1 = load <4 x i64>* %y
+  %y1 = load <4 x i64>, <4 x i64>* %y
   %k = lshr <4 x i64> %x, %y1
   ret <4 x i64> %k
 }
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 83100a8..94dcdca 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -3,7 +3,7 @@
 ; CHECK: vpbroadcastb (%
 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i8* %ptr, align 4
+  %q = load i8, i8* %ptr, align 4
   %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
   %q1 = insertelement <16 x i8> %q0, i8 %q, i32 1
   %q2 = insertelement <16 x i8> %q1, i8 %q, i32 2
@@ -25,7 +25,7 @@
 ; CHECK: vpbroadcastb (%
 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i8* %ptr, align 4
+  %q = load i8, i8* %ptr, align 4
   %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
   %q1 = insertelement <32 x i8> %q0, i8 %q, i32 1
   %q2 = insertelement <32 x i8> %q1, i8 %q, i32 2
@@ -65,7 +65,7 @@
 
 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i16* %ptr, align 4
+  %q = load i16, i16* %ptr, align 4
   %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
   %q1 = insertelement <8 x i16> %q0, i16 %q, i32 1
   %q2 = insertelement <8 x i16> %q1, i16 %q, i32 2
@@ -79,7 +79,7 @@
 ; CHECK: vpbroadcastw (%
 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i16* %ptr, align 4
+  %q = load i16, i16* %ptr, align 4
   %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
   %q1 = insertelement <16 x i16> %q0, i16 %q, i32 1
   %q2 = insertelement <16 x i16> %q1, i16 %q, i32 2
@@ -101,7 +101,7 @@
 ; CHECK: vbroadcastss (%
 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i32* %ptr, align 4
+  %q = load i32, i32* %ptr, align 4
   %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
   %q1 = insertelement <4 x i32> %q0, i32 %q, i32 1
   %q2 = insertelement <4 x i32> %q1, i32 %q, i32 2
@@ -111,7 +111,7 @@
 ; CHECK: vbroadcastss (%
 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i32* %ptr, align 4
+  %q = load i32, i32* %ptr, align 4
   %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
   %q1 = insertelement <8 x i32> %q0, i32 %q, i32 1
   %q2 = insertelement <8 x i32> %q1, i32 %q, i32 2
@@ -125,7 +125,7 @@
 ; CHECK: vpbroadcastq (%
 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i64* %ptr, align 4
+  %q = load i64, i64* %ptr, align 4
   %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
   %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
   ret <2 x i64> %q1
@@ -133,7 +133,7 @@
 ; CHECK: vbroadcastsd (%
 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load i64* %ptr, align 4
+  %q = load i64, i64* %ptr, align 4
   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
   %q1 = insertelement <4 x i64> %q0, i64 %q, i32 1
   %q2 = insertelement <4 x i64> %q1, i64 %q, i32 2
@@ -145,7 +145,7 @@
 ; this used to crash
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
 entry:
-  %q = load double* %ptr, align 4
+  %q = load double, double* %ptr, align 4
   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   ret <2 x double> %vecinit2.i
@@ -431,8 +431,8 @@
   %__b.addr.i = alloca <2 x i64>, align 16
   %vCr = alloca <2 x i64>, align 16
   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
-  %tmp = load <2 x i64>* %vCr, align 16
-  %tmp2 = load i8* %cV_R.addr, align 4
+  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+  %tmp2 = load i8, i8* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <16 x i8> undef, i8 %tmp2, i32 0
   %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
   %tmp3 = bitcast <16 x i8> %splat.splat to <2 x i64>
@@ -450,8 +450,8 @@
   %__b.addr.i = alloca <4 x i64>, align 16
   %vCr = alloca <4 x i64>, align 16
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
-  %tmp = load <4 x i64>* %vCr, align 16
-  %tmp2 = load i8* %cV_R.addr, align 4
+  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+  %tmp2 = load i8, i8* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <32 x i8> undef, i8 %tmp2, i32 0
   %splat.splat = shufflevector <32 x i8> %splat.splatinsert, <32 x i8> undef, <32 x i32> zeroinitializer
   %tmp3 = bitcast <32 x i8> %splat.splat to <4 x i64>
@@ -469,8 +469,8 @@
   %__b.addr.i = alloca <2 x i64>, align 16
   %vCr = alloca <2 x i64>, align 16
   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
-  %tmp = load <2 x i64>* %vCr, align 16
-  %tmp2 = load i16* %cV_R.addr, align 4
+  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+  %tmp2 = load i16, i16* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <8 x i16> undef, i16 %tmp2, i32 0
   %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
   %tmp3 = bitcast <8 x i16> %splat.splat to <2 x i64>
@@ -488,8 +488,8 @@
   %__b.addr.i = alloca <4 x i64>, align 16
   %vCr = alloca <4 x i64>, align 16
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
-  %tmp = load <4 x i64>* %vCr, align 16
-  %tmp2 = load i16* %cV_R.addr, align 4
+  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+  %tmp2 = load i16, i16* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <16 x i16> undef, i16 %tmp2, i32 0
   %splat.splat = shufflevector <16 x i16> %splat.splatinsert, <16 x i16> undef, <16 x i32> zeroinitializer
   %tmp3 = bitcast <16 x i16> %splat.splat to <4 x i64>
@@ -507,8 +507,8 @@
   %__b.addr.i = alloca <2 x i64>, align 16
   %vCr = alloca <2 x i64>, align 16
   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
-  %tmp = load <2 x i64>* %vCr, align 16
-  %tmp2 = load i32* %cV_R.addr, align 4
+  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+  %tmp2 = load i32, i32* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <4 x i32> undef, i32 %tmp2, i32 0
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   %tmp3 = bitcast <4 x i32> %splat.splat to <2 x i64>
@@ -526,8 +526,8 @@
   %__b.addr.i = alloca <4 x i64>, align 16
   %vCr = alloca <4 x i64>, align 16
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
-  %tmp = load <4 x i64>* %vCr, align 16
-  %tmp2 = load i32* %cV_R.addr, align 4
+  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+  %tmp2 = load i32, i32* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <8 x i32> undef, i32 %tmp2, i32 0
   %splat.splat = shufflevector <8 x i32> %splat.splatinsert, <8 x i32> undef, <8 x i32> zeroinitializer
   %tmp3 = bitcast <8 x i32> %splat.splat to <4 x i64>
@@ -545,8 +545,8 @@
   %__b.addr.i = alloca <2 x i64>, align 16
   %vCr = alloca <2 x i64>, align 16
   store <2 x i64> zeroinitializer, <2 x i64>* %vCr, align 16
-  %tmp = load <2 x i64>* %vCr, align 16
-  %tmp2 = load i64* %cV_R.addr, align 4
+  %tmp = load <2 x i64>, <2 x i64>* %vCr, align 16
+  %tmp2 = load i64, i64* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <2 x i64> undef, i64 %tmp2, i32 0
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   store <2 x i64> %tmp, <2 x i64>* %__a.addr.i, align 16
@@ -563,8 +563,8 @@
   %__b.addr.i = alloca <4 x i64>, align 16
   %vCr = alloca <4 x i64>, align 16
   store <4 x i64> zeroinitializer, <4 x i64>* %vCr, align 16
-  %tmp = load <4 x i64>* %vCr, align 16
-  %tmp2 = load i64* %cV_R.addr, align 4
+  %tmp = load <4 x i64>, <4 x i64>* %vCr, align 16
+  %tmp2 = load i64, i64* %cV_R.addr, align 4
   %splat.splatinsert = insertelement <4 x i64> undef, i64 %tmp2, i32 0
   %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
   store <4 x i64> %tmp, <4 x i64>* %__a.addr.i, align 16
diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll
index 94b0821..1ecd100 100644
--- a/llvm/test/CodeGen/X86/avx512-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-arith.ll
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:    vsubpd (%rdi), %zmm0, %zmm0
 ; CHECK-NEXT:    retq
 entry:
-  %tmp2 = load <8 x double>* %x, align 8
+  %tmp2 = load <8 x double>, <8 x double>* %x, align 8
   %sub.i = fsub <8 x double> %y, %tmp2
   ret <8 x double> %sub.i
 }
@@ -77,7 +77,7 @@
 ; CHECK-NEXT:    vsubps (%rdi), %zmm0, %zmm0
 ; CHECK-NEXT:    retq
 entry:
-  %tmp2 = load <16 x float>* %x, align 4
+  %tmp2 = load <16 x float>, <16 x float>* %x, align 4
   %sub.i = fsub <16 x float> %y, %tmp2
   ret <16 x float> %sub.i
 }
@@ -193,7 +193,7 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
 ; CHECK-NEXT:    retq
-  %tmp = load <8 x i64>* %j, align 4
+  %tmp = load <8 x i64>, <8 x i64>* %j, align 4
   %x = add <8 x i64> %i, %tmp
   ret <8 x i64> %x
 }
@@ -212,7 +212,7 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
-  %tmp = load i64* %j
+  %tmp = load i64, i64* %j
   %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
   %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
   %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
@@ -239,7 +239,7 @@
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
 ; CHECK-NEXT:    retq
-  %tmp = load <16 x i32>* %j, align 4
+  %tmp = load <16 x i32>, <16 x i32>* %j, align 4
   %x = add <16 x i32> %i, %tmp
   ret <16 x i32> %x
 }
@@ -287,7 +287,7 @@
 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
-  %j = load <16 x i32>* %j.ptr
+  %j = load <16 x i32>, <16 x i32>* %j.ptr
   %x = add <16 x i32> %i, %j
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
   ret <16 x i32> %r
@@ -314,7 +314,7 @@
 ; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
-  %j = load <16 x i32>* %j.ptr
+  %j = load <16 x i32>, <16 x i32>* %j.ptr
   %x = add <16 x i32> %i, %j
   %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
   ret <16 x i32> %r
@@ -445,7 +445,7 @@
 ; CHECK-NEXT:    vpandd (%rdi), %zmm0, %zmm0
 ; CHECK-NEXT:    retq
 entry:
-  %a = load <16 x i32>* %x, align 4
+  %a = load <16 x i32>, <16 x i32>* %x, align 4
   %b = and <16 x i32> %y, %a
   ret <16 x i32> %b
 }
@@ -456,7 +456,7 @@
 ; CHECK-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
 entry:
-  %a = load i64* %ap, align 8
+  %a = load i64, i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
   %d = and <8 x i64> %p1, %c
@@ -593,7 +593,7 @@
                                      <8 x double>* %j,  <8 x i64> %mask1)
                                      nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
-  %tmp = load <8 x double>* %j, align 8
+  %tmp = load <8 x double>, <8 x double>* %j, align 8
   %x = fadd <8 x double> %i, %tmp
   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
   ret <8 x double> %r
@@ -605,7 +605,7 @@
 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
                                       <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
-  %tmp = load <8 x double>* %j, align 8
+  %tmp = load <8 x double>, <8 x double>* %j, align 8
   %x = fadd <8 x double> %i, %tmp
   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
   ret <8 x double> %r
@@ -615,7 +615,7 @@
 ; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
 ; CHECK: ret
 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <8 x double> undef, double %tmp, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef,
                      <8 x i32> zeroinitializer
@@ -629,7 +629,7 @@
 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
                                       double* %j, <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <8 x double> undef, double %tmp, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef,
                      <8 x i32> zeroinitializer
@@ -644,7 +644,7 @@
 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
                                        <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <8 x double> undef, double %tmp, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef,
                      <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/avx512-build-vector.ll b/llvm/test/CodeGen/X86/avx512-build-vector.ll
index 9e9ad31..8373c6d 100644
--- a/llvm/test/CodeGen/X86/avx512-build-vector.ll
+++ b/llvm/test/CodeGen/X86/avx512-build-vector.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4],ymm1[5,6,7]
 ; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; CHECK-NEXT:    retq
-   %y = load i32* %x, align 4
+   %y = load i32, i32* %x, align 4
    %res = insertelement <16 x i32>zeroinitializer, i32 %y, i32 4
    ret <16 x i32>%res
 }
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index 2b672a7..842b9f8 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -87,7 +87,7 @@
 ; CHECK: ret
 define double @funcA(i64* nocapture %e) {
 entry:
-  %tmp1 = load i64* %e, align 8
+  %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to double
   ret double %conv
 }
@@ -97,7 +97,7 @@
 ; CHECK: ret
 define double @funcB(i32* %e) {
 entry:
-  %tmp1 = load i32* %e, align 4
+  %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to double
   ret double %conv
 }
@@ -107,7 +107,7 @@
 ; CHECK: ret
 define float @funcC(i32* %e) {
 entry:
-  %tmp1 = load i32* %e, align 4
+  %tmp1 = load i32, i32* %e, align 4
   %conv = sitofp i32 %tmp1 to float
   ret float %conv
 }
@@ -117,7 +117,7 @@
 ; CHECK: ret
 define float @i64tof32(i64* %e) {
 entry:
-  %tmp1 = load i64* %e, align 8
+  %tmp1 = load i64, i64* %e, align 8
   %conv = sitofp i64 %tmp1 to float
   ret float %conv
 }
@@ -129,7 +129,7 @@
 entry:
   %f = alloca float, align 4
   %d = alloca double, align 8
-  %tmp = load float* %f, align 4
+  %tmp = load float, float* %f, align 4
   %conv = fpext float %tmp to double
   store double %conv, double* %d, align 8
   ret void
@@ -144,7 +144,7 @@
 entry:
   %f = alloca float, align 4
   %d = alloca double, align 8
-  %tmp = load double* %d, align 8
+  %tmp = load double, double* %d, align 8
   %conv = fptrunc double %tmp to float
   store float %conv, float* %f, align 4
   ret void
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 20bf7e4..0e32a1c 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -170,7 +170,7 @@
 ;CHECK: vscatterdpd
 ;CHECK: ret
 define void @scatter_mask_dpd_execdomain(<8 x i32> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <8 x double>* %src, align 64 
+  %x = load <8 x double>, <8 x double>* %src, align 64 
   call void @llvm.x86.avx512.scatter.dpd.512 (i8* %stbuf, i8 %mask, <8 x i32>%ind, <8 x double> %x, i32 4)
   ret void
 }
@@ -180,7 +180,7 @@
 ;CHECK: vscatterqpd
 ;CHECK: ret
 define void @scatter_mask_qpd_execdomain(<8 x i64> %ind, <8 x double>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <8 x double>* %src, align 64
+  %x = load <8 x double>, <8 x double>* %src, align 64
   call void @llvm.x86.avx512.scatter.qpd.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x double> %x, i32 4)
   ret void
 }
@@ -190,7 +190,7 @@
 ;CHECK: vscatterdps
 ;CHECK: ret
 define void @scatter_mask_dps_execdomain(<16 x i32> %ind, <16 x float>* %src, i16 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <16 x float>* %src, align 64
+  %x = load <16 x float>, <16 x float>* %src, align 64
   call void @llvm.x86.avx512.scatter.dps.512 (i8* %stbuf, i16 %mask, <16 x i32>%ind, <16 x float> %x, i32 4)
   ret void
 }
@@ -200,7 +200,7 @@
 ;CHECK: vscatterqps
 ;CHECK: ret
 define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %mask, i8* %base, i8* %stbuf)  {
-  %x = load <8 x float>* %src, align 32 
+  %x = load <8 x float>, <8 x float>* %src, align 32 
   call void @llvm.x86.avx512.scatter.qps.512 (i8* %stbuf, i8 %mask, <8 x i64>%ind, <8 x float> %x, i32 4)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/avx512-i1test.ll b/llvm/test/CodeGen/X86/avx512-i1test.ll
index a237738..ba2f49b 100644
--- a/llvm/test/CodeGen/X86/avx512-i1test.ll
+++ b/llvm/test/CodeGen/X86/avx512-i1test.ll
@@ -18,7 +18,7 @@
   br label %bb33
 
 bb33:                                             ; preds = %bb51, %bb56
-  %r111 = load i64* undef, align 8
+  %r111 = load i64, i64* undef, align 8
   br i1 undef, label %bb51, label %bb35
 
 bb35:                                             ; preds = %bb33
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index d6b887e..6498b20 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -6,7 +6,7 @@
 ;CHECK: vinsertf32x4
 ;CHECK: ret
 define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
-  %rrr = load float* %br
+  %rrr = load float, float* %br
   %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1
   %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14
   ret <16 x float> %rrr3
@@ -20,7 +20,7 @@
 ;SKX: vinsertf64x2 $3
 ;CHECK: ret
 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
-  %rrr = load double* %br
+  %rrr = load double, double* %br
   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
   ret <8 x double> %rrr3
@@ -171,7 +171,7 @@
 ;CHECK: kmovw
 ;CHECK: ret
 define i16 @test15(i1 *%addr) {
-  %x = load i1 * %addr, align 128
+  %x = load i1 , i1 * %addr, align 128
   %x1 = insertelement <16 x i1> undef, i1 %x, i32 10
   %x2 = bitcast <16 x i1>%x1 to i16
   ret i16 %x2
@@ -183,7 +183,7 @@
 ;CHECK: korw
 ;CHECK: ret
 define i16 @test16(i1 *%addr, i16 %a) {
-  %x = load i1 * %addr, align 128
+  %x = load i1 , i1 * %addr, align 128
   %a1 = bitcast i16 %a to <16 x i1>
   %x1 = insertelement <16 x i1> %a1, i1 %x, i32 10
   %x2 = bitcast <16 x i1>%x1 to i16
@@ -199,7 +199,7 @@
 ;SKX: korb
 ;CHECK: ret
 define i8 @test17(i1 *%addr, i8 %a) {
-  %x = load i1 * %addr, align 128
+  %x = load i1 , i1 * %addr, align 128
   %a1 = bitcast i8 %a to <8 x i1>
   %x1 = insertelement <8 x i1> %a1, i1 %x, i32 4
   %x2 = bitcast <8 x i1>%x1 to i8
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 3f2691b..2e1b27e 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -30,7 +30,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
 }
@@ -53,7 +53,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
   ret <16 x float> %4
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index b6375c1..46581f7 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -356,7 +356,7 @@
 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
   ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
   ; CHECK: vblendmpd (%
-  %b = load <8 x double>* %ptr
+  %b = load <8 x double>, <8 x double>* %ptr
   %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
@@ -1435,7 +1435,7 @@
 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
   ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
   ; CHECK: vpsrlvq (%
-  %b = load <8 x i64>* %ptr
+  %b = load <8 x i64>, <8 x i64>* %ptr
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index bee4f52..140ce3b 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -83,7 +83,7 @@
 ; CHECK: ret
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
 entry:
-  %a = load <16 x i32>* %x, align 4
+  %a = load <16 x i32>, <16 x i32>* %x, align 4
   %b = and <16 x i32> %y, %a
   ret <16 x i32> %b
 }
@@ -93,7 +93,7 @@
 ; CHECK: ret
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
 entry:
-  %a = load i64* %ap, align 8
+  %a = load i64, i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
   %d = and <8 x i64> %p1, %c
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 264d915..c4e6251 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -34,7 +34,7 @@
 ; CHECK: ret
 
 define void @mask16_mem(i16* %ptr) {
-  %x = load i16* %ptr, align 4
+  %x = load i16, i16* %ptr, align 4
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <16 x i1> %m1 to i16
@@ -51,7 +51,7 @@
 ; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
 
 define void @mask8_mem(i8* %ptr) {
-  %x = load i8* %ptr, align 4
+  %x = load i8, i8* %ptr, align 4
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <8 x i1> %m1 to i8
@@ -128,7 +128,7 @@
 
   %maskPtr = alloca <8 x i1>
   store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
-  %mask = load <8 x i1>* %maskPtr
+  %mask = load <8 x i1>, <8 x i1>* %maskPtr
   %mask_convert = bitcast <8 x i1> %mask to i8
   ret i8 %mask_convert
 }
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512-mov.ll b/llvm/test/CodeGen/X86/avx512-mov.ll
index 93875e8..0cd8458 100644
--- a/llvm/test/CodeGen/X86/avx512-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512-mov.ll
@@ -28,7 +28,7 @@
 ; CHECK: vmovd  (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define <4 x i32> @test4(i32* %x) {
-   %y = load i32* %x
+   %y = load i32, i32* %x
    %res = insertelement <4 x i32>undef, i32 %y, i32 0
    ret <4 x i32>%res
 }
@@ -53,7 +53,7 @@
 ; CHECK: vmovss  (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define float @test7(i32* %x) {
-   %y = load i32* %x
+   %y = load i32, i32* %x
    %res = bitcast i32 %y to float
    ret float %res
 }
@@ -78,7 +78,7 @@
 ; CHECK: vmovd (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define <4 x i32> @test10(i32* %x) {
-   %y = load i32* %x, align 4
+   %y = load i32, i32* %x, align 4
    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
    ret <4 x i32>%res
 }
@@ -87,7 +87,7 @@
 ; CHECK: vmovss  (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define <4 x float> @test11(float* %x) {
-   %y = load float* %x, align 4
+   %y = load float, float* %x, align 4
    %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
    ret <4 x float>%res
 }
@@ -96,7 +96,7 @@
 ; CHECK: vmovsd  (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define <2 x double> @test12(double* %x) {
-   %y = load double* %x, align 8
+   %y = load double, double* %x, align 8
    %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
    ret <2 x double>%res
 }
@@ -121,7 +121,7 @@
 ; CHECK: vmovd  (%rdi), %xmm0 ## encoding: [0x62
 ; CHECK: ret
 define <4 x i32> @test15(i32* %x) {
-   %y = load i32* %x, align 4
+   %y = load i32, i32* %x, align 4
    %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
    ret <4 x i32>%res
 }
@@ -131,7 +131,7 @@
 ; CHECK: ret
 define <16 x i32> @test16(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %res = load <16 x i32>* %vaddr, align 1
+  %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
   ret <16 x i32>%res
 }
 
@@ -140,7 +140,7 @@
 ; CHECK: ret
 define <16 x i32> @test17(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %res = load <16 x i32>* %vaddr, align 64
+  %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
   ret <16 x i32>%res
 }
 
@@ -176,7 +176,7 @@
 ; CHECK: ret
 define  <8 x i64> @test21(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %res = load <8 x i64>* %vaddr, align 64
+  %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
   ret <8 x i64>%res
 }
 
@@ -194,7 +194,7 @@
 ; CHECK: ret
 define <8 x i64> @test23(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %res = load <8 x i64>* %vaddr, align 1
+  %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
   ret <8 x i64>%res
 }
 
@@ -212,7 +212,7 @@
 ; CHECK: ret
 define <8 x double> @test25(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %res = load <8 x double>* %vaddr, align 64
+  %res = load <8 x double>, <8 x double>* %vaddr, align 64
   ret <8 x double>%res
 }
 
@@ -230,7 +230,7 @@
 ; CHECK: ret
 define <16 x float> @test27(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %res = load <16 x float>* %vaddr, align 64
+  %res = load <16 x float>, <16 x float>* %vaddr, align 64
   ret <16 x float>%res
 }
 
@@ -248,7 +248,7 @@
 ; CHECK: ret
 define <8 x double> @test29(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %res = load <8 x double>* %vaddr, align 1
+  %res = load <8 x double>, <8 x double>* %vaddr, align 1
   ret <8 x double>%res
 }
 
@@ -266,7 +266,7 @@
 ; CHECK: ret
 define <16 x float> @test31(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %res = load <16 x float>* %vaddr, align 1
+  %res = load <16 x float>, <16 x float>* %vaddr, align 1
   ret <16 x float>%res
 }
 
@@ -276,7 +276,7 @@
 define <16 x i32> @test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %r = load <16 x i32>* %vaddr, align 64
+  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
   ret <16 x i32>%res
 }
@@ -287,7 +287,7 @@
 define <16 x i32> @test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %r = load <16 x i32>* %vaddr, align 1
+  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
   ret <16 x i32>%res
 }
@@ -298,7 +298,7 @@
 define <16 x i32> @test34(i8 * %addr, <16 x i32> %mask1) {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %r = load <16 x i32>* %vaddr, align 64
+  %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
   ret <16 x i32>%res
 }
@@ -309,7 +309,7 @@
 define <16 x i32> @test35(i8 * %addr, <16 x i32> %mask1) {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i32>*
-  %r = load <16 x i32>* %vaddr, align 1
+  %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
   ret <16 x i32>%res
 }
@@ -320,7 +320,7 @@
 define <8 x i64> @test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %r = load <8 x i64>* %vaddr, align 64
+  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
   ret <8 x i64>%res
 }
@@ -331,7 +331,7 @@
 define <8 x i64> @test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %r = load <8 x i64>* %vaddr, align 1
+  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
   ret <8 x i64>%res
 }
@@ -342,7 +342,7 @@
 define <8 x i64> @test38(i8 * %addr, <8 x i64> %mask1) {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %r = load <8 x i64>* %vaddr, align 64
+  %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
   ret <8 x i64>%res
 }
@@ -353,7 +353,7 @@
 define <8 x i64> @test39(i8 * %addr, <8 x i64> %mask1) {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i64>*
-  %r = load <8 x i64>* %vaddr, align 1
+  %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
   ret <8 x i64>%res
 }
@@ -364,7 +364,7 @@
 define <16 x float> @test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %r = load <16 x float>* %vaddr, align 64
+  %r = load <16 x float>, <16 x float>* %vaddr, align 64
   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
   ret <16 x float>%res
 }
@@ -375,7 +375,7 @@
 define <16 x float> @test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %r = load <16 x float>* %vaddr, align 1
+  %r = load <16 x float>, <16 x float>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
   ret <16 x float>%res
 }
@@ -386,7 +386,7 @@
 define <16 x float> @test42(i8 * %addr, <16 x float> %mask1) {
   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %r = load <16 x float>* %vaddr, align 64
+  %r = load <16 x float>, <16 x float>* %vaddr, align 64
   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
   ret <16 x float>%res
 }
@@ -397,7 +397,7 @@
 define <16 x float> @test43(i8 * %addr, <16 x float> %mask1) {
   %mask = fcmp one <16 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x float>*
-  %r = load <16 x float>* %vaddr, align 1
+  %r = load <16 x float>, <16 x float>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
   ret <16 x float>%res
 }
@@ -408,7 +408,7 @@
 define <8 x double> @test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %r = load <8 x double>* %vaddr, align 64
+  %r = load <8 x double>, <8 x double>* %vaddr, align 64
   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
   ret <8 x double>%res
 }
@@ -419,7 +419,7 @@
 define <8 x double> @test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %r = load <8 x double>* %vaddr, align 1
+  %r = load <8 x double>, <8 x double>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
   ret <8 x double>%res
 }
@@ -430,7 +430,7 @@
 define <8 x double> @test46(i8 * %addr, <8 x double> %mask1) {
   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %r = load <8 x double>* %vaddr, align 64
+  %r = load <8 x double>, <8 x double>* %vaddr, align 64
   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
   ret <8 x double>%res
 }
@@ -441,7 +441,7 @@
 define <8 x double> @test47(i8 * %addr, <8 x double> %mask1) {
   %mask = fcmp one <8 x double> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x double>*
-  %r = load <8 x double>* %vaddr, align 1
+  %r = load <8 x double>, <8 x double>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
   ret <8 x double>%res
 }
diff --git a/llvm/test/CodeGen/X86/avx512-round.ll b/llvm/test/CodeGen/X86/avx512-round.ll
index ffeb2a8..c4f417e 100644
--- a/llvm/test/CodeGen/X86/avx512-round.ll
+++ b/llvm/test/CodeGen/X86/avx512-round.ll
@@ -99,7 +99,7 @@
 define float @floor_f32m(float* %aptr) {
 ; CHECK-LABEL: floor_f32m
 ; CHECK: vrndscaless $1, (%rdi), {{.*}}encoding: [0x62,0xf3,0x7d,0x08,0x0a,0x07,0x01]
-  %a = load float* %aptr, align 4
+  %a = load float, float* %aptr, align 4
   %res = call float @llvm.floor.f32(float %a)
   ret float %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512-shift.ll b/llvm/test/CodeGen/X86/avx512-shift.ll
index 8cdcf8a..0636cd2 100644
--- a/llvm/test/CodeGen/X86/avx512-shift.ll
+++ b/llvm/test/CodeGen/X86/avx512-shift.ll
@@ -76,7 +76,7 @@
 ; CHECK: vpsravd (%
 ; CHECK: ret
 define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
-  %y1 = load <16 x i32>* %y
+  %y1 = load <16 x i32>, <16 x i32>* %y
   %k = ashr <16 x i32> %x, %y1
   ret <16 x i32> %k
 }
@@ -85,7 +85,7 @@
 ; CHECK: vpsllvd (%
 ; CHECK: ret
 define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
-  %y1 = load <16 x i32>* %y
+  %y1 = load <16 x i32>, <16 x i32>* %y
   %k = shl <16 x i32> %x, %y1
   ret <16 x i32> %k
 }
@@ -93,7 +93,7 @@
 ; CHECK: vpsrlvd (%
 ; CHECK: ret
 define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
-  %y1 = load <16 x i32>* %y
+  %y1 = load <16 x i32>, <16 x i32>* %y
   %k = lshr <16 x i32> %x, %y1
   ret <16 x i32> %k
 }
@@ -102,7 +102,7 @@
 ; CHECK: vpsrlvq (%
 ; CHECK: ret
 define <8 x i64> @variable_srl3_load(<8 x i64> %x, <8 x i64>* %y) {
-  %y1 = load <8 x i64>* %y
+  %y1 = load <8 x i64>, <8 x i64>* %y
   %k = lshr <8 x i64> %x, %y1
   ret <8 x i64> %k
 }
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
index 5bb8233..cc81d68 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -64,7 +64,7 @@
 ;CHECK: vbroadcastss (%{{.*}}, %zmm
 ;CHECK: ret
 define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
-  %a = load float* %a.ptr
+  %a = load float, float* %a.ptr
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %c
@@ -74,7 +74,7 @@
 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
 ;CHECK: ret
 define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
-  %a = load float* %a.ptr
+  %a = load float, float* %a.ptr
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -86,7 +86,7 @@
 ;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
 ;CHECK: ret
 define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
-  %a = load float* %a.ptr
+  %a = load float, float* %a.ptr
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -130,7 +130,7 @@
 ;CHECK: vbroadcastsd (%rdi), %zmm
 ;CHECK: ret
 define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
-  %a = load double* %a.ptr
+  %a = load double, double* %a.ptr
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double> %c
@@ -140,7 +140,7 @@
 ;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
 ;CHECK: ret
 define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
-  %a = load double* %a.ptr
+  %a = load double, double* %a.ptr
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -152,7 +152,7 @@
 ; CHECK-LABEL: _sd8xdouble_maskz_load:
 ; CHECK:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
 ; CHECK:    ret
-  %a = load double* %a.ptr
+  %a = load double, double* %a.ptr
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index b16f5c9..26e2c77 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %y = load <16 x i32>* %yp, align 4
+  %y = load <16 x i32>, <16 x i32>* %yp, align 4
   %mask = icmp eq <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
   ret <16 x i32> %max
@@ -215,7 +215,7 @@
 ; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %y = load <16 x i32>* %y.ptr, align 4
+  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
   %mask = icmp sgt <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
   ret <16 x i32> %max
@@ -228,7 +228,7 @@
 ; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %y = load <16 x i32>* %y.ptr, align 4
+  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
   %mask = icmp sle <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
   ret <16 x i32> %max
@@ -241,7 +241,7 @@
 ; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %y = load <16 x i32>* %y.ptr, align 4
+  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
   %mask = icmp ule <16 x i32> %x, %y
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
   ret <16 x i32> %max
@@ -286,7 +286,7 @@
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask1 = icmp sgt <8 x i64> %x1, %y1
-  %y = load <8 x i64>* %y.ptr, align 4
+  %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
   %mask0 = icmp sgt <8 x i64> %x, %y
   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
   %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %x1
@@ -302,7 +302,7 @@
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <16 x i32> %x1, %y1
-  %y = load <16 x i32>* %y.ptr, align 4
+  %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
   %mask0 = icmp ule <16 x i32> %x, %y
   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
   %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1
@@ -316,7 +316,7 @@
 ; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
   %mask = icmp eq <8 x i64> %x, %y
@@ -331,7 +331,7 @@
 ; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
   %mask = icmp sle <16 x i32> %x, %y
@@ -348,7 +348,7 @@
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <16 x i32> %x1, %y1
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer
   %mask0 = icmp sgt <16 x i32> %x, %y
@@ -366,7 +366,7 @@
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask1 = icmp sge <8 x i64> %x1, %y1
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
   %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer
   %mask0 = icmp sle <8 x i64> %x, %y
diff --git a/llvm/test/CodeGen/X86/avx512bw-arith.ll b/llvm/test/CodeGen/X86/avx512bw-arith.ll
index 94f68a2..52ebf27 100644
--- a/llvm/test/CodeGen/X86/avx512bw-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-arith.ll
@@ -12,7 +12,7 @@
 ; CHECK: vpaddb (%rdi), %zmm{{.*}}
 ; CHECK: ret
 define <64 x i8> @vpaddb512_fold_test(<64 x i8> %i, <64 x i8>* %j) nounwind {
-  %tmp = load <64 x i8>* %j, align 4
+  %tmp = load <64 x i8>, <64 x i8>* %j, align 4
   %x = add <64 x i8> %i, %tmp
   ret <64 x i8> %x
 }
@@ -29,7 +29,7 @@
 ; CHECK: vpaddw (%rdi), %zmm{{.*}}
 ; CHECK: ret
 define <32 x i16> @vpaddw512_fold_test(<32 x i16> %i, <32 x i16>* %j) nounwind {
-  %tmp = load <32 x i16>* %j, align 4
+  %tmp = load <32 x i16>, <32 x i16>* %j, align 4
   %x = add <32 x i16> %i, %tmp
   ret <32 x i16> %x
 }
@@ -59,7 +59,7 @@
 ; CHECK: ret
 define <32 x i16> @vpaddw512_mask_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
-  %j = load <32 x i16>* %j.ptr
+  %j = load <32 x i16>, <32 x i16>* %j.ptr
   %x = add <32 x i16> %i, %j
   %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %i
   ret <32 x i16> %r
@@ -70,7 +70,7 @@
 ; CHECK: ret
 define <32 x i16> @vpaddw512_maskz_fold_test(<32 x i16> %i, <32 x i16>* %j.ptr, <32 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
-  %j = load <32 x i16>* %j.ptr
+  %j = load <32 x i16>, <32 x i16>* %j.ptr
   %x = add <32 x i16> %i, %j
   %r = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   ret <32 x i16> %r
diff --git a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
index 9d7630c..0208011 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mask-op.ll
@@ -35,7 +35,7 @@
 }
 
 define void @mask32_mem(i32* %ptr) {
-  %x = load i32* %ptr, align 4
+  %x = load i32, i32* %ptr, align 4
   %m0 = bitcast i32 %x to <32 x i1>
   %m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
                             i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -52,7 +52,7 @@
 }
 
 define void @mask64_mem(i64* %ptr) {
-  %x = load i64* %ptr, align 4
+  %x = load i64, i64* %ptr, align 4
   %m0 = bitcast i64 %x to <64 x i1>
   %m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
                             i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
diff --git a/llvm/test/CodeGen/X86/avx512bw-mov.ll b/llvm/test/CodeGen/X86/avx512bw-mov.ll
index 2ff6d28..519b649 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mov.ll
@@ -5,7 +5,7 @@
 ; CHECK: ret
 define <64 x i8> @test1(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <64 x i8>*
-  %res = load <64 x i8>* %vaddr, align 1
+  %res = load <64 x i8>, <64 x i8>* %vaddr, align 1
   ret <64 x i8>%res
 }
 
@@ -24,7 +24,7 @@
 define <64 x i8> @test3(i8 * %addr, <64 x i8> %old, <64 x i8> %mask1) {
   %mask = icmp ne <64 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <64 x i8>*
-  %r = load <64 x i8>* %vaddr, align 1
+  %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
   %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old
   ret <64 x i8>%res
 }
@@ -35,7 +35,7 @@
 define <64 x i8> @test4(i8 * %addr, <64 x i8> %mask1) {
   %mask = icmp ne <64 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <64 x i8>*
-  %r = load <64 x i8>* %vaddr, align 1
+  %r = load <64 x i8>, <64 x i8>* %vaddr, align 1
   %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer
   ret <64 x i8>%res
 }
@@ -45,7 +45,7 @@
 ; CHECK: ret
 define <32 x i16> @test5(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <32 x i16>*
-  %res = load <32 x i16>* %vaddr, align 1
+  %res = load <32 x i16>, <32 x i16>* %vaddr, align 1
   ret <32 x i16>%res
 }
 
@@ -64,7 +64,7 @@
 define <32 x i16> @test7(i8 * %addr, <32 x i16> %old, <32 x i16> %mask1) {
   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <32 x i16>*
-  %r = load <32 x i16>* %vaddr, align 1
+  %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
   %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old
   ret <32 x i16>%res
 }
@@ -75,7 +75,7 @@
 define <32 x i16> @test8(i8 * %addr, <32 x i16> %mask1) {
   %mask = icmp ne <32 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <32 x i16>*
-  %r = load <32 x i16>* %vaddr, align 1
+  %r = load <32 x i16>, <32 x i16>* %vaddr, align 1
   %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer
   ret <32 x i16>%res
 }
diff --git a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
index 6ba4db6..141f5cc 100644
--- a/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-vec-cmp.ll
@@ -45,7 +45,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwind {
-  %y = load <32 x i16>* %yp, align 4
+  %y = load <32 x i16>, <32 x i16>* %yp, align 4
   %mask = icmp eq <32 x i16> %x, %y
   %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
   ret <32 x i16> %max
@@ -56,7 +56,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
-  %y = load <32 x i16>* %y.ptr, align 4
+  %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
   %mask = icmp sgt <32 x i16> %x, %y
   %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
   ret <32 x i16> %max
@@ -67,7 +67,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
-  %y = load <32 x i16>* %y.ptr, align 4
+  %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
   %mask = icmp sle <32 x i16> %x, %y
   %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
   ret <32 x i16> %max
@@ -78,7 +78,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) nounwind {
-  %y = load <32 x i16>* %y.ptr, align 4
+  %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
   %mask = icmp ule <32 x i16> %x, %y
   %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
   ret <32 x i16> %max
@@ -114,7 +114,7 @@
 ; CHECK: ret
 define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i8> %y1) nounwind {
   %mask1 = icmp sgt <64 x i8> %x1, %y1
-  %y = load <64 x i8>* %y.ptr, align 4
+  %y = load <64 x i8>, <64 x i8>* %y.ptr, align 4
   %mask0 = icmp sgt <64 x i8> %x, %y
   %mask = select <64 x i1> %mask0, <64 x i1> %mask1, <64 x i1> zeroinitializer
   %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %x1
@@ -127,7 +127,7 @@
 ; CHECK: ret
 define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32 x i16> %y1) nounwind {
   %mask1 = icmp sge <32 x i16> %x1, %y1
-  %y = load <32 x i16>* %y.ptr, align 4
+  %y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
   %mask0 = icmp ule <32 x i16> %x, %y
   %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
   %max = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %x1
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
index 96f0140..c0650e1 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-arith.ll
@@ -14,7 +14,7 @@
 ; CHECK: vpaddb (%rdi), %ymm{{.*}}
 ; CHECK: ret
 define <32 x i8> @vpaddb256_fold_test(<32 x i8> %i, <32 x i8>* %j) nounwind {
-  %tmp = load <32 x i8>* %j, align 4
+  %tmp = load <32 x i8>, <32 x i8>* %j, align 4
   %x = add <32 x i8> %i, %tmp
   ret <32 x i8> %x
 }
@@ -31,7 +31,7 @@
 ; CHECK: vpaddw (%rdi), %ymm{{.*}}
 ; CHECK: ret
 define <16 x i16> @vpaddw256_fold_test(<16 x i16> %i, <16 x i16>* %j) nounwind {
-  %tmp = load <16 x i16>* %j, align 4
+  %tmp = load <16 x i16>, <16 x i16>* %j, align 4
   %x = add <16 x i16> %i, %tmp
   ret <16 x i16> %x
 }
@@ -61,7 +61,7 @@
 ; CHECK: ret
 define <16 x i16> @vpaddw256_mask_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
-  %j = load <16 x i16>* %j.ptr
+  %j = load <16 x i16>, <16 x i16>* %j.ptr
   %x = add <16 x i16> %i, %j
   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %i
   ret <16 x i16> %r
@@ -72,7 +72,7 @@
 ; CHECK: ret
 define <16 x i16> @vpaddw256_maskz_fold_test(<16 x i16> %i, <16 x i16>* %j.ptr, <16 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
-  %j = load <16 x i16>* %j.ptr
+  %j = load <16 x i16>, <16 x i16>* %j.ptr
   %x = add <16 x i16> %i, %j
   %r = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
   ret <16 x i16> %r
@@ -116,7 +116,7 @@
 ; CHECK: vpaddb (%rdi), %xmm{{.*}}
 ; CHECK: ret
 define <16 x i8> @vpaddb128_fold_test(<16 x i8> %i, <16 x i8>* %j) nounwind {
-  %tmp = load <16 x i8>* %j, align 4
+  %tmp = load <16 x i8>, <16 x i8>* %j, align 4
   %x = add <16 x i8> %i, %tmp
   ret <16 x i8> %x
 }
@@ -133,7 +133,7 @@
 ; CHECK: vpaddw (%rdi), %xmm{{.*}}
 ; CHECK: ret
 define <8 x i16> @vpaddw128_fold_test(<8 x i16> %i, <8 x i16>* %j) nounwind {
-  %tmp = load <8 x i16>* %j, align 4
+  %tmp = load <8 x i16>, <8 x i16>* %j, align 4
   %x = add <8 x i16> %i, %tmp
   ret <8 x i16> %x
 }
@@ -163,7 +163,7 @@
 ; CHECK: ret
 define <8 x i16> @vpaddw128_mask_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
-  %j = load <8 x i16>* %j.ptr
+  %j = load <8 x i16>, <8 x i16>* %j.ptr
   %x = add <8 x i16> %i, %j
   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %i
   ret <8 x i16> %r
@@ -174,7 +174,7 @@
 ; CHECK: ret
 define <8 x i16> @vpaddw128_maskz_fold_test(<8 x i16> %i, <8 x i16>* %j.ptr, <8 x i16> %mask1) nounwind readnone {
   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
-  %j = load <8 x i16>* %j.ptr
+  %j = load <8 x i16>, <8 x i16>* %j.ptr
   %x = add <8 x i16> %i, %j
   %r = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
   ret <8 x i16> %r
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index dbb9117..cffa0a5 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -830,7 +830,7 @@
 define <2 x double> @test_mask_vfmsubadd128rm_pd(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmsubadd128rm_pd
   ; CHECK: vfmsubadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa7,0x07]
-  %a2 = load <2 x double>* %ptr_a2
+  %a2 = load <2 x double>, <2 x double>* %ptr_a2
   %res = call <2 x double> @llvm.x86.fma.mask.vfmsubadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
@@ -838,7 +838,7 @@
 define <8 x double> @test_mask_vfmsubaddrm_pd(<8 x double> %a0, <8 x double> %a1, <8 x double>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmsubaddrm_pd
   ; CHECK: vfmsubadd213pd  (%rdi), %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0xa7,0x07]
-  %a2 = load <8 x double>* %ptr_a2, align 8
+  %a2 = load <8 x double>, <8 x double>* %ptr_a2, align 8
   %res = call <8 x double> @llvm.x86.fma.mask.vfmsubadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask, i32 4) nounwind
   ret <8 x double> %res
 }
@@ -860,7 +860,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
-  %a2 = load <4 x float>* %ptr_a2
+  %a2 = load <4 x float>, <4 x float>* %ptr_a2
   %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
@@ -868,7 +868,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka
   ; CHECK: vfmadd213ps     (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
-  %a2 = load <4 x float>* %ptr_a2, align 8
+  %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
   %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   ret <4 x float> %res
 }
@@ -876,7 +876,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
-  %a2 = load <4 x float>* %ptr_a2
+  %a2 = load <4 x float>, <4 x float>* %ptr_a2
   %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   ret <4 x float> %res
 }
@@ -884,7 +884,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza
   ; CHECK: vfmadd213ps	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa8,0x07]
-  %a2 = load <4 x float>* %ptr_a2, align 4
+  %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
   %res = call <4 x float> @llvm.x86.fma.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   ret <4 x float> %res
 }
@@ -892,7 +892,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb
   ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
-  %q = load float* %ptr_a2
+  %q = load float, float* %ptr_a2
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -904,7 +904,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba
   ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
-  %q = load float* %ptr_a2, align 4
+  %q = load float, float* %ptr_a2, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -916,7 +916,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz
   ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0  ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
-  %q = load float* %ptr_a2
+  %q = load float, float* %ptr_a2
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -928,7 +928,7 @@
 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza
   ; CHECK: vfmadd213ps	(%rdi){1to4}, %xmm1, %xmm0  ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
-  %q = load float* %ptr_a2, align 4
+  %q = load float, float* %ptr_a2, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
@@ -954,7 +954,7 @@
 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk
   ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
-  %a2 = load <2 x double>* %ptr_a2
+  %a2 = load <2 x double>, <2 x double>* %ptr_a2
   %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   ret <2 x double> %res
 }
@@ -962,7 +962,7 @@
 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz
   ; CHECK: vfmadd213pd	(%rdi), %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
-  %a2 = load <2 x double>* %ptr_a2
+  %a2 = load <2 x double>, <2 x double>* %ptr_a2
   %res = call <2 x double> @llvm.x86.fma.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   ret <2 x double> %res
 }
@@ -984,7 +984,7 @@
 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
   ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk
   ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
-  %a2 = load <4 x double>* %ptr_a2
+  %a2 = load <4 x double>, <4 x double>* %ptr_a2
   %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   ret <4 x double> %res
 }
@@ -992,7 +992,7 @@
 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
   ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz
   ; CHECK: vfmadd213pd	(%rdi), %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
-  %a2 = load <4 x double>* %ptr_a2
+  %a2 = load <4 x double>, <4 x double>* %ptr_a2
   %res = call <4 x double> @llvm.x86.fma.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   ret <4 x double> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
index 835844f..8a9a4fa 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
@@ -5,7 +5,7 @@
 ; CHECK: ret
 define <32 x i8> @test_256_1(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <32 x i8>*
-  %res = load <32 x i8>* %vaddr, align 1
+  %res = load <32 x i8>, <32 x i8>* %vaddr, align 1
   ret <32 x i8>%res
 }
 
@@ -24,7 +24,7 @@
 define <32 x i8> @test_256_3(i8 * %addr, <32 x i8> %old, <32 x i8> %mask1) {
   %mask = icmp ne <32 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <32 x i8>*
-  %r = load <32 x i8>* %vaddr, align 1
+  %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
   %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> %old
   ret <32 x i8>%res
 }
@@ -35,7 +35,7 @@
 define <32 x i8> @test_256_4(i8 * %addr, <32 x i8> %mask1) {
   %mask = icmp ne <32 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <32 x i8>*
-  %r = load <32 x i8>* %vaddr, align 1
+  %r = load <32 x i8>, <32 x i8>* %vaddr, align 1
   %res = select <32 x i1> %mask, <32 x i8> %r, <32 x i8> zeroinitializer
   ret <32 x i8>%res
 }
@@ -45,7 +45,7 @@
 ; CHECK: ret
 define <16 x i16> @test_256_5(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x i16>*
-  %res = load <16 x i16>* %vaddr, align 1
+  %res = load <16 x i16>, <16 x i16>* %vaddr, align 1
   ret <16 x i16>%res
 }
 
@@ -64,7 +64,7 @@
 define <16 x i16> @test_256_7(i8 * %addr, <16 x i16> %old, <16 x i16> %mask1) {
   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i16>*
-  %r = load <16 x i16>* %vaddr, align 1
+  %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> %old
   ret <16 x i16>%res
 }
@@ -75,7 +75,7 @@
 define <16 x i16> @test_256_8(i8 * %addr, <16 x i16> %mask1) {
   %mask = icmp ne <16 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i16>*
-  %r = load <16 x i16>* %vaddr, align 1
+  %r = load <16 x i16>, <16 x i16>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i16> %r, <16 x i16> zeroinitializer
   ret <16 x i16>%res
 }
@@ -85,7 +85,7 @@
 ; CHECK: ret
 define <16 x i8> @test_128_1(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <16 x i8>*
-  %res = load <16 x i8>* %vaddr, align 1
+  %res = load <16 x i8>, <16 x i8>* %vaddr, align 1
   ret <16 x i8>%res
 }
 
@@ -104,7 +104,7 @@
 define <16 x i8> @test_128_3(i8 * %addr, <16 x i8> %old, <16 x i8> %mask1) {
   %mask = icmp ne <16 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i8>*
-  %r = load <16 x i8>* %vaddr, align 1
+  %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> %old
   ret <16 x i8>%res
 }
@@ -115,7 +115,7 @@
 define <16 x i8> @test_128_4(i8 * %addr, <16 x i8> %mask1) {
   %mask = icmp ne <16 x i8> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <16 x i8>*
-  %r = load <16 x i8>* %vaddr, align 1
+  %r = load <16 x i8>, <16 x i8>* %vaddr, align 1
   %res = select <16 x i1> %mask, <16 x i8> %r, <16 x i8> zeroinitializer
   ret <16 x i8>%res
 }
@@ -125,7 +125,7 @@
 ; CHECK: ret
 define <8 x i16> @test_128_5(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x i16>*
-  %res = load <8 x i16>* %vaddr, align 1
+  %res = load <8 x i16>, <8 x i16>* %vaddr, align 1
   ret <8 x i16>%res
 }
 
@@ -144,7 +144,7 @@
 define <8 x i16> @test_128_7(i8 * %addr, <8 x i16> %old, <8 x i16> %mask1) {
   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i16>*
-  %r = load <8 x i16>* %vaddr, align 1
+  %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> %old
   ret <8 x i16>%res
 }
@@ -155,7 +155,7 @@
 define <8 x i16> @test_128_8(i8 * %addr, <8 x i16> %mask1) {
   %mask = icmp ne <8 x i16> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i16>*
-  %r = load <8 x i16>* %vaddr, align 1
+  %r = load <8 x i16>, <8 x i16>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i16> %r, <8 x i16> zeroinitializer
   ret <8 x i16>%res
 }
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
index 2d13a16..9bf02fa 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-vec-cmp.ll
@@ -45,7 +45,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nounwind {
-  %y = load <16 x i16>* %yp, align 4
+  %y = load <16 x i16>, <16 x i16>* %yp, align 4
   %mask = icmp eq <16 x i16> %x, %y
   %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
   ret <16 x i16> %max
@@ -56,7 +56,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
-  %y = load <16 x i16>* %y.ptr, align 4
+  %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
   %mask = icmp sgt <16 x i16> %x, %y
   %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
   ret <16 x i16> %max
@@ -67,7 +67,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
-  %y = load <16 x i16>* %y.ptr, align 4
+  %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
   %mask = icmp sle <16 x i16> %x, %y
   %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
   ret <16 x i16> %max
@@ -78,7 +78,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr) nounwind {
-  %y = load <16 x i16>* %y.ptr, align 4
+  %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
   %mask = icmp ule <16 x i16> %x, %y
   %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
   ret <16 x i16> %max
@@ -114,7 +114,7 @@
 ; CHECK: ret
 define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32 x i8> %y1) nounwind {
   %mask1 = icmp sgt <32 x i8> %x1, %y1
-  %y = load <32 x i8>* %y.ptr, align 4
+  %y = load <32 x i8>, <32 x i8>* %y.ptr, align 4
   %mask0 = icmp sgt <32 x i8> %x, %y
   %mask = select <32 x i1> %mask0, <32 x i1> %mask1, <32 x i1> zeroinitializer
   %max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
@@ -127,7 +127,7 @@
 ; CHECK: ret
 define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1, <16 x i16> %y1) nounwind {
   %mask1 = icmp sge <16 x i16> %x1, %y1
-  %y = load <16 x i16>* %y.ptr, align 4
+  %y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
   %mask0 = icmp ule <16 x i16> %x, %y
   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
   %max = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %x1
@@ -179,7 +179,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwind {
-  %y = load <8 x i16>* %yp, align 4
+  %y = load <8 x i16>, <8 x i16>* %yp, align 4
   %mask = icmp eq <8 x i16> %x, %y
   %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
   ret <8 x i16> %max
@@ -190,7 +190,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
-  %y = load <8 x i16>* %y.ptr, align 4
+  %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
   %mask = icmp sgt <8 x i16> %x, %y
   %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
   ret <8 x i16> %max
@@ -201,7 +201,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
-  %y = load <8 x i16>* %y.ptr, align 4
+  %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
   %mask = icmp sle <8 x i16> %x, %y
   %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
   ret <8 x i16> %max
@@ -212,7 +212,7 @@
 ; CHECK: vmovdqu16
 ; CHECK: ret
 define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) nounwind {
-  %y = load <8 x i16>* %y.ptr, align 4
+  %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
   %mask = icmp ule <8 x i16> %x, %y
   %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
   ret <8 x i16> %max
@@ -248,7 +248,7 @@
 ; CHECK: ret
 define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16 x i8> %y1) nounwind {
   %mask1 = icmp sgt <16 x i8> %x1, %y1
-  %y = load <16 x i8>* %y.ptr, align 4
+  %y = load <16 x i8>, <16 x i8>* %y.ptr, align 4
   %mask0 = icmp sgt <16 x i8> %x, %y
   %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer
   %max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
@@ -261,7 +261,7 @@
 ; CHECK: ret
 define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8 x i16> %y1) nounwind {
   %mask1 = icmp sge <8 x i16> %x1, %y1
-  %y = load <8 x i16>* %y.ptr, align 4
+  %y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
   %mask0 = icmp ule <8 x i16> %x, %y
   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
   %max = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %x1
diff --git a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
index 32a2633..b4d11bc 100644
--- a/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-mask-op.ll
@@ -11,7 +11,7 @@
 }
 
 define void @mask8_mem(i8* %ptr) {
-  %x = load i8* %ptr, align 4
+  %x = load i8, i8* %ptr, align 4
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <8 x i1> %m1 to i8
diff --git a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll b/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
index ce402b4..dcde9c4 100644
--- a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
@@ -99,7 +99,7 @@
 
 define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, double* %ptr ) {
   ; CHECK: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
-  %mem = load double * %ptr, align 8
+  %mem = load double , double * %ptr, align 8
   %mem_v = insertelement <2 x double> undef, double %mem, i32 0
   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ; 
   ret <2 x double> %res
@@ -108,7 +108,7 @@
 define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, double* %ptr ) {
   ; CHECK: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
   %ptr1 = getelementptr double, double* %ptr, i32 18
-  %mem = load double * %ptr1, align 8
+  %mem = load double , double * %ptr1, align 8
   %mem_v = insertelement <2 x double> undef, double %mem, i32 0
   %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 7, i32 4) ;
   ret <2 x double> %res
diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll
index 1f7da78..ef01d86 100644
--- a/llvm/test/CodeGen/X86/avx512vl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll
@@ -14,7 +14,7 @@
 ; CHECK: vpaddq (%rdi), %ymm{{.*}}
 ; CHECK: ret
 define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, <4 x i64>* %j) nounwind {
-  %tmp = load <4 x i64>* %j, align 4
+  %tmp = load <4 x i64>, <4 x i64>* %j, align 4
   %x = add <4 x i64> %i, %tmp
   ret <4 x i64> %x
 }
@@ -31,7 +31,7 @@
 ; CHECK: vpaddq (%rdi){1to4}, %ymm{{.*}}
 ; CHECK: ret
 define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, i64* %j.ptr) nounwind {
-  %j = load i64* %j.ptr
+  %j = load i64, i64* %j.ptr
   %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0
   %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer
   %x = add <4 x i64> %i, %j.v
@@ -50,7 +50,7 @@
 ; CHECK: vpaddd (%rdi), %ymm{{.*}}
 ; CHECK: ret
 define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, <8 x i32>* %j) nounwind {
-  %tmp = load <8 x i32>* %j, align 4
+  %tmp = load <8 x i32>, <8 x i32>* %j, align 4
   %x = add <8 x i32> %i, %tmp
   ret <8 x i32> %x
 }
@@ -88,7 +88,7 @@
 ; CHECK: ret
 define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
-  %j = load <8 x i32>* %j.ptr
+  %j = load <8 x i32>, <8 x i32>* %j.ptr
   %x = add <8 x i32> %i, %j
   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i
   ret <8 x i32> %r
@@ -109,7 +109,7 @@
 ; CHECK: ret
 define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, <8 x i32>* %j.ptr, <8 x i32> %mask1) nounwind readnone {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
-  %j = load <8 x i32>* %j.ptr
+  %j = load <8 x i32>, <8 x i32>* %j.ptr
   %x = add <8 x i32> %i, %j
   %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
   ret <8 x i32> %r
@@ -341,7 +341,7 @@
                                          <4 x double>* %j,  <4 x i64> %mask1)
                                          nounwind {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
-  %tmp = load <4 x double>* %j
+  %tmp = load <4 x double>, <4 x double>* %j
   %x = fadd <4 x double> %i, %tmp
   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst
   ret <4 x double> %r
@@ -353,7 +353,7 @@
 define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, <4 x double>* %j,
                                           <4 x i64> %mask1) nounwind {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
-  %tmp = load <4 x double>* %j
+  %tmp = load <4 x double>, <4 x double>* %j
   %x = fadd <4 x double> %i, %tmp
   %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer
   ret <4 x double> %r
@@ -363,7 +363,7 @@
 ; CHECK: vaddpd (%rdi){1to4}, %ymm{{.*}}
 ; CHECK: ret
 define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, double* %j) nounwind {
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <4 x double> undef, double %tmp, i32 0
   %c = shufflevector <4 x double> %b, <4 x double> undef,
                      <4 x i32> zeroinitializer
@@ -377,7 +377,7 @@
 define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i,
                                           double* %j, <4 x i64> %mask1) nounwind {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <4 x double> undef, double %tmp, i32 0
   %c = shufflevector <4 x double> %b, <4 x double> undef,
                      <4 x i32> zeroinitializer
@@ -392,7 +392,7 @@
 define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, double* %j,
                                            <4 x i64> %mask1) nounwind {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %b = insertelement <4 x double> undef, double %tmp, i32 0
   %c = shufflevector <4 x double> %b, <4 x double> undef,
                      <4 x i32> zeroinitializer
@@ -415,7 +415,7 @@
 ; CHECK: vpaddq (%rdi), %xmm{{.*}}
 ; CHECK: ret
 define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, <2 x i64>* %j) nounwind {
-  %tmp = load <2 x i64>* %j, align 4
+  %tmp = load <2 x i64>, <2 x i64>* %j, align 4
   %x = add <2 x i64> %i, %tmp
   ret <2 x i64> %x
 }
@@ -424,7 +424,7 @@
 ; CHECK: vpaddq (%rdi){1to2}, %xmm{{.*}}
 ; CHECK: ret
 define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, i64* %j) nounwind {
-  %tmp = load i64* %j
+  %tmp = load i64, i64* %j
   %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0
   %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1
   %x = add <2 x i64> %i, %j.1
@@ -443,7 +443,7 @@
 ; CHECK: vpaddd (%rdi), %xmm{{.*}}
 ; CHECK: ret
 define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, <4 x i32>* %j) nounwind {
-  %tmp = load <4 x i32>* %j, align 4
+  %tmp = load <4 x i32>, <4 x i32>* %j, align 4
   %x = add <4 x i32> %i, %tmp
   ret <4 x i32> %x
 }
@@ -481,7 +481,7 @@
 ; CHECK: ret
 define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
-  %j = load <4 x i32>* %j.ptr
+  %j = load <4 x i32>, <4 x i32>* %j.ptr
   %x = add <4 x i32> %i, %j
   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i
   ret <4 x i32> %r
@@ -502,7 +502,7 @@
 ; CHECK: ret
 define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, <4 x i32>* %j.ptr, <4 x i32> %mask1) nounwind readnone {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
-  %j = load <4 x i32>* %j.ptr
+  %j = load <4 x i32>, <4 x i32>* %j.ptr
   %x = add <4 x i32> %i, %j
   %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
   ret <4 x i32> %r
@@ -735,7 +735,7 @@
                                          <2 x double>* %j,  <2 x i64> %mask1)
                                          nounwind {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
-  %tmp = load <2 x double>* %j
+  %tmp = load <2 x double>, <2 x double>* %j
   %x = fadd <2 x double> %i, %tmp
   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst
   ret <2 x double> %r
@@ -747,7 +747,7 @@
 define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, <2 x double>* %j,
                                           <2 x i64> %mask1) nounwind {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
-  %tmp = load <2 x double>* %j
+  %tmp = load <2 x double>, <2 x double>* %j
   %x = fadd <2 x double> %i, %tmp
   %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer
   ret <2 x double> %r
@@ -757,7 +757,7 @@
 ; CHECK: vaddpd (%rdi){1to2}, %xmm{{.*}}
 ; CHECK: ret
 define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, double* %j) nounwind {
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
   %x = fadd <2 x double> %j.1, %i
@@ -771,7 +771,7 @@
                                           double* %j, <2 x i64> %mask1)
                                           nounwind {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
   %x = fadd <2 x double> %j.1, %i
@@ -785,7 +785,7 @@
 define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, double* %j,
                                            <2 x i64> %mask1) nounwind {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
-  %tmp = load double* %j
+  %tmp = load double, double* %j
   %j.0 = insertelement <2 x double> undef, double %tmp, i64 0
   %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1
   %x = fadd <2 x double> %j.1, %i
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index fe347bd..38a7e7a 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -805,7 +805,7 @@
 define <4 x double> @test_x86_mask_blend_pd_256_memop(<4 x double> %a, <4 x double>* %ptr, i8 %mask) {
   ; CHECK-LABEL: test_x86_mask_blend_pd_256_memop
   ; CHECK: vblendmpd (%
-  %b = load <4 x double>* %ptr
+  %b = load <4 x double>, <4 x double>* %ptr
   %res = call <4 x double> @llvm.x86.avx512.mask.blend.pd.256(<4 x double> %a, <4 x double> %b, i8 %mask) ; <<4 x double>> [#uses=1]
   ret <4 x double> %res
 }
@@ -843,7 +843,7 @@
 define <2 x double> @test_x86_mask_blend_pd_128_memop(<2 x double> %a, <2 x double>* %ptr, i8 %mask) {
   ; CHECK-LABEL: test_x86_mask_blend_pd_128_memop
   ; CHECK: vblendmpd (%
-  %b = load <2 x double>* %ptr
+  %b = load <2 x double>, <2 x double>* %ptr
   %res = call <2 x double> @llvm.x86.avx512.mask.blend.pd.128(<2 x double> %a, <2 x double> %b, i8 %mask) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
diff --git a/llvm/test/CodeGen/X86/avx512vl-mov.ll b/llvm/test/CodeGen/X86/avx512vl-mov.ll
index 3224656..18fa0a1 100644
--- a/llvm/test/CodeGen/X86/avx512vl-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-mov.ll
@@ -5,7 +5,7 @@
 ; CHECK: ret
 define <8 x i32> @test_256_1(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %res = load <8 x i32>* %vaddr, align 1
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 1
   ret <8 x i32>%res
 }
 
@@ -14,7 +14,7 @@
 ; CHECK: ret
 define <8 x i32> @test_256_2(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %res = load <8 x i32>* %vaddr, align 32
+  %res = load <8 x i32>, <8 x i32>* %vaddr, align 32
   ret <8 x i32>%res
 }
 
@@ -50,7 +50,7 @@
 ; CHECK: ret
 define  <4 x i64> @test_256_6(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %res = load <4 x i64>* %vaddr, align 32
+  %res = load <4 x i64>, <4 x i64>* %vaddr, align 32
   ret <4 x i64>%res
 }
 
@@ -68,7 +68,7 @@
 ; CHECK: ret
 define <4 x i64> @test_256_8(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %res = load <4 x i64>* %vaddr, align 1
+  %res = load <4 x i64>, <4 x i64>* %vaddr, align 1
   ret <4 x i64>%res
 }
 
@@ -86,7 +86,7 @@
 ; CHECK: ret
 define <4 x double> @test_256_10(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %res = load <4 x double>* %vaddr, align 32
+  %res = load <4 x double>, <4 x double>* %vaddr, align 32
   ret <4 x double>%res
 }
 
@@ -104,7 +104,7 @@
 ; CHECK: ret
 define <8 x float> @test_256_12(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %res = load <8 x float>* %vaddr, align 32
+  %res = load <8 x float>, <8 x float>* %vaddr, align 32
   ret <8 x float>%res
 }
 
@@ -122,7 +122,7 @@
 ; CHECK: ret
 define <4 x double> @test_256_14(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %res = load <4 x double>* %vaddr, align 1
+  %res = load <4 x double>, <4 x double>* %vaddr, align 1
   ret <4 x double>%res
 }
 
@@ -140,7 +140,7 @@
 ; CHECK: ret
 define <8 x float> @test_256_16(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %res = load <8 x float>* %vaddr, align 1
+  %res = load <8 x float>, <8 x float>* %vaddr, align 1
   ret <8 x float>%res
 }
 
@@ -150,7 +150,7 @@
 define <8 x i32> @test_256_17(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %r = load <8 x i32>* %vaddr, align 32
+  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
   ret <8 x i32>%res
 }
@@ -161,7 +161,7 @@
 define <8 x i32> @test_256_18(i8 * %addr, <8 x i32> %old, <8 x i32> %mask1) {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %r = load <8 x i32>* %vaddr, align 1
+  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> %old
   ret <8 x i32>%res
 }
@@ -172,7 +172,7 @@
 define <8 x i32> @test_256_19(i8 * %addr, <8 x i32> %mask1) {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %r = load <8 x i32>* %vaddr, align 32
+  %r = load <8 x i32>, <8 x i32>* %vaddr, align 32
   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
   ret <8 x i32>%res
 }
@@ -183,7 +183,7 @@
 define <8 x i32> @test_256_20(i8 * %addr, <8 x i32> %mask1) {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x i32>*
-  %r = load <8 x i32>* %vaddr, align 1
+  %r = load <8 x i32>, <8 x i32>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x i32> %r, <8 x i32> zeroinitializer
   ret <8 x i32>%res
 }
@@ -194,7 +194,7 @@
 define <4 x i64> @test_256_21(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %r = load <4 x i64>* %vaddr, align 32
+  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
   ret <4 x i64>%res
 }
@@ -205,7 +205,7 @@
 define <4 x i64> @test_256_22(i8 * %addr, <4 x i64> %old, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %r = load <4 x i64>* %vaddr, align 1
+  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> %old
   ret <4 x i64>%res
 }
@@ -216,7 +216,7 @@
 define <4 x i64> @test_256_23(i8 * %addr, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %r = load <4 x i64>* %vaddr, align 32
+  %r = load <4 x i64>, <4 x i64>* %vaddr, align 32
   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
   ret <4 x i64>%res
 }
@@ -227,7 +227,7 @@
 define <4 x i64> @test_256_24(i8 * %addr, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i64>*
-  %r = load <4 x i64>* %vaddr, align 1
+  %r = load <4 x i64>, <4 x i64>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x i64> %r, <4 x i64> zeroinitializer
   ret <4 x i64>%res
 }
@@ -238,7 +238,7 @@
 define <8 x float> @test_256_25(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
   %mask = fcmp one <8 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %r = load <8 x float>* %vaddr, align 32
+  %r = load <8 x float>, <8 x float>* %vaddr, align 32
   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
   ret <8 x float>%res
 }
@@ -249,7 +249,7 @@
 define <8 x float> @test_256_26(i8 * %addr, <8 x float> %old, <8 x float> %mask1) {
   %mask = fcmp one <8 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %r = load <8 x float>* %vaddr, align 1
+  %r = load <8 x float>, <8 x float>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> %old
   ret <8 x float>%res
 }
@@ -260,7 +260,7 @@
 define <8 x float> @test_256_27(i8 * %addr, <8 x float> %mask1) {
   %mask = fcmp one <8 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %r = load <8 x float>* %vaddr, align 32
+  %r = load <8 x float>, <8 x float>* %vaddr, align 32
   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
   ret <8 x float>%res
 }
@@ -271,7 +271,7 @@
 define <8 x float> @test_256_28(i8 * %addr, <8 x float> %mask1) {
   %mask = fcmp one <8 x float> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <8 x float>*
-  %r = load <8 x float>* %vaddr, align 1
+  %r = load <8 x float>, <8 x float>* %vaddr, align 1
   %res = select <8 x i1> %mask, <8 x float> %r, <8 x float> zeroinitializer
   ret <8 x float>%res
 }
@@ -282,7 +282,7 @@
 define <4 x double> @test_256_29(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %r = load <4 x double>* %vaddr, align 32
+  %r = load <4 x double>, <4 x double>* %vaddr, align 32
   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
   ret <4 x double>%res
 }
@@ -293,7 +293,7 @@
 define <4 x double> @test_256_30(i8 * %addr, <4 x double> %old, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %r = load <4 x double>* %vaddr, align 1
+  %r = load <4 x double>, <4 x double>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> %old
   ret <4 x double>%res
 }
@@ -304,7 +304,7 @@
 define <4 x double> @test_256_31(i8 * %addr, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %r = load <4 x double>* %vaddr, align 32
+  %r = load <4 x double>, <4 x double>* %vaddr, align 32
   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
   ret <4 x double>%res
 }
@@ -315,7 +315,7 @@
 define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
   %mask = icmp ne <4 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x double>*
-  %r = load <4 x double>* %vaddr, align 1
+  %r = load <4 x double>, <4 x double>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x double> %r, <4 x double> zeroinitializer
   ret <4 x double>%res
 }
@@ -325,7 +325,7 @@
 ; CHECK: ret
 define <4 x i32> @test_128_1(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %res = load <4 x i32>* %vaddr, align 1
+  %res = load <4 x i32>, <4 x i32>* %vaddr, align 1
   ret <4 x i32>%res
 }
 
@@ -334,7 +334,7 @@
 ; CHECK: ret
 define <4 x i32> @test_128_2(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %res = load <4 x i32>* %vaddr, align 16
+  %res = load <4 x i32>, <4 x i32>* %vaddr, align 16
   ret <4 x i32>%res
 }
 
@@ -370,7 +370,7 @@
 ; CHECK: ret
 define  <2 x i64> @test_128_6(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %res = load <2 x i64>* %vaddr, align 16
+  %res = load <2 x i64>, <2 x i64>* %vaddr, align 16
   ret <2 x i64>%res
 }
 
@@ -388,7 +388,7 @@
 ; CHECK: ret
 define <2 x i64> @test_128_8(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %res = load <2 x i64>* %vaddr, align 1
+  %res = load <2 x i64>, <2 x i64>* %vaddr, align 1
   ret <2 x i64>%res
 }
 
@@ -406,7 +406,7 @@
 ; CHECK: ret
 define <2 x double> @test_128_10(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %res = load <2 x double>* %vaddr, align 16
+  %res = load <2 x double>, <2 x double>* %vaddr, align 16
   ret <2 x double>%res
 }
 
@@ -424,7 +424,7 @@
 ; CHECK: ret
 define <4 x float> @test_128_12(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %res = load <4 x float>* %vaddr, align 16
+  %res = load <4 x float>, <4 x float>* %vaddr, align 16
   ret <4 x float>%res
 }
 
@@ -442,7 +442,7 @@
 ; CHECK: ret
 define <2 x double> @test_128_14(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %res = load <2 x double>* %vaddr, align 1
+  %res = load <2 x double>, <2 x double>* %vaddr, align 1
   ret <2 x double>%res
 }
 
@@ -460,7 +460,7 @@
 ; CHECK: ret
 define <4 x float> @test_128_16(i8 * %addr) {
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %res = load <4 x float>* %vaddr, align 1
+  %res = load <4 x float>, <4 x float>* %vaddr, align 1
   ret <4 x float>%res
 }
 
@@ -470,7 +470,7 @@
 define <4 x i32> @test_128_17(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %r = load <4 x i32>* %vaddr, align 16
+  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
   ret <4 x i32>%res
 }
@@ -481,7 +481,7 @@
 define <4 x i32> @test_128_18(i8 * %addr, <4 x i32> %old, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %r = load <4 x i32>* %vaddr, align 1
+  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> %old
   ret <4 x i32>%res
 }
@@ -492,7 +492,7 @@
 define <4 x i32> @test_128_19(i8 * %addr, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %r = load <4 x i32>* %vaddr, align 16
+  %r = load <4 x i32>, <4 x i32>* %vaddr, align 16
   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
   ret <4 x i32>%res
 }
@@ -503,7 +503,7 @@
 define <4 x i32> @test_128_20(i8 * %addr, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x i32>*
-  %r = load <4 x i32>* %vaddr, align 1
+  %r = load <4 x i32>, <4 x i32>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x i32> %r, <4 x i32> zeroinitializer
   ret <4 x i32>%res
 }
@@ -514,7 +514,7 @@
 define <2 x i64> @test_128_21(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %r = load <2 x i64>* %vaddr, align 16
+  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
   ret <2 x i64>%res
 }
@@ -525,7 +525,7 @@
 define <2 x i64> @test_128_22(i8 * %addr, <2 x i64> %old, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %r = load <2 x i64>* %vaddr, align 1
+  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> %old
   ret <2 x i64>%res
 }
@@ -536,7 +536,7 @@
 define <2 x i64> @test_128_23(i8 * %addr, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %r = load <2 x i64>* %vaddr, align 16
+  %r = load <2 x i64>, <2 x i64>* %vaddr, align 16
   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
   ret <2 x i64>%res
 }
@@ -547,7 +547,7 @@
 define <2 x i64> @test_128_24(i8 * %addr, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x i64>*
-  %r = load <2 x i64>* %vaddr, align 1
+  %r = load <2 x i64>, <2 x i64>* %vaddr, align 1
   %res = select <2 x i1> %mask, <2 x i64> %r, <2 x i64> zeroinitializer
   ret <2 x i64>%res
 }
@@ -558,7 +558,7 @@
 define <4 x float> @test_128_25(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %r = load <4 x float>* %vaddr, align 16
+  %r = load <4 x float>, <4 x float>* %vaddr, align 16
   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
   ret <4 x float>%res
 }
@@ -569,7 +569,7 @@
 define <4 x float> @test_128_26(i8 * %addr, <4 x float> %old, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %r = load <4 x float>* %vaddr, align 1
+  %r = load <4 x float>, <4 x float>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> %old
   ret <4 x float>%res
 }
@@ -580,7 +580,7 @@
 define <4 x float> @test_128_27(i8 * %addr, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %r = load <4 x float>* %vaddr, align 16
+  %r = load <4 x float>, <4 x float>* %vaddr, align 16
   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
   ret <4 x float>%res
 }
@@ -591,7 +591,7 @@
 define <4 x float> @test_128_28(i8 * %addr, <4 x i32> %mask1) {
   %mask = icmp ne <4 x i32> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <4 x float>*
-  %r = load <4 x float>* %vaddr, align 1
+  %r = load <4 x float>, <4 x float>* %vaddr, align 1
   %res = select <4 x i1> %mask, <4 x float> %r, <4 x float> zeroinitializer
   ret <4 x float>%res
 }
@@ -602,7 +602,7 @@
 define <2 x double> @test_128_29(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %r = load <2 x double>* %vaddr, align 16
+  %r = load <2 x double>, <2 x double>* %vaddr, align 16
   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
   ret <2 x double>%res
 }
@@ -613,7 +613,7 @@
 define <2 x double> @test_128_30(i8 * %addr, <2 x double> %old, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %r = load <2 x double>* %vaddr, align 1
+  %r = load <2 x double>, <2 x double>* %vaddr, align 1
   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> %old
   ret <2 x double>%res
 }
@@ -624,7 +624,7 @@
 define <2 x double> @test_128_31(i8 * %addr, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %r = load <2 x double>* %vaddr, align 16
+  %r = load <2 x double>, <2 x double>* %vaddr, align 16
   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
   ret <2 x double>%res
 }
@@ -635,7 +635,7 @@
 define <2 x double> @test_128_32(i8 * %addr, <2 x i64> %mask1) {
   %mask = icmp ne <2 x i64> %mask1, zeroinitializer
   %vaddr = bitcast i8* %addr to <2 x double>*
-  %r = load <2 x double>* %vaddr, align 1
+  %r = load <2 x double>, <2 x double>* %vaddr, align 1
   %res = select <2 x i1> %mask, <2 x double> %r, <2 x double> zeroinitializer
   ret <2 x double>%res
 }
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
index b6b5085..aed8cb1 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -45,7 +45,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwind {
-  %y = load <8 x i32>* %yp, align 4
+  %y = load <8 x i32>, <8 x i32>* %yp, align 4
   %mask = icmp eq <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
   ret <8 x i32> %max
@@ -56,7 +56,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
-  %y = load <8 x i32>* %y.ptr, align 4
+  %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
   %mask = icmp sgt <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
   ret <8 x i32> %max
@@ -67,7 +67,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
-  %y = load <8 x i32>* %y.ptr, align 4
+  %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
   %mask = icmp sle <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
   ret <8 x i32> %max
@@ -78,7 +78,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nounwind {
-  %y = load <8 x i32>* %y.ptr, align 4
+  %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
   %mask = icmp ule <8 x i32> %x, %y
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
   ret <8 x i32> %max
@@ -114,7 +114,7 @@
 ; CHECK: ret
 define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
   %mask1 = icmp sgt <4 x i64> %x1, %y1
-  %y = load <4 x i64>* %y.ptr, align 4
+  %y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
   %mask0 = icmp sgt <4 x i64> %x, %y
   %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
   %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %x1
@@ -127,7 +127,7 @@
 ; CHECK: ret
 define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
   %mask1 = icmp sge <8 x i32> %x1, %y1
-  %y = load <8 x i32>* %y.ptr, align 4
+  %y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
   %mask0 = icmp ule <8 x i32> %x, %y
   %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer
   %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %x1
@@ -139,7 +139,7 @@
 ; CHECK: vmovdqa64
 ; CHECK: ret
 define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind {
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
   %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
   %mask = icmp eq <4 x i64> %x, %y
@@ -152,7 +152,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind {
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
   %mask = icmp sle <8 x i32> %x, %y
@@ -166,7 +166,7 @@
 ; CHECK: ret
 define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32> %y1) nounwind {
   %mask1 = icmp sge <8 x i32> %x1, %y1
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <8 x i32> %y.0, <8 x i32> undef, <8 x i32> zeroinitializer
   %mask0 = icmp sgt <8 x i32> %x, %y
@@ -181,7 +181,7 @@
 ; CHECK: ret
 define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64> %y1) nounwind {
   %mask1 = icmp sge <4 x i64> %x1, %y1
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
   %y = shufflevector <4 x i64> %y.0, <4 x i64> undef, <4 x i32> zeroinitializer
   %mask0 = icmp sgt <4 x i64> %x, %y
@@ -235,7 +235,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwind {
-  %y = load <4 x i32>* %yp, align 4
+  %y = load <4 x i32>, <4 x i32>* %yp, align 4
   %mask = icmp eq <4 x i32> %x, %y
   %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
   ret <4 x i32> %max
@@ -246,7 +246,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
-  %y = load <4 x i32>* %y.ptr, align 4
+  %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
   %mask = icmp sgt <4 x i32> %x, %y
   %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
   ret <4 x i32> %max
@@ -257,7 +257,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
-  %y = load <4 x i32>* %y.ptr, align 4
+  %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
   %mask = icmp sle <4 x i32> %x, %y
   %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
   ret <4 x i32> %max
@@ -268,7 +268,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nounwind {
-  %y = load <4 x i32>* %y.ptr, align 4
+  %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
   %mask = icmp ule <4 x i32> %x, %y
   %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
   ret <4 x i32> %max
@@ -304,7 +304,7 @@
 ; CHECK: ret
 define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
   %mask1 = icmp sgt <2 x i64> %x1, %y1
-  %y = load <2 x i64>* %y.ptr, align 4
+  %y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
   %mask0 = icmp sgt <2 x i64> %x, %y
   %mask = select <2 x i1> %mask0, <2 x i1> %mask1, <2 x i1> zeroinitializer
   %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %x1
@@ -317,7 +317,7 @@
 ; CHECK: ret
 define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
   %mask1 = icmp sge <4 x i32> %x1, %y1
-  %y = load <4 x i32>* %y.ptr, align 4
+  %y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
   %mask0 = icmp ule <4 x i32> %x, %y
   %mask = select <4 x i1> %mask0, <4 x i1> %mask1, <4 x i1> zeroinitializer
   %max = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %x1
@@ -329,7 +329,7 @@
 ; CHECK: vmovdqa64
 ; CHECK: ret
 define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind {
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
   %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
   %mask = icmp eq <2 x i64> %x, %y
@@ -342,7 +342,7 @@
 ; CHECK: vmovdqa32
 ; CHECK: ret
 define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind {
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
   %mask = icmp sle <4 x i32> %x, %y
@@ -356,7 +356,7 @@
 ; CHECK: ret
 define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32> %y1) nounwind {
   %mask1 = icmp sge <4 x i32> %x1, %y1
-  %yb = load i32* %yb.ptr, align 4
+  %yb = load i32, i32* %yb.ptr, align 4
   %y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
   %y = shufflevector <4 x i32> %y.0, <4 x i32> undef, <4 x i32> zeroinitializer
   %mask0 = icmp sgt <4 x i32> %x, %y
@@ -371,7 +371,7 @@
 ; CHECK: ret
 define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64> %y1) nounwind {
   %mask1 = icmp sge <2 x i64> %x1, %y1
-  %yb = load i64* %yb.ptr, align 4
+  %yb = load i64, i64* %yb.ptr, align 4
   %y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
   %y = insertelement <2 x i64> %y.0, i64 %yb, i32 1
   %mask0 = icmp sgt <2 x i64> %x, %y
diff --git a/llvm/test/CodeGen/X86/bitcast-mmx.ll b/llvm/test/CodeGen/X86/bitcast-mmx.ll
index de1cb5a..4107f39 100644
--- a/llvm/test/CodeGen/X86/bitcast-mmx.ll
+++ b/llvm/test/CodeGen/X86/bitcast-mmx.ll
@@ -64,8 +64,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %y to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %n, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %n, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
diff --git a/llvm/test/CodeGen/X86/block-placement.ll b/llvm/test/CodeGen/X86/block-placement.ll
index eaba537..e0276e4 100644
--- a/llvm/test/CodeGen/X86/block-placement.ll
+++ b/llvm/test/CodeGen/X86/block-placement.ll
@@ -25,7 +25,7 @@
 
 entry:
   %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32* %gep1
+  %val1 = load i32, i32* %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then1, label %else1, !prof !0
 
@@ -35,7 +35,7 @@
 
 else1:
   %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32* %gep2
+  %val2 = load i32, i32* %gep2
   %cond2 = icmp ugt i32 %val2, 2
   br i1 %cond2, label %then2, label %else2, !prof !0
 
@@ -45,7 +45,7 @@
 
 else2:
   %gep3 = getelementptr i32, i32* %a, i32 3
-  %val3 = load i32* %gep3
+  %val3 = load i32, i32* %gep3
   %cond3 = icmp ugt i32 %val3, 3
   br i1 %cond3, label %then3, label %else3, !prof !0
 
@@ -55,7 +55,7 @@
 
 else3:
   %gep4 = getelementptr i32, i32* %a, i32 4
-  %val4 = load i32* %gep4
+  %val4 = load i32, i32* %gep4
   %cond4 = icmp ugt i32 %val4, 4
   br i1 %cond4, label %then4, label %else4, !prof !0
 
@@ -65,7 +65,7 @@
 
 else4:
   %gep5 = getelementptr i32, i32* %a, i32 3
-  %val5 = load i32* %gep5
+  %val5 = load i32, i32* %gep5
   %cond5 = icmp ugt i32 %val5, 3
   br i1 %cond5, label %then5, label %exit, !prof !0
 
@@ -114,7 +114,7 @@
 
 body3:
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -167,7 +167,7 @@
 
 body4:
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -198,7 +198,7 @@
 
 body1:
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %bailcond1 = icmp eq i32 %sum, 42
   br label %body0
@@ -223,7 +223,7 @@
   %iv = phi i32 [ 0, %entry ], [ %next, %body1 ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body1 ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %bailcond1 = icmp eq i32 %sum, 42
   br i1 %bailcond1, label %exit, label %body1
@@ -253,7 +253,7 @@
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
   %base = phi i32 [ 0, %entry ], [ %sum, %body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %sum = add nsw i32 %0, %base
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
@@ -280,7 +280,7 @@
 loop.body.1:
   %iv = phi i32 [ 0, %entry ], [ %next, %loop.body.2 ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %iv
-  %bidx = load i32* %arrayidx
+  %bidx = load i32, i32* %arrayidx
   br label %inner.loop.body
 
 inner.loop.body:
@@ -288,7 +288,7 @@
   %base = phi i32 [ 0, %loop.body.1 ], [ %sum, %inner.loop.body ]
   %scaled_idx = mul i32 %bidx, %iv
   %inner.arrayidx = getelementptr inbounds i32, i32* %b, i32 %scaled_idx
-  %0 = load i32* %inner.arrayidx
+  %0 = load i32, i32* %inner.arrayidx
   %sum = add nsw i32 %0, %base
   %inner.next = add i32 %iv, 1
   %inner.exitcond = icmp eq i32 %inner.next, %i
@@ -322,13 +322,13 @@
   br i1 undef, label %loop.body3, label %loop.body2
 
 loop.body2:
-  %ptr = load i32** undef, align 4
+  %ptr = load i32*, i32** undef, align 4
   br label %loop.body3
 
 loop.body3:
   %myptr = phi i32* [ %ptr2, %loop.body5 ], [ %ptr, %loop.body2 ], [ undef, %loop.body1 ]
   %bcmyptr = bitcast i32* %myptr to i32*
-  %val = load i32* %bcmyptr, align 4
+  %val = load i32, i32* %bcmyptr, align 4
   %comp = icmp eq i32 %val, 48
   br i1 %comp, label %loop.body4, label %loop.body5
 
@@ -336,7 +336,7 @@
   br i1 undef, label %loop.header, label %loop.body5
 
 loop.body5:
-  %ptr2 = load i32** undef, align 4
+  %ptr2 = load i32*, i32** undef, align 4
   br label %loop.body3
 }
 
@@ -366,7 +366,7 @@
   br i1 %comp0, label %bail, label %loop.body1
 
 loop.body1:
-  %val0 = load i32** undef, align 4
+  %val0 = load i32*, i32** undef, align 4
   br i1 undef, label %loop.body2, label %loop.inner1.begin
 
 loop.body2:
@@ -375,7 +375,7 @@
 loop.body3:
   %ptr1 = getelementptr inbounds i32, i32* %val0, i32 0
   %castptr1 = bitcast i32* %ptr1 to i32**
-  %val1 = load i32** %castptr1, align 4
+  %val1 = load i32*, i32** %castptr1, align 4
   br label %loop.inner1.begin
 
 loop.inner1.begin:
@@ -387,7 +387,7 @@
 loop.inner1.end:
   %ptr2 = getelementptr inbounds i32, i32* %valphi, i32 0
   %castptr2 = bitcast i32* %ptr2 to i32**
-  %val2 = load i32** %castptr2, align 4
+  %val2 = load i32*, i32** %castptr2, align 4
   br label %loop.inner1.begin
 
 loop.body4.dead:
@@ -486,7 +486,7 @@
   br i1 %cond, label %entry.if.then_crit_edge, label %lor.lhs.false, !prof !1
 
 entry.if.then_crit_edge:
-  %.pre14 = load i8* undef, align 1
+  %.pre14 = load i8, i8* undef, align 1
   br label %if.then
 
 lor.lhs.false:
@@ -616,7 +616,7 @@
   br label %loop2a
 
 loop1:
-  %next.load = load i32** undef
+  %next.load = load i32*, i32** undef
   br i1 %comp.a, label %loop2a, label %loop2b
 
 loop2a:
@@ -728,199 +728,199 @@
 entry:
   br label %0
 
-  %val0 = load volatile float* undef
+  %val0 = load volatile float, float* undef
   %cmp0 = fcmp une float %val0, undef
   br i1 %cmp0, label %1, label %0
-  %val1 = load volatile float* undef
+  %val1 = load volatile float, float* undef
   %cmp1 = fcmp une float %val1, undef
   br i1 %cmp1, label %2, label %1
-  %val2 = load volatile float* undef
+  %val2 = load volatile float, float* undef
   %cmp2 = fcmp une float %val2, undef
   br i1 %cmp2, label %3, label %2
-  %val3 = load volatile float* undef
+  %val3 = load volatile float, float* undef
   %cmp3 = fcmp une float %val3, undef
   br i1 %cmp3, label %4, label %3
-  %val4 = load volatile float* undef
+  %val4 = load volatile float, float* undef
   %cmp4 = fcmp une float %val4, undef
   br i1 %cmp4, label %5, label %4
-  %val5 = load volatile float* undef
+  %val5 = load volatile float, float* undef
   %cmp5 = fcmp une float %val5, undef
   br i1 %cmp5, label %6, label %5
-  %val6 = load volatile float* undef
+  %val6 = load volatile float, float* undef
   %cmp6 = fcmp une float %val6, undef
   br i1 %cmp6, label %7, label %6
-  %val7 = load volatile float* undef
+  %val7 = load volatile float, float* undef
   %cmp7 = fcmp une float %val7, undef
   br i1 %cmp7, label %8, label %7
-  %val8 = load volatile float* undef
+  %val8 = load volatile float, float* undef
   %cmp8 = fcmp une float %val8, undef
   br i1 %cmp8, label %9, label %8
-  %val9 = load volatile float* undef
+  %val9 = load volatile float, float* undef
   %cmp9 = fcmp une float %val9, undef
   br i1 %cmp9, label %10, label %9
-  %val10 = load volatile float* undef
+  %val10 = load volatile float, float* undef
   %cmp10 = fcmp une float %val10, undef
   br i1 %cmp10, label %11, label %10
-  %val11 = load volatile float* undef
+  %val11 = load volatile float, float* undef
   %cmp11 = fcmp une float %val11, undef
   br i1 %cmp11, label %12, label %11
-  %val12 = load volatile float* undef
+  %val12 = load volatile float, float* undef
   %cmp12 = fcmp une float %val12, undef
   br i1 %cmp12, label %13, label %12
-  %val13 = load volatile float* undef
+  %val13 = load volatile float, float* undef
   %cmp13 = fcmp une float %val13, undef
   br i1 %cmp13, label %14, label %13
-  %val14 = load volatile float* undef
+  %val14 = load volatile float, float* undef
   %cmp14 = fcmp une float %val14, undef
   br i1 %cmp14, label %15, label %14
-  %val15 = load volatile float* undef
+  %val15 = load volatile float, float* undef
   %cmp15 = fcmp une float %val15, undef
   br i1 %cmp15, label %16, label %15
-  %val16 = load volatile float* undef
+  %val16 = load volatile float, float* undef
   %cmp16 = fcmp une float %val16, undef
   br i1 %cmp16, label %17, label %16
-  %val17 = load volatile float* undef
+  %val17 = load volatile float, float* undef
   %cmp17 = fcmp une float %val17, undef
   br i1 %cmp17, label %18, label %17
-  %val18 = load volatile float* undef
+  %val18 = load volatile float, float* undef
   %cmp18 = fcmp une float %val18, undef
   br i1 %cmp18, label %19, label %18
-  %val19 = load volatile float* undef
+  %val19 = load volatile float, float* undef
   %cmp19 = fcmp une float %val19, undef
   br i1 %cmp19, label %20, label %19
-  %val20 = load volatile float* undef
+  %val20 = load volatile float, float* undef
   %cmp20 = fcmp une float %val20, undef
   br i1 %cmp20, label %21, label %20
-  %val21 = load volatile float* undef
+  %val21 = load volatile float, float* undef
   %cmp21 = fcmp une float %val21, undef
   br i1 %cmp21, label %22, label %21
-  %val22 = load volatile float* undef
+  %val22 = load volatile float, float* undef
   %cmp22 = fcmp une float %val22, undef
   br i1 %cmp22, label %23, label %22
-  %val23 = load volatile float* undef
+  %val23 = load volatile float, float* undef
   %cmp23 = fcmp une float %val23, undef
   br i1 %cmp23, label %24, label %23
-  %val24 = load volatile float* undef
+  %val24 = load volatile float, float* undef
   %cmp24 = fcmp une float %val24, undef
   br i1 %cmp24, label %25, label %24
-  %val25 = load volatile float* undef
+  %val25 = load volatile float, float* undef
   %cmp25 = fcmp une float %val25, undef
   br i1 %cmp25, label %26, label %25
-  %val26 = load volatile float* undef
+  %val26 = load volatile float, float* undef
   %cmp26 = fcmp une float %val26, undef
   br i1 %cmp26, label %27, label %26
-  %val27 = load volatile float* undef
+  %val27 = load volatile float, float* undef
   %cmp27 = fcmp une float %val27, undef
   br i1 %cmp27, label %28, label %27
-  %val28 = load volatile float* undef
+  %val28 = load volatile float, float* undef
   %cmp28 = fcmp une float %val28, undef
   br i1 %cmp28, label %29, label %28
-  %val29 = load volatile float* undef
+  %val29 = load volatile float, float* undef
   %cmp29 = fcmp une float %val29, undef
   br i1 %cmp29, label %30, label %29
-  %val30 = load volatile float* undef
+  %val30 = load volatile float, float* undef
   %cmp30 = fcmp une float %val30, undef
   br i1 %cmp30, label %31, label %30
-  %val31 = load volatile float* undef
+  %val31 = load volatile float, float* undef
   %cmp31 = fcmp une float %val31, undef
   br i1 %cmp31, label %32, label %31
-  %val32 = load volatile float* undef
+  %val32 = load volatile float, float* undef
   %cmp32 = fcmp une float %val32, undef
   br i1 %cmp32, label %33, label %32
-  %val33 = load volatile float* undef
+  %val33 = load volatile float, float* undef
   %cmp33 = fcmp une float %val33, undef
   br i1 %cmp33, label %34, label %33
-  %val34 = load volatile float* undef
+  %val34 = load volatile float, float* undef
   %cmp34 = fcmp une float %val34, undef
   br i1 %cmp34, label %35, label %34
-  %val35 = load volatile float* undef
+  %val35 = load volatile float, float* undef
   %cmp35 = fcmp une float %val35, undef
   br i1 %cmp35, label %36, label %35
-  %val36 = load volatile float* undef
+  %val36 = load volatile float, float* undef
   %cmp36 = fcmp une float %val36, undef
   br i1 %cmp36, label %37, label %36
-  %val37 = load volatile float* undef
+  %val37 = load volatile float, float* undef
   %cmp37 = fcmp une float %val37, undef
   br i1 %cmp37, label %38, label %37
-  %val38 = load volatile float* undef
+  %val38 = load volatile float, float* undef
   %cmp38 = fcmp une float %val38, undef
   br i1 %cmp38, label %39, label %38
-  %val39 = load volatile float* undef
+  %val39 = load volatile float, float* undef
   %cmp39 = fcmp une float %val39, undef
   br i1 %cmp39, label %40, label %39
-  %val40 = load volatile float* undef
+  %val40 = load volatile float, float* undef
   %cmp40 = fcmp une float %val40, undef
   br i1 %cmp40, label %41, label %40
-  %val41 = load volatile float* undef
+  %val41 = load volatile float, float* undef
   %cmp41 = fcmp une float %val41, undef
   br i1 %cmp41, label %42, label %41
-  %val42 = load volatile float* undef
+  %val42 = load volatile float, float* undef
   %cmp42 = fcmp une float %val42, undef
   br i1 %cmp42, label %43, label %42
-  %val43 = load volatile float* undef
+  %val43 = load volatile float, float* undef
   %cmp43 = fcmp une float %val43, undef
   br i1 %cmp43, label %44, label %43
-  %val44 = load volatile float* undef
+  %val44 = load volatile float, float* undef
   %cmp44 = fcmp une float %val44, undef
   br i1 %cmp44, label %45, label %44
-  %val45 = load volatile float* undef
+  %val45 = load volatile float, float* undef
   %cmp45 = fcmp une float %val45, undef
   br i1 %cmp45, label %46, label %45
-  %val46 = load volatile float* undef
+  %val46 = load volatile float, float* undef
   %cmp46 = fcmp une float %val46, undef
   br i1 %cmp46, label %47, label %46
-  %val47 = load volatile float* undef
+  %val47 = load volatile float, float* undef
   %cmp47 = fcmp une float %val47, undef
   br i1 %cmp47, label %48, label %47
-  %val48 = load volatile float* undef
+  %val48 = load volatile float, float* undef
   %cmp48 = fcmp une float %val48, undef
   br i1 %cmp48, label %49, label %48
-  %val49 = load volatile float* undef
+  %val49 = load volatile float, float* undef
   %cmp49 = fcmp une float %val49, undef
   br i1 %cmp49, label %50, label %49
-  %val50 = load volatile float* undef
+  %val50 = load volatile float, float* undef
   %cmp50 = fcmp une float %val50, undef
   br i1 %cmp50, label %51, label %50
-  %val51 = load volatile float* undef
+  %val51 = load volatile float, float* undef
   %cmp51 = fcmp une float %val51, undef
   br i1 %cmp51, label %52, label %51
-  %val52 = load volatile float* undef
+  %val52 = load volatile float, float* undef
   %cmp52 = fcmp une float %val52, undef
   br i1 %cmp52, label %53, label %52
-  %val53 = load volatile float* undef
+  %val53 = load volatile float, float* undef
   %cmp53 = fcmp une float %val53, undef
   br i1 %cmp53, label %54, label %53
-  %val54 = load volatile float* undef
+  %val54 = load volatile float, float* undef
   %cmp54 = fcmp une float %val54, undef
   br i1 %cmp54, label %55, label %54
-  %val55 = load volatile float* undef
+  %val55 = load volatile float, float* undef
   %cmp55 = fcmp une float %val55, undef
   br i1 %cmp55, label %56, label %55
-  %val56 = load volatile float* undef
+  %val56 = load volatile float, float* undef
   %cmp56 = fcmp une float %val56, undef
   br i1 %cmp56, label %57, label %56
-  %val57 = load volatile float* undef
+  %val57 = load volatile float, float* undef
   %cmp57 = fcmp une float %val57, undef
   br i1 %cmp57, label %58, label %57
-  %val58 = load volatile float* undef
+  %val58 = load volatile float, float* undef
   %cmp58 = fcmp une float %val58, undef
   br i1 %cmp58, label %59, label %58
-  %val59 = load volatile float* undef
+  %val59 = load volatile float, float* undef
   %cmp59 = fcmp une float %val59, undef
   br i1 %cmp59, label %60, label %59
-  %val60 = load volatile float* undef
+  %val60 = load volatile float, float* undef
   %cmp60 = fcmp une float %val60, undef
   br i1 %cmp60, label %61, label %60
-  %val61 = load volatile float* undef
+  %val61 = load volatile float, float* undef
   %cmp61 = fcmp une float %val61, undef
   br i1 %cmp61, label %62, label %61
-  %val62 = load volatile float* undef
+  %val62 = load volatile float, float* undef
   %cmp62 = fcmp une float %val62, undef
   br i1 %cmp62, label %63, label %62
-  %val63 = load volatile float* undef
+  %val63 = load volatile float, float* undef
   %cmp63 = fcmp une float %val63, undef
   br i1 %cmp63, label %64, label %63
-  %val64 = load volatile float* undef
+  %val64 = load volatile float, float* undef
   %cmp64 = fcmp une float %val64, undef
   br i1 %cmp64, label %65, label %64
 
@@ -979,14 +979,14 @@
   %dec = add nsw i32 %l.0, -1
   %idxprom = sext i32 %dec to i64
   %arrayidx = getelementptr inbounds double, double* %ra, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   br label %if.end10
 
 if.else:
   %idxprom1 = sext i32 %ir.0 to i64
   %arrayidx2 = getelementptr inbounds double, double* %ra, i64 %idxprom1
-  %1 = load double* %arrayidx2, align 8
-  %2 = load double* %arrayidx3, align 8
+  %1 = load double, double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx3, align 8
   store double %2, double* %arrayidx2, align 8
   %dec6 = add nsw i32 %ir.0, -1
   %cmp7 = icmp eq i32 %dec6, 1
@@ -1020,11 +1020,11 @@
 land.lhs.true:
   %idxprom13 = sext i32 %j.0 to i64
   %arrayidx14 = getelementptr inbounds double, double* %ra, i64 %idxprom13
-  %3 = load double* %arrayidx14, align 8
+  %3 = load double, double* %arrayidx14, align 8
   %add15 = add nsw i32 %j.0, 1
   %idxprom16 = sext i32 %add15 to i64
   %arrayidx17 = getelementptr inbounds double, double* %ra, i64 %idxprom16
-  %4 = load double* %arrayidx17, align 8
+  %4 = load double, double* %arrayidx17, align 8
   %cmp18 = fcmp olt double %3, %4
   br i1 %cmp18, label %if.then19, label %if.end20
 
@@ -1035,7 +1035,7 @@
   %j.1 = phi i32 [ %add15, %if.then19 ], [ %j.0, %land.lhs.true ], [ %j.0, %while.body ]
   %idxprom21 = sext i32 %j.1 to i64
   %arrayidx22 = getelementptr inbounds double, double* %ra, i64 %idxprom21
-  %5 = load double* %arrayidx22, align 8
+  %5 = load double, double* %arrayidx22, align 8
   %cmp23 = fcmp olt double %rra.0, %5
   br i1 %cmp23, label %if.then24, label %while.cond
 
@@ -1066,7 +1066,7 @@
 
 entry:
   %gep1 = getelementptr i32, i32* %a, i32 1
-  %val1 = load i32* %gep1
+  %val1 = load i32, i32* %gep1
   %cond1 = icmp ugt i32 %val1, 1
   br i1 %cond1, label %then, label %else
 
@@ -1076,7 +1076,7 @@
 
 else:
   %gep2 = getelementptr i32, i32* %a, i32 2
-  %val2 = load i32* %gep2
+  %val2 = load i32, i32* %gep2
   br label %exit
 
 exit:
diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll
index a707209..ccc4533 100644
--- a/llvm/test/CodeGen/X86/bmi.ll
+++ b/llvm/test/CodeGen/X86/bmi.ll
@@ -27,7 +27,7 @@
 }
 
 define i32 @tzcnt32_load(i32* %x) nounwind  {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = tail call i32 @llvm.cttz.i32(i32 %x1, i1 false )
   ret i32 %tmp
 ; CHECK-LABEL: tzcnt32_load:
@@ -78,7 +78,7 @@
 }
 
 define i32 @andn32_load(i32 %x, i32* %y) nounwind readnone {
-  %y1 = load i32* %y
+  %y1 = load i32, i32* %y
   %tmp1 = xor i32 %x, -1
   %tmp2 = and i32 %y1, %tmp1
   ret i32 %tmp2
@@ -102,7 +102,7 @@
 }
 
 define i32 @bextr32_load(i32* %x, i32 %y) nounwind readnone {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = tail call i32 @llvm.x86.bmi.bextr.32(i32 %x1, i32 %y)
   ret i32 %tmp
 ; CHECK-LABEL: bextr32_load:
@@ -120,7 +120,7 @@
 }
 
 define i32 @bextr32b_load(i32* %x) nounwind uwtable readnone ssp {
-  %1 = load i32* %x
+  %1 = load i32, i32* %x
   %2 = lshr i32 %1, 4
   %3 = and i32 %2, 4095
   ret i32 %3
@@ -153,7 +153,7 @@
 }
 
 define i32 @bzhi32_load(i32* %x, i32 %y) nounwind readnone {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %x1, i32 %y)
   ret i32 %tmp
 ; CHECK-LABEL: bzhi32_load:
@@ -184,7 +184,7 @@
 
 define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 {
 entry:
-  %x = load i32* %w
+  %x = load i32, i32* %w
   %conv = zext i8 %index to i32
   %shl = shl i32 1, %conv
   %sub = add nsw i32 %shl, -1
@@ -242,7 +242,7 @@
 }
 
 define i32 @blsi32_load(i32* %x) nounwind readnone {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = sub i32 0, %x1
   %tmp2 = and i32 %x1, %tmp
   ret i32 %tmp2
@@ -267,7 +267,7 @@
 }
 
 define i32 @blsmsk32_load(i32* %x) nounwind readnone {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = sub i32 %x1, 1
   %tmp2 = xor i32 %x1, %tmp
   ret i32 %tmp2
@@ -292,7 +292,7 @@
 }
 
 define i32 @blsr32_load(i32* %x) nounwind readnone {
-  %x1 = load i32* %x
+  %x1 = load i32, i32* %x
   %tmp = sub i32 %x1, 1
   %tmp2 = and i32 %x1, %tmp
   ret i32 %tmp2
@@ -316,7 +316,7 @@
 }
 
 define i32 @pdep32_load(i32 %x, i32* %y) nounwind readnone {
-  %y1 = load i32* %y
+  %y1 = load i32, i32* %y
   %tmp = tail call i32 @llvm.x86.bmi.pdep.32(i32 %x, i32 %y1)
   ret i32 %tmp
 ; CHECK-LABEL: pdep32_load:
@@ -342,7 +342,7 @@
 }
 
 define i32 @pext32_load(i32 %x, i32* %y) nounwind readnone {
-  %y1 = load i32* %y
+  %y1 = load i32, i32* %y
   %tmp = tail call i32 @llvm.x86.bmi.pext.32(i32 %x, i32 %y1)
   ret i32 %tmp
 ; CHECK-LABEL: pext32_load:
diff --git a/llvm/test/CodeGen/X86/break-anti-dependencies.ll b/llvm/test/CodeGen/X86/break-anti-dependencies.ll
index 614d0ad..c54ac10 100644
--- a/llvm/test/CodeGen/X86/break-anti-dependencies.ll
+++ b/llvm/test/CodeGen/X86/break-anti-dependencies.ll
@@ -10,14 +10,14 @@
 
 define void @goo(double* %r, double* %p, double* %q) nounwind {
 entry:
-	%0 = load double* %p, align 8
+	%0 = load double, double* %p, align 8
 	%1 = fadd double %0, 1.100000e+00
 	%2 = fmul double %1, 1.200000e+00
 	%3 = fadd double %2, 1.300000e+00
 	%4 = fmul double %3, 1.400000e+00
 	%5 = fadd double %4, 1.500000e+00
 	%6 = fptosi double %5 to i32
-	%7 = load double* %r, align 8
+	%7 = load double, double* %r, align 8
 	%8 = fadd double %7, 7.100000e+00
 	%9 = fmul double %8, 7.200000e+00
 	%10 = fadd double %9, 7.300000e+00
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index 03a5f20..699de22 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -8,7 +8,7 @@
 ; SSE: movss ([[A0:%rdi|%rcx]]), %xmm0
 ; SSE: cvtss2sd %xmm0, %xmm0
 
-  %0 = load float* %x, align 4
+  %0 = load float, float* %x, align 4
   %1 = fpext float %0 to double
   ret double %1
 }
@@ -17,7 +17,7 @@
 entry:
 ; SSE-LABEL: t2:
 ; SSE: cvtsd2ss ([[A0]]), %xmm0
-  %0 = load double* %x, align 8
+  %0 = load double, double* %x, align 8
   %1 = fptrunc double %0 to float
   ret float %1
 }
@@ -27,7 +27,7 @@
 ; SSE-LABEL: squirtf:
 ; SSE: movss ([[A0]]), %xmm0
 ; SSE: sqrtss %xmm0, %xmm0
-  %z = load float* %x
+  %z = load float, float* %x
   %t = call float @llvm.sqrt.f32(float %z)
   ret float %t
 }
@@ -37,7 +37,7 @@
 ; SSE-LABEL: squirt:
 ; SSE: movsd ([[A0]]), %xmm0
 ; SSE: sqrtsd %xmm0, %xmm0
-  %z = load double* %x
+  %z = load double, double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
 }
@@ -46,7 +46,7 @@
 entry:
 ; SSE-LABEL: squirtf_size:
 ; SSE: sqrtss ([[A0]]), %xmm0
-  %z = load float* %x
+  %z = load float, float* %x
   %t = call float @llvm.sqrt.f32(float %z)
   ret float %t
 }
@@ -55,7 +55,7 @@
 entry:
 ; SSE-LABEL: squirt_size:
 ; SSE: sqrtsd ([[A0]]), %xmm0
-  %z = load double* %x
+  %z = load double, double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
 }
@@ -120,13 +120,13 @@
 ; SSE: cvtsi2sdq %{{r[0-9a-x]+}}, %[[REG]]
 define i64 @loopdep2(i64* nocapture %x, double* nocapture %y) nounwind {
 entry:
-  %vx = load i64* %x
+  %vx = load i64, i64* %x
   br label %loop
 loop:
   %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
   %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
   %fi = sitofp i64 %i to double
-  %vy = load double* %y
+  %vy = load double, double* %y
   %fipy = fadd double %fi, %vy
   %iipy = fptosi double %fipy to i64
   %s2 = add i64 %s1, %iipy
@@ -159,16 +159,16 @@
 for.body3:
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @v, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %conv = sitofp i32 %0 to double
   %arrayidx5 = getelementptr inbounds [1024 x double], [1024 x double]* @x, i64 0, i64 %indvars.iv
-  %1 = load double* %arrayidx5, align 8
+  %1 = load double, double* %arrayidx5, align 8
   %mul = fmul double %conv, %1
   %arrayidx7 = getelementptr inbounds [1024 x double], [1024 x double]* @y, i64 0, i64 %indvars.iv
-  %2 = load double* %arrayidx7, align 8
+  %2 = load double, double* %arrayidx7, align 8
   %mul8 = fmul double %mul, %2
   %arrayidx10 = getelementptr inbounds [1024 x double], [1024 x double]* @z, i64 0, i64 %indvars.iv
-  %3 = load double* %arrayidx10, align 8
+  %3 = load double, double* %arrayidx10, align 8
   %mul11 = fmul double %mul8, %3
   %arrayidx13 = getelementptr inbounds [1024 x double], [1024 x double]* @w, i64 0, i64 %indvars.iv
   store double %mul11, double* %arrayidx13, align 8
diff --git a/llvm/test/CodeGen/X86/bswap.ll b/llvm/test/CodeGen/X86/bswap.ll
index e6a456c..48dc18e 100644
--- a/llvm/test/CodeGen/X86/bswap.ll
+++ b/llvm/test/CodeGen/X86/bswap.ll
@@ -91,7 +91,7 @@
 ; CHECK64-LABEL: not_bswap:
 ; CHECK64-NOT: bswapq
 ; CHECK64: ret
-  %init = load i16* @var16
+  %init = load i16, i16* @var16
   %big = zext i16 %init to i64
 
   %hishifted = lshr i64 %big, 8
@@ -115,7 +115,7 @@
 ; CHECK64-NOT: bswapq
 ; CHECK64: ret
 
-  %init = load i8* @var8
+  %init = load i8, i8* @var8
   %big = zext i8 %init to i64
 
   %hishifted = lshr i64 %big, 8
@@ -140,7 +140,7 @@
 ; CHECK64: shrq $48, [[REG]]
 ; CHECK64: ret
 
-  %init = load i16* @var16
+  %init = load i16, i16* @var16
   %big = zext i16 %init to i64
 
   %hishifted = lshr i64 %big, 8
diff --git a/llvm/test/CodeGen/X86/byval-align.ll b/llvm/test/CodeGen/X86/byval-align.ll
index 275bf2d..fca5349 100644
--- a/llvm/test/CodeGen/X86/byval-align.ll
+++ b/llvm/test/CodeGen/X86/byval-align.ll
@@ -14,14 +14,14 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   %obj1 = bitcast %struct.S* %obj to i8*          ; <i8*> [#uses=1]
   store i8* %obj1, i8** %ptr, align 8
-  %0 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %0 = load i8*, i8** %ptr, align 8                    ; <i8*> [#uses=1]
   %1 = ptrtoint i8* %0 to i64                     ; <i64> [#uses=1]
   store i64 %1, i64* %p, align 8
-  %2 = load i8** %ptr, align 8                    ; <i8*> [#uses=1]
+  %2 = load i8*, i8** %ptr, align 8                    ; <i8*> [#uses=1]
   %3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i8* %2) nounwind ; <i32> [#uses=0]
-  %4 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %4 = load i64, i64* %p, align 8                      ; <i64> [#uses=1]
   %5 = and i64 %4, 140737488355264                ; <i64> [#uses=1]
-  %6 = load i64* %p, align 8                      ; <i64> [#uses=1]
+  %6 = load i64, i64* %p, align 8                      ; <i64> [#uses=1]
   %7 = icmp ne i64 %5, %6                         ; <i1> [#uses=1]
   br i1 %7, label %bb, label %bb2
 
diff --git a/llvm/test/CodeGen/X86/byval.ll b/llvm/test/CodeGen/X86/byval.ll
index 7d85dbd..f29511a 100644
--- a/llvm/test/CodeGen/X86/byval.ll
+++ b/llvm/test/CodeGen/X86/byval.ll
@@ -12,6 +12,6 @@
 define i64 @f(%struct.s* byval %a) {
 entry:
 	%tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
-	%tmp3 = load i64* %tmp2, align 8
+	%tmp3 = load i64, i64* %tmp2, align 8
 	ret i64 %tmp3
 }
diff --git a/llvm/test/CodeGen/X86/call-push.ll b/llvm/test/CodeGen/X86/call-push.ll
index aab63c5..6bcb5d6 100644
--- a/llvm/test/CodeGen/X86/call-push.ll
+++ b/llvm/test/CodeGen/X86/call-push.ll
@@ -12,7 +12,7 @@
 entry:
         %tmp2 = getelementptr %struct.decode_t, %struct.decode_t* %decode, i32 0, i32 4           ; <i16*> [#uses=1]
         %tmp23 = bitcast i16* %tmp2 to i32*             ; <i32*> [#uses=1]
-        %tmp4 = load i32* %tmp23                ; <i32> [#uses=1]
+        %tmp4 = load i32, i32* %tmp23                ; <i32> [#uses=1]
         %tmp514 = lshr i32 %tmp4, 24            ; <i32> [#uses=1]
         %tmp56 = trunc i32 %tmp514 to i8                ; <i8> [#uses=1]
         %tmp7 = icmp eq i8 %tmp56, 0            ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/cas.ll b/llvm/test/CodeGen/X86/cas.ll
index ec519c6..7807bb9 100644
--- a/llvm/test/CodeGen/X86/cas.ll
+++ b/llvm/test/CodeGen/X86/cas.ll
@@ -24,14 +24,14 @@
   store float* %p, float** %p.addr, align 8
   store float* %expected, float** %expected.addr, align 8
   store float %desired, float* %desired.addr, align 4
-  %0 = load float** %expected.addr, align 8
-  %1 = load float** %expected.addr, align 8
-  %2 = load float* %1, align 4
-  %3 = load float* %desired.addr, align 4
-  %4 = load float** %p.addr, align 8
+  %0 = load float*, float** %expected.addr, align 8
+  %1 = load float*, float** %expected.addr, align 8
+  %2 = load float, float* %1, align 4
+  %3 = load float, float* %desired.addr, align 4
+  %4 = load float*, float** %p.addr, align 8
   %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(float* %0, float %2, float %3, float* %4) nounwind
   store i8 %5, i8* %success, align 1
-  %6 = load i8* %success, align 1
+  %6 = load i8, i8* %success, align 1
   %tobool = trunc i8 %6 to i1
   ret i1 %tobool
 }
@@ -52,16 +52,16 @@
   store i8* %expected, i8** %expected.addr, align 8
   %frombool = zext i1 %desired to i8
   store i8 %frombool, i8* %desired.addr, align 1
-  %0 = load i8** %expected.addr, align 8
-  %1 = load i8** %expected.addr, align 8
-  %2 = load i8* %1, align 1
+  %0 = load i8*, i8** %expected.addr, align 8
+  %1 = load i8*, i8** %expected.addr, align 8
+  %2 = load i8, i8* %1, align 1
   %tobool = trunc i8 %2 to i1
-  %3 = load i8* %desired.addr, align 1
+  %3 = load i8, i8* %desired.addr, align 1
   %tobool1 = trunc i8 %3 to i1
-  %4 = load i8** %p.addr, align 8
+  %4 = load i8*, i8** %p.addr, align 8
   %5 = call i8 asm sideeffect "lock; cmpxchg $3, $4; mov $2, $1; sete $0", "={ax},=*rm,{ax},q,*m,~{memory},~{cc},~{dirflag},~{fpsr},~{flags}"(i8* %0, i1 %tobool, i1 %tobool1, i8* %4) nounwind
   store i8 %5, i8* %success, align 1
-  %6 = load i8* %success, align 1
+  %6 = load i8, i8* %success, align 1
   %tobool2 = trunc i8 %6 to i1
   ret i1 %tobool2
 }
diff --git a/llvm/test/CodeGen/X86/chain_order.ll b/llvm/test/CodeGen/X86/chain_order.ll
index 3264c0e..442786a 100644
--- a/llvm/test/CodeGen/X86/chain_order.ll
+++ b/llvm/test/CodeGen/X86/chain_order.ll
@@ -12,13 +12,13 @@
 ; A test from pifft (after SLP-vectorization) that fails when we drop the chain on newly merged loads.
 define void @cftx020(double* nocapture %a) {
 entry:
-  %0 = load double* %a, align 8
+  %0 = load double, double* %a, align 8
   %arrayidx1 = getelementptr inbounds double, double* %a, i64 2
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %arrayidx2 = getelementptr inbounds double, double* %a, i64 1
-  %2 = load double* %arrayidx2, align 8
+  %2 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 3
-  %3 = load double* %arrayidx3, align 8
+  %3 = load double, double* %arrayidx3, align 8
   %4 = insertelement <2 x double> undef, double %0, i32 0
   %5 = insertelement <2 x double> %4, double %3, i32 1
   %6 = insertelement <2 x double> undef, double %1, i32 0
diff --git a/llvm/test/CodeGen/X86/change-compare-stride-1.ll b/llvm/test/CodeGen/X86/change-compare-stride-1.ll
index 5f0684e..c5480ba 100644
--- a/llvm/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/llvm/test/CodeGen/X86/change-compare-stride-1.ll
@@ -41,38 +41,38 @@
 	%4 = add i32 %3, -481		; <i32> [#uses=1]
 	%5 = zext i32 %4 to i64		; <i64> [#uses=1]
 	%6 = getelementptr i8, i8* %in, i64 %5		; <i8*> [#uses=1]
-	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%7 = load i8, i8* %6, align 1		; <i8> [#uses=1]
 	%8 = add i32 %3, -480		; <i32> [#uses=1]
 	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
 	%10 = getelementptr i8, i8* %in, i64 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%11 = load i8, i8* %10, align 1		; <i8> [#uses=1]
 	%12 = add i32 %3, -479		; <i32> [#uses=1]
 	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
 	%14 = getelementptr i8, i8* %in, i64 %13		; <i8*> [#uses=1]
-	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%15 = load i8, i8* %14, align 1		; <i8> [#uses=1]
 	%16 = add i32 %3, -1		; <i32> [#uses=1]
 	%17 = zext i32 %16 to i64		; <i64> [#uses=1]
 	%18 = getelementptr i8, i8* %in, i64 %17		; <i8*> [#uses=1]
-	%19 = load i8* %18, align 1		; <i8> [#uses=1]
+	%19 = load i8, i8* %18, align 1		; <i8> [#uses=1]
 	%20 = zext i32 %3 to i64		; <i64> [#uses=1]
 	%21 = getelementptr i8, i8* %in, i64 %20		; <i8*> [#uses=1]
-	%22 = load i8* %21, align 1		; <i8> [#uses=1]
+	%22 = load i8, i8* %21, align 1		; <i8> [#uses=1]
 	%23 = add i32 %3, 1		; <i32> [#uses=1]
 	%24 = zext i32 %23 to i64		; <i64> [#uses=1]
 	%25 = getelementptr i8, i8* %in, i64 %24		; <i8*> [#uses=1]
-	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%26 = load i8, i8* %25, align 1		; <i8> [#uses=1]
 	%27 = add i32 %3, 481		; <i32> [#uses=1]
 	%28 = zext i32 %27 to i64		; <i64> [#uses=1]
 	%29 = getelementptr i8, i8* %in, i64 %28		; <i8*> [#uses=1]
-	%30 = load i8* %29, align 1		; <i8> [#uses=1]
+	%30 = load i8, i8* %29, align 1		; <i8> [#uses=1]
 	%31 = add i32 %3, 480		; <i32> [#uses=1]
 	%32 = zext i32 %31 to i64		; <i64> [#uses=1]
 	%33 = getelementptr i8, i8* %in, i64 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%34 = load i8, i8* %33, align 1		; <i8> [#uses=1]
 	%35 = add i32 %3, 479		; <i32> [#uses=1]
 	%36 = zext i32 %35 to i64		; <i64> [#uses=1]
 	%37 = getelementptr i8, i8* %in, i64 %36		; <i8*> [#uses=1]
-	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%38 = load i8, i8* %37, align 1		; <i8> [#uses=1]
 	%39 = add i8 %11, %7		; <i8> [#uses=1]
 	%40 = add i8 %39, %15		; <i8> [#uses=1]
 	%41 = add i8 %40, %19		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/clobber-fi0.ll b/llvm/test/CodeGen/X86/clobber-fi0.ll
index 4876c35..02f1a16 100644
--- a/llvm/test/CodeGen/X86/clobber-fi0.ll
+++ b/llvm/test/CodeGen/X86/clobber-fi0.ll
@@ -20,17 +20,17 @@
   br label %bb4
 
 bb4:                                              ; preds = %bb4, %bb
-  %tmp6 = load i32* %tmp2, align 4                ; [#uses=1 type=i32]
+  %tmp6 = load i32, i32* %tmp2, align 4                ; [#uses=1 type=i32]
   %tmp7 = add i32 %tmp6, -1                       ; [#uses=2 type=i32]
   store i32 %tmp7, i32* %tmp2, align 4
   %tmp8 = icmp eq i32 %tmp7, 0                    ; [#uses=1 type=i1]
-  %tmp9 = load i32* %tmp                          ; [#uses=1 type=i32]
+  %tmp9 = load i32, i32* %tmp                          ; [#uses=1 type=i32]
   %tmp10 = add i32 %tmp9, -1              ; [#uses=1 type=i32]
   store i32 %tmp10, i32* %tmp3
   br i1 %tmp8, label %bb11, label %bb4
 
 bb11:                                             ; preds = %bb4
-  %tmp12 = load i32* %tmp, align 4                ; [#uses=1 type=i32]
+  %tmp12 = load i32, i32* %tmp, align 4                ; [#uses=1 type=i32]
   ret i32 %tmp12
 }
 
diff --git a/llvm/test/CodeGen/X86/cmov-into-branch.ll b/llvm/test/CodeGen/X86/cmov-into-branch.ll
index cad8dd3..9094408 100644
--- a/llvm/test/CodeGen/X86/cmov-into-branch.ll
+++ b/llvm/test/CodeGen/X86/cmov-into-branch.ll
@@ -2,7 +2,7 @@
 
 ; cmp with single-use load, should not form cmov.
 define i32 @test1(double %a, double* nocapture %b, i32 %x, i32 %y)  {
-  %load = load double* %b, align 8
+  %load = load double, double* %b, align 8
   %cmp = fcmp olt double %load, %a
   %cond = select i1 %cmp, i32 %x, i32 %y
   ret i32 %cond
@@ -25,7 +25,7 @@
 
 ; Multiple uses of %a, should not form cmov.
 define i32 @test3(i32 %a, i32* nocapture %b, i32 %x)  {
-  %load = load i32* %b, align 4
+  %load = load i32, i32* %b, align 4
   %cmp = icmp ult i32 %load, %a
   %cond = select i1 %cmp, i32 %a, i32 %x
   ret i32 %cond
@@ -38,7 +38,7 @@
 
 ; Multiple uses of the load.
 define i32 @test4(i32 %a, i32* nocapture %b, i32 %x, i32 %y)  {
-  %load = load i32* %b, align 4
+  %load = load i32, i32* %b, align 4
   %cmp = icmp ult i32 %load, %a
   %cond = select i1 %cmp, i32 %x, i32 %y
   %add = add i32 %cond, %load
@@ -50,7 +50,7 @@
 
 ; Multiple uses of the cmp.
 define i32 @test5(i32 %a, i32* nocapture %b, i32 %x, i32 %y) {
-  %load = load i32* %b, align 4
+  %load = load i32, i32* %b, align 4
   %cmp = icmp ult i32 %load, %a
   %cmp1 = icmp ugt i32 %load, %a
   %cond = select i1 %cmp1, i32 %a, i32 %y
diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll
index 355c6b4..aad04fa 100644
--- a/llvm/test/CodeGen/X86/cmov.ll
+++ b/llvm/test/CodeGen/X86/cmov.ll
@@ -12,7 +12,7 @@
 	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%1 = and i32 %0, 1		; <i32> [#uses=1]
 	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
-        %v = load i32* %vp
+        %v = load i32, i32* %vp
 	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
 	ret i32 %.0
 }
@@ -27,7 +27,7 @@
 	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%1 = and i32 %0, 1		; <i32> [#uses=1]
 	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
-        %v = load i32* %vp
+        %v = load i32, i32* %vp
 	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
 	ret i32 %.0
 }
@@ -71,7 +71,7 @@
 
 define i32 @test4() nounwind {
 entry:
-  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %0 = load i8, i8* @g_3, align 1                     ; <i8> [#uses=2]
   %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
   %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
   %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
@@ -79,12 +79,12 @@
   %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
   %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
   %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
-  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %g_96.promoted.i = load i8, i8* @g_96               ; <i8> [#uses=3]
   %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
   br i1 %3, label %func_4.exit.i, label %bb.i.i.i
 
 bb.i.i.i:                                         ; preds = %entry
-  %4 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
+  %4 = load volatile i8, i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_4.exit.i
 
 ; CHECK-LABEL: test4:
@@ -101,7 +101,7 @@
   br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
 
 bb.i.i:                                           ; preds = %func_4.exit.i
-  %5 = load volatile i8* @g_100, align 1          ; <i8> [#uses=0]
+  %5 = load volatile i8, i8* @g_100, align 1          ; <i8> [#uses=0]
   br label %func_1.exit
 
 func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
@@ -125,7 +125,7 @@
 ; CHECK:	orl	$-2, %eax
 ; CHECK:	ret
 
-	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %P, align 4		; <i32> [#uses=1]
 	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
 	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
 	ret i32 %iftmp.0.0
@@ -138,7 +138,7 @@
 ; CHECK:	movzbl	%al, %eax
 ; CHECK:	leal	4(%rax,%rax,8), %eax
 ; CHECK:        ret
-	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %P, align 4		; <i32> [#uses=1]
 	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
 	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
 	ret i32 %iftmp.0.0
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index a32c4b7..818138a 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
 
 define i32 @test1(i32 %X, i32* %y) nounwind {
-	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp = load i32, i32* %y		; <i32> [#uses=1]
 	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
 
@@ -15,7 +15,7 @@
 }
 
 define i32 @test2(i32 %X, i32* %y) nounwind {
-	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp = load i32, i32* %y		; <i32> [#uses=1]
 	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
 	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
@@ -30,7 +30,7 @@
 }
 
 define i8 @test2b(i8 %X, i8* %y) nounwind {
-	%tmp = load i8* %y		; <i8> [#uses=1]
+	%tmp = load i8, i8* %y		; <i8> [#uses=1]
 	%tmp1 = shl i8 %tmp, 3		; <i8> [#uses=1]
 	%tmp1.upgrd.2 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
 	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
@@ -90,7 +90,7 @@
 define i32 @test6() nounwind align 2 {
   %A = alloca {i64, i64}, align 8
   %B = getelementptr inbounds {i64, i64}, {i64, i64}* %A, i64 0, i32 1
-  %C = load i64* %B
+  %C = load i64, i64* %B
   %D = icmp eq i64 %C, 0
   br i1 %D, label %T, label %F
 T:
diff --git a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index b7995db..6112393 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -42,7 +42,7 @@
   br label %while.condthread-pre-split.i
 
 while.condthread-pre-split.i:
-  %.pr.i = load i32* %p, align 4
+  %.pr.i = load i32, i32* %p, align 4
   br label %while.cond.i
 
 while.cond.i:
diff --git a/llvm/test/CodeGen/X86/cmpxchg-i1.ll b/llvm/test/CodeGen/X86/cmpxchg-i1.ll
index a21ab59..5f5869f 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-i1.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-i1.ll
@@ -68,7 +68,7 @@
   ; Result already in %eax
 ; CHECK: retq
 entry:
-  %init = load atomic i32* %addr seq_cst, align 4
+  %init = load atomic i32, i32* %addr seq_cst, align 4
   br label %loop
 
 loop:
diff --git a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
index 4dd3001..278e6a4 100644
--- a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
+++ b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll
@@ -62,7 +62,7 @@
 ; CHECK-NOT: cmpq
 ; CHECK: jne
 entry:
-  %init = load atomic i128* %addr seq_cst, align 16
+  %init = load atomic i128, i128* %addr seq_cst, align 16
   br label %loop
 
 loop:
diff --git a/llvm/test/CodeGen/X86/coalesce-esp.ll b/llvm/test/CodeGen/X86/coalesce-esp.ll
index 7b89185..e0257e6 100644
--- a/llvm/test/CodeGen/X86/coalesce-esp.ll
+++ b/llvm/test/CodeGen/X86/coalesce-esp.ll
@@ -20,7 +20,7 @@
 	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
 	%scevgep25 = getelementptr i32, i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
 	%scevgep27 = getelementptr i32, i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
-	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	%1 = load i32, i32* %scevgep27, align 4		; <i32> [#uses=0]
 	br i1 undef, label %bb7.backedge, label %bb5
 
 bb5:		; preds = %bb4
diff --git a/llvm/test/CodeGen/X86/coalesce-implicitdef.ll b/llvm/test/CodeGen/X86/coalesce-implicitdef.ll
index 9be0452..a0766f9 100644
--- a/llvm/test/CodeGen/X86/coalesce-implicitdef.ll
+++ b/llvm/test/CodeGen/X86/coalesce-implicitdef.ll
@@ -71,7 +71,7 @@
   br i1 %tobool17, label %for.inc27.if.end30.loopexit56_crit_edge, label %while.condthread-pre-split
 
 if.then22:                                        ; preds = %while.end
-  %1 = load i16* %p2.1, align 2
+  %1 = load i16, i16* %p2.1, align 2
   %tobool23 = icmp eq i16 %1, 0
   br i1 %tobool23, label %for.inc27.backedge, label %label.loopexit
 
@@ -89,7 +89,7 @@
 if.end30:                                         ; preds = %for.inc27.if.end30.loopexit56_crit_edge, %label.loopexit, %label.preheader, %for.inc
   %i.0.load46 = phi i32 [ 0, %for.inc ], [ %i.0.load4669, %label.preheader ], [ %i.0.load4669, %label.loopexit ], [ %i.0.load4669, %for.inc27.if.end30.loopexit56_crit_edge ]
   %pi.4 = phi i32* [ %i, %for.inc ], [ %pi.3.ph, %label.preheader ], [ %pi.3.ph, %label.loopexit ], [ %pi.3.ph, %for.inc27.if.end30.loopexit56_crit_edge ]
-  %2 = load i32* %pi.4, align 4
+  %2 = load i32, i32* %pi.4, align 4
   %tobool31 = icmp eq i32 %2, 0
   br i1 %tobool31, label %for.inc34, label %label.preheader
 
@@ -100,7 +100,7 @@
 
 for.end36:                                        ; preds = %for.cond
   store i32 1, i32* %i, align 4
-  %3 = load i32* @c, align 4
+  %3 = load i32, i32* @c, align 4
   %tobool37 = icmp eq i32 %3, 0
   br i1 %tobool37, label %label.preheader, label %land.rhs
 
@@ -111,15 +111,15 @@
 label.preheader:                                  ; preds = %for.end36, %if.end30, %land.rhs
   %i.0.load4669 = phi i32 [ 1, %land.rhs ], [ %i.0.load46, %if.end30 ], [ 1, %for.end36 ]
   %pi.3.ph = phi i32* [ %pi.0, %land.rhs ], [ %pi.4, %if.end30 ], [ %pi.0, %for.end36 ]
-  %4 = load i32* @b, align 4
+  %4 = load i32, i32* @b, align 4
   %inc285863 = add nsw i32 %4, 1
   store i32 %inc285863, i32* @b, align 4
   %tobool175964 = icmp eq i32 %inc285863, 0
   br i1 %tobool175964, label %if.end30, label %while.condthread-pre-split.lr.ph.lr.ph
 
 while.condthread-pre-split.lr.ph.lr.ph:           ; preds = %label.preheader
-  %.pr50 = load i32* @d, align 4
+  %.pr50 = load i32, i32* @d, align 4
   %tobool19 = icmp eq i32 %.pr50, 0
-  %a.promoted.pre = load i32* @a, align 4
+  %a.promoted.pre = load i32, i32* @a, align 4
   br label %while.condthread-pre-split
 }
diff --git a/llvm/test/CodeGen/X86/coalescer-commute1.ll b/llvm/test/CodeGen/X86/coalescer-commute1.ll
index 227cd72..dccf3b9 100644
--- a/llvm/test/CodeGen/X86/coalescer-commute1.ll
+++ b/llvm/test/CodeGen/X86/coalescer-commute1.ll
@@ -6,14 +6,14 @@
 
 define void @runcont(i32* %source) nounwind  {
 entry:
-	%tmp10 = load i32* @NNTOT, align 4		; <i32> [#uses=1]
+	%tmp10 = load i32, i32* @NNTOT, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
 	%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
 	%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ]		; <float> [#uses=1]
 	%tmp2 = getelementptr i32, i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
-	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* %tmp2, align 4		; <i32> [#uses=1]
 	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
 	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
 	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/coalescer-commute4.ll b/llvm/test/CodeGen/X86/coalescer-commute4.ll
index 06e542b..d4af1a6 100644
--- a/llvm/test/CodeGen/X86/coalescer-commute4.ll
+++ b/llvm/test/CodeGen/X86/coalescer-commute4.ll
@@ -14,10 +14,10 @@
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
 	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %bb.preheader ], [ %tmp14, %bb ]		; <float> [#uses=1]
 	%tmp3 = getelementptr i32, i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp3, align 4		; <i32> [#uses=1]
 	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
 	%tmp8 = getelementptr float, float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
-	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp9 = load float, float* %tmp8, align 4		; <float> [#uses=1]
 	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
 	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/coalescer-cross.ll b/llvm/test/CodeGen/X86/coalescer-cross.ll
index 0211d2d..92aedbe 100644
--- a/llvm/test/CodeGen/X86/coalescer-cross.ll
+++ b/llvm/test/CodeGen/X86/coalescer-cross.ll
@@ -31,12 +31,12 @@
 	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
 	%3 = getelementptr %struct.lua_State, %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
-	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%4 = load %struct.TValue*, %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
 	%5 = getelementptr %struct.TValue, %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
 	store double %2, double* %5, align 4
 	%6 = getelementptr %struct.TValue, %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
 	store i32 3, i32* %6, align 4
-	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%7 = load %struct.TValue*, %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
 	%8 = getelementptr %struct.TValue, %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
 	store %struct.TValue* %8, %struct.TValue** %3, align 4
 	ret i32 1
diff --git a/llvm/test/CodeGen/X86/coalescer-dce2.ll b/llvm/test/CodeGen/X86/coalescer-dce2.ll
index bbbf09b..116a704 100644
--- a/llvm/test/CodeGen/X86/coalescer-dce2.ll
+++ b/llvm/test/CodeGen/X86/coalescer-dce2.ll
@@ -14,19 +14,19 @@
 
 define void @fn1() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @d, align 4
+  %0 = load i32, i32* @d, align 4
   %tobool72 = icmp eq i32 %0, 0
   br i1 %tobool72, label %for.end32, label %for.cond1.preheader.lr.ph
 
 for.cond1.preheader.lr.ph:                        ; preds = %entry
-  %1 = load i32* @c, align 4
+  %1 = load i32, i32* @c, align 4
   %tobool2 = icmp eq i32 %1, 0
-  %2 = load i32* @b, align 4
+  %2 = load i32, i32* @b, align 4
   %cmp = icmp sgt i32 %2, 0
   %conv = zext i1 %cmp to i32
-  %3 = load i32* @g, align 4
+  %3 = load i32, i32* @g, align 4
   %tobool4 = icmp eq i32 %3, 0
-  %4 = load i16* @a, align 2
+  %4 = load i16, i16* @a, align 2
   %tobool9 = icmp eq i16 %4, 0
   br label %for.cond1.preheader
 
@@ -41,7 +41,7 @@
   br i1 %tobool9, label %cond.end.us.us, label %cond.end.us
 
 cond.false18.us.us:                               ; preds = %if.end.us.us
-  %5 = load i32* @f, align 4
+  %5 = load i32, i32* @f, align 4
   %sext76 = shl i32 %5, 16
   %phitmp75 = ashr exact i32 %sext76, 16
   br label %cond.end.us.us
@@ -74,7 +74,7 @@
   br i1 %cmp14.us, label %cond.end21.us, label %cond.false18.us
 
 if.end6.us:                                       ; preds = %if.end.us
-  %6 = load i32* @f, align 4
+  %6 = load i32, i32* @f, align 4
   %conv7.us = trunc i32 %6 to i16
   %tobool11.us = icmp eq i16 %conv7.us, 0
   br i1 %tobool11.us, label %cond.false18.us, label %land.lhs.true12.us
@@ -95,7 +95,7 @@
   br i1 %tobool4, label %if.end6.us65, label %for.cond25.loopexit.us-lcssa.us-lcssa
 
 cond.false18.us40:                                ; preds = %if.end.us50
-  %7 = load i32* @f, align 4
+  %7 = load i32, i32* @f, align 4
   %sext = shl i32 %7, 16
   %phitmp = ashr exact i32 %sext, 16
   br label %if.end.us50
diff --git a/llvm/test/CodeGen/X86/coalescer-identity.ll b/llvm/test/CodeGen/X86/coalescer-identity.ll
index 1aac095..8d58116 100644
--- a/llvm/test/CodeGen/X86/coalescer-identity.ll
+++ b/llvm/test/CodeGen/X86/coalescer-identity.ll
@@ -12,10 +12,10 @@
 
 define void @func() nounwind uwtable ssp {
 for.body.lr.ph:
-  %0 = load i32* @g2, align 4
+  %0 = load i32, i32* @g2, align 4
   %tobool6 = icmp eq i32 %0, 0
-  %s.promoted = load i16* @s, align 2
-  %.pre = load i32* @g1, align 4
+  %s.promoted = load i16, i16* @s, align 2
+  %.pre = load i32, i32* @g1, align 4
   br i1 %tobool6, label %for.body.us, label %for.body
 
 for.body.us:                                      ; preds = %for.body.lr.ph, %for.inc.us
diff --git a/llvm/test/CodeGen/X86/code_placement.ll b/llvm/test/CodeGen/X86/code_placement.ll
index ca5b75f..7d23584 100644
--- a/llvm/test/CodeGen/X86/code_placement.ll
+++ b/llvm/test/CodeGen/X86/code_placement.ll
@@ -6,9 +6,9 @@
 
 define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
 entry:
-	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %rk, align 4		; <i32> [#uses=1]
 	%1 = getelementptr i32, i32* %rk, i64 1		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* %1, align 4		; <i32> [#uses=1]
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
@@ -24,36 +24,36 @@
 	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
 	%5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* %5, align 4		; <i32> [#uses=1]
 	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
 	%8 = and i32 %7, 255		; <i32> [#uses=1]
 	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
 	%10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%11 = load i32, i32* %10, align 4		; <i32> [#uses=1]
 	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
 	%12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
 	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%14 = load i32, i32* %13, align 4		; <i32> [#uses=1]
 	%15 = xor i32 %11, %6		; <i32> [#uses=1]
 	%16 = xor i32 %15, %14		; <i32> [#uses=3]
 	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
 	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
 	%19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
-	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%20 = load i32, i32* %19, align 4		; <i32> [#uses=1]
 	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
 	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
 	%23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
-	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%24 = load i32, i32* %23, align 4		; <i32> [#uses=1]
 	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
 	%25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
 	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%27 = load i32, i32* %26, align 4		; <i32> [#uses=1]
 	%28 = xor i32 %24, %20		; <i32> [#uses=1]
 	%29 = xor i32 %28, %27		; <i32> [#uses=4]
 	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
 	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
 	%32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
-	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%33 = load i32, i32* %32, align 4		; <i32> [#uses=2]
 	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2, label %bb1
 
@@ -65,22 +65,22 @@
 	%37 = and i32 %36, 255		; <i32> [#uses=1]
 	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
 	%39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
-	%40 = load i32* %39, align 4		; <i32> [#uses=1]
-	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%40 = load i32, i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32, i32* %35, align 4		; <i32> [#uses=1]
 	%42 = xor i32 %40, %33		; <i32> [#uses=1]
 	%43 = xor i32 %42, %41		; <i32> [#uses=1]
 	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
 	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
 	%46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%47 = load i32, i32* %46, align 4		; <i32> [#uses=1]
 	%48 = and i32 %16, 255		; <i32> [#uses=1]
 	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
 	%50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%51 = load i32, i32* %50, align 4		; <i32> [#uses=1]
 	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
 	%52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
 	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
-	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%54 = load i32, i32* %53, align 4		; <i32> [#uses=1]
 	%55 = xor i32 %51, %47		; <i32> [#uses=1]
 	%56 = xor i32 %55, %54		; <i32> [#uses=1]
 	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
@@ -96,26 +96,26 @@
 	%60 = and i32 %59, 255		; <i32> [#uses=1]
 	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
 	%62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%63 = load i32, i32* %62, align 4		; <i32> [#uses=1]
 	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
 	%65 = or i32 %64, %58		; <i32> [#uses=1]
-	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%66 = load i32, i32* %57, align 4		; <i32> [#uses=1]
 	%67 = xor i32 %65, %66		; <i32> [#uses=2]
 	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
 	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
 	%70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
-	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%71 = load i32, i32* %70, align 4		; <i32> [#uses=1]
 	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
 	%73 = and i32 %16, 255		; <i32> [#uses=1]
 	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
 	%75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
-	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%76 = load i32, i32* %75, align 4		; <i32> [#uses=1]
 	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
 	%78 = or i32 %77, %72		; <i32> [#uses=1]
 	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
 	%79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
 	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%81 = load i32, i32* %80, align 4		; <i32> [#uses=1]
 	%82 = xor i32 %78, %81		; <i32> [#uses=2]
 	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
 	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
index 3ad9949..ac5cbb9 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-addrmode-sext.ll
@@ -20,7 +20,7 @@
   %add = add nsw i32 %arg1, %arg2 
   %sextadd = sext i32 %add to i64
   %base = inttoptr i64 %sextadd to i8*
-  %res = load i8* %base
+  %res = load i8, i8* %base
   ret i8 %res
 }
 
@@ -36,7 +36,7 @@
   %add = add nsw i32 %arg1, %arg2 
   %sextadd = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -49,7 +49,7 @@
   %add = add i32 %arg1, %arg2 
   %sextadd = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -63,7 +63,7 @@
   %add = add nsw i32 %arg1, 1 
   %sextadd = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -78,7 +78,7 @@
   %add = add nsw i32 %zext, 1 
   %sextadd = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -100,7 +100,7 @@
   %add = add nsw i32 %cst, 1
   %sextadd = sext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -117,7 +117,7 @@
   %add = add nsw i8 %trunc, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -136,7 +136,7 @@
   %add = add nsw i8 %trunc, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -153,7 +153,7 @@
   %add = add nsw i8 %trunc, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -165,14 +165,14 @@
 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
 ; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
 ; CHECK: add i8 [[LOAD]], [[TRUNC]]
 ; CHECK: ret
 define i8 @oneArgPromotionTruncInsert(i8 %arg1, i8* %base) {
   %add = add nsw i8 %arg1, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   %finalres = add i8 %res, %add
   ret i8 %finalres
 }
@@ -189,7 +189,7 @@
   %add = add nsw i8 %trunc, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   %finalres = add i8 %res, %add
   ret i8 %finalres
 }
@@ -203,7 +203,7 @@
 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
 ; CHECK: [[TRUNC:%[a-zA-Z_0-9-]+]] = trunc i64 [[PROMOTED]] to i8
 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
 ; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = add i8 [[LOAD]], [[TRUNC]]
 ; CHECK: add i8 [[ADDRES]], [[TRUNC]]
 ; CHECK: ret
@@ -211,7 +211,7 @@
   %add = add nsw i8 %arg1, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   %almostfinalres = add i8 %res, %add
   %finalres = add i8 %almostfinalres, %add
   ret i8 %finalres
@@ -223,7 +223,7 @@
 ; CHECK: [[ARG1SEXT:%[a-zA-Z_0-9-]+]] = sext i8 %arg1 to i64
 ; CHECK: [[PROMOTED:%[a-zA-Z_0-9-]+]] = add nsw i64 [[ARG1SEXT]], 1
 ; CHECK: [[GEP:%[a-zA-Z_0-9-]+]] = getelementptr inbounds i8, i8* %base, i64 [[PROMOTED]]
-; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8* [[GEP]]
+; CHECK: [[LOAD:%[a-zA-Z_0-9-]+]] = load i8, i8* [[GEP]]
 ; CHECK: [[ADDRES:%[a-zA-Z_0-9-]+]] = zext i8 [[LOAD]] to i64
 ; CHECK: add i64 [[ADDRES]], [[PROMOTED]]
 ; CHECK: ret
@@ -231,7 +231,7 @@
   %add = add nsw i8 %arg1, 1 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   %almostfinalres = zext i8 %res to i64
   %finalres = add i64 %almostfinalres, %sextadd
   ret i64 %finalres
@@ -264,7 +264,7 @@
   %promotableadd2 = add nsw i32 %promotableadd1, %promotableadd1 
   %sextadd = sext i32 %promotableadd2 to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -287,7 +287,7 @@
   %add = add nsw i8 %trunc, %arg2 
   %sextadd = sext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %sextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -313,11 +313,11 @@
 ; BB then
 ; CHECK: [[BASE1:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
 ; CHECK: [[ADDR1:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE1]] to i32*
-; CHECK: load i32* [[ADDR1]]
+; CHECK: load i32, i32* [[ADDR1]]
 ; BB else
 ; CHECK: [[BASE2:%[a-zA-Z_0-9-]+]] = add i64 [[SEXTADD]], 48
 ; CHECK: [[ADDR2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[BASE2]] to i32*
-; CHECK: load i32* [[ADDR2]]
+; CHECK: load i32, i32* [[ADDR2]]
 ; CHECK: ret
 ; CHECK-GEP-LABEL: @checkProfitability
 ; CHECK-GEP-NOT: {{%[a-zA-Z_0-9-]+}} = sext i32 %arg1 to i64
@@ -330,13 +330,13 @@
 ; CHECK-GEP: [[BCC1:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE1]] to i8*
 ; CHECK-GEP: [[FULL1:%[a-zA-Z_0-9-]+]] = getelementptr i8, i8* [[BCC1]], i64 48
 ; CHECK-GEP: [[ADDR1:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL1]] to i32*
-; CHECK-GEP: load i32* [[ADDR1]]
+; CHECK-GEP: load i32, i32* [[ADDR1]]
 ; BB else
 ; CHECK-GEP: [[BASE2:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[SEXTADD]] to i32*
 ; CHECK-GEP: [[BCC2:%[a-zA-Z_0-9-]+]] = bitcast i32* [[BASE2]] to i8*
 ; CHECK-GEP: [[FULL2:%[a-zA-Z_0-9-]+]] = getelementptr i8, i8* [[BCC2]], i64 48
 ; CHECK-GEP: [[ADDR2:%[a-zA-Z_0-9-]+]] = bitcast i8* [[FULL2]] to i32*
-; CHECK-GEP: load i32* [[ADDR2]]
+; CHECK-GEP: load i32, i32* [[ADDR2]]
 ; CHECK-GEP: ret
 define i32 @checkProfitability(i32 %arg1, i32 %arg2, i1 %test) {
   %shl = shl nsw i32 %arg1, 1
@@ -346,16 +346,16 @@
   %arrayidx1 = getelementptr i32, i32* %tmpptr, i64 12
   br i1 %test, label %then, label %else
 then: 
-  %res1 = load i32* %arrayidx1
+  %res1 = load i32, i32* %arrayidx1
   br label %end
 else:
-  %res2 = load i32* %arrayidx1
+  %res2 = load i32, i32* %arrayidx1
   br label %end
 end:
   %tmp = phi i32 [%res1, %then], [%res2, %else]
   %res = add i32 %tmp, %add1
   %addr = inttoptr i32 %res to i32*
-  %final = load i32* %addr
+  %final = load i32, i32* %addr
   ret i32 %final
 }
 
@@ -377,7 +377,7 @@
 ; CHECK-NEXT: [[ADD:%[a-zA-Z_0-9-]+]] = add i64 [[BASE]], [[PROMOTED_CONV]]
 ; CHECK-NEXT: [[ADDR:%[a-zA-Z_0-9-]+]] = add i64 [[ADD]], 7
 ; CHECK-NEXT: [[CAST:%[a-zA-Z_0-9-]+]] = inttoptr i64 [[ADDR]] to i8*
-; CHECK-NEXT: load i8* [[CAST]], align 1
+; CHECK-NEXT: load i8, i8* [[CAST]], align 1
 define signext i16 @fn3(%struct.dns_packet* nocapture readonly %P) {
 entry:
   %tmp = getelementptr inbounds %struct.dns_packet, %struct.dns_packet* %P, i64 0, i32 2
@@ -389,7 +389,7 @@
   %inc.i.i = add i16 %src.addr.0.i.i, 1
   %idxprom.i.i = sext i16 %src.addr.0.i.i to i64
   %arrayidx.i.i = getelementptr inbounds [0 x i8], [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i.i
-  %tmp1 = load i8* %arrayidx.i.i, align 1
+  %tmp1 = load i8, i8* %arrayidx.i.i, align 1
   %conv2.i.i = zext i8 %tmp1 to i32
   %and.i.i = and i32 %conv2.i.i, 15
   store i32 %and.i.i, i32* @a, align 4
@@ -402,7 +402,7 @@
   %sub.i = add nsw i32 %conv.i, -1
   %idxprom.i = sext i32 %sub.i to i64
   %arrayidx.i = getelementptr inbounds [0 x i8], [0 x i8]* %data.i.i, i64 0, i64 %idxprom.i
-  %tmp2 = load i8* %arrayidx.i, align 1
+  %tmp2 = load i8, i8* %arrayidx.i, align 1
   %conv2.i = sext i8 %tmp2 to i16
   store i16 %conv2.i, i16* @b, align 2
   %sub4.i = sub nsw i32 0, %conv.i
@@ -412,7 +412,7 @@
 
 if.then.i:                                        ; preds = %fn1.exit.i
   %end.i = getelementptr inbounds %struct.dns_packet, %struct.dns_packet* %P, i64 0, i32 1
-  %tmp3 = load i32* %end.i, align 4
+  %tmp3 = load i32, i32* %end.i, align 4
   %sub7.i = add i32 %tmp3, 65535
   %conv8.i = trunc i32 %sub7.i to i16
   br label %fn2.exit
@@ -433,7 +433,7 @@
   %add = add nsw i32 %arg1, %arg2 
   %zextadd = zext i32 %add to i64
   %base = inttoptr i64 %zextadd to i8*
-  %res = load i8* %base
+  %res = load i8, i8* %base
   ret i8 %res
 }
 
@@ -448,7 +448,7 @@
   %add = add nuw i32 %arg1, %arg2 
   %zextadd = zext i32 %add to i64
   %base = inttoptr i64 %zextadd to i8*
-  %res = load i8* %base
+  %res = load i8, i8* %base
   ret i8 %res
 }
 
@@ -462,7 +462,7 @@
   %add = add nuw i8 %arg1, -1 
   %zextadd = zext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -477,7 +477,7 @@
   %add = add nuw i32 %zext, 1 
   %zextadd = zext i32 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -496,7 +496,7 @@
   %add = add nuw i8 %trunc, 1 
   %zextadd = zext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -513,7 +513,7 @@
   %add = add nuw i8 %trunc, 1 
   %zextadd = zext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
 
@@ -529,6 +529,6 @@
   %add = add nuw i8 %sextarg1, 1 
   %zextadd = zext i8 %add to i64
   %arrayidx = getelementptr inbounds i8, i8* %base, i64 %zextadd
-  %res = load i8* %arrayidx
+  %res = load i8, i8* %arrayidx
   ret i8 %res
 }
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-cast.ll b/llvm/test/CodeGen/X86/codegen-prepare-cast.ll
index cd84303..1b80a9b 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -10,7 +10,7 @@
 @.str = external constant [7 x i8]              ; <[7 x i8]*> [#uses=1]
 
 ; CHECK-LABEL: @_Dmain
-; CHECK: load i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)
+; CHECK: load i8, i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0)
 ; CHECK ret
 define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
 entry:
@@ -19,7 +19,7 @@
 
 foreachbody:            ; preds = %entry
         %tmp4 = getelementptr i8, i8* %tmp, i32 undef               ; <i8*> [#uses=1]
-        %tmp5 = load i8* %tmp4          ; <i8> [#uses=0]
+        %tmp5 = load i8, i8* %tmp4          ; <i8> [#uses=0]
         unreachable
 
 foreachend:             ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
index 9b27c33..6619ebe 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -12,13 +12,13 @@
 ; CHECK: movsbl ({{%rdi|%rcx}}), %eax
 ;
 ; OPTALL-LABEL: @foo
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ; OPTALL-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; OPTALL: store i32 [[ZEXT]], i32* %q
 ; OPTALL: ret
 define void @foo(i8* %p, i32* %q) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %a = icmp slt i8 %t, 20
   br i1 %a, label %true, label %false
 true:
@@ -32,7 +32,7 @@
 ; Check that we manage to form a zextload is an operation with only one
 ; argument to explicitly extend is in the the way.
 ; OPTALL-LABEL: @promoteOneArg
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ; OPT-NEXT: [[ZEXT:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw i32 [[ZEXT]], 2
 ; Make sure the operation is not promoted when the promotion pass is disabled.
@@ -42,7 +42,7 @@
 ; OPTALL: ret
 define void @promoteOneArg(i8* %p, i32* %q) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %add = add nuw i8 %t, 2
   %a = icmp slt i8 %t, 20
   br i1 %a, label %true, label %false
@@ -58,7 +58,7 @@
 ; argument to explicitly extend is in the the way.
 ; Version with sext.
 ; OPTALL-LABEL: @promoteOneArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i32 [[SEXT]], 2
 ; DISABLE: [[ADD:%[a-zA-Z_0-9-]+]] = add nsw i8 [[LD]], 2
@@ -67,7 +67,7 @@
 ; OPTALL: ret
 define void @promoteOneArgSExt(i8* %p, i32* %q) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %add = add nsw i8 %t, 2
   %a = icmp slt i8 %t, 20
   br i1 %a, label %true, label %false
@@ -90,7 +90,7 @@
 ; transformation, the regular heuristic does not apply the optimization. 
 ; 
 ; OPTALL-LABEL: @promoteTwoArgZext
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ;
 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
@@ -106,7 +106,7 @@
 ; OPTALL: ret
 define void @promoteTwoArgZext(i8* %p, i32* %q, i8 %b) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %add = add nuw i8 %t, %b
   %a = icmp slt i8 %t, 20
   br i1 %a, label %true, label %false
@@ -122,7 +122,7 @@
 ; arguments to explicitly extend is in the the way.
 ; Version with sext.
 ; OPTALL-LABEL: @promoteTwoArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ;
 ; STRESS-NEXT: [[SEXTLD:%[a-zA-Z_0-9-]+]] = sext i8 [[LD]] to i32
 ; STRESS-NEXT: [[SEXTB:%[a-zA-Z_0-9-]+]] = sext i8 %b to i32
@@ -137,7 +137,7 @@
 ; OPTALL: ret
 define void @promoteTwoArgSExt(i8* %p, i32* %q, i8 %b) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %add = add nsw i8 %t, %b
   %a = icmp slt i8 %t, 20
   br i1 %a, label %true, label %false
@@ -152,7 +152,7 @@
 ; Check that we do not a zextload if we need to introduce more than
 ; one additional extension.
 ; OPTALL-LABEL: @promoteThreeArgZext
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ;
 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i8 %b to i32
@@ -172,7 +172,7 @@
 ; OPTALL: ret
 define void @promoteThreeArgZext(i8* %p, i32* %q, i8 %b, i8 %c) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %tmp = add nuw i8 %t, %b
   %add = add nuw i8 %tmp, %c
   %a = icmp slt i8 %t, 20
@@ -188,7 +188,7 @@
 ; Check that we manage to form a zextload after promoting and merging
 ; two extensions.
 ; OPTALL-LABEL: @promoteMergeExtArgZExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ;
 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = zext i16 %b to i32
@@ -206,7 +206,7 @@
 ; OPTALL: ret
 define void @promoteMergeExtArgZExt(i8* %p, i32* %q, i16 %b) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %ext = zext i8 %t to i16
   %add = add nuw i16 %ext, %b
   %a = icmp slt i8 %t, 20
@@ -223,7 +223,7 @@
 ; two extensions.
 ; Version with sext.
 ; OPTALL-LABEL: @promoteMergeExtArgSExt
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %p
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %p
 ;
 ; STRESS-NEXT: [[ZEXTLD:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i32
 ; STRESS-NEXT: [[ZEXTB:%[a-zA-Z_0-9-]+]] = sext i16 %b to i32
@@ -240,7 +240,7 @@
 ; OPTALL: ret
 define void @promoteMergeExtArgSExt(i8* %p, i32* %q, i16 %b) {
 entry:
-  %t = load i8* %p
+  %t = load i8, i8* %p
   %ext = zext i8 %t to i16
   %add = add nsw i16 %ext, %b
   %a = icmp slt i8 %t, 20
@@ -284,11 +284,11 @@
 ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag.
 ;
 ; OPTALL-LABEL: @severalPromotions
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8* %addr1
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i8, i8* %addr1
 ; OPT-NEXT: [[ZEXTLD1_1:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
 ; OPT-NEXT: [[ZEXTLD1_2:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
 ; OPT-NEXT: [[ZEXTLD1_3:%[a-zA-Z_0-9-]+]] = zext i8 [[LD]] to i64
-; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32* %addr2
+; OPT-NEXT: [[LD2:%[a-zA-Z_0-9-]+]] = load i32, i32* %addr2
 ; OPT-NEXT: [[SEXTLD2:%[a-zA-Z_0-9-]+]] = sext i32 [[LD2]] to i64
 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nsw i64 [[SEXTLD2]], [[ZEXTLD1_1]]
 ; We do not combine this one: see 2.b.
@@ -308,9 +308,9 @@
 ; OPTALL: call void @dummy(i64 [[RES]], i64 [[RESZA]], i64 [[RESB]])
 ; OPTALL: ret
 define void @severalPromotions(i8* %addr1, i32* %addr2, i8 %a, i32 %b) {
-  %ld = load i8* %addr1
+  %ld = load i8, i8* %addr1
   %zextld = zext i8 %ld to i32
-  %ld2 = load i32* %addr2
+  %ld2 = load i32, i32* %addr2
   %add = add nsw i32 %ld2, %zextld
   %sextadd = sext i32 %add to i64
   %zexta = zext i8 %a to i32
@@ -345,7 +345,7 @@
 ; to an instruction.
 ; This used to cause a crash.
 ; OPTALL-LABEL: @promotionOfArgEndsUpInValue
-; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16* %addr
+; OPTALL: [[LD:%[a-zA-Z_0-9-]+]] = load i16, i16* %addr
 
 ; OPT-NEXT: [[SEXT:%[a-zA-Z_0-9-]+]] = sext i16 [[LD]] to i32
 ; OPT-NEXT: [[RES:%[a-zA-Z_0-9-]+]] = add nuw nsw i32 [[SEXT]], zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i32)
@@ -356,7 +356,7 @@
 ; OPTALL-NEXT: ret i32 [[RES]]
 define i32 @promotionOfArgEndsUpInValue(i16* %addr) {
 entry:
-  %val = load i16* %addr
+  %val = load i16, i16* %addr
   %add = add nuw nsw i16 %val, zext (i1 icmp ne (i32* getelementptr inbounds ([2 x i32]* @c, i64 0, i64 1), i32* @a) to i16)
   %conv3 = sext i16 %add to i32
   ret i32 %conv3
diff --git a/llvm/test/CodeGen/X86/codegen-prepare.ll b/llvm/test/CodeGen/X86/codegen-prepare.ll
index 0bb9f8a..e58bc22 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare.ll
@@ -25,9 +25,9 @@
 
 if.then:                                         ; preds = %entry
   %0 = getelementptr inbounds %class.D, %class.D* %address2, i64 0, i32 0, i64 0, i32 0
-  %1 = load float* %0, align 4 
+  %1 = load float, float* %0, align 4 
   %2 = getelementptr inbounds float, float* %0, i64 3
-  %3 = load float* %2, align 4 
+  %3 = load float, float* %2, align 4 
   %4 = getelementptr inbounds %class.D, %class.D* %address1, i64 0, i32 0, i64 0, i32 0
   store float %1, float* %4, align 4
   br label %if.end
diff --git a/llvm/test/CodeGen/X86/codemodel.ll b/llvm/test/CodeGen/X86/codemodel.ll
index 3aebc13..bbc96d7 100644
--- a/llvm/test/CodeGen/X86/codemodel.ll
+++ b/llvm/test/CodeGen/X86/codemodel.ll
@@ -11,7 +11,7 @@
 ; CHECK-SMALL:   movl data(%rip), %eax
 ; CHECK-KERNEL-LABEL: foo:
 ; CHECK-KERNEL:  movl data, %eax
-	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -21,7 +21,7 @@
 ; CHECK-SMALL:   movl data+40(%rip), %eax
 ; CHECK-KERNEL-LABEL: foo2:
 ; CHECK-KERNEL:  movl data+40, %eax
-	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -31,7 +31,7 @@
 ; CHECK-SMALL:   movl data-40(%rip), %eax
 ; CHECK-KERNEL-LABEL: foo3:
 ; CHECK-KERNEL:  movq $-40, %rax
-	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -43,7 +43,7 @@
 ; CHECK-SMALL:   movl data(%rax), %eax
 ; CHECK-KERNEL-LABEL: foo4:
 ; CHECK-KERNEL:  movl data+16777216, %eax
-	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -53,7 +53,7 @@
 ; CHECK-SMALL:   movl data+16777212(%rip), %eax
 ; CHECK-KERNEL-LABEL: foo1:
 ; CHECK-KERNEL:  movl data+16777212, %eax
-        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        %0 = load i32, i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
         ret i32 %0
 }
 define i32 @foo5() nounwind readonly {
@@ -62,6 +62,6 @@
 ; CHECK-SMALL:   movl data-16777216(%rip), %eax
 ; CHECK-KERNEL-LABEL: foo5:
 ; CHECK-KERNEL:  movq $-16777216, %rax
-	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/combiner-aa-0.ll b/llvm/test/CodeGen/X86/combiner-aa-0.ll
index ce87a99..403059d 100644
--- a/llvm/test/CodeGen/X86/combiner-aa-0.ll
+++ b/llvm/test/CodeGen/X86/combiner-aa-0.ll
@@ -5,14 +5,14 @@
 @g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
 
 define void @foo() nounwind {
-	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
-	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t0 = load i32, i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
 	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
 	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
 	store i32 %t4, i32* null, align 4
 	%t5 = getelementptr %struct.Hash_Key, %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
-	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t6 = load i32, i32* %t5, align 4		; <i32> [#uses=1]
 	%t7 = shl i32 1, undef		; <i32> [#uses=1]
 	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
 	store i32 %t8, i32* %t5, align 4
diff --git a/llvm/test/CodeGen/X86/combiner-aa-1.ll b/llvm/test/CodeGen/X86/combiner-aa-1.ll
index 04f3206..cc3e5ca 100644
--- a/llvm/test/CodeGen/X86/combiner-aa-1.ll
+++ b/llvm/test/CodeGen/X86/combiner-aa-1.ll
@@ -13,9 +13,9 @@
 define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
 comb_entry:
   %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
-  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %0 = load i32 (...)**, i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
   %1 = getelementptr inbounds i32 (...)*, i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
-  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  %2 = load i32 (...)*, i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
   store %struct.node* undef, %struct.node** %.SV59
   %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
   %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
diff --git a/llvm/test/CodeGen/X86/commute-blend-avx2.ll b/llvm/test/CodeGen/X86/commute-blend-avx2.ll
index d06c6da..0172f99 100644
--- a/llvm/test/CodeGen/X86/commute-blend-avx2.ll
+++ b/llvm/test/CodeGen/X86/commute-blend-avx2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=avx2 < %s | FileCheck %s
 
 define <8 x i16> @commute_fold_vpblendw_128(<8 x i16> %a, <8 x i16>* %b) #0 {
-  %1 = load <8 x i16>* %b
+  %1 = load <8 x i16>, <8 x i16>* %b
   %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
   ret <8 x i16> %2
 
@@ -12,7 +12,7 @@
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 define <16 x i16> @commute_fold_vpblendw_256(<16 x i16> %a, <16 x i16>* %b) #0 {
-  %1 = load <16 x i16>* %b
+  %1 = load <16 x i16>, <16 x i16>* %b
   %2 = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %1, <16 x i16> %a, i8 17)
   ret <16 x i16> %2
 
@@ -23,7 +23,7 @@
 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
 
 define <4 x i32> @commute_fold_vpblendd_128(<4 x i32> %a, <4 x i32>* %b) #0 {
-  %1 = load <4 x i32>* %b
+  %1 = load <4 x i32>, <4 x i32>* %b
   %2 = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %1, <4 x i32> %a, i8 1)
   ret <4 x i32> %2
 
@@ -34,7 +34,7 @@
 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
 
 define <8 x i32> @commute_fold_vpblendd_256(<8 x i32> %a, <8 x i32>* %b) #0 {
-  %1 = load <8 x i32>* %b
+  %1 = load <8 x i32>, <8 x i32>* %b
   %2 = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %1, <8 x i32> %a, i8 129)
   ret <8 x i32> %2
 
@@ -45,7 +45,7 @@
 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
 
 define <4 x float> @commute_fold_vblendps_128(<4 x float> %a, <4 x float>* %b) #0 {
-  %1 = load <4 x float>* %b
+  %1 = load <4 x float>, <4 x float>* %b
   %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
   ret <4 x float> %2
 
@@ -56,7 +56,7 @@
 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 define <8 x float> @commute_fold_vblendps_256(<8 x float> %a, <8 x float>* %b) #0 {
-  %1 = load <8 x float>* %b
+  %1 = load <8 x float>, <8 x float>* %b
   %2 = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %1, <8 x float> %a, i8 7)
   ret <8 x float> %2
 
@@ -67,7 +67,7 @@
 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
 
 define <2 x double> @commute_fold_vblendpd_128(<2 x double> %a, <2 x double>* %b) #0 {
-  %1 = load <2 x double>* %b
+  %1 = load <2 x double>, <2 x double>* %b
   %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
   ret <2 x double> %2
 
@@ -78,7 +78,7 @@
 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
 
 define <4 x double> @commute_fold_vblendpd_256(<4 x double> %a, <4 x double>* %b) #0 {
-  %1 = load <4 x double>* %b
+  %1 = load <4 x double>, <4 x double>* %b
   %2 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %1, <4 x double> %a, i8 7)
   ret <4 x double> %2
 
diff --git a/llvm/test/CodeGen/X86/commute-blend-sse41.ll b/llvm/test/CodeGen/X86/commute-blend-sse41.ll
index 59fef8c..500f6a3 100644
--- a/llvm/test/CodeGen/X86/commute-blend-sse41.ll
+++ b/llvm/test/CodeGen/X86/commute-blend-sse41.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -O3 -mtriple=x86_64-unknown -mcpu=corei7 < %s | FileCheck %s
 
 define <8 x i16> @commute_fold_pblendw(<8 x i16> %a, <8 x i16>* %b) #0 {
-  %1 = load <8 x i16>* %b
+  %1 = load <8 x i16>, <8 x i16>* %b
   %2 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %1, <8 x i16> %a, i8 17)
   ret <8 x i16> %2
 
@@ -12,7 +12,7 @@
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
 
 define <4 x float> @commute_fold_blendps(<4 x float> %a, <4 x float>* %b) #0 {
-  %1 = load <4 x float>* %b
+  %1 = load <4 x float>, <4 x float>* %b
   %2 = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %1, <4 x float> %a, i8 3)
   ret <4 x float> %2
 
@@ -23,7 +23,7 @@
 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 define <2 x double> @commute_fold_blendpd(<2 x double> %a, <2 x double>* %b) #0 {
-  %1 = load <2 x double>* %b
+  %1 = load <2 x double>, <2 x double>* %b
   %2 = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %1, <2 x double> %a, i8 1)
   ret <2 x double> %2
 
diff --git a/llvm/test/CodeGen/X86/commute-clmul.ll b/llvm/test/CodeGen/X86/commute-clmul.ll
index fe3e556..d13911a 100644
--- a/llvm/test/CodeGen/X86/commute-clmul.ll
+++ b/llvm/test/CodeGen/X86/commute-clmul.ll
@@ -12,7 +12,7 @@
   ;AVX:       vpclmulqdq $0, (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 0)
   ret <2 x i64> %2
 }
@@ -26,7 +26,7 @@
   ;AVX:       vpclmulqdq $1, (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 16)
   ret <2 x i64> %2
 }
@@ -40,7 +40,7 @@
   ;AVX:       vpclmulqdq $16, (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 1)
   ret <2 x i64> %2
 }
@@ -54,7 +54,7 @@
   ;AVX:       vpclmulqdq $17, (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %a1, i8 17)
   ret <2 x i64> %2
 }
diff --git a/llvm/test/CodeGen/X86/commute-fcmp.ll b/llvm/test/CodeGen/X86/commute-fcmp.ll
index 0d7f2af..6f43ebe 100644
--- a/llvm/test/CodeGen/X86/commute-fcmp.ll
+++ b/llvm/test/CodeGen/X86/commute-fcmp.ll
@@ -15,7 +15,7 @@
   ;AVX:       vcmpeqps (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp oeq <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -30,7 +30,7 @@
   ;AVX:       vcmpneqps (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp une <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -45,7 +45,7 @@
   ;AVX:       vcmpordps (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp ord <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -60,7 +60,7 @@
   ;AVX:       vcmpunordps (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp uno <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -78,7 +78,7 @@
   ;AVX-NEXT:  vcmpltps %xmm0, %xmm1, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp olt <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -96,7 +96,7 @@
   ;AVX-NEXT:  vcmpleps %xmm0, %xmm1, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x float>* %a0
+  %1 = load <4 x float>, <4 x float>* %a0
   %2 = fcmp ole <4 x float> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i32>
   ret <4 x i32> %3
@@ -107,7 +107,7 @@
   ;AVX:       vcmpeqps (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp oeq <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -118,7 +118,7 @@
   ;AVX:       vcmpneqps (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp une <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -129,7 +129,7 @@
   ;AVX:       vcmpordps (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp ord <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -140,7 +140,7 @@
   ;AVX:       vcmpunordps (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp uno <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -152,7 +152,7 @@
   ;AVX-NEXT:  vcmpltps %ymm0, %ymm1, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp olt <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -164,7 +164,7 @@
   ;AVX-NEXT:  vcmpleps %ymm0, %ymm1, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <8 x float>* %a0
+  %1 = load <8 x float>, <8 x float>* %a0
   %2 = fcmp ole <8 x float> %1, %a1
   %3 = sext <8 x i1> %2 to <8 x i32>
   ret <8 x i32> %3
@@ -184,7 +184,7 @@
   ;AVX:       vcmpeqpd (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp oeq <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -199,7 +199,7 @@
   ;AVX:       vcmpneqpd (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp une <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -214,7 +214,7 @@
   ;AVX:       vcmpordpd (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp ord <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -229,7 +229,7 @@
   ;AVX:       vcmpunordpd (%rdi), %xmm0, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp uno <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -247,7 +247,7 @@
   ;AVX-NEXT:  vcmpltpd %xmm0, %xmm1, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp olt <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -265,7 +265,7 @@
   ;AVX-NEXT:  vcmplepd %xmm0, %xmm1, %xmm0
   ;AVX-NEXT:  retq
 
-  %1 = load <2 x double>* %a0
+  %1 = load <2 x double>, <2 x double>* %a0
   %2 = fcmp ole <2 x double> %1, %a1
   %3 = sext <2 x i1> %2 to <2 x i64>
   ret <2 x i64> %3
@@ -276,7 +276,7 @@
   ;AVX:       vcmpeqpd (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp oeq <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
@@ -287,7 +287,7 @@
   ;AVX:       vcmpneqpd (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp une <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
@@ -298,7 +298,7 @@
   ;AVX:       vcmpordpd (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp ord <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
@@ -309,7 +309,7 @@
   ;AVX:       vcmpunordpd (%rdi), %ymm0, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp uno <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
@@ -321,7 +321,7 @@
   ;AVX-NEXT:  vcmpltpd %ymm0, %ymm1, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp olt <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
@@ -333,7 +333,7 @@
   ;AVX-NEXT:  vcmplepd %ymm0, %ymm1, %ymm0
   ;AVX-NEXT:  retq
 
-  %1 = load <4 x double>* %a0
+  %1 = load <4 x double>, <4 x double>* %a0
   %2 = fcmp ole <4 x double> %1, %a1
   %3 = sext <4 x i1> %2 to <4 x i64>
   ret <4 x i64> %3
diff --git a/llvm/test/CodeGen/X86/commute-intrinsic.ll b/llvm/test/CodeGen/X86/commute-intrinsic.ll
index 7d5ca47..ff9049c 100644
--- a/llvm/test/CodeGen/X86/commute-intrinsic.ll
+++ b/llvm/test/CodeGen/X86/commute-intrinsic.ll
@@ -6,7 +6,7 @@
 
 define <2 x i64> @madd(<2 x i64> %b) nounwind  {
 entry:
-	%tmp2 = load <2 x i64>* @a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = load <2 x i64>, <2 x i64>* @a, align 16		; <<2 x i64>> [#uses=1]
 	%tmp6 = bitcast <2 x i64> %b to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone 		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/commute-xop.ll b/llvm/test/CodeGen/X86/commute-xop.ll
index a3e14fe..e551d9b 100644
--- a/llvm/test/CodeGen/X86/commute-xop.ll
+++ b/llvm/test/CodeGen/X86/commute-xop.ll
@@ -3,7 +3,7 @@
 define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomb
   ;CHECK:       vpcomgtb (%rdi), %xmm0, %xmm0
-  %1 = load <16 x i8>* %a0
+  %1 = load <16 x i8>, <16 x i8>* %a0
   %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb
   ret <16 x i8> %2
 }
@@ -12,7 +12,7 @@
 define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomd
   ;CHECK:       vpcomged (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled
   ret <4 x i32> %2
 }
@@ -21,7 +21,7 @@
 define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomq
   ;CHECK:       vpcomltq (%rdi), %xmm0, %xmm0
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq
   ret <2 x i64> %2
 }
@@ -30,7 +30,7 @@
 define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomub
   ;CHECK:       vpcomleub (%rdi), %xmm0, %xmm0
-  %1 = load <16 x i8>* %a0
+  %1 = load <16 x i8>, <16 x i8>* %a0
   %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub
   ret <16 x i8> %2
 }
@@ -39,7 +39,7 @@
 define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomud
   ;CHECK:       vpcomequd (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd
   ret <4 x i32> %2
 }
@@ -48,7 +48,7 @@
 define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomuq
   ;CHECK:       vpcomnequq (%rdi), %xmm0, %xmm0
-  %1 = load <2 x i64>* %a0
+  %1 = load <2 x i64>, <2 x i64>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq
   ret <2 x i64> %2
 }
@@ -57,7 +57,7 @@
 define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomuw
   ;CHECK:       vpcomfalseuw (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw
   ret <8 x i16> %2
 }
@@ -66,7 +66,7 @@
 define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) {
   ;CHECK-LABEL: commute_fold_vpcomw
   ;CHECK:       vpcomtruew (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew
   ret <8 x i16> %2
 }
@@ -75,7 +75,7 @@
 define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacsdd
   ;CHECK:       vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
@@ -84,7 +84,7 @@
 define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacsdqh
   ;CHECK:       vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
   ret <2 x i64> %2
 }
@@ -93,7 +93,7 @@
 define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacsdql
   ;CHECK:       vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
   ret <2 x i64> %2
 }
@@ -102,7 +102,7 @@
 define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacssdd
   ;CHECK:       vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
@@ -111,7 +111,7 @@
 define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacssdqh
   ;CHECK:       vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
   ret <2 x i64> %2
 }
@@ -120,7 +120,7 @@
 define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacssdql
   ;CHECK:       vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <4 x i32>* %a0
+  %1 = load <4 x i32>, <4 x i32>* %a0
   %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2)
   ret <2 x i64> %2
 }
@@ -129,7 +129,7 @@
 define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacsswd
   ;CHECK:       vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
@@ -138,7 +138,7 @@
 define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacssww
   ;CHECK:       vpmacssww %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
   ret <8 x i16> %2
 }
@@ -147,7 +147,7 @@
 define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacswd
   ;CHECK:       vpmacswd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
@@ -156,7 +156,7 @@
 define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) {
   ;CHECK-LABEL: commute_fold_vpmacsww
   ;CHECK:       vpmacsww %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2)
   ret <8 x i16> %2
 }
@@ -165,7 +165,7 @@
 define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmadcsswd
   ;CHECK:       vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
@@ -174,7 +174,7 @@
 define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) {
   ;CHECK-LABEL: commute_fold_vpmadcswd
   ;CHECK:       vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0
-  %1 = load <8 x i16>* %a0
+  %1 = load <8 x i16>, <8 x i16>* %a0
   %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2)
   ret <4 x i32> %2
 }
diff --git a/llvm/test/CodeGen/X86/compact-unwind.ll b/llvm/test/CodeGen/X86/compact-unwind.ll
index d004f6e..f8266a1 100644
--- a/llvm/test/CodeGen/X86/compact-unwind.ll
+++ b/llvm/test/CodeGen/X86/compact-unwind.ll
@@ -39,12 +39,12 @@
 
 define i8* @test0(i64 %size) {
   %addr = alloca i64, align 8
-  %tmp20 = load i32* @gv, align 4
+  %tmp20 = load i32, i32* @gv, align 4
   %tmp21 = call i32 @bar()
-  %tmp25 = load i64* %addr, align 8
+  %tmp25 = load i64, i64* %addr, align 8
   %tmp26 = inttoptr i64 %tmp25 to %ty*
   %tmp29 = getelementptr inbounds %ty, %ty* %tmp26, i64 0, i32 0
-  %tmp34 = load i8** %tmp29, align 8
+  %tmp34 = load i8*, i8** %tmp29, align 8
   %tmp35 = getelementptr inbounds i8, i8* %tmp34, i64 %size
   store i8* %tmp35, i8** %tmp29, align 8
   ret i8* null
@@ -85,7 +85,7 @@
 for.body3:                                        ; preds = %for.inc, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.inc ]
   %image4 = getelementptr inbounds %"struct.dyld::MappedRanges", %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 0, i64 %indvars.iv, i32 0
-  %0 = load %class.ImageLoader** %image4, align 8
+  %0 = load %class.ImageLoader*, %class.ImageLoader** %image4, align 8
   %cmp5 = icmp eq %class.ImageLoader* %0, %image
   br i1 %cmp5, label %if.then, label %for.inc
 
@@ -102,7 +102,7 @@
 
 for.inc10:                                        ; preds = %for.inc
   %next = getelementptr inbounds %"struct.dyld::MappedRanges", %"struct.dyld::MappedRanges"* %p.019, i64 0, i32 1
-  %1 = load %"struct.dyld::MappedRanges"** %next, align 8
+  %1 = load %"struct.dyld::MappedRanges"*, %"struct.dyld::MappedRanges"** %next, align 8
   %cmp = icmp eq %"struct.dyld::MappedRanges"* %1, null
   br i1 %cmp, label %for.end11, label %for.cond1.preheader
 
diff --git a/llvm/test/CodeGen/X86/complex-asm.ll b/llvm/test/CodeGen/X86/complex-asm.ll
index 8ceb47c..d7b5879 100644
--- a/llvm/test/CodeGen/X86/complex-asm.ll
+++ b/llvm/test/CodeGen/X86/complex-asm.ll
@@ -8,9 +8,9 @@
   %v = alloca %0, align 8
   call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
   %0 = getelementptr inbounds %0, %0* %v, i64 0, i32 0
-  %1 = load i64* %0, align 8
+  %1 = load i64, i64* %0, align 8
   %2 = getelementptr inbounds %0, %0* %v, i64 0, i32 1
-  %3 = load i64* %2, align 8
+  %3 = load i64, i64* %2, align 8
   %mrv4 = insertvalue %0 undef, i64 %1, 0
   %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
   ret %0 %mrv5
diff --git a/llvm/test/CodeGen/X86/computeKnownBits_urem.ll b/llvm/test/CodeGen/X86/computeKnownBits_urem.ll
index 9902e6f..a72740e 100644
--- a/llvm/test/CodeGen/X86/computeKnownBits_urem.ll
+++ b/llvm/test/CodeGen/X86/computeKnownBits_urem.ll
@@ -3,7 +3,7 @@
 entry:
   %a = alloca i32, align 4
   store i32 1, i32* %a, align 4
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %or = or i32 1, %0
   %and = and i32 1, %or
   %rem = urem i32 %and, 1
diff --git a/llvm/test/CodeGen/X86/const-base-addr.ll b/llvm/test/CodeGen/X86/const-base-addr.ll
index 19a103c..4264713 100644
--- a/llvm/test/CodeGen/X86/const-base-addr.ll
+++ b/llvm/test/CodeGen/X86/const-base-addr.ll
@@ -12,11 +12,11 @@
 ; CHECK-NEXT:   addl  8(%rcx), %eax
 ; CHECK-NEXT:   addl  12(%rcx), %eax
   %addr1 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 1
-  %tmp1 = load i32* %addr1
+  %tmp1 = load i32, i32* %addr1
   %addr2 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 2
-  %tmp2 = load i32* %addr2
+  %tmp2 = load i32, i32* %addr2
   %addr3 = getelementptr %T, %T* inttoptr (i64 123456789012345678 to %T*), i32 0, i32 3
-  %tmp3 = load i32* %addr3
+  %tmp3 = load i32, i32* %addr3
   %tmp4 = add i32 %tmp1, %tmp2
   %tmp5 = add i32 %tmp3, %tmp4
   ret i32 %tmp5
diff --git a/llvm/test/CodeGen/X86/constant-combines.ll b/llvm/test/CodeGen/X86/constant-combines.ll
index 0c9c211..5ea736e 100644
--- a/llvm/test/CodeGen/X86/constant-combines.ll
+++ b/llvm/test/CodeGen/X86/constant-combines.ll
@@ -20,7 +20,7 @@
 
   %1 = getelementptr inbounds { float, float }, { float, float }* %arg, i64 0,  i32 0
   %2 = bitcast float* %1 to i64*
-  %3 = load i64* %2, align 8
+  %3 = load i64, i64* %2, align 8
   %4 = trunc i64 %3 to i32
   %5 = lshr i64 %3, 32
   %6 = trunc i64 %5 to i32
diff --git a/llvm/test/CodeGen/X86/constant-hoisting-optnone.ll b/llvm/test/CodeGen/X86/constant-hoisting-optnone.ll
index f61fe3f..4d8a06c 100644
--- a/llvm/test/CodeGen/X86/constant-hoisting-optnone.ll
+++ b/llvm/test/CodeGen/X86/constant-hoisting-optnone.ll
@@ -12,8 +12,8 @@
 ; CHECK-DAG: movabsq {{.*#+}} imm = 0xBEEBEEBEC
 ; CHECK: ret
 entry:
-  %0 = load i64* inttoptr (i64 51250129900 to i64*)
-  %1 = load i64* inttoptr (i64 51250129908 to i64*)
+  %0 = load i64, i64* inttoptr (i64 51250129900 to i64*)
+  %1 = load i64, i64* inttoptr (i64 51250129908 to i64*)
   %2 = add i64 %0, %1
   ret i64 %2
 }
diff --git a/llvm/test/CodeGen/X86/constant-hoisting-shift-immediate.ll b/llvm/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
index 883be35..65c26f8 100644
--- a/llvm/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
+++ b/llvm/test/CodeGen/X86/constant-hoisting-shift-immediate.ll
@@ -6,7 +6,7 @@
 ; be in another basic block. As a result, a very inefficient code might be
 ; produced. Here we check that this doesn't occur.
 entry:
-  %data1 = load i192* %p, align 8
+  %data1 = load i192, i192* %p, align 8
   %lshr1 = lshr i192 %data1, 128
   %val1  = trunc i192 %lshr1 to i64
   br i1 %z, label %End, label %L_val2
@@ -14,7 +14,7 @@
 ; CHECK: movq    16(%rdx), %rax
 ; CHECK-NEXT: retq
 L_val2:
-  %data2 = load i192* %q, align 8
+  %data2 = load i192, i192* %q, align 8
   %lshr2 = lshr i192 %data2, 128
   %val2  = trunc i192 %lshr2 to i64
   br label %End
diff --git a/llvm/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/llvm/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
index ed55b7f..7fc56f5 100644
--- a/llvm/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
+++ b/llvm/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -12,7 +12,7 @@
   %0 = add i32 %i2, 1           ; <i32> [#uses=1]
   %1 = sext i32 %0 to i64               ; <i64> [#uses=1]
   %2 = getelementptr i8, i8* %ptr, i64 %1           ; <i8*> [#uses=1]
-  %3 = load i8* %2, align 1             ; <i8> [#uses=1]
+  %3 = load i8, i8* %2, align 1             ; <i8> [#uses=1]
   %4 = icmp eq i8 0, %3         ; <i1> [#uses=1]
   br i1 %4, label %bb3, label %bb34
 
diff --git a/llvm/test/CodeGen/X86/cppeh-catch-all.ll b/llvm/test/CodeGen/X86/cppeh-catch-all.ll
index cf7b364..b48d534 100644
--- a/llvm/test/CodeGen/X86/cppeh-catch-all.ll
+++ b/llvm/test/CodeGen/X86/cppeh-catch-all.ll
@@ -39,7 +39,7 @@
   br label %catch
 
 catch:                                            ; preds = %lpad
-  %exn = load i8** %exn.slot
+  %exn = load i8*, i8** %exn.slot
   %3 = call i8* @llvm.eh.begincatch(i8* %exn) #3
   call void @_Z16handle_exceptionv()
   br label %invoke.cont2
@@ -57,7 +57,7 @@
 ; CHECK:   %eh.alloc = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1)
 ; CHECK:   %eh.data = bitcast i8* %eh.alloc to %struct._Z4testv.ehdata*
 ; CHECK:   %eh.obj.ptr = getelementptr inbounds %struct._Z4testv.ehdata, %struct._Z4testv.ehdata* %eh.data, i32 0, i32 1
-; CHECK:   %eh.obj = load i8** %eh.obj.ptr
+; CHECK:   %eh.obj = load i8*, i8** %eh.obj.ptr
 ; CHECK:   call void @_Z16handle_exceptionv()
 ; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
 ; CHECK: }
diff --git a/llvm/test/CodeGen/X86/cppeh-catch-scalar.ll b/llvm/test/CodeGen/X86/cppeh-catch-scalar.ll
index 2b7f841..b5f40c3 100644
--- a/llvm/test/CodeGen/X86/cppeh-catch-scalar.ll
+++ b/llvm/test/CodeGen/X86/cppeh-catch-scalar.ll
@@ -55,18 +55,18 @@
   br label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32* %ehselector.slot
+  %sel = load i32, i32* %ehselector.slot
   %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3
   %matches = icmp eq i32 %sel, %3
   br i1 %matches, label %catch, label %eh.resume
 
 catch:                                            ; preds = %catch.dispatch
-  %exn11 = load i8** %exn.slot
+  %exn11 = load i8*, i8** %exn.slot
   %4 = call i8* @llvm.eh.begincatch(i8* %exn11) #3
   %5 = bitcast i8* %4 to i32*
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   store i32 %6, i32* %i, align 4
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   call void @_Z10handle_inti(i32 %7)
   br label %invoke.cont2
 
@@ -78,8 +78,8 @@
   ret void
 
 eh.resume:                                        ; preds = %catch.dispatch
-  %exn3 = load i8** %exn.slot
-  %sel4 = load i32* %ehselector.slot
+  %exn3 = load i8*, i8** %exn.slot
+  %sel4 = load i32, i32* %ehselector.slot
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
   %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
   resume { i8*, i32 } %lpad.val5
@@ -90,12 +90,12 @@
 ; CHECK:   %eh.alloc = call i8* @llvm.framerecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1)
 ; CHECK:   %eh.data = bitcast i8* %eh.alloc to %struct._Z4testv.ehdata*
 ; CHECK:   %eh.obj.ptr = getelementptr inbounds %struct._Z4testv.ehdata, %struct._Z4testv.ehdata* %eh.data, i32 0, i32 1
-; CHECK:   %eh.obj = load i8** %eh.obj.ptr
+; CHECK:   %eh.obj = load i8*, i8** %eh.obj.ptr
 ; CHECK:   %i = getelementptr inbounds %struct._Z4testv.ehdata, %struct._Z4testv.ehdata* %eh.data, i32 0, i32 2
 ; CHECK:   %2 = bitcast i8* %eh.obj to i32*
-; CHECK:   %3 = load i32* %2, align 4
+; CHECK:   %3 = load i32, i32* %2, align 4
 ; CHECK:   store i32 %3, i32* %i, align 4
-; CHECK:   %4 = load i32* %i, align 4
+; CHECK:   %4 = load i32, i32* %i, align 4
 ; CHECK:   call void @_Z10handle_inti(i32 %4)
 ; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
 ; CHECK: }
diff --git a/llvm/test/CodeGen/X86/cppeh-frame-vars.ll b/llvm/test/CodeGen/X86/cppeh-frame-vars.ll
index bb4cef4..8b8a849 100644
--- a/llvm/test/CodeGen/X86/cppeh-frame-vars.ll
+++ b/llvm/test/CodeGen/X86/cppeh-frame-vars.ll
@@ -83,7 +83,7 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %1, 10
   br i1 %cmp, label %for.body, label %for.end
 
@@ -92,9 +92,9 @@
           to label %invoke.cont unwind label %lpad
 
 invoke.cont:                                      ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %a = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-  %3 = load i32* %a, align 4
+  %3 = load i32, i32* %a, align 4
   %add = add nsw i32 %3, %2
   store i32 %add, i32* %a, align 4
   br label %try.cont
@@ -109,42 +109,42 @@
   br label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32* %ehselector.slot
+  %sel = load i32, i32* %ehselector.slot
   %7 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
   %matches = icmp eq i32 %sel, %7
   br i1 %matches, label %catch, label %eh.resume
 
 catch:                                            ; preds = %catch.dispatch
-  %exn = load i8** %exn.slot
+  %exn = load i8*, i8** %exn.slot
   %8 = call i8* @llvm.eh.begincatch(i8* %exn) #1
   %9 = bitcast i8* %8 to i32*
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   store i32 %10, i32* %e, align 4
-  %11 = load i32* %e, align 4
-  %12 = load i32* %NumExceptions, align 4
+  %11 = load i32, i32* %e, align 4
+  %12 = load i32, i32* %NumExceptions, align 4
   %idxprom = sext i32 %12 to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i64 %idxprom
   store i32 %11, i32* %arrayidx, align 4
-  %13 = load i32* %NumExceptions, align 4
+  %13 = load i32, i32* %NumExceptions, align 4
   %inc = add nsw i32 %13, 1
   store i32 %inc, i32* %NumExceptions, align 4
-  %14 = load i32* %e, align 4
-  %15 = load i32* %i, align 4
+  %14 = load i32, i32* %e, align 4
+  %15 = load i32, i32* %i, align 4
   %cmp1 = icmp eq i32 %14, %15
   br i1 %cmp1, label %if.then, label %if.else
 
 if.then:                                          ; preds = %catch
-  %16 = load i32* %e, align 4
+  %16 = load i32, i32* %e, align 4
   %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 1
-  %17 = load i32* %b, align 4
+  %17 = load i32, i32* %b, align 4
   %add2 = add nsw i32 %17, %16
   store i32 %add2, i32* %b, align 4
   br label %if.end
 
 if.else:                                          ; preds = %catch
-  %18 = load i32* %e, align 4
+  %18 = load i32, i32* %e, align 4
   %a3 = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-  %19 = load i32* %a3, align 4
+  %19 = load i32, i32* %a3, align 4
   %add4 = add nsw i32 %19, %18
   store i32 %add4, i32* %a3, align 4
   br label %if.end
@@ -154,25 +154,25 @@
   br label %try.cont
 
 try.cont:                                         ; preds = %if.end, %invoke.cont
-  %20 = load i32* %NumExceptions, align 4
+  %20 = load i32, i32* %NumExceptions, align 4
   call void @"\01?does_not_throw@@YAXH@Z"(i32 %20)
   br label %for.inc
 
 for.inc:                                          ; preds = %try.cont
-  %21 = load i32* %i, align 4
+  %21 = load i32, i32* %i, align 4
   %inc5 = add nsw i32 %21, 1
   store i32 %inc5, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %22 = load i32* %NumExceptions, align 4
+  %22 = load i32, i32* %NumExceptions, align 4
   %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i32 0
   call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %22, %struct.SomeData* dereferenceable(8) %Data)
   ret void
 
 eh.resume:                                        ; preds = %catch.dispatch
-  %exn6 = load i8** %exn.slot
-  %sel7 = load i32* %ehselector.slot
+  %exn6 = load i8*, i8** %exn.slot
+  %sel7 = load i32, i32* %ehselector.slot
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn6, 0
   %lpad.val8 = insertvalue { i8*, i32 } %lpad.val, i32 %sel7, 1
   resume { i8*, i32 } %lpad.val8
@@ -184,40 +184,40 @@
 ; CHECK:   %eh.alloc = call i8* @llvm.framerecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1)
 ; CHECK:   %eh.data = bitcast i8* %eh.alloc to %"struct.\01?test@@YAXXZ.ehdata"*
 ; CHECK:   %eh.obj.ptr = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 1
-; CHECK:   %eh.obj = load i8** %eh.obj.ptr
+; CHECK:   %eh.obj = load i8*, i8** %eh.obj.ptr
 ; CHECK:   %e = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 2
 ; CHECK:   %NumExceptions = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 3
 ; CHECK:   %ExceptionVal = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 4
 ; CHECK:   %i = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 5
 ; CHECK:   %Data = getelementptr inbounds %"struct.\01?test@@YAXXZ.ehdata", %"struct.\01?test@@YAXXZ.ehdata"* %eh.data, i32 0, i32 6
 ; CHECK:   %2 = bitcast i8* %eh.obj to i32*
-; CHECK:   %3 = load i32* %2, align 4
+; CHECK:   %3 = load i32, i32* %2, align 4
 ; CHECK:   store i32 %3, i32* %e, align 4
-; CHECK:   %4 = load i32* %e, align 4
-; CHECK:   %5 = load i32* %NumExceptions, align 4
+; CHECK:   %4 = load i32, i32* %e, align 4
+; CHECK:   %5 = load i32, i32* %NumExceptions, align 4
 ; CHECK:   %idxprom = sext i32 %5 to i64
 ; CHECK:   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i64 %idxprom
 ; CHECK:   store i32 %4, i32* %arrayidx, align 4
-; CHECK:   %6 = load i32* %NumExceptions, align 4
+; CHECK:   %6 = load i32, i32* %NumExceptions, align 4
 ; CHECK:   %inc = add nsw i32 %6, 1
 ; CHECK:   store i32 %inc, i32* %NumExceptions, align 4
-; CHECK:   %7 = load i32* %e, align 4
-; CHECK:   %8 = load i32* %i, align 4
+; CHECK:   %7 = load i32, i32* %e, align 4
+; CHECK:   %8 = load i32, i32* %i, align 4
 ; CHECK:   %cmp1 = icmp eq i32 %7, %8
 ; CHECK:   br i1 %cmp1, label %if.then, label %if.else
 ;
 ; CHECK: if.then:                                          ; preds = %catch.entry
-; CHECK:   %9 = load i32* %e, align 4
+; CHECK:   %9 = load i32, i32* %e, align 4
 ; CHECK:   %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 1
-; CHECK:   %10 = load i32* %b, align 4
+; CHECK:   %10 = load i32, i32* %b, align 4
 ; CHECK:   %add2 = add nsw i32 %10, %9
 ; CHECK:   store i32 %add2, i32* %b, align 4
 ; CHECK:   br label %if.end
 ;
 ; CHECK: if.else:                                          ; preds = %catch.entry
-; CHECK:   %11 = load i32* %e, align 4
+; CHECK:   %11 = load i32, i32* %e, align 4
 ; CHECK:   %a3 = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-; CHECK:   %12 = load i32* %a3, align 4
+; CHECK:   %12 = load i32, i32* %a3, align 4
 ; CHECK:   %add4 = add nsw i32 %12, %11
 ; CHECK:   store i32 %add4, i32* %a3, align 4
 ; CHECK:   br label %if.end
diff --git a/llvm/test/CodeGen/X86/crash-O0.ll b/llvm/test/CodeGen/X86/crash-O0.ll
index df97817..dab15c1 100644
--- a/llvm/test/CodeGen/X86/crash-O0.ll
+++ b/llvm/test/CodeGen/X86/crash-O0.ll
@@ -45,7 +45,7 @@
 ; CHECK: retq
 define i64 @addressModeWith32bitIndex(i32 %V) {
   %gep = getelementptr i64, i64* null, i32 %V
-  %load = load i64* %gep
+  %load = load i64, i64* %gep
   %sdiv = sdiv i64 0, %load
   ret i64 %sdiv
 }
diff --git a/llvm/test/CodeGen/X86/crash-nosse.ll b/llvm/test/CodeGen/X86/crash-nosse.ll
index b1e01f9..aff120d 100644
--- a/llvm/test/CodeGen/X86/crash-nosse.ll
+++ b/llvm/test/CodeGen/X86/crash-nosse.ll
@@ -11,7 +11,7 @@
   br label %CF
 
 CF:                                               ; preds = %CF, %BB
-  %L19 = load <8 x float>* %S17
+  %L19 = load <8 x float>, <8 x float>* %S17
   %BC = bitcast <32 x i32> %Shuff6 to <32 x float>
   %S28 = fcmp ord double 0x3ED1A1F787BB2185, 0x3EE59DE55A8DF890
   br i1 %S28, label %CF, label %CF39
diff --git a/llvm/test/CodeGen/X86/crash.ll b/llvm/test/CodeGen/X86/crash.ll
index ffab342..3acae44 100644
--- a/llvm/test/CodeGen/X86/crash.ll
+++ b/llvm/test/CodeGen/X86/crash.ll
@@ -7,9 +7,9 @@
 ; Chain and flag folding issues.
 define i32 @test1() nounwind ssp {
 entry:
-  %tmp5.i = load volatile i32* undef              ; <i32> [#uses=1]
+  %tmp5.i = load volatile i32, i32* undef              ; <i32> [#uses=1]
   %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
-  %tmp12.i = load volatile i32* undef             ; <i32> [#uses=1]
+  %tmp12.i = load volatile i32, i32* undef             ; <i32> [#uses=1]
   %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
   %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
   %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
@@ -40,7 +40,7 @@
 
 define void @test3() {
 dependentGraph243.exit:
-  %subject19 = load %pair* undef                     ; <%1> [#uses=1]
+  %subject19 = load %pair, %pair* undef                     ; <%1> [#uses=1]
   %0 = extractvalue %pair %subject19, 1              ; <double> [#uses=2]
   %1 = select i1 undef, double %0, double undef   ; <double> [#uses=1]
   %2 = select i1 undef, double %1, double %0      ; <double> [#uses=1]
@@ -52,7 +52,7 @@
 ; PR6605
 define i64 @test4(i8* %P) nounwind ssp {
 entry:
-  %tmp1 = load i8* %P                           ; <i8> [#uses=3]
+  %tmp1 = load i8, i8* %P                           ; <i8> [#uses=3]
   %tobool = icmp eq i8 %tmp1, 0                   ; <i1> [#uses=1]
   %tmp58 = sext i1 %tobool to i8                  ; <i8> [#uses=1]
   %mul.i = and i8 %tmp58, %tmp1                   ; <i8> [#uses=1]
@@ -76,7 +76,7 @@
 ; PR6607
 define fastcc void @test5(i32 %FUNC) nounwind {
 foo:
-  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  %0 = load i8, i8* undef, align 1                    ; <i8> [#uses=3]
   %1 = sext i8 %0 to i32                          ; <i32> [#uses=2]
   %2 = zext i8 %0 to i32                          ; <i32> [#uses=1]
   %tmp1.i5037 = urem i32 %2, 10                   ; <i32> [#uses=1]
@@ -121,7 +121,7 @@
 
 bb14:
   %tmp0 = trunc i16 undef to i1
-  %tmp1 = load i8* undef, align 8
+  %tmp1 = load i8, i8* undef, align 8
   %tmp2 = shl i8 %tmp1, 4
   %tmp3 = lshr i8 %tmp2, 7
   %tmp4 = trunc i8 %tmp3 to i1
@@ -239,7 +239,7 @@
 
 define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
 bb:
-  %tmp = load %t9** undef, align 4
+  %tmp = load %t9*, %t9** undef, align 4
   %tmp2 = getelementptr inbounds %t9, %t9* %tmp, i32 0, i32 0
   %tmp3 = getelementptr inbounds %t9, %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
   br label %bb4
@@ -250,25 +250,25 @@
   br i1 undef, label %bb34, label %bb7
 
 bb7:                                              ; preds = %bb4
-  %tmp8 = load i32* undef, align 4
+  %tmp8 = load i32, i32* undef, align 4
   %tmp9 = and i96 %tmp6, 4294967040
   %tmp10 = zext i32 %tmp8 to i96
   %tmp11 = shl nuw nsw i96 %tmp10, 32
   %tmp12 = or i96 %tmp9, %tmp11
   %tmp13 = or i96 %tmp12, 1
-  %tmp14 = load i32* undef, align 4
+  %tmp14 = load i32, i32* undef, align 4
   %tmp15 = and i96 %tmp5, 4294967040
   %tmp16 = zext i32 %tmp14 to i96
   %tmp17 = shl nuw nsw i96 %tmp16, 32
   %tmp18 = or i96 %tmp15, %tmp17
   %tmp19 = or i96 %tmp18, 1
-  %tmp20 = load i8* undef, align 1
+  %tmp20 = load i8, i8* undef, align 1
   %tmp21 = and i8 %tmp20, 1
   %tmp22 = icmp ne i8 %tmp21, 0
   %tmp23 = select i1 %tmp22, i96 %tmp19, i96 %tmp13
   %tmp24 = select i1 %tmp22, i96 %tmp13, i96 %tmp19
   store i96 %tmp24, i96* undef, align 4
-  %tmp25 = load %t13** %tmp3, align 4
+  %tmp25 = load %t13*, %t13** %tmp3, align 4
   %tmp26 = icmp eq %t13* %tmp25, undef
   br i1 %tmp26, label %bb28, label %bb27
 
@@ -281,7 +281,7 @@
 
 bb29:                                             ; preds = %bb28, %bb27
   store i96 %tmp23, i96* undef, align 4
-  %tmp30 = load %t13** %tmp3, align 4
+  %tmp30 = load %t13*, %t13** %tmp3, align 4
   br i1 false, label %bb33, label %bb31
 
 bb31:                                             ; preds = %bb29
@@ -348,13 +348,13 @@
   br label %"4"
 
 "3":
-  %0 = load <2 x i32>* null, align 8
+  %0 = load <2 x i32>, <2 x i32>* null, align 8
   %1 = xor <2 x i32> zeroinitializer, %0
   %2 = and <2 x i32> %1, %6
   %3 = or <2 x i32> undef, %2
   %4 = and <2 x i32> %3, undef
   store <2 x i32> %4, <2 x i32>* undef
-  %5 = load <2 x i32>* undef, align 1
+  %5 = load <2 x i32>, <2 x i32>* undef, align 1
   br label %"4"
 
 "4":
@@ -378,7 +378,7 @@
 @__force_order = external hidden global i32, align 4
 define void @pr11078(i32* %pgd) nounwind {
 entry:
-  %t0 = load i32* %pgd, align 4
+  %t0 = load i32, i32* %pgd, align 4
   %and2 = and i32 %t0, 1
   %tobool = icmp eq i32 %and2, 0
   br i1 %tobool, label %if.then, label %if.end
@@ -405,7 +405,7 @@
   br i1 undef, label %if.then3, label %if.end7
 
 if.then3:                                         ; preds = %while.body.preheader
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   br i1 undef, label %land.lhs.true.i255, label %if.end7
 
 land.lhs.true.i255:                               ; preds = %if.then3
@@ -434,7 +434,7 @@
 @.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
 define void @pr13188(i64* nocapture %this) uwtable ssp sanitize_address align 2 {
 entry:
-  %x7 = load i64* %this, align 8
+  %x7 = load i64, i64* %this, align 8
   %sub = add i64 %x7, -1
   %conv = uitofp i64 %sub to float
   %div = fmul float %conv, 5.000000e-01
@@ -450,12 +450,12 @@
 
 define void @pr13943() nounwind uwtable ssp {
 entry:
-  %srcval = load i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
+  %srcval = load i576, i576* bitcast ([9 x i32*]* @fn1.g to i576*), align 16
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
   %g.0 = phi i576 [ %srcval, %entry ], [ %ins, %for.inc ]
-  %0 = load i32* @e, align 4
+  %0 = load i32, i32* @e, align 4
   %1 = lshr i576 %g.0, 64
   %2 = trunc i576 %1 to i64
   %3 = inttoptr i64 %2 to i32*
@@ -510,9 +510,9 @@
   unreachable
 
 bb5:                                              ; preds = %bb3
-  %tmp = load <4 x float>* undef, align 1
+  %tmp = load <4 x float>, <4 x float>* undef, align 1
   %tmp6 = bitcast <4 x float> %tmp to i128
-  %tmp7 = load <4 x float>* undef, align 1
+  %tmp7 = load <4 x float>, <4 x float>* undef, align 1
   %tmp8 = bitcast <4 x float> %tmp7 to i128
   br label %bb10
 
@@ -583,7 +583,7 @@
 }
 
 define void @pr14194() nounwind uwtable {
-  %tmp = load i64* undef, align 16
+  %tmp = load i64, i64* undef, align 16
   %tmp1 = trunc i64 %tmp to i32
   %tmp2 = lshr i64 %tmp, 32
   %tmp3 = trunc i64 %tmp2 to i32
diff --git a/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll b/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll
index 32d3f49..86afc1f 100644
--- a/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll
+++ b/llvm/test/CodeGen/X86/critical-anti-dep-breaker.ll
@@ -16,9 +16,9 @@
 define i32 @Part_Create(i64* %Anchor, i32 %TypeNum, i32 %F, i32 %Z, i32* %Status, i64* %PartTkn) {
   %PartObj = alloca i64*, align 8
   %Vchunk = alloca i64, align 8
-  %1 = load i64* @NullToken, align 4
+  %1 = load i64, i64* @NullToken, align 4
   store i64 %1, i64* %Vchunk, align 8
-  %2 = load i32* @PartClass, align 4
+  %2 = load i32, i32* @PartClass, align 4
   call i32 @Image(i64* %Anchor, i32 %2, i32 0, i32 0, i32* %Status, i64* %PartTkn, i64** %PartObj)
   call i32 @Create(i64* %Anchor)
   ret i32 %2
diff --git a/llvm/test/CodeGen/X86/cse-add-with-overflow.ll b/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
index 1fcc03f..dc02fe9 100644
--- a/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/cse-add-with-overflow.ll
@@ -15,8 +15,8 @@
 
 define i64 @redundantadd(i64* %a0, i64* %a1) {
 entry:
-  %tmp8 = load i64* %a0, align 8
-  %tmp12 = load i64* %a1, align 8
+  %tmp8 = load i64, i64* %a0, align 8
+  %tmp12 = load i64, i64* %a1, align 8
   %tmp13 = icmp ult i64 %tmp12, -281474976710656
   br i1 %tmp13, label %exit1, label %body
 
diff --git a/llvm/test/CodeGen/X86/cvt16.ll b/llvm/test/CodeGen/X86/cvt16.ll
index 4d920e2..9846da5 100644
--- a/llvm/test/CodeGen/X86/cvt16.ll
+++ b/llvm/test/CodeGen/X86/cvt16.ll
@@ -33,7 +33,7 @@
 
 
 define float @test2(i16* nocapture %src) {
-  %1 = load i16* %src, align 2
+  %1 = load i16, i16* %src, align 2
   %2 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
   ret float %2
 }
@@ -60,7 +60,7 @@
 ; F16C: ret
 
 define double @test4(i16* nocapture %src) {
-  %1 = load i16* %src, align 2
+  %1 = load i16, i16* %src, align 2
   %2 = tail call double @llvm.convert.from.fp16.f64(i16 %1)
   ret double %2
 }
diff --git a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll
index cf631c3..3a6231a 100644
--- a/llvm/test/CodeGen/X86/dagcombine-buildvector.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -17,7 +17,7 @@
 ; CHECK: movdqa
 define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
 entry:
-        %tmp1 = load <4 x i16>* %src
+        %tmp1 = load <4 x i16>, <4 x i16>* %src
         %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
         %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
         store <4 x i32> %0, <4 x i32>* %dest
diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll
index 7a1a693..be1dcff 100644
--- a/llvm/test/CodeGen/X86/dagcombine-cse.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll
@@ -7,12 +7,12 @@
 	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
 	%tmp11 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp9		; <i8*> [#uses=1]
 	%tmp1112 = bitcast i8* %tmp11 to i32*		; <i32*> [#uses=1]
-	%tmp13 = load i32* %tmp1112, align 4		; <i32> [#uses=1]
+	%tmp13 = load i32, i32* %tmp1112, align 4		; <i32> [#uses=1]
 	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
 	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
 	%tmp21 = getelementptr i8, i8* %ref_frame_ptr, i32 %tmp20.sum		; <i8*> [#uses=1]
 	%tmp2122 = bitcast i8* %tmp21 to i16*		; <i16*> [#uses=1]
-	%tmp23 = load i16* %tmp2122, align 2		; <i16> [#uses=1]
+	%tmp23 = load i16, i16* %tmp2122, align 2		; <i16> [#uses=1]
 	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
 	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
 	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/darwin-quote.ll b/llvm/test/CodeGen/X86/darwin-quote.ll
index 8fddc11..c912c92 100644
--- a/llvm/test/CodeGen/X86/darwin-quote.ll
+++ b/llvm/test/CodeGen/X86/darwin-quote.ll
@@ -2,7 +2,7 @@
 
 
 define internal i64 @baz() nounwind {
-  %tmp = load i64* @"+x"
+  %tmp = load i64, i64* @"+x"
   ret i64 %tmp
 ; CHECK: _baz:
 ; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
diff --git a/llvm/test/CodeGen/X86/dbg-changes-codegen.ll b/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
index de2864a..d2a29ca 100644
--- a/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/llvm/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -43,7 +43,7 @@
 ; Function Attrs: nounwind readonly uwtable
 define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
 entry:
-  %0 = load %struct.Foo** @pfoo, align 8
+  %0 = load %struct.Foo*, %struct.Foo** @pfoo, align 8
   tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !{!"0x102"})
   %cmp.i = icmp eq %struct.Foo* %0, %this
   ret i1 %cmp.i
@@ -52,9 +52,9 @@
 ; Function Attrs: nounwind uwtable
 define void @_Z3bazv() #1 {
 entry:
-  %0 = load %struct.Wibble** @wibble1, align 8
+  %0 = load %struct.Wibble*, %struct.Wibble** @wibble1, align 8
   tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !{!"0x102"})
-  %1 = load %struct.Wibble** @wibble2, align 8
+  %1 = load %struct.Wibble*, %struct.Wibble** @wibble2, align 8
   %cmp.i = icmp ugt %struct.Wibble* %1, %0
   br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
 
diff --git a/llvm/test/CodeGen/X86/dbg-combine.ll b/llvm/test/CodeGen/X86/dbg-combine.ll
index dcdbfc4..e443408 100644
--- a/llvm/test/CodeGen/X86/dbg-combine.ll
+++ b/llvm/test/CodeGen/X86/dbg-combine.ll
@@ -31,7 +31,7 @@
   %cleanup.dest.slot = alloca i32
   call void @llvm.dbg.declare(metadata i32* %elems, metadata !12, metadata !13), !dbg !14
   store i32 3, i32* %elems, align 4, !dbg !14
-  %0 = load i32* %elems, align 4, !dbg !15
+  %0 = load i32, i32* %elems, align 4, !dbg !15
   %1 = zext i32 %0 to i64, !dbg !16
   %2 = call i8* @llvm.stacksave(), !dbg !16
   store i8* %2, i8** %saved_stack, !dbg !16
@@ -43,16 +43,16 @@
   store i32 1, i32* %arrayidx1, align 4, !dbg !26
   %arrayidx2 = getelementptr inbounds i32, i32* %vla, i64 2, !dbg !27
   store i32 2, i32* %arrayidx2, align 4, !dbg !28
-  %3 = load i32* %elems, align 4, !dbg !29
+  %3 = load i32, i32* %elems, align 4, !dbg !29
   %4 = zext i32 %3 to i64, !dbg !30
   %vla3 = alloca i32, i64 %4, align 16, !dbg !30
   call void @llvm.dbg.declare(metadata i32* %vla3, metadata !31, metadata !21), !dbg !32
   %arrayidx4 = getelementptr inbounds i32, i32* %vla3, i64 0, !dbg !33
   store i32 1, i32* %arrayidx4, align 4, !dbg !34
   %arrayidx5 = getelementptr inbounds i32, i32* %vla3, i64 0, !dbg !35
-  %5 = load i32* %arrayidx5, align 4, !dbg !35
+  %5 = load i32, i32* %arrayidx5, align 4, !dbg !35
   store i32 1, i32* %cleanup.dest.slot
-  %6 = load i8** %saved_stack, !dbg !36
+  %6 = load i8*, i8** %saved_stack, !dbg !36
   call void @llvm.stackrestore(i8* %6), !dbg !36
   ret i32 %5, !dbg !36
 }
diff --git a/llvm/test/CodeGen/X86/discontiguous-loops.ll b/llvm/test/CodeGen/X86/discontiguous-loops.ll
index 479c450..edebbbe 100644
--- a/llvm/test/CodeGen/X86/discontiguous-loops.ll
+++ b/llvm/test/CodeGen/X86/discontiguous-loops.ll
@@ -39,7 +39,7 @@
   br i1 %tmp9, label %bb10, label %ybb12
 
 bb10:                                             ; preds = %ybb8
-  %tmp11 = load i8** undef, align 8               ; <i8*> [#uses=1]
+  %tmp11 = load i8*, i8** undef, align 8               ; <i8*> [#uses=1]
   call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
   unreachable
 
diff --git a/llvm/test/CodeGen/X86/div8.ll b/llvm/test/CodeGen/X86/div8.ll
index 0825f79..f4f50e5 100644
--- a/llvm/test/CodeGen/X86/div8.ll
+++ b/llvm/test/CodeGen/X86/div8.ll
@@ -10,13 +10,13 @@
   %quotient = alloca i8, align 1
   store i8 %dividend, i8* %dividend.addr, align 2
   store i8 %divisor, i8* %divisor.addr, align 1
-  %tmp = load i8* %dividend.addr, align 2
-  %tmp1 = load i8* %divisor.addr, align 1
+  %tmp = load i8, i8* %dividend.addr, align 2
+  %tmp1 = load i8, i8* %divisor.addr, align 1
 ; Insist on i8->i32 zero extension, even though divb demands only i16:
 ; CHECK: movzbl {{.*}}%eax
 ; CHECK: divb
   %div = udiv i8 %tmp, %tmp1
   store i8 %div, i8* %quotient, align 1
-  %tmp4 = load i8* %quotient, align 1
+  %tmp4 = load i8, i8* %quotient, align 1
   ret i8 %tmp4
 }
diff --git a/llvm/test/CodeGen/X86/dllimport-x86_64.ll b/llvm/test/CodeGen/X86/dllimport-x86_64.ll
index 839bca4..af15a86 100644
--- a/llvm/test/CodeGen/X86/dllimport-x86_64.ll
+++ b/llvm/test/CodeGen/X86/dllimport-x86_64.ll
@@ -35,13 +35,13 @@
 ; available_externally uses go away
 ; OPT-NOT: call void @inline1()
 ; OPT-NOT: call void @inline2()
-; OPT-NOT: load i32* @Var2
+; OPT-NOT: load i32, i32* @Var2
 ; OPT: call void (...)* @dummy(i32 %1, i32 1)
 
 ; CHECK-DAG: movq __imp_Var1(%rip), [[R1:%[a-z]{3}]]
 ; CHECK-DAG: movq __imp_Var2(%rip), [[R2:%[a-z]{3}]]
-  %1 = load i32* @Var1
-  %2 = load i32* @Var2
+  %1 = load i32, i32* @Var1
+  %2 = load i32, i32* @Var2
   call void(...)* @dummy(i32 %1, i32 %2)
 
   ret void
diff --git a/llvm/test/CodeGen/X86/dllimport.ll b/llvm/test/CodeGen/X86/dllimport.ll
index 231ad65..eb9484c 100644
--- a/llvm/test/CodeGen/X86/dllimport.ll
+++ b/llvm/test/CodeGen/X86/dllimport.ll
@@ -46,13 +46,13 @@
 ; available_externally uses go away
 ; OPT-NOT: call void @inline1()
 ; OPT-NOT: call void @inline2()
-; OPT-NOT: load i32* @Var2
+; OPT-NOT: load i32, i32* @Var2
 ; OPT: call void (...)* @dummy(i32 %1, i32 1)
 
 ; CHECK-DAG: movl __imp__Var1, [[R1:%[a-z]{3}]]
 ; CHECK-DAG: movl __imp__Var2, [[R2:%[a-z]{3}]]
-  %1 = load i32* @Var1
-  %2 = load i32* @Var2
+  %1 = load i32, i32* @Var1
+  %2 = load i32, i32* @Var2
   call void(...)* @dummy(i32 %1, i32 %2)
 
   ret void
diff --git a/llvm/test/CodeGen/X86/dollar-name.ll b/llvm/test/CodeGen/X86/dollar-name.ll
index 2ecd729..a31b806 100644
--- a/llvm/test/CodeGen/X86/dollar-name.ll
+++ b/llvm/test/CodeGen/X86/dollar-name.ll
@@ -8,8 +8,8 @@
 ; CHECK: movl	($bar),
 ; CHECK: addl	($qux),
 ; CHECK: calll	($hen)
-  %m = load i32* @"$bar"
-  %n = load i32* @"$qux"
+  %m = load i32, i32* @"$bar"
+  %n = load i32, i32* @"$qux"
   %t = add i32 %m, %n
   %u = call i32 @"$hen"(i32 %t)
   ret i32 %u
diff --git a/llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll b/llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
index 24d9533..8a334d2 100644
--- a/llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
+++ b/llvm/test/CodeGen/X86/dont-trunc-store-double-to-float.ll
@@ -10,7 +10,7 @@
   %b = alloca float
 
   store double 3.140000e+00, double* %a
-  %0 = load double* %a
+  %0 = load double, double* %a
 
   %1 = fptrunc double %0 to float
 
diff --git a/llvm/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/llvm/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index 9405f76..2925f24 100644
--- a/llvm/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/llvm/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -7,7 +7,7 @@
 entry:
   %a = alloca i32, align 4
   call void @t1_helper(i32* %a) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
@@ -27,7 +27,7 @@
   %a = alloca i32, align 4
   %v = alloca <8 x float>, align 32
   call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
@@ -53,7 +53,7 @@
   %a = alloca i32, align 4
   %vla = alloca i32, i64 %sz, align 16
   call void @t3_helper(i32* %a, i32* %vla) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
@@ -78,7 +78,7 @@
   %v = alloca <8 x float>, align 32
   %vla = alloca i32, i64 %sz, align 16
   call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
@@ -108,10 +108,10 @@
 entry:
   %a = alloca i32, align 4
   %0 = bitcast float* %f to <8 x float>*
-  %1 = load <8 x float>* %0, align 32
+  %1 = load <8 x float>, <8 x float>* %0, align 32
   call void @t5_helper1(i32* %a) nounwind
   call void @t5_helper2(<8 x float> %1) nounwind
-  %2 = load i32* %a, align 4
+  %2 = load i32, i32* %a, align 4
   %add = add nsw i32 %2, 13
   ret i32 %add
 
@@ -138,11 +138,11 @@
 ; CHECK: _t6
   %a = alloca i32, align 4
   %0 = bitcast float* %f to <8 x float>*
-  %1 = load <8 x float>* %0, align 32
+  %1 = load <8 x float>, <8 x float>* %0, align 32
   %vla = alloca i32, i64 %sz, align 16
   call void @t6_helper1(i32* %a, i32* %vla) nounwind
   call void @t6_helper2(<8 x float> %1) nounwind
-  %2 = load i32* %a, align 4
+  %2 = load i32, i32* %a, align 4
   %add = add nsw i32 %2, 13
   ret i32 %add
 }
@@ -162,7 +162,7 @@
   store i32 0, i32* %x, align 32
   %0 = zext i32 %size to i64
   %vla = alloca i32, i64 %0, align 16
-  %1 = load i32* %x, align 32
+  %1 = load i32, i32* %x, align 32
   call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1)
   ret void
 
@@ -195,7 +195,7 @@
 entry:
   %a = alloca i32, align 4
   call void @t1_helper(i32* %a) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
@@ -213,7 +213,7 @@
   %a = alloca i32, align 4
   %vla = alloca i32, i64 %sz, align 16
   call void @t3_helper(i32* %a, i32* %vla) nounwind
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 13
   ret i32 %add
 
diff --git a/llvm/test/CodeGen/X86/early-ifcvt.ll b/llvm/test/CodeGen/X86/early-ifcvt.ll
index 24c1edc..6215519 100644
--- a/llvm/test/CodeGen/X86/early-ifcvt.ll
+++ b/llvm/test/CodeGen/X86/early-ifcvt.ll
@@ -15,7 +15,7 @@
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
   %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
   %incdec.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 1
-  %0 = load i32* %p.addr.0, align 4
+  %0 = load i32, i32* %p.addr.0, align 4
   %cmp = icmp sgt i32 %0, %max.0
   br i1 %cmp, label %do.cond, label %if.else
 
diff --git a/llvm/test/CodeGen/X86/emit-big-cst.ll b/llvm/test/CodeGen/X86/emit-big-cst.ll
index 96c15d4..51852d0 100644
--- a/llvm/test/CodeGen/X86/emit-big-cst.ll
+++ b/llvm/test/CodeGen/X86/emit-big-cst.ll
@@ -10,7 +10,7 @@
 
 define void @accessBig(i64* %storage) {
   %addr = bitcast i64* %storage to i82*
-  %bigLoadedCst = load volatile i82* @bigCst
+  %bigLoadedCst = load volatile i82, i82* @bigCst
   %tmp = add i82 %bigLoadedCst, 1
   store i82 %tmp, i82* %addr
   ret void
diff --git a/llvm/test/CodeGen/X86/expand-opaque-const.ll b/llvm/test/CodeGen/X86/expand-opaque-const.ll
index 6e461cf..1e39cd8 100644
--- a/llvm/test/CodeGen/X86/expand-opaque-const.ll
+++ b/llvm/test/CodeGen/X86/expand-opaque-const.ll
@@ -11,11 +11,11 @@
   %op2 = alloca i64
   store i64 -6687208052682386272, i64* %op1
   store i64 7106745059734980448, i64* %op2
-  %tmp1 = load i64* %op1
-  %tmp2 = load i64* %op2
+  %tmp1 = load i64, i64* %op1
+  %tmp2 = load i64, i64* %op2
   %tmp = xor i64 %tmp2, 7106745059734980448
   %tmp3 = lshr i64 %tmp1, %tmp
   store i64 %tmp3, i64* %retval
-  %tmp4 = load i64* %retval
+  %tmp4 = load i64, i64* %retval
   ret i64 %tmp4
 }
diff --git a/llvm/test/CodeGen/X86/extend.ll b/llvm/test/CodeGen/X86/extend.ll
index 9553b1b..d349e78 100644
--- a/llvm/test/CodeGen/X86/extend.ll
+++ b/llvm/test/CodeGen/X86/extend.ll
@@ -5,13 +5,13 @@
 @G2 = internal global i8 0              ; <i8*> [#uses=1]
 
 define i16 @test1() {
-        %tmp.0 = load i8* @G1           ; <i8> [#uses=1]
+        %tmp.0 = load i8, i8* @G1           ; <i8> [#uses=1]
         %tmp.3 = zext i8 %tmp.0 to i16          ; <i16> [#uses=1]
         ret i16 %tmp.3
 }
 
 define i16 @test2() {
-        %tmp.0 = load i8* @G2           ; <i8> [#uses=1]
+        %tmp.0 = load i8, i8* @G2           ; <i8> [#uses=1]
         %tmp.3 = sext i8 %tmp.0 to i16          ; <i16> [#uses=1]
         ret i16 %tmp.3
 }
diff --git a/llvm/test/CodeGen/X86/extract-extract.ll b/llvm/test/CodeGen/X86/extract-extract.ll
index bf7a38e..9f15163 100644
--- a/llvm/test/CodeGen/X86/extract-extract.ll
+++ b/llvm/test/CodeGen/X86/extract-extract.ll
@@ -12,10 +12,10 @@
 define fastcc void @foo(%pp* nocapture byval %p_arg) {
 entry:
         %tmp2 = getelementptr %pp, %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
-        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp3 = load %cc, %cc* %tmp2         ; <%cc> [#uses=1]
         %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
         %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
-        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %.ptr.i = load %cr*, %cr** undef              ; <%cr*> [#uses=0]
         %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
         store %cr* undef, %cr** undef
         ret void
diff --git a/llvm/test/CodeGen/X86/extractelement-load.ll b/llvm/test/CodeGen/X86/extractelement-load.ll
index 732f698..e50d353 100644
--- a/llvm/test/CodeGen/X86/extractelement-load.ll
+++ b/llvm/test/CodeGen/X86/extractelement-load.ll
@@ -9,7 +9,7 @@
 ; CHECK-NOT: movd
 ; CHECK: movl 8(
 ; CHECK-NEXT: ret
-	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = load <2 x i64>, <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
 	%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp4 = extractelement <4 x i32> %tmp3, i32 2		; <i32> [#uses=1]
 	ret i32 %tmp4
@@ -20,7 +20,7 @@
 define i32 @t2(<8 x i32>* %xp) {
 ; CHECK-LABEL: t2:
 ; CHECK: ret
-  %x = load <8 x i32>* %xp
+  %x = load <8 x i32>, <8 x i32>* %xp
   %Shuff68 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32
 undef, i32 7, i32 9, i32 undef, i32 13, i32 15, i32 1, i32 3>
   %y = extractelement <8 x i32> %Shuff68, i32 0
@@ -41,7 +41,7 @@
 ; CHECK: movhpd
 
 bb:
-  %tmp13 = load <2 x double>* undef, align 1
+  %tmp13 = load <2 x double>, <2 x double>* undef, align 1
   %.sroa.3.24.vec.extract = extractelement <2 x double> %tmp13, i32 1
   store double %.sroa.3.24.vec.extract, double* undef, align 8
   unreachable
@@ -55,7 +55,7 @@
 ; CHECK-LABEL: t4:
 ; CHECK: mov
 ; CHECK: ret
-  %b = load <2 x double>* %a, align 16
+  %b = load <2 x double>, <2 x double>* %a, align 16
   %c = shufflevector <2 x double> %b, <2 x double> %b, <2 x i32> <i32 1, i32 0>
   %d = bitcast <2 x double> %c to <2 x i64>
   %e = extractelement <2 x i64> %d, i32 1
diff --git a/llvm/test/CodeGen/X86/extractps.ll b/llvm/test/CodeGen/X86/extractps.ll
index 9e1a375..fecd2fa 100644
--- a/llvm/test/CodeGen/X86/extractps.ll
+++ b/llvm/test/CodeGen/X86/extractps.ll
@@ -7,7 +7,7 @@
 external global float, align 16         ; <float*>:0 [#uses=2]
 
 define internal void @""() nounwind {
-        load float* @0, align 16                ; <float>:1 [#uses=1]
+        load float, float* @0, align 16                ; <float>:1 [#uses=1]
         insertelement <4 x float> undef, float %1, i32 0                ; <<4 x float>>:2 [#uses=1]
         call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
         extractelement <4 x float> %3, i32 0            ; <float>:4 [#uses=1]
@@ -15,7 +15,7 @@
         ret void
 }
 define internal void @""() nounwind {
-        load float* @0, align 16                ; <float>:1 [#uses=1]
+        load float, float* @0, align 16                ; <float>:1 [#uses=1]
         insertelement <4 x float> undef, float %1, i32 1                ; <<4 x float>>:2 [#uses=1]
         call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
         extractelement <4 x float> %3, i32 1            ; <float>:4 [#uses=1]
diff --git a/llvm/test/CodeGen/X86/f16c-intrinsics.ll b/llvm/test/CodeGen/X86/f16c-intrinsics.ll
index 802f917..02967d5 100644
--- a/llvm/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/f16c-intrinsics.ll
@@ -25,7 +25,7 @@
   ; CHECK-LABEL: test_x86_vcvtph2ps_256_m:
   ; CHECK-NOT: vmov
   ; CHECK: vcvtph2ps  (%
-  %tmp1 = load <8 x i16>* %a, align 16
+  %tmp1 = load <8 x i16>, <8 x i16>* %a, align 16
   %0 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %tmp1)
   ret <8 x float> %0
 }
@@ -54,7 +54,7 @@
 ; CHECK-NOT: vmov
 ; CHECK: vcvtph2ps (%
 
-  %load = load i64* %ptr
+  %load = load i64, i64* %ptr
   %ins1 = insertelement <2 x i64> undef, i64 %load, i32 0
   %ins2 = insertelement <2 x i64> %ins1, i64 0, i32 1
   %bc = bitcast <2 x i64> %ins2 to <8 x i16>
diff --git a/llvm/test/CodeGen/X86/fast-isel-args-fail.ll b/llvm/test/CodeGen/X86/fast-isel-args-fail.ll
index 7e783d2..0026832 100644
--- a/llvm/test/CodeGen/X86/fast-isel-args-fail.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-args-fail.ll
@@ -17,6 +17,6 @@
 ; WIN32: movl (%rcx), %eax
 ; WIN64: foo
 ; WIN64: movl (%rdi), %eax
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/llvm/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
index 21fae4a..3310e61 100644
--- a/llvm/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -15,10 +15,10 @@
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
   store i32 %z, i32* %z.addr, align 4
-  %tmp = load i32* %x.addr, align 4
-  %tmp1 = load i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
+  %tmp1 = load i32, i32* %y.addr, align 4
   %add = add nsw i32 %tmp, %tmp1
-  %tmp2 = load i32* %z.addr, align 4
+  %tmp2 = load i32, i32* %z.addr, align 4
   %add3 = add nsw i32 %add, %tmp2
   ret i32 %add3
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
index 5cdb2c9..aaa8ef4 100644
--- a/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-call-bool.ll
@@ -8,7 +8,7 @@
 define i64 @foo(i8* %arg) {
 ; CHECK-LABEL: foo:
 top:
-  %0 = load i8* %arg
+  %0 = load i8, i8* %arg
 ; CHECK: movb
   %1 = trunc i8 %0 to i1
 ; CHECK: andb $1,
diff --git a/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll b/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll
index 86bf2f2..5686484e 100644
--- a/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-fold-mem.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: fold_load
 ; CHECK:       addq  (%rdi), %rsi
 ; CHECK-NEXT:  movq  %rsi, %rax
-  %1 = load i64* %a, align 8
+  %1 = load i64, i64* %a, align 8
   %2 = add i64 %1, %b
   ret i64 %2
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
index 90a0537..e4e9aea 100644
--- a/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-fptrunc-fpext.ll
@@ -47,7 +47,7 @@
 ; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
 ; ALL-NEXT: ret
 entry:
-  %0 = load float* %x, align 4
+  %0 = load float, float* %x, align 4
   %conv = fpext float %0 to double
   ret double %conv
 }
@@ -59,7 +59,7 @@
 ; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
 ; ALL-NEXT: ret
 entry:
-  %0 = load double* %x, align 8
+  %0 = load double, double* %x, align 8
   %conv = fptrunc double %0 to float
   ret float %conv
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-gep.ll b/llvm/test/CodeGen/X86/fast-isel-gep.ll
index 55de06d..67b3029 100644
--- a/llvm/test/CodeGen/X86/fast-isel-gep.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-gep.ll
@@ -7,7 +7,7 @@
 ; PR3181
 define i32 @test1(i32 %t3, i32* %t1) nounwind {
        %t9 = getelementptr i32, i32* %t1, i32 %t3           ; <i32*> [#uses=1]
-       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       %t15 = load i32, i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
 ; X32-LABEL: test1:
 ; X32:  	movl	(%eax,%ecx,4), %eax
@@ -21,7 +21,7 @@
 }
 define i32 @test2(i64 %t3, i32* %t1) nounwind {
        %t9 = getelementptr i32, i32* %t1, i64 %t3           ; <i32*> [#uses=1]
-       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       %t15 = load i32, i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
 ; X32-LABEL: test2:
 ; X32:  	movl	(%edx,%ecx,4), %e
@@ -38,7 +38,7 @@
 define i8 @test3(i8* %start) nounwind {
 entry:
   %A = getelementptr i8, i8* %start, i64 -2               ; <i8*> [#uses=1]
-  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  %B = load i8, i8* %A, align 1                       ; <i8> [#uses=1]
   ret i8 %B
   
   
@@ -59,11 +59,11 @@
   %p.addr = alloca double*, align 8               ; <double**> [#uses=2]
   store i64 %x, i64* %x.addr
   store double* %p, double** %p.addr
-  %tmp = load i64* %x.addr                        ; <i64> [#uses=1]
+  %tmp = load i64, i64* %x.addr                        ; <i64> [#uses=1]
   %add = add nsw i64 %tmp, 16                     ; <i64> [#uses=1]
-  %tmp1 = load double** %p.addr                   ; <double*> [#uses=1]
+  %tmp1 = load double*, double** %p.addr                   ; <double*> [#uses=1]
   %arrayidx = getelementptr inbounds double, double* %tmp1, i64 %add ; <double*> [#uses=1]
-  %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
+  %tmp2 = load double, double* %arrayidx                  ; <double> [#uses=1]
   ret double %tmp2
 
 ; X32-LABEL: test4:
@@ -77,7 +77,7 @@
 define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
   %v8 = getelementptr i8, i8* %A, i32 %I
   %v9 = bitcast i8* %v8 to i64*
-  %v10 = load i64* %v9
+  %v10 = load i64, i64* %v9
   %v11 = add i64 %B, %v10
   ret i64 %v11
 ; X64-LABEL: test5:
@@ -91,7 +91,7 @@
 ; of their blocks.
 define void @test6() {
 if.end:                                           ; preds = %if.then, %invoke.cont
-  %tmp15 = load i64* undef
+  %tmp15 = load i64, i64* undef
   %dec = add i64 %tmp15, 13
   store i64 %dec, i64* undef
   %call17 = invoke i8* @_ZNK18G__FastAllocString4dataEv()
@@ -119,7 +119,7 @@
 
 
   %tmp29 = getelementptr inbounds {i32,i32,i32}, {i32,i32,i32}* %tmp1, i32 0, i32 2
-  %tmp30 = load i32* %tmp29, align 4
+  %tmp30 = load i32, i32* %tmp29, align 4
 
   %p2 = getelementptr inbounds {i32,i32,i32}, {i32,i32,i32}* %tmp1, i32 0, i32 2
   store i32 4, i32* %p2
diff --git a/llvm/test/CodeGen/X86/fast-isel-gv.ll b/llvm/test/CodeGen/X86/fast-isel-gv.ll
index de75095..b3955d6 100644
--- a/llvm/test/CodeGen/X86/fast-isel-gv.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-gv.ll
@@ -12,15 +12,15 @@
 	%retval = alloca i32		; <i32*> [#uses=2]
 	%0 = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%1 = load i8 (...)** @f, align 8		; <i8 (...)*> [#uses=1]
+	%1 = load i8 (...)*, i8 (...)** @f, align 8		; <i8 (...)*> [#uses=1]
 	%2 = icmp ne i8 (...)* %1, @kill		; <i1> [#uses=1]
 	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
 	store i32 %3, i32* %0, align 4
-	%4 = load i32* %0, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	store i32 %4, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval1
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-i1.ll b/llvm/test/CodeGen/X86/fast-isel-i1.ll
index d7cb2d4..d72a31c 100644
--- a/llvm/test/CodeGen/X86/fast-isel-i1.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-i1.ll
@@ -27,7 +27,7 @@
 ; CHECK: movb {{.*}} %al
 ; CHECK-NEXT: xorb $1, %al
 ; CHECK-NEXT: testb $1
-  %tmp = load i8* %a, align 1
+  %tmp = load i8, i8* %a, align 1
   %tobool = trunc i8 %tmp to i1
   %tobool2 = xor i1 %tobool, true
   br i1 %tobool2, label %if.then, label %if.end
diff --git a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
index e231e4d..d5fe833 100644
--- a/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-int-float-conversion.ll
@@ -18,7 +18,7 @@
 ; AVX: vcvtsi2sdl (%rdi), %xmm0, %xmm0
 ; ALL-NEXT: ret
 entry:
-  %0 = load i32* %a
+  %0 = load i32, i32* %a
   %1 = sitofp i32 %0 to double
   ret double %1
 }
@@ -39,7 +39,7 @@
 ; AVX: vcvtsi2ssl (%rdi), %xmm0, %xmm0
 ; ALL-NEXT: ret
 entry:
-  %0 = load i32* %a
+  %0 = load i32, i32* %a
   %1 = sitofp i32 %0 to float
   ret float %1
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-mem.ll b/llvm/test/CodeGen/X86/fast-isel-mem.ll
index eca1ae9..717b5ec 100644
--- a/llvm/test/CodeGen/X86/fast-isel-mem.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-mem.ll
@@ -6,8 +6,8 @@
 ; rdar://6653118
 define i32 @loadgv() nounwind {
 entry:
-	%0 = load i32* @src, align 4
-	%1 = load i32* @src, align 4
+	%0 = load i32, i32* @src, align 4
+	%1 = load i32, i32* @src, align 4
         %2 = add i32 %0, %1
         store i32 %2, i32* @src
 	ret i32 %2
diff --git a/llvm/test/CodeGen/X86/fast-isel-tailcall.ll b/llvm/test/CodeGen/X86/fast-isel-tailcall.ll
index 79ff79d..88ad05e 100644
--- a/llvm/test/CodeGen/X86/fast-isel-tailcall.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -7,7 +7,7 @@
 
 define i32 @stub(i8* %t0) nounwind {
 entry:
-        %t1 = load i32* inttoptr (i32 139708680 to i32*)         ; <i32> [#uses=1]
+        %t1 = load i32, i32* inttoptr (i32 139708680 to i32*)         ; <i32> [#uses=1]
         %t2 = bitcast i8* %t0 to i32 (i32)*               ; <i32 (i32)*> [#uses=1]
         %t3 = call fastcc i32 %t2(i32 %t1)         ; <i32> [#uses=1]
         ret i32 %t3
diff --git a/llvm/test/CodeGen/X86/fast-isel-tls.ll b/llvm/test/CodeGen/X86/fast-isel-tls.ll
index 686df43..18bb9c1 100644
--- a/llvm/test/CodeGen/X86/fast-isel-tls.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-tls.ll
@@ -4,7 +4,7 @@
 @v = thread_local global i32 0
 define i32 @f() nounwind {
 entry:
-          %t = load i32* @v
+          %t = load i32, i32* @v
           %s = add i32 %t, 1
           ret i32 %s
 }
@@ -16,7 +16,7 @@
 @alias = internal alias i32* @v
 define i32 @f_alias() nounwind {
 entry:
-          %t = load i32* @v
+          %t = load i32, i32* @v
           %s = add i32 %t, 1
           ret i32 %s
 }
diff --git a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
index 695914b..d4bbb63 100644
--- a/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -20,7 +20,7 @@
 entry:
   %x.addr = alloca i64, align 8
   store i64 %x, i64* %x.addr, align 8
-  %tmp = load i64* %x.addr, align 8
+  %tmp = load i64, i64* %x.addr, align 8
   %cmp = icmp sgt i64 %tmp, 42
   br i1 %cmp, label %if.then, label %if.end
 
@@ -53,7 +53,7 @@
 
 define i32 @test4(i64 %idxprom9) nounwind {
   %arrayidx10 = getelementptr inbounds [153 x i8], [153 x i8]* @rtx_length, i32 0, i64 %idxprom9
-  %tmp11 = load i8* %arrayidx10, align 1
+  %tmp11 = load i8, i8* %arrayidx10, align 1
   %conv = zext i8 %tmp11 to i32
   ret i32 %conv
 
@@ -212,7 +212,7 @@
 ; w.r.t. the call.
 define i32 @test17(i32 *%P) ssp nounwind {
 entry:
-  %tmp = load i32* %P
+  %tmp = load i32, i32* %P
   %cmp = icmp ne i32 %tmp, 5
   call void @foo()
   br i1 %cmp, label %if.then, label %if.else
diff --git a/llvm/test/CodeGen/X86/fast-isel-x86.ll b/llvm/test/CodeGen/X86/fast-isel-x86.ll
index 61e9b98f..8049c72 100644
--- a/llvm/test/CodeGen/X86/fast-isel-x86.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-x86.ll
@@ -6,7 +6,7 @@
 ; CHECK: retl
 @G = external global float
 define float @test0() nounwind {
-  %t = load float* @G
+  %t = load float, float* @G
   ret float %t
 }
 
@@ -28,7 +28,7 @@
 ; CHECK: retl
 @HHH = external global i32
 define i32 @test2() nounwind {
-  %t = load i32* @HHH
+  %t = load i32, i32* @HHH
   ret i32 %t
 }
 
diff --git a/llvm/test/CodeGen/X86/fast-isel.ll b/llvm/test/CodeGen/X86/fast-isel.ll
index 154ef50..36183e4 100644
--- a/llvm/test/CodeGen/X86/fast-isel.ll
+++ b/llvm/test/CodeGen/X86/fast-isel.ll
@@ -5,9 +5,9 @@
 
 define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
 entry:
-  %r = load i32* %p
-  %s = load i32* %q
-  %y = load i32** %z
+  %r = load i32, i32* %p
+  %s = load i32, i32* %q
+  %y = load i32*, i32** %z
   br label %fast
 
 fast:
@@ -29,8 +29,8 @@
 
 define void @bar(double* %p, double* %q) nounwind {
 entry:
-  %r = load double* %p
-  %s = load double* %q
+  %r = load double, double* %p
+  %s = load double, double* %q
   br label %fast
 
 fast:
@@ -94,7 +94,7 @@
 }
 
 define void @load_store_i1(i1* %p, i1* %q) nounwind {
-  %t = load i1* %p
+  %t = load i1, i1* %p
   store i1 %t, i1* %q
   ret void
 }
@@ -102,7 +102,7 @@
 @crash_test1x = external global <2 x i32>, align 8
 
 define void @crash_test1() nounwind ssp {
-  %tmp = load <2 x i32>* @crash_test1x, align 8
+  %tmp = load <2 x i32>, <2 x i32>* @crash_test1x, align 8
   %neg = xor <2 x i32> %tmp, <i32 -1, i32 -1>
   ret void
 }
@@ -113,7 +113,7 @@
   %a1 = alloca i64*, align 8
   %a2 = bitcast i64** %a1 to i8*
   call void @llvm.lifetime.start(i64 -1, i8* %a2) nounwind      
-  %a3 = load i64** %a1, align 8
+  %a3 = load i64*, i64** %a1, align 8
   ret i64* %a3
 }
 
diff --git a/llvm/test/CodeGen/X86/fastcc-byval.ll b/llvm/test/CodeGen/X86/fastcc-byval.ll
index 32ba6ed..1706de4 100644
--- a/llvm/test/CodeGen/X86/fastcc-byval.ll
+++ b/llvm/test/CodeGen/X86/fastcc-byval.ll
@@ -17,7 +17,7 @@
 	%a = getelementptr %struct.MVT, %struct.MVT* %V, i32 0, i32 0
 	store i32 1, i32* %a
 	call fastcc void @foo(%struct.MVT* byval %V) nounwind
-	%t = load i32* %a
+	%t = load i32, i32* %a
 	ret i32 %t
 }
 
diff --git a/llvm/test/CodeGen/X86/fastcc-sret.ll b/llvm/test/CodeGen/X86/fastcc-sret.ll
index 9f00970..499aadd 100644
--- a/llvm/test/CodeGen/X86/fastcc-sret.ll
+++ b/llvm/test/CodeGen/X86/fastcc-sret.ll
@@ -19,7 +19,7 @@
         call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
         %tmp4 = getelementptr %struct.foo, %struct.foo* %memtmp, i32 0, i32 0
 	%tmp5 = getelementptr [4 x i32], [4 x i32]* %tmp4, i32 0, i32 0
-        %tmp6 = load i32* %tmp5
+        %tmp6 = load i32, i32* %tmp5
         store i32 %tmp6, i32* @dst
         ret void
 }
diff --git a/llvm/test/CodeGen/X86/fastcc.ll b/llvm/test/CodeGen/X86/fastcc.ll
index a362f8d..020e7f9 100644
--- a/llvm/test/CodeGen/X86/fastcc.ll
+++ b/llvm/test/CodeGen/X86/fastcc.ll
@@ -9,10 +9,10 @@
 
 define i32 @foo() nounwind {
 entry:
-	%0 = load double* @d, align 8		; <double> [#uses=1]
-	%1 = load double* @c, align 8		; <double> [#uses=1]
-	%2 = load double* @b, align 8		; <double> [#uses=1]
-	%3 = load double* @a, align 8		; <double> [#uses=1]
+	%0 = load double, double* @d, align 8		; <double> [#uses=1]
+	%1 = load double, double* @c, align 8		; <double> [#uses=1]
+	%2 = load double, double* @b, align 8		; <double> [#uses=1]
+	%3 = load double, double* @a, align 8		; <double> [#uses=1]
 	tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
 	ret i32 0
 }
diff --git a/llvm/test/CodeGen/X86/fastisel-gep-promote-before-add.ll b/llvm/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
index 8e78d38..1f67299 100644
--- a/llvm/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
+++ b/llvm/test/CodeGen/X86/fastisel-gep-promote-before-add.ll
@@ -6,13 +6,13 @@
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
-  %0 = load i8** %ptr.addr, align 8
+  %0 = load i8*, i8** %ptr.addr, align 8
 
   ; CHECK-LABEL: _gep_promotion:
   ; CHECK: movzbl ({{.*}})
   %arrayidx = getelementptr inbounds i8, i8* %0, i8 %add
 
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   ret i8 %1
 }
 
@@ -22,16 +22,16 @@
   %ptr.addr = alloca i8*, align 8
   store i8 %i, i8* %i.addr, align 4
   store i8* %ptr, i8** %ptr.addr, align 8
-  %0 = load i8* %i.addr, align 4
+  %0 = load i8, i8* %i.addr, align 4
   ; CHECK-LABEL: _gep_promotion_nonconst:
   ; CHECK: movzbl ({{.*}})
   %xor = xor i8 %0, -128   ; %0   ^ 0x80
   %add = add i8 %xor, -127 ; %xor + 0x81
-  %1 = load i8** %ptr.addr, align 8
+  %1 = load i8*, i8** %ptr.addr, align 8
 
   %arrayidx = getelementptr inbounds i8, i8* %1, i8 %add
 
-  %2 = load i8* %arrayidx, align 1
+  %2 = load i8, i8* %arrayidx, align 1
   ret i8 %2
 }
 
diff --git a/llvm/test/CodeGen/X86/fma-do-not-commute.ll b/llvm/test/CodeGen/X86/fma-do-not-commute.ll
index 4e21172..1f6a19c 100644
--- a/llvm/test/CodeGen/X86/fma-do-not-commute.ll
+++ b/llvm/test/CodeGen/X86/fma-do-not-commute.ll
@@ -18,8 +18,8 @@
 
 loop:
   %sum0 = phi float [ %fma, %loop ], [ %arg, %entry ]
-  %addrVal = load float* %addr, align 4
-  %addr2Val = load float* %addr2, align 4
+  %addrVal = load float, float* %addr, align 4
+  %addr2Val = load float, float* %addr2, align 4
   %fmul = fmul float %addrVal, %addr2Val
   %fma = fadd float %sum0, %fmul
   br i1 true, label %exit, label %loop
diff --git a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
index 64a2068..85de1ef 100644
--- a/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
+++ b/llvm/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
@@ -4,14 +4,14 @@
 ; VFMADD
 define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
   ; CHECK: vfmaddss (%{{.*}})
-  %x = load float *%a2
+  %x = load float , float *%a2
   %y = insertelement <4 x float> undef, float %x, i32 0
   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
   ret < 4 x float > %res
 }
 define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddss %{{.*}}, (%{{.*}})
-  %x = load float *%a1
+  %x = load float , float *%a1
   %y = insertelement <4 x float> undef, float %x, i32 0
   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
   ret < 4 x float > %res
@@ -21,14 +21,14 @@
 
 define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
   ; CHECK: vfmaddsd (%{{.*}})
-  %x = load double *%a2
+  %x = load double , double *%a2
   %y = insertelement <2 x double> undef, double %x, i32 0
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
   ret < 2 x double > %res
 }
 define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddsd %{{.*}}, (%{{.*}})
-  %x = load double *%a1
+  %x = load double , double *%a1
   %y = insertelement <2 x double> undef, double %x, i32 0
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
   ret < 2 x double > %res
@@ -36,13 +36,13 @@
 declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
   ; CHECK: vfmaddps (%{{.*}})
-  %x = load <4 x float>* %a2
+  %x = load <4 x float>, <4 x float>* %a2
   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %x)
   ret < 4 x float > %res
 }
 define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x float >* %a1, < 4 x float > %a2) {
   ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
-  %x = load <4 x float>* %a1
+  %x = load <4 x float>, <4 x float>* %a1
   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %a0, < 4 x float > %x, < 4 x float > %a2)
   ret < 4 x float > %res
 }
@@ -52,21 +52,21 @@
 define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
   ; CHECK: vmovaps
   ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
-  %x = load <4 x float>* %a0
-  %y = load <4 x float>* %a1
+  %x = load <4 x float>, <4 x float>* %a0
+  %y = load <4 x float>, <4 x float>* %a1
   %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2)
   ret < 4 x float > %res
 }
 
 define < 2 x double > @test_x86_fma_vfmadd_pd_load(< 2 x double > %a0, < 2 x double > %a1, < 2 x double >* %a2) {
   ; CHECK: vfmaddpd (%{{.*}})
-  %x = load <2 x double>* %a2
+  %x = load <2 x double>, <2 x double>* %a2
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %x)
   ret < 2 x double > %res
 }
 define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x double >* %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
-  %x = load <2 x double>* %a1
+  %x = load <2 x double>, <2 x double>* %a1
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %x, < 2 x double > %a2)
   ret < 2 x double > %res
 }
@@ -76,8 +76,8 @@
 define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
   ; CHECK: vmovapd
   ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
-  %x = load <2 x double>* %a0
-  %y = load <2 x double>* %a1
+  %x = load <2 x double>, <2 x double>* %a0
+  %y = load <2 x double>, <2 x double>* %a1
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2)
   ret < 2 x double > %res
 }
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index 9b52db9..a27b760 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -190,7 +190,7 @@
 ; CHECK_FMA4: vfmaddps     %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
-  %x = load <4 x float>* %a0
+  %x = load <4 x float>, <4 x float>* %a0
   %y = fmul <4 x float> %x, %a1
   %res = fadd <4 x float> %y, %a2
   ret <4 x float> %res
@@ -204,7 +204,7 @@
 ; CHECK_FMA4: vfmsubps     %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK_FMA4: ret
 define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
-  %x = load <4 x float>* %a0
+  %x = load <4 x float>, <4 x float>* %a0
   %y = fmul <4 x float> %x, %a1
   %res = fsub <4 x float> %y, %a2
   ret <4 x float> %res
diff --git a/llvm/test/CodeGen/X86/fmul-zero.ll b/llvm/test/CodeGen/X86/fmul-zero.ll
index 03bad65..bc139f88 100644
--- a/llvm/test/CodeGen/X86/fmul-zero.ll
+++ b/llvm/test/CodeGen/X86/fmul-zero.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86-64 | grep mulps
 
 define void @test14(<4 x float>*) nounwind {
-        load <4 x float>* %0, align 1
+        load <4 x float>, <4 x float>* %0, align 1
         fmul <4 x float> %2, zeroinitializer
         store <4 x float> %3, <4 x float>* %0, align 1
         ret void
diff --git a/llvm/test/CodeGen/X86/fold-add.ll b/llvm/test/CodeGen/X86/fold-add.ll
index 18c0ec1..7d27400 100644
--- a/llvm/test/CodeGen/X86/fold-add.ll
+++ b/llvm/test/CodeGen/X86/fold-add.ll
@@ -13,12 +13,12 @@
 ; CHECK: ret
 
 entry:
-	%0 = load i32* @prev_length, align 4		; <i32> [#uses=3]
+	%0 = load i32, i32* @prev_length, align 4		; <i32> [#uses=3]
 	%1 = zext i32 %cur_match to i64		; <i64> [#uses=1]
 	%2 = sext i32 %0 to i64		; <i64> [#uses=1]
 	%.sum3 = add i64 %1, %2		; <i64> [#uses=1]
 	%3 = getelementptr [65536 x i8], [65536 x i8]* @window, i64 0, i64 %.sum3		; <i8*> [#uses=1]
-	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%4 = load i8, i8* %3, align 1		; <i8> [#uses=1]
 	%5 = icmp eq i8 %4, 0		; <i1> [#uses=1]
 	br i1 %5, label %bb5, label %bb23
 
diff --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll
index 8432a70..00173ef 100644
--- a/llvm/test/CodeGen/X86/fold-and-shift.ll
+++ b/llvm/test/CodeGen/X86/fold-and-shift.ll
@@ -12,7 +12,7 @@
   %tmp4 = and i32 %tmp2, 1020
   %tmp7 = getelementptr i8, i8* %X, i32 %tmp4
   %tmp78 = bitcast i8* %tmp7 to i32*
-  %tmp9 = load i32* %tmp78
+  %tmp9 = load i32, i32* %tmp78
   ret i32 %tmp9
 }
 
@@ -28,7 +28,7 @@
   %tmp4 = and i32 %tmp2, 131070
   %tmp7 = getelementptr i16, i16* %X, i32 %tmp4
   %tmp78 = bitcast i16* %tmp7 to i32*
-  %tmp9 = load i32* %tmp78
+  %tmp9 = load i32, i32* %tmp78
   ret i32 %tmp9
 }
 
@@ -46,11 +46,11 @@
 ; CHECK: ret
 
 entry:
-  %i = load i16* %i.ptr
+  %i = load i16, i16* %i.ptr
   %i.zext = zext i16 %i to i32
   %index = lshr i32 %i.zext, 11
   %val.ptr = getelementptr inbounds i32, i32* %arr, i32 %index
-  %val = load i32* %val.ptr
+  %val = load i32, i32* %val.ptr
   %sum = add i32 %val, %i.zext
   ret i32 %sum
 }
@@ -65,12 +65,12 @@
 ; CHECK: ret
 
 entry:
-  %i = load i16* %i.ptr
+  %i = load i16, i16* %i.ptr
   %i.zext = zext i16 %i to i32
   %index = lshr i32 %i.zext, 11
   %index.zext = zext i32 %index to i64
   %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
-  %val = load i32* %val.ptr
+  %val = load i32, i32* %val.ptr
   %sum.1 = add i32 %val, %i.zext
   %sum.2 = add i32 %sum.1, %index
   ret i32 %sum.2
diff --git a/llvm/test/CodeGen/X86/fold-call-2.ll b/llvm/test/CodeGen/X86/fold-call-2.ll
index 7a2b038..b5e2606 100644
--- a/llvm/test/CodeGen/X86/fold-call-2.ll
+++ b/llvm/test/CodeGen/X86/fold-call-2.ll
@@ -4,7 +4,7 @@
 
 define i32 @main() nounwind {
 entry:
-	load void ()** @f, align 8		; <void ()*>:0 [#uses=1]
+	load void ()*, void ()** @f, align 8		; <void ()*>:0 [#uses=1]
 	tail call void %0( ) nounwind
 	ret i32 0
 }
diff --git a/llvm/test/CodeGen/X86/fold-call-3.ll b/llvm/test/CodeGen/X86/fold-call-3.ll
index 69cd198..e7e81b9 100644
--- a/llvm/test/CodeGen/X86/fold-call-3.ll
+++ b/llvm/test/CodeGen/X86/fold-call-3.ll
@@ -10,7 +10,7 @@
 define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind {
 entry:
 	%0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8		; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3]
-	%1 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @NumTrials, align 4		; <i32> [#uses=1]
 	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
 	br i1 %2, label %return, label %bb.nph
 
@@ -25,18 +25,18 @@
 bb:		; preds = %bb, %bb.nph
 	%Trial.01 = phi i32 [ 0, %bb.nph ], [ %12, %bb ]		; <i32> [#uses=1]
 	%Val_addr.02 = phi i8* [ %Val, %bb.nph ], [ %11, %bb ]		; <i8*> [#uses=1]
-	%6 = load i32 (...)*** %3, align 8		; <i32 (...)**> [#uses=1]
+	%6 = load i32 (...)**, i32 (...)*** %3, align 8		; <i32 (...)**> [#uses=1]
 	%7 = getelementptr i32 (...)*, i32 (...)** %6, i64 70		; <i32 (...)**> [#uses=1]
-	%8 = load i32 (...)** %7, align 8		; <i32 (...)*> [#uses=1]
+	%8 = load i32 (...)*, i32 (...)** %7, align 8		; <i32 (...)*> [#uses=1]
 	%9 = bitcast i32 (...)* %8 to { i64, i64 } (%"struct.clang::Action"*, i8*)*		; <{ i64, i64 } (%"struct.clang::Action"*, i8*)*> [#uses=1]
 	%10 = call { i64, i64 } %9(%"struct.clang::Action"* %Actions, i8* %Val_addr.02) nounwind		; <{ i64, i64 }> [#uses=2]
 	%mrv_gr = extractvalue { i64, i64 } %10, 0		; <i64> [#uses=1]
 	store i64 %mrv_gr, i64* %mrv_gep
 	%mrv_gr2 = extractvalue { i64, i64 } %10, 1		; <i64> [#uses=1]
 	store i64 %mrv_gr2, i64* %4
-	%11 = load i8** %5, align 8		; <i8*> [#uses=1]
+	%11 = load i8*, i8** %5, align 8		; <i8*> [#uses=1]
 	%12 = add i32 %Trial.01, 1		; <i32> [#uses=2]
-	%13 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%13 = load i32, i32* @NumTrials, align 4		; <i32> [#uses=1]
 	%14 = icmp ult i32 %12, %13		; <i1> [#uses=1]
 	br i1 %14, label %bb, label %return
 
diff --git a/llvm/test/CodeGen/X86/fold-call-oper.ll b/llvm/test/CodeGen/X86/fold-call-oper.ll
index 94383d4..1e3e58d 100644
--- a/llvm/test/CodeGen/X86/fold-call-oper.ll
+++ b/llvm/test/CodeGen/X86/fold-call-oper.ll
@@ -14,7 +14,7 @@
 ; CHECK: callq *{{.*}}(%rbp)
 define void @foldCallOper(i32 (i32*, i32, i32**)* nocapture %p1) #0 {
 entry:
-  %0 = load i32*** @a, align 8
+  %0 = load i32**, i32*** @a, align 8
   br label %for.body.i
 
 for.body.i:                                       ; preds = %for.body.i, %entry
@@ -33,14 +33,14 @@
   br i1 %tobool.i, label %for.inc8.i, label %if.then.i
 
 if.then.i:                                        ; preds = %for.body3.i
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   store i32 %2, i32* @b, align 4
   br label %for.inc8.i
 
 for.inc8.i:                                       ; preds = %if.then.i, %for.body3.i
   %lftr.wideiv.i = trunc i64 %indvars.iv.i to i32
   %arrayidx4.phi.trans.insert.i = getelementptr inbounds [0 x i32*], [0 x i32*]* undef, i64 0, i64 %indvars.iv.i
-  %.pre.i = load i32** %arrayidx4.phi.trans.insert.i, align 8
+  %.pre.i = load i32*, i32** %arrayidx4.phi.trans.insert.i, align 8
   %phitmp.i = add i64 %indvars.iv.i, 1
   br label %for.body3.i
 }
diff --git a/llvm/test/CodeGen/X86/fold-call.ll b/llvm/test/CodeGen/X86/fold-call.ll
index 4bc5243..0083994 100644
--- a/llvm/test/CodeGen/X86/fold-call.ll
+++ b/llvm/test/CodeGen/X86/fold-call.ll
@@ -20,7 +20,7 @@
 define void @test2(%struct.X* nocapture %x) {
 entry:
   %f = getelementptr inbounds %struct.X, %struct.X* %x, i64 0, i32 0
-  %0 = load void ()** %f
+  %0 = load void ()*, void ()** %f
   store void ()* null, void ()** %f
   tail call void %0()
   ret void
diff --git a/llvm/test/CodeGen/X86/fold-load-unops.ll b/llvm/test/CodeGen/X86/fold-load-unops.ll
index 0b2e6c7..b03e80b 100644
--- a/llvm/test/CodeGen/X86/fold-load-unops.ll
+++ b/llvm/test/CodeGen/X86/fold-load-unops.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: rcpss:
 ; CHECK:       vrcpss (%rdi), %xmm0, %xmm0
 
-    %ld = load float* %a
+    %ld = load float, float* %a
     %ins = insertelement <4 x float> undef, float %ld, i32 0
     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
     %ext = extractelement <4 x float> %res, i32 0
@@ -20,7 +20,7 @@
 ; CHECK-LABEL: rsqrtss:
 ; CHECK:       vrsqrtss (%rdi), %xmm0, %xmm0
 
-    %ld = load float* %a
+    %ld = load float, float* %a
     %ins = insertelement <4 x float> undef, float %ld, i32 0
     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
     %ext = extractelement <4 x float> %res, i32 0
@@ -31,7 +31,7 @@
 ; CHECK-LABEL: sqrtss:
 ; CHECK:       vsqrtss (%rdi), %xmm0, %xmm0
 
-    %ld = load float* %a
+    %ld = load float, float* %a
     %ins = insertelement <4 x float> undef, float %ld, i32 0
     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
     %ext = extractelement <4 x float> %res, i32 0
@@ -42,7 +42,7 @@
 ; CHECK-LABEL: sqrtsd:
 ; CHECK:       vsqrtsd (%rdi), %xmm0, %xmm0
 
-    %ld = load double* %a
+    %ld = load double, double* %a
     %ins = insertelement <2 x double> undef, double %ld, i32 0
     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
     %ext = extractelement <2 x double> %res, i32 0
diff --git a/llvm/test/CodeGen/X86/fold-load-vec.ll b/llvm/test/CodeGen/X86/fold-load-vec.ll
index 2714923..657b7bd 100644
--- a/llvm/test/CodeGen/X86/fold-load-vec.ll
+++ b/llvm/test/CodeGen/X86/fold-load-vec.ll
@@ -14,24 +14,24 @@
   store <4 x float>* %source, <4 x float>** %source.addr, align 8
   store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
   store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
-  %0 = load <4 x float>** %source.addr, align 8
+  %0 = load <4 x float>*, <4 x float>** %source.addr, align 8
   %arrayidx = getelementptr inbounds <4 x float>, <4 x float>* %0, i64 0
-  %1 = load <4 x float>* %arrayidx, align 16
+  %1 = load <4 x float>, <4 x float>* %arrayidx, align 16
   %2 = extractelement <4 x float> %1, i32 0
-  %3 = load <2 x float>* %tmp, align 8
+  %3 = load <2 x float>, <2 x float>* %tmp, align 8
   %4 = insertelement <2 x float> %3, float %2, i32 1
   store <2 x float> %4, <2 x float>* %tmp, align 8
-  %5 = load <2 x float>* %tmp, align 8
-  %6 = load <2 x float>** %dest.addr, align 8
+  %5 = load <2 x float>, <2 x float>* %tmp, align 8
+  %6 = load <2 x float>*, <2 x float>** %dest.addr, align 8
   %arrayidx1 = getelementptr inbounds <2 x float>, <2 x float>* %6, i64 0
   store <2 x float> %5, <2 x float>* %arrayidx1, align 8
-  %7 = load <2 x float>** %dest.addr, align 8
+  %7 = load <2 x float>*, <2 x float>** %dest.addr, align 8
   %arrayidx2 = getelementptr inbounds <2 x float>, <2 x float>* %7, i64 0
-  %8 = load <2 x float>* %arrayidx2, align 8
+  %8 = load <2 x float>, <2 x float>* %arrayidx2, align 8
   %vecext = extractelement <2 x float> %8, i32 0
-  %9 = load <2 x float>** %dest.addr, align 8
+  %9 = load <2 x float>*, <2 x float>** %dest.addr, align 8
   %arrayidx3 = getelementptr inbounds <2 x float>, <2 x float>* %9, i64 0
-  %10 = load <2 x float>* %arrayidx3, align 8
+  %10 = load <2 x float>, <2 x float>* %arrayidx3, align 8
   %vecext4 = extractelement <2 x float> %10, i32 1
   call void @ext(float %vecext, float %vecext4)
   ret void
diff --git a/llvm/test/CodeGen/X86/fold-load.ll b/llvm/test/CodeGen/X86/fold-load.ll
index dde0a2d..49eeb6b 100644
--- a/llvm/test/CodeGen/X86/fold-load.ll
+++ b/llvm/test/CodeGen/X86/fold-load.ll
@@ -10,7 +10,7 @@
 
 cond_true:		; preds = %entry
 	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
-	%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]
+	%tmp.i = load i32, i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]
 	%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8		; <i8> [#uses=1]
 	%tmp21.i = and i8 %tmp.i.upgrd.1, 1		; <i8> [#uses=1]
 	%tmp22.i = icmp eq i8 %tmp21.i, 0		; <i1> [#uses=1]
@@ -30,7 +30,7 @@
 
 
 define i32 @test2(i16* %P, i16* %Q) nounwind {
-  %A = load i16* %P, align 4                      ; <i16> [#uses=11]
+  %A = load i16, i16* %P, align 4                      ; <i16> [#uses=11]
   %C = zext i16 %A to i32                         ; <i32> [#uses=1]
   %D = and i32 %C, 255                            ; <i32> [#uses=1]
   br label %L
@@ -54,8 +54,8 @@
 ; CHECK: xorl (%e
 ; CHECK: j
 entry:
-  %0 = load i32* %P, align 4
-  %1 = load i32* %Q, align 4
+  %0 = load i32, i32* %P, align 4
+  %1 = load i32, i32* %Q, align 4
   %2 = xor i32 %0, %1
   %3 = and i32 %2, 89947
   %4 = icmp eq i32 %3, 0
diff --git a/llvm/test/CodeGen/X86/fold-mul-lohi.ll b/llvm/test/CodeGen/X86/fold-mul-lohi.ll
index 18ec0a2..8d4c5ef 100644
--- a/llvm/test/CodeGen/X86/fold-mul-lohi.ll
+++ b/llvm/test/CodeGen/X86/fold-mul-lohi.ll
@@ -14,7 +14,7 @@
 bb:
 	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
 	%tmp2 = getelementptr [1000 x i8], [1000 x i8]* @B, i32 0, i32 %i.019.0
-	%tmp3 = load i8* %tmp2, align 4
+	%tmp3 = load i8, i8* %tmp2, align 4
 	%tmp4 = mul i8 %tmp3, 2
 	%tmp5 = getelementptr [1000 x i8], [1000 x i8]* @A, i32 0, i32 %i.019.0
 	store i8 %tmp4, i8* %tmp5, align 4
diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 60a6844..d95c632 100644
--- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -20,7 +20,7 @@
 
 define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
 entry:
-	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%tmp3.i = load i32, i32* null		; <i32> [#uses=1]
 	%cmp = icmp slt i32 0, %tmp3.i		; <i1> [#uses=1]
 	br i1 %cmp, label %forcond, label %ifthen
 
@@ -28,7 +28,7 @@
 	ret void
 
 forcond:		; preds = %entry
-	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%tmp3.i536 = load i32, i32* null		; <i32> [#uses=1]
 	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
 	br i1 %cmp12, label %forbody, label %afterfor
 
diff --git a/llvm/test/CodeGen/X86/fold-sext-trunc.ll b/llvm/test/CodeGen/X86/fold-sext-trunc.ll
index b453310..ab28888 100644
--- a/llvm/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/llvm/test/CodeGen/X86/fold-sext-trunc.ll
@@ -9,8 +9,8 @@
 
 define void @int322(i32 %foo) nounwind {
 entry:
-	%val = load i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)		; <i64> [#uses=1]
-	%0 = load i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4		; <i32> [#uses=1]
+	%val = load i64, i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)		; <i64> [#uses=1]
+	%0 = load i32, i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4		; <i32> [#uses=1]
 	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
 	%tmp4.i = lshr i64 %val, 32		; <i64> [#uses=1]
 	%tmp5.i = trunc i64 %tmp4.i to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/fold-tied-op.ll b/llvm/test/CodeGen/X86/fold-tied-op.ll
index 1695d12..62fed42 100644
--- a/llvm/test/CodeGen/X86/fold-tied-op.ll
+++ b/llvm/test/CodeGen/X86/fold-tied-op.ll
@@ -23,20 +23,20 @@
 ; Function Attrs: nounwind uwtable
 define i64 @fn1() #0 {
 entry:
-  %0 = load i32* @a, align 4, !tbaa !1
+  %0 = load i32, i32* @a, align 4, !tbaa !1
   %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
   %total_len = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 0
-  %2 = load i32* %total_len, align 4, !tbaa !5
+  %2 = load i32, i32* %total_len, align 4, !tbaa !5
   %tobool = icmp eq i32 %2, 0
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
   %v3 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 3
-  %3 = load i64* %v3, align 4, !tbaa !8
+  %3 = load i64, i64* %v3, align 4, !tbaa !8
   %v4 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 4
-  %4 = load i64* %v4, align 4, !tbaa !9
+  %4 = load i64, i64* %v4, align 4, !tbaa !9
   %v2 = getelementptr inbounds %struct.XXH_state64_t, %struct.XXH_state64_t* %1, i32 0, i32 2
-  %5 = load i64* %v2, align 4, !tbaa !10
+  %5 = load i64, i64* %v2, align 4, !tbaa !10
   %shl = shl i64 %5, 1
   %or = or i64 %shl, %5
   %shl2 = shl i64 %3, 2
@@ -54,7 +54,7 @@
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %6 = load i64* @b, align 8, !tbaa !11
+  %6 = load i64, i64* @b, align 8, !tbaa !11
   %xor10 = xor i64 %6, -4417276706812531889
   %mul11 = mul nsw i64 %xor10, 400714785074694791
   br label %if.end
diff --git a/llvm/test/CodeGen/X86/fold-vex.ll b/llvm/test/CodeGen/X86/fold-vex.ll
index 5a8b1d8..006db6e 100644
--- a/llvm/test/CodeGen/X86/fold-vex.ll
+++ b/llvm/test/CodeGen/X86/fold-vex.ll
@@ -14,7 +14,7 @@
 ; unless specially configured on some CPUs such as AMD Family 10H.
 
 define <4 x i32> @test1(<4 x i32>* %p0, <4 x i32> %in1) nounwind {
-  %in0 = load <4 x i32>* %p0, align 2
+  %in0 = load <4 x i32>, <4 x i32>* %p0, align 2
   %a = and <4 x i32> %in0, %in1
   ret <4 x i32> %a
 
diff --git a/llvm/test/CodeGen/X86/fold-zext-trunc.ll b/llvm/test/CodeGen/X86/fold-zext-trunc.ll
index f901ad2..1e4944a 100644
--- a/llvm/test/CodeGen/X86/fold-zext-trunc.ll
+++ b/llvm/test/CodeGen/X86/fold-zext-trunc.ll
@@ -12,9 +12,9 @@
 ; CHECK-NOT: movzbl
 ; CHECK: calll
 entry:
-  %tmp17 = load i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
+  %tmp17 = load i8, i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0), align 4
   %tmp54 = zext i8 %tmp17 to i32
-  %foo = load i32* bitcast (i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
+  %foo = load i32, i32* bitcast (i8* getelementptr inbounds (%struct.S0* @g_98, i32 0, i32 1, i32 0) to i32*), align 4
   %conv.i = trunc i32 %foo to i8
   tail call void @func_12(i32 %tmp54, i8 zeroext %conv.i) nounwind
   ret void
diff --git a/llvm/test/CodeGen/X86/force-align-stack-alloca.ll b/llvm/test/CodeGen/X86/force-align-stack-alloca.ll
index bd98069..a9ba20f 100644
--- a/llvm/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/llvm/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -10,7 +10,7 @@
 
 define i32 @f(i8* %p) nounwind {
 entry:
-  %0 = load i8* %p
+  %0 = load i8, i8* %p
   %conv = sext i8 %0 to i32
   ret i32 %conv
 }
diff --git a/llvm/test/CodeGen/X86/fp-double-rounding.ll b/llvm/test/CodeGen/X86/fp-double-rounding.ll
index 030cb9a..c7578ac 100644
--- a/llvm/test/CodeGen/X86/fp-double-rounding.ll
+++ b/llvm/test/CodeGen/X86/fp-double-rounding.ll
@@ -11,7 +11,7 @@
 ; UNSAFE-NOT: cvt
 define void @double_rounding(fp128* %x, float* %f) {
 entry:
-  %0 = load fp128* %x, align 16
+  %0 = load fp128, fp128* %x, align 16
   %1 = fptrunc fp128 %0 to double
   %2 = fptrunc double %1 to float
   store float %2, float* %f, align 4
diff --git a/llvm/test/CodeGen/X86/fp-load-trunc.ll b/llvm/test/CodeGen/X86/fp-load-trunc.ll
index e6c1e1a..3896913 100644
--- a/llvm/test/CodeGen/X86/fp-load-trunc.ll
+++ b/llvm/test/CodeGen/X86/fp-load-trunc.ll
@@ -23,7 +23,7 @@
 ; AVX-NEXT:    flds (%esp)
 ; AVX-NEXT:    popl %eax
 ; AVX-NEXT:    retl
-  %x = load <1 x double>* %p
+  %x = load <1 x double>, <1 x double>* %p
   %y = fptrunc <1 x double> %x to <1 x float>
   ret <1 x float> %y
 }
@@ -40,7 +40,7 @@
 ; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; AVX-NEXT:    vcvtpd2psx (%eax), %xmm0
 ; AVX-NEXT:    retl
-  %x = load <2 x double>* %p
+  %x = load <2 x double>, <2 x double>* %p
   %y = fptrunc <2 x double> %x to <2 x float>
   ret <2 x float> %y
 }
@@ -59,7 +59,7 @@
 ; AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; AVX-NEXT:    vcvtpd2psy (%eax), %xmm0
 ; AVX-NEXT:    retl
-  %x = load <4 x double>* %p
+  %x = load <4 x double>, <4 x double>* %p
   %y = fptrunc <4 x double> %x to <4 x float>
   ret <4 x float> %y
 }
@@ -83,7 +83,7 @@
 ; AVX-NEXT:    vcvtpd2psy 32(%eax), %xmm1
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX-NEXT:    retl
-  %x = load <8 x double>* %p
+  %x = load <8 x double>, <8 x double>* %p
   %y = fptrunc <8 x double> %x to <8 x float>
   ret <8 x float> %y
 }
diff --git a/llvm/test/CodeGen/X86/fp-stack-O0-crash.ll b/llvm/test/CodeGen/X86/fp-stack-O0-crash.ll
index ae83a02..5acfd5d 100644
--- a/llvm/test/CodeGen/X86/fp-stack-O0-crash.ll
+++ b/llvm/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -11,14 +11,14 @@
   br i1 false, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
-  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp = load x86_fp80, x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80, x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
   %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
   br i1 %cmp, label %if.then, label %if.end
 
 cond.false:                                       ; preds = %entry
-  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
-  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp2 = load x86_fp80, x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80, x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
   %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
   br i1 %cmp4, label %if.then, label %if.end
 
diff --git a/llvm/test/CodeGen/X86/fp-stack-compare-cmov.ll b/llvm/test/CodeGen/X86/fp-stack-compare-cmov.ll
index b457fbc..1d35488 100644
--- a/llvm/test/CodeGen/X86/fp-stack-compare-cmov.ll
+++ b/llvm/test/CodeGen/X86/fp-stack-compare-cmov.ll
@@ -4,7 +4,7 @@
 define float @foo(float* %col.2.0) {
 ; CHECK: fucompi
 ; CHECK: fcmov
-  %tmp = load float* %col.2.0
+  %tmp = load float, float* %col.2.0
   %tmp16 = fcmp olt float %tmp, 0.000000e+00
   %tmp20 = fsub float -0.000000e+00, %tmp
   %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
diff --git a/llvm/test/CodeGen/X86/fp-stack-compare.ll b/llvm/test/CodeGen/X86/fp-stack-compare.ll
index a8557ad..96088d7 100644
--- a/llvm/test/CodeGen/X86/fp-stack-compare.ll
+++ b/llvm/test/CodeGen/X86/fp-stack-compare.ll
@@ -6,7 +6,7 @@
 ; CHECK-NOT: fucompi
 ; CHECK: j
 ; CHECK-NOT: fcmov
-  %tmp = load float* %col.2.0
+  %tmp = load float, float* %col.2.0
   %tmp16 = fcmp olt float %tmp, 0.000000e+00
   %tmp20 = fsub float -0.000000e+00, %tmp
   %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp
diff --git a/llvm/test/CodeGen/X86/fp-stack-ret.ll b/llvm/test/CodeGen/X86/fp-stack-ret.ll
index 2733117..9635e2d 100644
--- a/llvm/test/CodeGen/X86/fp-stack-ret.ll
+++ b/llvm/test/CodeGen/X86/fp-stack-ret.ll
@@ -7,7 +7,7 @@
 ; CHECK: fldl
 ; CHECK-NEXT: ret
 define double @test1(double *%P) {
-        %A = load double* %P
+        %A = load double, double* %P
         ret double %A
 }
 
diff --git a/llvm/test/CodeGen/X86/fp-stack.ll b/llvm/test/CodeGen/X86/fp-stack.ll
index dca644d..44c0396 100644
--- a/llvm/test/CodeGen/X86/fp-stack.ll
+++ b/llvm/test/CodeGen/X86/fp-stack.ll
@@ -5,9 +5,9 @@
 
 define void @foo() nounwind {
 entry:
-  %tmp6 = load x86_fp80* undef                       ; <x86_fp80> [#uses=2]
-  %tmp15 = load x86_fp80* undef                      ; <x86_fp80> [#uses=2]
-  %tmp24 = load x86_fp80* undef                      ; <x86_fp80> [#uses=1]
+  %tmp6 = load x86_fp80, x86_fp80* undef                       ; <x86_fp80> [#uses=2]
+  %tmp15 = load x86_fp80, x86_fp80* undef                      ; <x86_fp80> [#uses=2]
+  %tmp24 = load x86_fp80, x86_fp80* undef                      ; <x86_fp80> [#uses=1]
   br i1 undef, label %return, label %bb.nph
 
 bb.nph:                                           ; preds = %entry
diff --git a/llvm/test/CodeGen/X86/fp2sint.ll b/llvm/test/CodeGen/X86/fp2sint.ll
index 1675444..b41f56f 100644
--- a/llvm/test/CodeGen/X86/fp2sint.ll
+++ b/llvm/test/CodeGen/X86/fp2sint.ll
@@ -4,10 +4,10 @@
 
 define i32 @main(i32 %argc, i8** %argv) {
 cond_false.i.i.i:               ; preds = %bb.i5
-       %tmp35.i = load double* null, align 8           ; <double> [#uses=1]
+       %tmp35.i = load double, double* null, align 8           ; <double> [#uses=1]
        %tmp3536.i = fptosi double %tmp35.i to i32              ; <i32> [#uses=1]
        %tmp3536140.i = zext i32 %tmp3536.i to i64              ; <i64> [#uses=1]
-       %tmp39.i = load double* null, align 4           ; <double> [#uses=1]
+       %tmp39.i = load double, double* null, align 4           ; <double> [#uses=1]
        %tmp3940.i = fptosi double %tmp39.i to i32              ; <i32> [#uses=1]
        %tmp3940137.i = zext i32 %tmp3940.i to i64              ; <i64> [#uses=1]
        %tmp3940137138.i = shl i64 %tmp3940137.i, 32            ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/fp_load_cast_fold.ll b/llvm/test/CodeGen/X86/fp_load_cast_fold.ll
index 72ea12f..5fd22e3 100644
--- a/llvm/test/CodeGen/X86/fp_load_cast_fold.ll
+++ b/llvm/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -1,19 +1,19 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
 define double @short(i16* %P) {
-        %V = load i16* %P               ; <i16> [#uses=1]
+        %V = load i16, i16* %P               ; <i16> [#uses=1]
         %V2 = sitofp i16 %V to double           ; <double> [#uses=1]
         ret double %V2
 }
 
 define double @int(i32* %P) {
-        %V = load i32* %P               ; <i32> [#uses=1]
+        %V = load i32, i32* %P               ; <i32> [#uses=1]
         %V2 = sitofp i32 %V to double           ; <double> [#uses=1]
         ret double %V2
 }
 
 define double @long(i64* %P) {
-        %V = load i64* %P               ; <i64> [#uses=1]
+        %V = load i64, i64* %P               ; <i64> [#uses=1]
         %V2 = sitofp i64 %V to double           ; <double> [#uses=1]
         ret double %V2
 }
diff --git a/llvm/test/CodeGen/X86/fp_load_fold.ll b/llvm/test/CodeGen/X86/fp_load_fold.ll
index a2cea5e..5749745 100644
--- a/llvm/test/CodeGen/X86/fp_load_fold.ll
+++ b/llvm/test/CodeGen/X86/fp_load_fold.ll
@@ -4,37 +4,37 @@
 ; Test that the load of the memory location is folded into the operation.
 
 define double @test_add(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fadd double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_mul(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fmul double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_sub(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fsub double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_subr(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fsub double %Y, %X		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_div(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fdiv double %X, %Y		; <double> [#uses=1]
 	ret double %R
 }
 
 define double @test_divr(double %X, double* %P) {
-	%Y = load double* %P		; <double> [#uses=1]
+	%Y = load double, double* %P		; <double> [#uses=1]
 	%R = fdiv double %Y, %X		; <double> [#uses=1]
 	ret double %R
 }
diff --git a/llvm/test/CodeGen/X86/frameallocate.ll b/llvm/test/CodeGen/X86/frameallocate.ll
index 7a2f9e3..2172ac0 100644
--- a/llvm/test/CodeGen/X86/frameallocate.ll
+++ b/llvm/test/CodeGen/X86/frameallocate.ll
@@ -10,7 +10,7 @@
 define void @print_framealloc_from_fp(i8* %fp) {
   %alloc = call i8* @llvm.framerecover(i8* bitcast (void(i32*, i32*)* @alloc_func to i8*), i8* %fp)
   %alloc_i32 = bitcast i8* %alloc to i32*
-  %r = load i32* %alloc_i32
+  %r = load i32, i32* %alloc_i32
   call i32 (i8*, ...)* @printf(i8* getelementptr ([10 x i8]* @str, i32 0, i32 0), i32 %r)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/full-lsr.ll b/llvm/test/CodeGen/X86/full-lsr.ll
index 03eadc0..85b2b41 100644
--- a/llvm/test/CodeGen/X86/full-lsr.ll
+++ b/llvm/test/CodeGen/X86/full-lsr.ll
@@ -19,17 +19,17 @@
 bb:		; preds = %bb, %entry
 	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=5]
 	%1 = getelementptr float, float* %A, i32 %i.03		; <float*> [#uses=1]
-	%2 = load float* %1, align 4		; <float> [#uses=1]
+	%2 = load float, float* %1, align 4		; <float> [#uses=1]
 	%3 = getelementptr float, float* %B, i32 %i.03		; <float*> [#uses=1]
-	%4 = load float* %3, align 4		; <float> [#uses=1]
+	%4 = load float, float* %3, align 4		; <float> [#uses=1]
 	%5 = fadd float %2, %4		; <float> [#uses=1]
 	%6 = getelementptr float, float* %C, i32 %i.03		; <float*> [#uses=1]
 	store float %5, float* %6, align 4
 	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
 	%8 = getelementptr float, float* %A, i32 %7		; <float*> [#uses=1]
-	%9 = load float* %8, align 4		; <float> [#uses=1]
+	%9 = load float, float* %8, align 4		; <float> [#uses=1]
 	%10 = getelementptr float, float* %B, i32 %7		; <float*> [#uses=1]
-	%11 = load float* %10, align 4		; <float> [#uses=1]
+	%11 = load float, float* %10, align 4		; <float> [#uses=1]
 	%12 = fadd float %9, %11		; <float> [#uses=1]
 	%13 = getelementptr float, float* %C, i32 %7		; <float*> [#uses=1]
 	store float %12, float* %13, align 4
diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll
index c308960..f7d4eb3 100644
--- a/llvm/test/CodeGen/X86/gather-addresses.ll
+++ b/llvm/test/CodeGen/X86/gather-addresses.ll
@@ -35,8 +35,8 @@
 ; WIN: movhpd	(%rcx,%r[[REG4]],8), %xmm1
 
 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
-  %a = load <4 x i32>* %i
-  %b = load <4 x i32>* %h
+  %a = load <4 x i32>, <4 x i32>* %i
+  %b = load <4 x i32>, <4 x i32>* %h
   %j = and <4 x i32> %a, %b
   %d0 = extractelement <4 x i32> %j, i32 0
   %d1 = extractelement <4 x i32> %j, i32 1
@@ -46,10 +46,10 @@
   %q1 = getelementptr double, double* %p, i32 %d1
   %q2 = getelementptr double, double* %p, i32 %d2
   %q3 = getelementptr double, double* %p, i32 %d3
-  %r0 = load double* %q0
-  %r1 = load double* %q1
-  %r2 = load double* %q2
-  %r3 = load double* %q3
+  %r0 = load double, double* %q0
+  %r1 = load double, double* %q1
+  %r2 = load double, double* %q2
+  %r3 = load double, double* %q3
   %v0 = insertelement <4 x double> undef, double %r0, i32 0
   %v1 = insertelement <4 x double> %v0, double %r1, i32 1
   %v2 = insertelement <4 x double> %v1, double %r2, i32 2
@@ -67,8 +67,8 @@
 ; LIN32-DAG: {{(mov|and)}}l	8(%esp),
 ; LIN32-DAG: {{(mov|and)}}l	12(%esp),
 define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind {
-  %a = load <4 x i32>* %i
-  %b = load <4 x i32>* %h
+  %a = load <4 x i32>, <4 x i32>* %i
+  %b = load <4 x i32>, <4 x i32>* %h
   %j = and <4 x i32> %a, %b
   %d0 = extractelement <4 x i32> %j, i32 0
   %d1 = extractelement <4 x i32> %j, i32 1
diff --git a/llvm/test/CodeGen/X86/ghc-cc.ll b/llvm/test/CodeGen/X86/ghc-cc.ll
index 3ada8c8..16e4db6 100644
--- a/llvm/test/CodeGen/X86/ghc-cc.ll
+++ b/llvm/test/CodeGen/X86/ghc-cc.ll
@@ -32,10 +32,10 @@
   ; CHECK-NEXT: movl hp, %edi
   ; CHECK-NEXT: movl sp, %ebp
   ; CHECK-NEXT: movl base, %ebx
-  %0 = load i32* @r1
-  %1 = load i32* @hp
-  %2 = load i32* @sp
-  %3 = load i32* @base
+  %0 = load i32, i32* @r1
+  %1 = load i32, i32* @hp
+  %2 = load i32, i32* @sp
+  %3 = load i32, i32* @base
   ; CHECK: jmp bar
   tail call ghccc void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
   ret void
diff --git a/llvm/test/CodeGen/X86/ghc-cc64.ll b/llvm/test/CodeGen/X86/ghc-cc64.ll
index 7251dd6..c4ce8cf 100644
--- a/llvm/test/CodeGen/X86/ghc-cc64.ll
+++ b/llvm/test/CodeGen/X86/ghc-cc64.ll
@@ -57,22 +57,22 @@
   ; CHECK-NEXT: movq hp(%rip), %r12
   ; CHECK-NEXT: movq sp(%rip), %rbp
   ; CHECK-NEXT: movq base(%rip), %r13
-  %0 = load double* @d2
-  %1 = load double* @d1
-  %2 = load float* @f4
-  %3 = load float* @f3
-  %4 = load float* @f2
-  %5 = load float* @f1
-  %6 = load i64* @splim
-  %7 = load i64* @r6
-  %8 = load i64* @r5
-  %9 = load i64* @r4
-  %10 = load i64* @r3
-  %11 = load i64* @r2
-  %12 = load i64* @r1
-  %13 = load i64* @hp
-  %14 = load i64* @sp
-  %15 = load i64* @base
+  %0 = load double, double* @d2
+  %1 = load double, double* @d1
+  %2 = load float, float* @f4
+  %3 = load float, float* @f3
+  %4 = load float, float* @f2
+  %5 = load float, float* @f1
+  %6 = load i64, i64* @splim
+  %7 = load i64, i64* @r6
+  %8 = load i64, i64* @r5
+  %9 = load i64, i64* @r4
+  %10 = load i64, i64* @r3
+  %11 = load i64, i64* @r2
+  %12 = load i64, i64* @r1
+  %13 = load i64, i64* @hp
+  %14 = load i64, i64* @sp
+  %15 = load i64, i64* @base
   ; CHECK: jmp bar
   tail call ghccc void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
                              i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
diff --git a/llvm/test/CodeGen/X86/gs-fold.ll b/llvm/test/CodeGen/X86/gs-fold.ll
index 6470d2b..bbdd033 100644
--- a/llvm/test/CodeGen/X86/gs-fold.ll
+++ b/llvm/test/CodeGen/X86/gs-fold.ll
@@ -6,9 +6,9 @@
 
 define i32 @test() nounwind uwtable {
 entry:
-  %0 = load volatile %struct.thread* addrspace(256)* null
+  %0 = load volatile %struct.thread*, %struct.thread* addrspace(256)* null
   %c = getelementptr inbounds %struct.thread, %struct.thread* %0, i64 0, i32 2
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/X86/h-register-addressing-32.ll b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
index f79b7f2..d021413 100644
--- a/llvm/test/CodeGen/X86/h-register-addressing-32.ll
+++ b/llvm/test/CodeGen/X86/h-register-addressing-32.ll
@@ -6,7 +6,7 @@
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
   %t2 = getelementptr double, double* %p, i32 %t1
-  %t3 = load double* %t2, align 8
+  %t3 = load double, double* %t2, align 8
   ret double %t3
 }
 ; CHECK: foo8:
@@ -16,7 +16,7 @@
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
   %t2 = getelementptr float, float* %p, i32 %t1
-  %t3 = load float* %t2, align 8
+  %t3 = load float, float* %t2, align 8
   ret float %t3
 }
 ; CHECK: foo4:
@@ -26,7 +26,7 @@
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
   %t2 = getelementptr i16, i16* %p, i32 %t1
-  %t3 = load i16* %t2, align 8
+  %t3 = load i16, i16* %t2, align 8
   ret i16 %t3
 }
 ; CHECK: foo2:
@@ -36,7 +36,7 @@
   %t0 = lshr i32 %x, 8
   %t1 = and i32 %t0, 255
   %t2 = getelementptr i8, i8* %p, i32 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: foo1:
@@ -46,7 +46,7 @@
   %t0 = lshr i32 %x, 5
   %t1 = and i32 %t0, 2040
   %t2 = getelementptr i8, i8* %p, i32 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar8:
@@ -56,7 +56,7 @@
   %t0 = lshr i32 %x, 6
   %t1 = and i32 %t0, 1020
   %t2 = getelementptr i8, i8* %p, i32 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar4:
@@ -66,7 +66,7 @@
   %t0 = lshr i32 %x, 7
   %t1 = and i32 %t0, 510
   %t2 = getelementptr i8, i8* %p, i32 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar2:
diff --git a/llvm/test/CodeGen/X86/h-register-addressing-64.ll b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
index c9bd097..b3159f4 100644
--- a/llvm/test/CodeGen/X86/h-register-addressing-64.ll
+++ b/llvm/test/CodeGen/X86/h-register-addressing-64.ll
@@ -6,7 +6,7 @@
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
   %t2 = getelementptr double, double* %p, i64 %t1
-  %t3 = load double* %t2, align 8
+  %t3 = load double, double* %t2, align 8
   ret double %t3
 }
 ; CHECK: foo8:
@@ -16,7 +16,7 @@
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
   %t2 = getelementptr float, float* %p, i64 %t1
-  %t3 = load float* %t2, align 8
+  %t3 = load float, float* %t2, align 8
   ret float %t3
 }
 ; CHECK: foo4:
@@ -26,7 +26,7 @@
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
   %t2 = getelementptr i16, i16* %p, i64 %t1
-  %t3 = load i16* %t2, align 8
+  %t3 = load i16, i16* %t2, align 8
   ret i16 %t3
 }
 ; CHECK: foo2:
@@ -36,7 +36,7 @@
   %t0 = lshr i64 %x, 8
   %t1 = and i64 %t0, 255
   %t2 = getelementptr i8, i8* %p, i64 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: foo1:
@@ -46,7 +46,7 @@
   %t0 = lshr i64 %x, 5
   %t1 = and i64 %t0, 2040
   %t2 = getelementptr i8, i8* %p, i64 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar8:
@@ -56,7 +56,7 @@
   %t0 = lshr i64 %x, 6
   %t1 = and i64 %t0, 1020
   %t2 = getelementptr i8, i8* %p, i64 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar4:
@@ -66,7 +66,7 @@
   %t0 = lshr i64 %x, 7
   %t1 = and i64 %t0, 510
   %t2 = getelementptr i8, i8* %p, i64 %t1
-  %t3 = load i8* %t2, align 8
+  %t3 = load i8, i8* %t2, align 8
   ret i8 %t3
 }
 ; CHECK: bar2:
diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll
index 1dcf939..f4331ba 100644
--- a/llvm/test/CodeGen/X86/half.ll
+++ b/llvm/test/CodeGen/X86/half.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: test_load_store:
 ; CHECK: movw (%rdi), [[TMP:%[a-z0-9]+]]
 ; CHECK: movw [[TMP]], (%rsi)
-  %val = load half* %in
+  %val = load half, half* %in
   store half %val, half* %out
   ret void
 }
@@ -13,7 +13,7 @@
 define i16 @test_bitcast_from_half(half* %addr) {
 ; CHECK-LABEL: test_bitcast_from_half:
 ; CHECK: movzwl (%rdi), %eax
-  %val = load half* %addr
+  %val = load half, half* %addr
   %val_int = bitcast half %val to i16
   ret i16 %val_int
 }
@@ -31,7 +31,7 @@
 
 ; CHECK-LIBCALL: jmp __gnu_h2f_ieee
 ; CHECK-FP16: vcvtph2ps
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to float
   ret float %val32
 }
@@ -43,7 +43,7 @@
 ; CHECK-LIBCALL: cvtss2sd
 ; CHECK-FP16: vcvtph2ps
 ; CHECK-FP16: vcvtss2sd
-  %val16 = load half* %addr
+  %val16 = load half, half* %addr
   %val32 = fpext half %val16 to double
   ret double %val32
 }
diff --git a/llvm/test/CodeGen/X86/hidden-vis-2.ll b/llvm/test/CodeGen/X86/hidden-vis-2.ll
index 74554d1..62e143d 100644
--- a/llvm/test/CodeGen/X86/hidden-vis-2.ll
+++ b/llvm/test/CodeGen/X86/hidden-vis-2.ll
@@ -5,6 +5,6 @@
 
 define i32 @t() nounwind readonly {
 entry:
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/hidden-vis-3.ll b/llvm/test/CodeGen/X86/hidden-vis-3.ll
index 4be881e..5d9ef44 100644
--- a/llvm/test/CodeGen/X86/hidden-vis-3.ll
+++ b/llvm/test/CodeGen/X86/hidden-vis-3.ll
@@ -12,8 +12,8 @@
 ; X64: _t:
 ; X64: movl _y(%rip), %eax
 
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
-	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* @y, align 4		; <i32> [#uses=1]
 	%2 = add i32 %1, %0		; <i32> [#uses=1]
 	ret i32 %2
 }
diff --git a/llvm/test/CodeGen/X86/hidden-vis-4.ll b/llvm/test/CodeGen/X86/hidden-vis-4.ll
index 25a87b9..17d44d0 100644
--- a/llvm/test/CodeGen/X86/hidden-vis-4.ll
+++ b/llvm/test/CodeGen/X86/hidden-vis-4.ll
@@ -7,6 +7,6 @@
 ; CHECK-LABEL: t:
 ; CHECK: movl _x, %eax
 ; CHECK: .comm _x,4
-	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @x, align 4		; <i32> [#uses=1]
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/hidden-vis-pic.ll b/llvm/test/CodeGen/X86/hidden-vis-pic.ll
index 1caab7a..e49bb48 100644
--- a/llvm/test/CodeGen/X86/hidden-vis-pic.ll
+++ b/llvm/test/CodeGen/X86/hidden-vis-pic.ll
@@ -45,6 +45,6 @@
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval                    ; <i32> [#uses=1]
   ret i32 %retval1
 }
diff --git a/llvm/test/CodeGen/X86/hipe-cc.ll b/llvm/test/CodeGen/X86/hipe-cc.ll
index b34417e..e3808e7 100644
--- a/llvm/test/CodeGen/X86/hipe-cc.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc.ll
@@ -53,18 +53,18 @@
   ; CHECK-NEXT: movl  12(%esp), %ebp
   ; CHECK-NEXT: movl   8(%esp), %eax
   ; CHECK-NEXT: movl   4(%esp), %edx
-  %0 = load i32* %hp_var
-  %1 = load i32* %p_var
-  %2 = load i32* %arg0_var
-  %3 = load i32* %arg1_var
-  %4 = load i32* %arg2_var
+  %0 = load i32, i32* %hp_var
+  %1 = load i32, i32* %p_var
+  %2 = load i32, i32* %arg0_var
+  %3 = load i32, i32* %arg1_var
+  %4 = load i32, i32* %arg2_var
   ; CHECK:      jmp bar
   tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
   ret void
 }
 
 define cc 11 void @baz() nounwind {
-  %tmp_clos = load i32* @clos
+  %tmp_clos = load i32, i32* @clos
   %tmp_clos2 = inttoptr i32 %tmp_clos to i32*
   %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
   ; CHECK:      movl $42, %eax
diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll
index 27e1c72..28d9039 100644
--- a/llvm/test/CodeGen/X86/hipe-cc64.ll
+++ b/llvm/test/CodeGen/X86/hipe-cc64.ll
@@ -62,19 +62,19 @@
   ; CHECK-NEXT: movq  24(%rsp), %rsi
   ; CHECK-NEXT: movq  16(%rsp), %rdx
   ; CHECK-NEXT: movq  8(%rsp), %rcx
-  %0 = load i64* %hp_var
-  %1 = load i64* %p_var
-  %2 = load i64* %arg0_var
-  %3 = load i64* %arg1_var
-  %4 = load i64* %arg2_var
-  %5 = load i64* %arg3_var
+  %0 = load i64, i64* %hp_var
+  %1 = load i64, i64* %p_var
+  %2 = load i64, i64* %arg0_var
+  %3 = load i64, i64* %arg1_var
+  %4 = load i64, i64* %arg2_var
+  %5 = load i64, i64* %arg3_var
   ; CHECK:      jmp bar
   tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
   ret void
 }
 
 define cc 11 void @baz() nounwind {
-  %tmp_clos = load i64* @clos
+  %tmp_clos = load i64, i64* @clos
   %tmp_clos2 = inttoptr i64 %tmp_clos to i64*
   %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
   ; CHECK:      movl $42, %esi
diff --git a/llvm/test/CodeGen/X86/hoist-invariant-load.ll b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
index c9e5290..2d3d99f 100644
--- a/llvm/test/CodeGen/X86/hoist-invariant-load.ll
+++ b/llvm/test/CodeGen/X86/hoist-invariant-load.ll
@@ -15,7 +15,7 @@
 
 for.body:                                         ; preds = %for.body, %entry
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+  %0 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
   %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
   %inc = add i32 %i.01, 1
   %exitcond = icmp eq i32 %inc, 10000
diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll
index 5fe1164..21bca02 100644
--- a/llvm/test/CodeGen/X86/i128-mul.ll
+++ b/llvm/test/CodeGen/X86/i128-mul.ll
@@ -27,7 +27,7 @@
   %carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ]
   %i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i64, i64* %x, i64 %i.012
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %conv2 = zext i64 %0 to i128
   %mul = mul i128 %conv2, %conv
   %conv3 = zext i64 %carry.013 to i128
diff --git a/llvm/test/CodeGen/X86/i128-ret.ll b/llvm/test/CodeGen/X86/i128-ret.ll
index 264f07c..1d76471 100644
--- a/llvm/test/CodeGen/X86/i128-ret.ll
+++ b/llvm/test/CodeGen/X86/i128-ret.ll
@@ -4,7 +4,7 @@
 ; CHECK: movq 8([[A0]]), %rdx
 
 define i128 @test(i128 *%P) {
-        %A = load i128* %P
+        %A = load i128, i128* %P
         ret i128 %A
 }
 
diff --git a/llvm/test/CodeGen/X86/i1narrowfail.ll b/llvm/test/CodeGen/X86/i1narrowfail.ll
index e280f3c..4f9a756 100644
--- a/llvm/test/CodeGen/X86/i1narrowfail.ll
+++ b/llvm/test/CodeGen/X86/i1narrowfail.ll
@@ -3,7 +3,7 @@
 ; CHECK-LABEL: @foo
 ; CHECK: orb     $16
 define void @foo(i64* %ptr) {
-  %r11 = load i64* %ptr, align 8
+  %r11 = load i64, i64* %ptr, align 8
   %r12 = or i64 16, %r11
   store i64 %r12, i64* %ptr, align 8
   ret void
diff --git a/llvm/test/CodeGen/X86/i256-add.ll b/llvm/test/CodeGen/X86/i256-add.ll
index 5a7a7a7..6164d89 100644
--- a/llvm/test/CodeGen/X86/i256-add.ll
+++ b/llvm/test/CodeGen/X86/i256-add.ll
@@ -3,15 +3,15 @@
 ; RUN: grep sbbl %t | count 7
 
 define void @add(i256* %p, i256* %q) nounwind {
-  %a = load i256* %p
-  %b = load i256* %q
+  %a = load i256, i256* %p
+  %b = load i256, i256* %q
   %c = add i256 %a, %b
   store i256 %c, i256* %p
   ret void
 }
 define void @sub(i256* %p, i256* %q) nounwind {
-  %a = load i256* %p
-  %b = load i256* %q
+  %a = load i256, i256* %p
+  %b = load i256, i256* %q
   %c = sub i256 %a, %b
   store i256 %c, i256* %p
   ret void
diff --git a/llvm/test/CodeGen/X86/i2k.ll b/llvm/test/CodeGen/X86/i2k.ll
index 6116c2e..83c10a5 100644
--- a/llvm/test/CodeGen/X86/i2k.ll
+++ b/llvm/test/CodeGen/X86/i2k.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86
 
 define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
-  %a = load i2011* %x
-  %b = load i2011* %y
+  %a = load i2011, i2011* %x
+  %b = load i2011, i2011* %y
   %c = add i2011 %a, %b
   store i2011 %c, i2011* %p
   ret void
diff --git a/llvm/test/CodeGen/X86/i486-fence-loop.ll b/llvm/test/CodeGen/X86/i486-fence-loop.ll
index d809619..96ed056 100644
--- a/llvm/test/CodeGen/X86/i486-fence-loop.ll
+++ b/llvm/test/CodeGen/X86/i486-fence-loop.ll
@@ -16,9 +16,9 @@
   br label %while.body
 
 while.body:
-  %0 = load volatile i32* %addr, align 4
+  %0 = load volatile i32, i32* %addr, align 4
   fence seq_cst
-  %1 = load volatile i32* %addr, align 4
+  %1 = load volatile i32, i32* %addr, align 4
   %cmp = icmp sgt i32 %1, %0
   br i1 %cmp, label %while.body, label %if.then
 
diff --git a/llvm/test/CodeGen/X86/i64-mem-copy.ll b/llvm/test/CodeGen/X86/i64-mem-copy.ll
index bf77896..21f8877 100644
--- a/llvm/test/CodeGen/X86/i64-mem-copy.ll
+++ b/llvm/test/CodeGen/X86/i64-mem-copy.ll
@@ -11,7 +11,7 @@
 
 define void @foo(i64* %x, i64* %y) nounwind  {
 entry:
-	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%tmp1 = load i64, i64* %y, align 8		; <i64> [#uses=1]
 	store i64 %tmp1, i64* %x, align 8
 	ret void
 }
diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
index bb3778a..220eb72 100644
--- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -169,11 +169,11 @@
 ; CHECK: testPR4485
 define void @testPR4485(x86_fp80* %a) {
 entry:
-	%0 = load x86_fp80* %a, align 16
+	%0 = load x86_fp80, x86_fp80* %a, align 16
 	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
 	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
 	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
-	%3 = load x86_fp80* %a, align 16
+	%3 = load x86_fp80, x86_fp80* %a, align 16
 	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
 	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
 	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
@@ -367,7 +367,7 @@
 ; Function Attrs: ssp
 define void @test_live_st(i32 %a1) {
 entry:
-  %0 = load x86_fp80* undef, align 16
+  %0 = load x86_fp80, x86_fp80* undef, align 16
   %cond = icmp eq i32 %a1, 1
   br i1 %cond, label %sw.bb4.i, label %_Z5tointRKe.exit
 
diff --git a/llvm/test/CodeGen/X86/inline-asm-out-regs.ll b/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
index 46966f5..8e47f81 100644
--- a/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -9,7 +9,7 @@
         br label %bb1.i
 
 bb1.i:          ; preds = %bb6.i.i, %bb1.i, %entry
-        %0 = load i32* null, align 8            ; <i32> [#uses=1]
+        %0 = load i32, i32* null, align 8            ; <i32> [#uses=1]
         %1 = icmp ugt i32 %0, 1048575           ; <i1> [#uses=1]
         br i1 %1, label %bb2.i, label %bb1.i
 
@@ -19,7 +19,7 @@
         ; <i32> [#uses=1]
         %2 = lshr i32 %asmresult2.i.i, 8                ; <i32> [#uses=1]
         %3 = trunc i32 %2 to i8         ; <i8> [#uses=1]
-        %4 = load i32* @pcibios_last_bus, align 4               ; <i32> [#uses=1]
+        %4 = load i32, i32* @pcibios_last_bus, align 4               ; <i32> [#uses=1]
         %5 = icmp slt i32 %4, 0         ; <i1> [#uses=1]
         br i1 %5, label %bb5.i.i, label %bb6.i.i
 
diff --git a/llvm/test/CodeGen/X86/inline-asm-ptr-cast.ll b/llvm/test/CodeGen/X86/inline-asm-ptr-cast.ll
index 50e3021..2135346 100644
--- a/llvm/test/CodeGen/X86/inline-asm-ptr-cast.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -16,12 +16,12 @@
   store i64 1, i64* %flags, align 8
   store i64 -1, i64* %newflags, align 8
   %0 = bitcast i32* %dst to i8*
-  %tmp = load i64* %flags, align 8
+  %tmp = load i64, i64* %flags, align 8
   %and = and i64 %tmp, 1
   %1 = bitcast i32* %src to i8*
-  %tmp1 = load i8* %1
+  %tmp1 = load i8, i8* %1
   %2 = bitcast i32* %dst to i8*
-  %tmp2 = load i8* %2
+  %tmp2 = load i8, i8* %2
   call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq  $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll b/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll
index f2ac0f4..cfbe260 100644
--- a/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-stack-realign.ll
@@ -11,6 +11,6 @@
   %r = alloca i32, align 16
   store i32 -1, i32* %r, align 16
   call void asm sideeffect inteldialect "push esi\0A\09xor esi, esi\0A\09mov dword ptr $0, esi\0A\09pop esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r)
-  %0 = load i32* %r, align 16
+  %0 = load i32, i32* %r, align 16
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll b/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll
index 0e4e7e1..3dfae11 100644
--- a/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-stack-realign2.ll
@@ -11,6 +11,6 @@
   %r = alloca i32, align 16
   store i32 -1, i32* %r, align 16
   call void asm sideeffect "push %esi\0A\09xor %esi, %esi\0A\09mov %esi, $0\0A\09pop %esi", "=*m,~{flags},~{esi},~{esp},~{dirflag},~{fpsr},~{flags}"(i32* %r)
-  %0 = load i32* %r, align 16
+  %0 = load i32, i32* %r, align 16
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll b/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll
index 3baaaaa..be0c6f5 100644
--- a/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-stack-realign3.ll
@@ -15,7 +15,7 @@
   br label %skip
 
 skip:
-  %0 = load i32* %r, align 128
+  %0 = load i32, i32* %r, align 128
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/X86/inline-asm-tied.ll b/llvm/test/CodeGen/X86/inline-asm-tied.ll
index fb5896b..9ceb0e8 100644
--- a/llvm/test/CodeGen/X86/inline-asm-tied.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-tied.ll
@@ -11,12 +11,12 @@
 	%retval = alloca i64		; <i64*> [#uses=2]
 	%_data.addr = alloca i64		; <i64*> [#uses=4]
 	store i64 %_data, i64* %_data.addr
-	%tmp = load i64* %_data.addr		; <i64> [#uses=1]
+	%tmp = load i64, i64* %_data.addr		; <i64> [#uses=1]
 	%0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind		; <i64> [#uses=1]
 	store i64 %0, i64* %_data.addr
-	%tmp1 = load i64* %_data.addr		; <i64> [#uses=1]
+	%tmp1 = load i64, i64* %_data.addr		; <i64> [#uses=1]
 	store i64 %tmp1, i64* %retval
-	%1 = load i64* %retval		; <i64> [#uses=1]
+	%1 = load i64, i64* %retval		; <i64> [#uses=1]
 	ret i64 %1
 }
 
diff --git a/llvm/test/CodeGen/X86/ins_split_regalloc.ll b/llvm/test/CodeGen/X86/ins_split_regalloc.ll
index f5c5254..f04d088 100644
--- a/llvm/test/CodeGen/X86/ins_split_regalloc.ll
+++ b/llvm/test/CodeGen/X86/ins_split_regalloc.ll
@@ -25,7 +25,7 @@
 ; CHECK: jmpq *[[F_ADDR_TC]]
 define void @test(i32 %a, i32 %b, i32 %c) {
 entry:
-  %fct_f = load void (i32)** @f, align 8
+  %fct_f = load void (i32)*, void (i32)** @f, align 8
   tail call void %fct_f(i32 %a)
   tail call void %fct_f(i32 %b)
   tail call void %fct_f(i32 %c)
diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll
index a74e3f2..4a5d8df 100644
--- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll
+++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -18,7 +18,7 @@
 bb4.i:		; preds = %bb22
 	ret i32 0
 walkExprTree.exit:		; preds = %bb22
-	%tmp83 = load i16* null, align 4		; <i16> [#uses=1]
+	%tmp83 = load i16, i16* null, align 4		; <i16> [#uses=1]
 	%tmp84 = or i16 %tmp83, 2		; <i16> [#uses=2]
 	store i16 %tmp84, i16* null, align 4
 	%tmp98993 = zext i16 %tmp84 to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
index 48df724b..71890bc 100644
--- a/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
+++ b/llvm/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -35,7 +35,7 @@
 bb433:		; preds = %bb428
 	ret void
 bb650:		; preds = %bb650, %bb428
-	%tmp658 = load i8* null, align 8		; <i8> [#uses=1]
+	%tmp658 = load i8, i8* null, align 8		; <i8> [#uses=1]
 	%tmp659 = icmp eq i8 %tmp658, 0		; <i1> [#uses=1]
 	br i1 %tmp659, label %bb650, label %bb662
 bb662:		; preds = %bb650
@@ -43,7 +43,7 @@
 bb688:		; preds = %bb662
 	ret void
 bb761:		; preds = %bb662
-	%tmp487248736542 = load i32* null, align 4		; <i32> [#uses=2]
+	%tmp487248736542 = load i32, i32* null, align 4		; <i32> [#uses=2]
 	%tmp487648776541 = and i32 %tmp487248736542, 57344		; <i32> [#uses=1]
 	%tmp4881 = icmp eq i32 %tmp487648776541, 8192		; <i1> [#uses=1]
 	br i1 %tmp4881, label %bb4884, label %bb4897
@@ -54,10 +54,10 @@
 bb4897:		; preds = %bb4884, %bb761
 	ret void
 bb4932:		; preds = %bb4884
-	%tmp4933 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp4933 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	br i1 %foo, label %bb5054, label %bb4940
 bb4940:		; preds = %bb4932
-	%tmp4943 = load i32* null, align 4		; <i32> [#uses=2]
+	%tmp4943 = load i32, i32* null, align 4		; <i32> [#uses=2]
 	switch i32 %tmp4933, label %bb5054 [
 		 i32 159, label %bb4970
 		 i32 160, label %bb5002
@@ -67,10 +67,10 @@
 	%tmp49764977 = and i16 %tmp49746536, 4095		; <i16> [#uses=1]
 	%mask498049814982 = zext i16 %tmp49764977 to i64		; <i64> [#uses=1]
 	%tmp4984 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* null, i64 %mask498049814982, i32 5		; <%struct.rec**> [#uses=1]
-	%tmp4985 = load %struct.rec** %tmp4984, align 8		; <%struct.rec*> [#uses=1]
+	%tmp4985 = load %struct.rec*, %struct.rec** %tmp4984, align 8		; <%struct.rec*> [#uses=1]
 	%tmp4988 = getelementptr %struct.rec, %struct.rec* %tmp4985, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
 	%tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32*		; <i32*> [#uses=1]
-	%tmp4992 = load i32* %tmp4991, align 8		; <i32> [#uses=1]
+	%tmp4992 = load i32, i32* %tmp4991, align 8		; <i32> [#uses=1]
 	%tmp49924993 = trunc i32 %tmp4992 to i16		; <i16> [#uses=1]
 	%tmp4996 = add i16 %tmp49924993, 0		; <i16> [#uses=1]
 	br label %bb5054
@@ -79,10 +79,10 @@
 	%tmp50085009 = and i16 %tmp50066537, 4095		; <i16> [#uses=1]
 	%mask501250135014 = zext i16 %tmp50085009 to i64		; <i64> [#uses=1]
 	%tmp5016 = getelementptr %struct.FONT_INFO, %struct.FONT_INFO* null, i64 %mask501250135014, i32 5		; <%struct.rec**> [#uses=1]
-	%tmp5017 = load %struct.rec** %tmp5016, align 8		; <%struct.rec*> [#uses=1]
+	%tmp5017 = load %struct.rec*, %struct.rec** %tmp5016, align 8		; <%struct.rec*> [#uses=1]
 	%tmp5020 = getelementptr %struct.rec, %struct.rec* %tmp5017, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
 	%tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32*		; <i32*> [#uses=1]
-	%tmp5024 = load i32* %tmp5023, align 8		; <i32> [#uses=1]
+	%tmp5024 = load i32, i32* %tmp5023, align 8		; <i32> [#uses=1]
 	%tmp50245025 = trunc i32 %tmp5024 to i16		; <i16> [#uses=1]
 	%tmp5028 = sub i16 %tmp50245025, 0		; <i16> [#uses=1]
 	br label %bb5054
diff --git a/llvm/test/CodeGen/X86/insertps-O0-bug.ll b/llvm/test/CodeGen/X86/insertps-O0-bug.ll
index e89ac26..73748ee 100644
--- a/llvm/test/CodeGen/X86/insertps-O0-bug.ll
+++ b/llvm/test/CodeGen/X86/insertps-O0-bug.ll
@@ -40,11 +40,11 @@
 ; CHECK: insertps $64, [[REG]],
 ; CHECK: ret
 entry:
-  %0 = load <4 x float>* %b, align 16
+  %0 = load <4 x float>, <4 x float>* %b, align 16
   %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64)
   %2 = alloca <4 x float>, align 16
   store <4 x float> %1, <4 x float>* %2, align 16
-  %3 = load <4 x float>* %2, align 16
+  %3 = load <4 x float>, <4 x float>* %2, align 16
   ret <4 x float> %3
 }
 
diff --git a/llvm/test/CodeGen/X86/invalid-shift-immediate.ll b/llvm/test/CodeGen/X86/invalid-shift-immediate.ll
index 77a9f7e..21ad6e8 100644
--- a/llvm/test/CodeGen/X86/invalid-shift-immediate.ll
+++ b/llvm/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -9,7 +9,7 @@
 	%x_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %x, i32* %x_addr
-	%tmp = load i32* %x_addr, align 4		; <i32> [#uses=1]
+	%tmp = load i32, i32* %x_addr, align 4		; <i32> [#uses=1]
 	%tmp1 = ashr i32 %tmp, -2		; <i32> [#uses=1]
 	%tmp2 = and i32 %tmp1, 1		; <i32> [#uses=1]
 	%tmp23 = trunc i32 %tmp2 to i8		; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/isel-optnone.ll b/llvm/test/CodeGen/X86/isel-optnone.ll
index 7ca62e4..831ad38 100644
--- a/llvm/test/CodeGen/X86/isel-optnone.ll
+++ b/llvm/test/CodeGen/X86/isel-optnone.ll
@@ -2,9 +2,9 @@
 
 define i32* @fooOptnone(i32* %p, i32* %q, i32** %z) #0 {
 entry:
-  %r = load i32* %p
-  %s = load i32* %q
-  %y = load i32** %z
+  %r = load i32, i32* %p
+  %s = load i32, i32* %q
+  %y = load i32*, i32** %z
 
   %t0 = add i32 %r, %s
   %t1 = add i32 %t0, 1
@@ -21,9 +21,9 @@
 
 define i32* @fooNormal(i32* %p, i32* %q, i32** %z) #1 {
 entry:
-  %r = load i32* %p
-  %s = load i32* %q
-  %y = load i32** %z
+  %r = load i32, i32* %p
+  %s = load i32, i32* %q
+  %y = load i32*, i32** %z
 
   %t0 = add i32 %r, %s
   %t1 = add i32 %t0, 1
diff --git a/llvm/test/CodeGen/X86/isel-sink.ll b/llvm/test/CodeGen/X86/isel-sink.ll
index 40d60a4..27abe05 100644
--- a/llvm/test/CodeGen/X86/isel-sink.ll
+++ b/llvm/test/CodeGen/X86/isel-sink.ll
@@ -18,6 +18,6 @@
 	store i32 4, i32* %P
 	ret i32 141
 F:
-	%V = load i32* %P
+	%V = load i32, i32* %P
 	ret i32 %V
 }
diff --git a/llvm/test/CodeGen/X86/isel-sink2.ll b/llvm/test/CodeGen/X86/isel-sink2.ll
index 7cfd73a..65f1994 100644
--- a/llvm/test/CodeGen/X86/isel-sink2.ll
+++ b/llvm/test/CodeGen/X86/isel-sink2.ll
@@ -5,13 +5,13 @@
 define i8 @test(i32 *%P) nounwind {
   %Q = getelementptr i32, i32* %P, i32 1
   %R = bitcast i32* %Q to i8*
-  %S = load i8* %R
+  %S = load i8, i8* %R
   %T = icmp eq i8 %S, 0
   br i1 %T, label %TB, label %F
 TB:
   ret i8 4
 F:
   %U = getelementptr i8, i8* %R, i32 3
-  %V = load i8* %U
+  %V = load i8, i8* %U
   ret i8 %V
 }
diff --git a/llvm/test/CodeGen/X86/isel-sink3.ll b/llvm/test/CodeGen/X86/isel-sink3.ll
index 15af197..fa633dc 100644
--- a/llvm/test/CodeGen/X86/isel-sink3.ll
+++ b/llvm/test/CodeGen/X86/isel-sink3.ll
@@ -11,7 +11,7 @@
 
 define i32 @bar(i32** %P) nounwind {
 entry:
-	%0 = load i32** %P, align 4		; <i32*> [#uses=2]
+	%0 = load i32*, i32** %P, align 4		; <i32*> [#uses=2]
 	%1 = getelementptr i32, i32* %0, i32 1		; <i32*> [#uses=1]
 	%2 = icmp ugt i32* %1, inttoptr (i64 1233 to i32*)		; <i1> [#uses=1]
 	br i1 %2, label %bb1, label %bb
@@ -22,6 +22,6 @@
 
 bb1:		; preds = %entry, %bb
 	%3 = getelementptr i32, i32* %1, i32 1		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* %3, align 4		; <i32> [#uses=1]
 	ret i32 %4
 }
diff --git a/llvm/test/CodeGen/X86/jump_sign.ll b/llvm/test/CodeGen/X86/jump_sign.ll
index 7bdbb7b..440f1cc 100644
--- a/llvm/test/CodeGen/X86/jump_sign.ll
+++ b/llvm/test/CodeGen/X86/jump_sign.ll
@@ -164,7 +164,7 @@
 ; PR://13046
 define void @func_o() nounwind uwtable {
 entry:
-  %0 = load i16* undef, align 2
+  %0 = load i16, i16* undef, align 2
   br i1 undef, label %if.then.i, label %if.end.i
 
 if.then.i:                                        ; preds = %entry
@@ -238,7 +238,7 @@
 ; CHECK: j
 ; CHECK-NOT: sub
 ; CHECK: ret
-  %0 = load i32* %offset, align 8
+  %0 = load i32, i32* %offset, align 8
   %cmp = icmp slt i32 %0, %size
   br i1 %cmp, label %return, label %if.end
 
@@ -287,10 +287,10 @@
 ; CHECK: andb
 ; CHECK: j
 ; CHECK: ret
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
   %cmp = icmp ult i32 %0, %p1
   %conv = zext i1 %cmp to i32
-  %1 = load i32* @a, align 4
+  %1 = load i32, i32* @a, align 4
   %and = and i32 %conv, %1
   %conv1 = trunc i32 %and to i8
   %2 = urem i8 %conv1, 3
diff --git a/llvm/test/CodeGen/X86/large-constants.ll b/llvm/test/CodeGen/X86/large-constants.ll
index 157ecc4..945d633 100644
--- a/llvm/test/CodeGen/X86/large-constants.ll
+++ b/llvm/test/CodeGen/X86/large-constants.ll
@@ -40,10 +40,10 @@
 
 define void @constant_expressions() {
 entry:
-  %0 = load i64* inttoptr (i64 add (i64 51250129900, i64 0) to i64*)
-  %1 = load i64* inttoptr (i64 add (i64 51250129900, i64 8) to i64*)
-  %2 = load i64* inttoptr (i64 add (i64 51250129900, i64 16) to i64*)
-  %3 = load i64* inttoptr (i64 add (i64 51250129900, i64 24) to i64*)
+  %0 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 0) to i64*)
+  %1 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 8) to i64*)
+  %2 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 16) to i64*)
+  %3 = load i64, i64* inttoptr (i64 add (i64 51250129900, i64 24) to i64*)
   %4 = add i64 %0, %1
   %5 = add i64 %2, %3
   %6 = add i64 %4, %5
@@ -54,10 +54,10 @@
 
 define void @constant_expressions2() {
 entry:
-  %0 = load i64* inttoptr (i64 51250129900 to i64*)
-  %1 = load i64* inttoptr (i64 51250129908 to i64*)
-  %2 = load i64* inttoptr (i64 51250129916 to i64*)
-  %3 = load i64* inttoptr (i64 51250129924 to i64*)
+  %0 = load i64, i64* inttoptr (i64 51250129900 to i64*)
+  %1 = load i64, i64* inttoptr (i64 51250129908 to i64*)
+  %2 = load i64, i64* inttoptr (i64 51250129916 to i64*)
+  %3 = load i64, i64* inttoptr (i64 51250129924 to i64*)
   %4 = add i64 %0, %1
   %5 = add i64 %2, %3
   %6 = add i64 %4, %5
diff --git a/llvm/test/CodeGen/X86/ldzero.ll b/llvm/test/CodeGen/X86/ldzero.ll
index dab04bc..3befa8a 100644
--- a/llvm/test/CodeGen/X86/ldzero.ll
+++ b/llvm/test/CodeGen/X86/ldzero.ll
@@ -11,15 +11,15 @@
 	%d = alloca double, align 8		; <double*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store double 0.000000e+00, double* %d, align 8
-	%tmp1 = load double* %d, align 8		; <double> [#uses=1]
+	%tmp1 = load double, double* %d, align 8		; <double> [#uses=1]
 	%tmp12 = fpext double %tmp1 to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp12, x86_fp80* %tmp, align 16
-	%tmp3 = load x86_fp80* %tmp, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = load x86_fp80, x86_fp80* %tmp, align 16		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp3, x86_fp80* %retval, align 16
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	%retval4 = load x86_fp80, x86_fp80* %retval		; <x86_fp80> [#uses=1]
 	ret x86_fp80 %retval4
 }
 
@@ -30,14 +30,14 @@
 	%ld = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store x86_fp80 0xK00000000000000000000, x86_fp80* %ld, align 16
-	%tmp1 = load x86_fp80* %ld, align 16		; <x86_fp80> [#uses=1]
+	%tmp1 = load x86_fp80, x86_fp80* %ld, align 16		; <x86_fp80> [#uses=1]
 	%tmp12 = fptrunc x86_fp80 %tmp1 to double		; <double> [#uses=1]
 	store double %tmp12, double* %tmp, align 8
-	%tmp3 = load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp3 = load double, double* %tmp, align 8		; <double> [#uses=1]
 	store double %tmp3, double* %retval, align 8
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load double* %retval		; <double> [#uses=1]
+	%retval4 = load double, double* %retval		; <double> [#uses=1]
 	ret double %retval4
 }
diff --git a/llvm/test/CodeGen/X86/lea-5.ll b/llvm/test/CodeGen/X86/lea-5.ll
index 3df65c6..b89c199 100644
--- a/llvm/test/CodeGen/X86/lea-5.ll
+++ b/llvm/test/CodeGen/X86/lea-5.ll
@@ -18,7 +18,7 @@
 
 ; CHECK: leaq	-40(%rsp,%r{{[^,]*}},4), %rax
 ; X32:   leal	-40(%rsp,%r{{[^,]*}},4), %eax
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp eq i32 %0, 0
   %inc = add nsw i32 %d.addr.0, 1
 
@@ -45,7 +45,7 @@
 
 ; CHECK: leaq	(%rsp,%r{{[^,]*}},4), %rax
 ; X32:   leal	(%rsp,%r{{[^,]*}},4), %eax
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp eq i32 %0, 0
   %inc = add nsw i32 %d.addr.0, 1
 
diff --git a/llvm/test/CodeGen/X86/lea-recursion.ll b/llvm/test/CodeGen/X86/lea-recursion.ll
index 9480600..5d93959 100644
--- a/llvm/test/CodeGen/X86/lea-recursion.ll
+++ b/llvm/test/CodeGen/X86/lea-recursion.ll
@@ -13,32 +13,32 @@
 
 define void @foo() {
 entry:
-	%tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0)		; <i32> [#uses=1]
-	%tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0)		; <i32> [#uses=1]
 	%tmp9 = add i32 %tmp4, 1		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=2]
 	store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1)
-	%tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp8.1 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1)		; <i32> [#uses=1]
 	%tmp9.1 = add i32 %tmp10, 1		; <i32> [#uses=1]
 	%tmp10.1 = add i32 %tmp9.1, %tmp8.1		; <i32> [#uses=2]
 	store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2)
-	%tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp8.2 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2)		; <i32> [#uses=1]
 	%tmp9.2 = add i32 %tmp10.1, 1		; <i32> [#uses=1]
 	%tmp10.2 = add i32 %tmp9.2, %tmp8.2		; <i32> [#uses=2]
 	store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3)
-	%tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp8.3 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3)		; <i32> [#uses=1]
 	%tmp9.3 = add i32 %tmp10.2, 1		; <i32> [#uses=1]
 	%tmp10.3 = add i32 %tmp9.3, %tmp8.3		; <i32> [#uses=2]
 	store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4)
-	%tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4)		; <i32> [#uses=1]
+	%tmp8.4 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4)		; <i32> [#uses=1]
 	%tmp9.4 = add i32 %tmp10.3, 1		; <i32> [#uses=1]
 	%tmp10.4 = add i32 %tmp9.4, %tmp8.4		; <i32> [#uses=2]
 	store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5)
-	%tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5)		; <i32> [#uses=1]
+	%tmp8.5 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5)		; <i32> [#uses=1]
 	%tmp9.5 = add i32 %tmp10.4, 1		; <i32> [#uses=1]
 	%tmp10.5 = add i32 %tmp9.5, %tmp8.5		; <i32> [#uses=2]
 	store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6)
-	%tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6)		; <i32> [#uses=1]
+	%tmp8.6 = load i32, i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6)		; <i32> [#uses=1]
 	%tmp9.6 = add i32 %tmp10.5, 1		; <i32> [#uses=1]
 	%tmp10.6 = add i32 %tmp9.6, %tmp8.6		; <i32> [#uses=1]
 	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
diff --git a/llvm/test/CodeGen/X86/legalize-shift-64.ll b/llvm/test/CodeGen/X86/legalize-shift-64.ll
index 64460bb..fb8f7b6 100644
--- a/llvm/test/CodeGen/X86/legalize-shift-64.ll
+++ b/llvm/test/CodeGen/X86/legalize-shift-64.ll
@@ -71,7 +71,7 @@
   %t = alloca i64, align 8
   store i32 1, i32* %x, align 4
   store i64 1, i64* %t, align 8  ;; DEAD
-  %load = load i32* %x, align 4
+  %load = load i32, i32* %x, align 4
   %shl = shl i32 %load, 8
   %add = add i32 %shl, -224
   %sh_prom = zext i32 %add to i64
diff --git a/llvm/test/CodeGen/X86/licm-nested.ll b/llvm/test/CodeGen/X86/licm-nested.ll
index 2c71cbb..d8af64f 100644
--- a/llvm/test/CodeGen/X86/licm-nested.ll
+++ b/llvm/test/CodeGen/X86/licm-nested.ll
@@ -14,7 +14,7 @@
 
 while.cond.preheader:                             ; preds = %entry
   %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1 ; <i8**> [#uses=1]
-  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %tmp2 = load i8*, i8** %arrayidx                     ; <i8*> [#uses=1]
   %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2]
   %tobool51 = icmp eq i32 %call, 0                ; <i1> [#uses=1]
   br i1 %tobool51, label %while.end, label %bb.nph53
@@ -50,7 +50,7 @@
   %tmp73 = shl i64 %indvar57, 1                   ; <i64> [#uses=1]
   %add = add i64 %tmp73, 4                        ; <i64> [#uses=2]
   %arrayidx17 = getelementptr [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
-  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tmp18 = load i8, i8* %arrayidx17                   ; <i8> [#uses=1]
   %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
   br i1 %tobool19, label %for.inc35, label %if.then
 
diff --git a/llvm/test/CodeGen/X86/liveness-local-regalloc.ll b/llvm/test/CodeGen/X86/liveness-local-regalloc.ll
index 36050f7..0954f9d 100644
--- a/llvm/test/CodeGen/X86/liveness-local-regalloc.ll
+++ b/llvm/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -71,7 +71,7 @@
   %A2 = alloca <2 x i8>
   %A1 = alloca i1
   %A = alloca i32
-  %L = load i8* %0
+  %L = load i8, i8* %0
   store i8 -37, i8* %0
   %E = extractelement <4 x i64> zeroinitializer, i32 2
   %Shuff = shufflevector <4 x i64> zeroinitializer, <4 x i64> zeroinitializer, <4 x i32> <i32 5, i32 7, i32 1, i32 3>
diff --git a/llvm/test/CodeGen/X86/load-slice.ll b/llvm/test/CodeGen/X86/load-slice.ll
index 464b191..7f3dd63 100644
--- a/llvm/test/CodeGen/X86/load-slice.ll
+++ b/llvm/test/CodeGen/X86/load-slice.ll
@@ -48,7 +48,7 @@
 entry:
   %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
   %tmp = bitcast %class.Complex* %arrayidx to i64*
-  %tmp1 = load i64* %tmp, align 8
+  %tmp1 = load i64, i64* %tmp, align 8
   %t0.sroa.0.0.extract.trunc = trunc i64 %tmp1 to i32
   %tmp2 = bitcast i32 %t0.sroa.0.0.extract.trunc to float
   %t0.sroa.2.0.extract.shift = lshr i64 %tmp1, 32
@@ -57,11 +57,11 @@
   %add = add i64 %out_start, 8
   %arrayidx2 = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %add
   %i.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 0
-  %tmp4 = load float* %i.i, align 4
+  %tmp4 = load float, float* %i.i, align 4
   %add.i = fadd float %tmp4, %tmp2
   %retval.sroa.0.0.vec.insert.i = insertelement <2 x float> undef, float %add.i, i32 0
   %r.i = getelementptr inbounds %class.Complex, %class.Complex* %arrayidx2, i64 0, i32 1
-  %tmp5 = load float* %r.i, align 4
+  %tmp5 = load float, float* %r.i, align 4
   %add5.i = fadd float %tmp5, %tmp3
   %retval.sroa.0.4.vec.insert.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert.i, float %add5.i, i32 1
   %ref.tmp.sroa.0.0.cast = bitcast %class.Complex* %arrayidx to <2 x float>*
@@ -102,7 +102,7 @@
 define i32 @t2(%class.Complex* nocapture %out, i64 %out_start) {
   %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
   %bitcast = bitcast %class.Complex* %arrayidx to i64*
-  %chunk64 = load i64* %bitcast, align 8
+  %chunk64 = load i64, i64* %bitcast, align 8
   %slice32_low = trunc i64 %chunk64 to i32
   %shift48 = lshr i64 %chunk64, 48
   %slice32_high = trunc i64 %shift48 to i32
@@ -127,7 +127,7 @@
 define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) {
   %arrayidx = getelementptr inbounds %class.Complex, %class.Complex* %out, i64 %out_start
   %bitcast = bitcast %class.Complex* %arrayidx to i64*
-  %chunk64 = load i64* %bitcast, align 8
+  %chunk64 = load i64, i64* %bitcast, align 8
   %slice32_low = trunc i64 %chunk64 to i32
   %shift48 = lshr i64 %chunk64, 48
   %slice32_high = trunc i64 %shift48 to i32
diff --git a/llvm/test/CodeGen/X86/longlong-deadload.ll b/llvm/test/CodeGen/X86/longlong-deadload.ll
index 73e1012..3adaf49 100644
--- a/llvm/test/CodeGen/X86/longlong-deadload.ll
+++ b/llvm/test/CodeGen/X86/longlong-deadload.ll
@@ -6,7 +6,7 @@
 ; CHECK: movl 4(%esp), %[[REGISTER:.*]]
 ; CHECK-NOT: 4(%[[REGISTER]])
 ; CHECK: ret
-	%tmp1 = load i64* %P, align 8		; <i64> [#uses=1]
+	%tmp1 = load i64, i64* %P, align 8		; <i64> [#uses=1]
 	%tmp2 = xor i64 %tmp1, 1		; <i64> [#uses=1]
 	store i64 %tmp2, i64* %P, align 8
 	ret void
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce4.ll b/llvm/test/CodeGen/X86/loop-strength-reduce4.ll
index 681051e..786534b 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce4.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -27,30 +27,30 @@
 	%t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ]		; <i32> [#uses=1]
 	%j.065.0 = shl i32 %indvar, 2		; <i32> [#uses=4]
 	%tmp3 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %j.065.0		; <i32*> [#uses=2]
-	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %tmp3, align 4		; <i32> [#uses=1]
 	%tmp6 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %t.063.0		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %tmp6, align 4		; <i32> [#uses=1]
 	%tmp8 = xor i32 %tmp7, %tmp4		; <i32> [#uses=2]
 	store i32 %tmp8, i32* %tmp3, align 4
 	%tmp1378 = or i32 %j.065.0, 1		; <i32> [#uses=1]
 	%tmp16 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp1378		; <i32*> [#uses=2]
-	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
+	%tmp17 = load i32, i32* %tmp16, align 4		; <i32> [#uses=1]
 	%tmp19 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp8		; <i32*> [#uses=1]
-	%tmp20 = load i32* %tmp19, align 4		; <i32> [#uses=1]
+	%tmp20 = load i32, i32* %tmp19, align 4		; <i32> [#uses=1]
 	%tmp21 = xor i32 %tmp20, %tmp17		; <i32> [#uses=2]
 	store i32 %tmp21, i32* %tmp16, align 4
 	%tmp2680 = or i32 %j.065.0, 2		; <i32> [#uses=1]
 	%tmp29 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp2680		; <i32*> [#uses=2]
-	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
+	%tmp30 = load i32, i32* %tmp29, align 4		; <i32> [#uses=1]
 	%tmp32 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp21		; <i32*> [#uses=1]
-	%tmp33 = load i32* %tmp32, align 4		; <i32> [#uses=1]
+	%tmp33 = load i32, i32* %tmp32, align 4		; <i32> [#uses=1]
 	%tmp34 = xor i32 %tmp33, %tmp30		; <i32> [#uses=2]
 	store i32 %tmp34, i32* %tmp29, align 4
 	%tmp3982 = or i32 %j.065.0, 3		; <i32> [#uses=1]
 	%tmp42 = getelementptr [0 x i32], [0 x i32]* @state, i32 0, i32 %tmp3982		; <i32*> [#uses=2]
-	%tmp43 = load i32* %tmp42, align 4		; <i32> [#uses=1]
+	%tmp43 = load i32, i32* %tmp42, align 4		; <i32> [#uses=1]
 	%tmp45 = getelementptr [0 x i32], [0 x i32]* @S, i32 0, i32 %tmp34		; <i32*> [#uses=1]
-	%tmp46 = load i32* %tmp45, align 4		; <i32> [#uses=1]
+	%tmp46 = load i32, i32* %tmp45, align 4		; <i32> [#uses=1]
 	%tmp47 = xor i32 %tmp46, %tmp43		; <i32> [#uses=3]
 	store i32 %tmp47, i32* %tmp42, align 4
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
index eb5a4b3..92ec485 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -28,7 +28,7 @@
 	%indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ]		; <i32> [#uses=2]
 	%sfb.314.i = add i32 %indvar32.i, 0		; <i32> [#uses=3]
 	%1 = getelementptr [4 x [21 x double]], [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i		; <double*> [#uses=1]
-	%2 = load double* %1, align 8		; <double> [#uses=0]
+	%2 = load double, double* %1, align 8		; <double> [#uses=0]
 	br i1 false, label %bb30.i41, label %bb33.i47
 
 bb30.i41:		; preds = %bb29.i38
diff --git a/llvm/test/CodeGen/X86/loop-strength-reduce8.ll b/llvm/test/CodeGen/X86/loop-strength-reduce8.ll
index 8e393ac..716e147 100644
--- a/llvm/test/CodeGen/X86/loop-strength-reduce8.ll
+++ b/llvm/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -54,8 +54,8 @@
 	call void @llvm.va_start(i8* %p1)
 	%0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind		; <%struct.tree_node*> [#uses=2]
 	%1 = getelementptr [256 x i32], [256 x i32]* @tree_code_length, i32 0, i32 %code		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=2]
-	%3 = load i32* @lineno, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* %1, align 4		; <i32> [#uses=2]
+	%3 = load i32, i32* @lineno, align 4		; <i32> [#uses=1]
 	%4 = bitcast %struct.tree_node* %0 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=2]
 	%5 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 1		; <i32*> [#uses=1]
 	store i32 %3, i32* %5, align 4
@@ -64,11 +64,11 @@
 
 bb:		; preds = %bb, %entry
 	%i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
-	%7 = load i8** %p, align 4		; <i8*> [#uses=2]
+	%7 = load i8*, i8** %p, align 4		; <i8*> [#uses=2]
 	%8 = getelementptr i8, i8* %7, i32 4		; <i8*> [#uses=1]
 	store i8* %8, i8** %p, align 4
 	%9 = bitcast i8* %7 to %struct.tree_node**		; <%struct.tree_node**> [#uses=1]
-	%10 = load %struct.tree_node** %9, align 4		; <%struct.tree_node*> [#uses=1]
+	%10 = load %struct.tree_node*, %struct.tree_node** %9, align 4		; <%struct.tree_node*> [#uses=1]
 	%11 = getelementptr %struct.tree_exp, %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01		; <%struct.tree_node**> [#uses=1]
 	store %struct.tree_node* %10, %struct.tree_node** %11, align 4
 	%indvar.next = add i32 %i.01, 1		; <i32> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/lsr-delayed-fold.ll b/llvm/test/CodeGen/X86/lsr-delayed-fold.ll
index 8135914..eaa52de 100644
--- a/llvm/test/CodeGen/X86/lsr-delayed-fold.ll
+++ b/llvm/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -42,7 +42,7 @@
 
 lbl_264:                                          ; preds = %if.end, %lbl_264.preheader
   %g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
-  %tmp7 = load i16* undef                         ; <i16> [#uses=1]
+  %tmp7 = load i16, i16* undef                         ; <i16> [#uses=1]
   %conv8 = trunc i16 %tmp7 to i8                  ; <i8> [#uses=1]
   %mul.i = mul i8 %p_95.addr.0, %p_95.addr.0      ; <i8> [#uses=1]
   %mul.i18 = mul i8 %mul.i, %conv8                ; <i8> [#uses=1]
@@ -99,7 +99,7 @@
   %add112 = trunc i64 %tmp45 to i32               ; <i32> [#uses=1]
   %add118 = trunc i64 %tmp47 to i32               ; <i32> [#uses=1]
   %tmp10 = getelementptr %struct.Bu, %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
-  %tmp11 = load i32* %tmp10                       ; <i32> [#uses=0]
+  %tmp11 = load i32, i32* %tmp10                       ; <i32> [#uses=0]
   tail call void undef(i32 %add22)
   tail call void undef(i32 %add28)
   tail call void undef(i32 %add34)
diff --git a/llvm/test/CodeGen/X86/lsr-i386.ll b/llvm/test/CodeGen/X86/lsr-i386.ll
index 5829fe2..85e3d3a 100644
--- a/llvm/test/CodeGen/X86/lsr-i386.ll
+++ b/llvm/test/CodeGen/X86/lsr-i386.ll
@@ -22,7 +22,7 @@
 bb1:                                              ; preds = %bb6, %bb
   %indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; <i32> [#uses=2]
   %tmp21 = add i32 %indvar11, 1                   ; <i32> [#uses=1]
-  %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
+  %t = load i32, i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
   %tmp15 = mul i32 %n, %t                      ; <i32> [#uses=1]
   %tmp16 = add i32 %tmp21, %tmp15                 ; <i32> [#uses=1]
   %tmp17 = shl i32 %tmp16, 3                      ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
index d23414e..2e3929b 100644
--- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -17,9 +17,9 @@
 
 define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind {
 entry:
-	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %rk, align 4		; <i32> [#uses=1]
 	%1 = getelementptr i32, i32* %rk, i64 1		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* %1, align 4		; <i32> [#uses=1]
 	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
 	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
 	br label %bb
@@ -33,36 +33,36 @@
 	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
 	%5 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* %5, align 4		; <i32> [#uses=1]
 	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
 	%8 = and i32 %7, 255		; <i32> [#uses=1]
 	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
 	%10 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%11 = load i32, i32* %10, align 4		; <i32> [#uses=1]
 	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
 	%12 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
 	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%14 = load i32, i32* %13, align 4		; <i32> [#uses=1]
 	%15 = xor i32 %11, %6		; <i32> [#uses=1]
 	%16 = xor i32 %15, %14		; <i32> [#uses=3]
 	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
 	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
 	%19 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
-	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%20 = load i32, i32* %19, align 4		; <i32> [#uses=1]
 	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
 	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
 	%23 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
-	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%24 = load i32, i32* %23, align 4		; <i32> [#uses=1]
 	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
 	%25 = getelementptr i8, i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
 	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%27 = load i32, i32* %26, align 4		; <i32> [#uses=1]
 	%28 = xor i32 %24, %20		; <i32> [#uses=1]
 	%29 = xor i32 %28, %27		; <i32> [#uses=4]
 	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
 	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
 	%32 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
-	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%33 = load i32, i32* %32, align 4		; <i32> [#uses=2]
 	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2, label %bb1
 
@@ -74,22 +74,22 @@
 	%37 = and i32 %36, 255		; <i32> [#uses=1]
 	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
 	%39 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
-	%40 = load i32* %39, align 4		; <i32> [#uses=1]
-	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%40 = load i32, i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32, i32* %35, align 4		; <i32> [#uses=1]
 	%42 = xor i32 %40, %33		; <i32> [#uses=1]
 	%43 = xor i32 %42, %41		; <i32> [#uses=1]
 	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
 	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
 	%46 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%47 = load i32, i32* %46, align 4		; <i32> [#uses=1]
 	%48 = and i32 %16, 255		; <i32> [#uses=1]
 	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
 	%50 = getelementptr [256 x i32], [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%51 = load i32, i32* %50, align 4		; <i32> [#uses=1]
 	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
 	%52 = getelementptr i8, i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
 	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
-	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%54 = load i32, i32* %53, align 4		; <i32> [#uses=1]
 	%55 = xor i32 %51, %47		; <i32> [#uses=1]
 	%56 = xor i32 %55, %54		; <i32> [#uses=1]
 	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
@@ -105,26 +105,26 @@
 	%60 = and i32 %59, 255		; <i32> [#uses=1]
 	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
 	%62 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%63 = load i32, i32* %62, align 4		; <i32> [#uses=1]
 	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
 	%65 = or i32 %64, %58		; <i32> [#uses=1]
-	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%66 = load i32, i32* %57, align 4		; <i32> [#uses=1]
 	%67 = xor i32 %65, %66		; <i32> [#uses=2]
 	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
 	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
 	%70 = getelementptr [256 x i32], [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
-	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%71 = load i32, i32* %70, align 4		; <i32> [#uses=1]
 	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
 	%73 = and i32 %16, 255		; <i32> [#uses=1]
 	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
 	%75 = getelementptr [256 x i32], [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
-	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%76 = load i32, i32* %75, align 4		; <i32> [#uses=1]
 	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
 	%78 = or i32 %77, %72		; <i32> [#uses=1]
 	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
 	%79 = getelementptr i8, i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
 	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%81 = load i32, i32* %80, align 4		; <i32> [#uses=1]
 	%82 = xor i32 %78, %81		; <i32> [#uses=2]
 	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
 	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
@@ -176,7 +176,7 @@
   %bi.06 = phi i32 [ 0, %for.body.lr.ph ], [ %i.addr.0.bi.0, %for.body ]
   %b.05 = phi i32 [ 0, %for.body.lr.ph ], [ %.b.0, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp ugt i32 %1, %b.05
   %.b.0 = select i1 %cmp1, i32 %1, i32 %b.05
   %2 = trunc i64 %indvars.iv to i32
diff --git a/llvm/test/CodeGen/X86/lsr-normalization.ll b/llvm/test/CodeGen/X86/lsr-normalization.ll
index 4e7cebe..a883338 100644
--- a/llvm/test/CodeGen/X86/lsr-normalization.ll
+++ b/llvm/test/CodeGen/X86/lsr-normalization.ll
@@ -39,7 +39,7 @@
 bb10:                                             ; preds = %bb8, %bb
   %tmp11 = bitcast i8* %tmp5 to %0*               ; <%0*> [#uses=1]
   call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
-  %tmp12 = load %0** %tmp3                        ; <%0*> [#uses=3]
+  %tmp12 = load %0*, %0** %tmp3                        ; <%0*> [#uses=3]
   %tmp13 = icmp eq %0* %tmp12, %tmp               ; <i1> [#uses=1]
   br i1 %tmp13, label %bb14, label %bb16
 
@@ -51,7 +51,7 @@
   %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
   %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
   %tmp19 = getelementptr inbounds %0, %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
-  %tmp20 = load %0** %tmp19                       ; <%0*> [#uses=2]
+  %tmp20 = load %0*, %0** %tmp19                       ; <%0*> [#uses=2]
   %tmp21 = icmp eq %0* %tmp20, %tmp               ; <i1> [#uses=1]
   %tmp22 = add i64 %tmp17, 1                      ; <i64> [#uses=2]
   br i1 %tmp21, label %bb23, label %bb16
@@ -64,7 +64,7 @@
   %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
   %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
   %tmp28 = getelementptr inbounds %0, %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
-  %tmp29 = load %0** %tmp28                       ; <%0*> [#uses=2]
+  %tmp29 = load %0*, %0** %tmp28                       ; <%0*> [#uses=2]
   %tmp30 = icmp eq %0* %tmp29, %tmp               ; <i1> [#uses=1]
   %tmp31 = add i64 %tmp26, 1                      ; <i64> [#uses=2]
   br i1 %tmp30, label %bb32, label %bb25
@@ -75,14 +75,14 @@
   br label %bb35
 
 bb35:                                             ; preds = %bb32, %bb14
-  %tmp36 = load %0** %tmp3                        ; <%0*> [#uses=2]
+  %tmp36 = load %0*, %0** %tmp3                        ; <%0*> [#uses=2]
   %tmp37 = icmp eq %0* %tmp36, %tmp               ; <i1> [#uses=1]
   br i1 %tmp37, label %bb44, label %bb38
 
 bb38:                                             ; preds = %bb38, %bb35
   %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
   %tmp40 = getelementptr inbounds %0, %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
-  %tmp41 = load %0** %tmp40                       ; <%0*> [#uses=2]
+  %tmp41 = load %0*, %0** %tmp40                       ; <%0*> [#uses=2]
   %tmp42 = bitcast %0* %tmp39 to i8*              ; <i8*> [#uses=1]
   call void @_ZdlPv(i8* %tmp42) nounwind
   %tmp43 = icmp eq %0* %tmp41, %tmp               ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll b/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll
index bd83efd..31a1859 100644
--- a/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll
+++ b/llvm/test/CodeGen/X86/lsr-redundant-addressing.ll
@@ -23,7 +23,7 @@
   %tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ]
   %tmp40 = sub i64 0, %tmp39
   %tmp47 = getelementptr [5 x %0], [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
-  %tmp34 = load i32* %tmp47, align 16
+  %tmp34 = load i32, i32* %tmp47, align 16
   %tmp203 = icmp slt i32 %tmp34, 12
   br i1 %tmp203, label %bb215, label %bb200
 
@@ -39,13 +39,13 @@
   store i32 %tmp216, i32* %tmp47, align 16
   %tmp217 = sext i32 %tmp216 to i64
   %tmp218 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
-  %tmp219 = load i32* %tmp218, align 8
+  %tmp219 = load i32, i32* %tmp218, align 8
   store i32 %tmp219, i32* %tmp48, align 4
   %tmp220 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
-  %tmp221 = load i32* %tmp220, align 4
+  %tmp221 = load i32, i32* %tmp220, align 4
   store i32 %tmp221, i32* %tmp49, align 4
   %tmp222 = getelementptr inbounds [13 x %1], [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
-  %tmp223 = load i32* %tmp222, align 8
+  %tmp223 = load i32, i32* %tmp222, align 8
   store i32 %tmp223, i32* %tmp50, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/lsr-reuse-trunc.ll b/llvm/test/CodeGen/X86/lsr-reuse-trunc.ll
index c3dbd60..7f73b6b 100644
--- a/llvm/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/llvm/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -14,7 +14,7 @@
 
 define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
 entry:
-  %0 = load i32* %n, align 4
+  %0 = load i32, i32* %n, align 4
   %1 = icmp sgt i32 %0, 0
   br i1 %1, label %bb, label %return
 
@@ -25,7 +25,7 @@
   %scevgep9 = bitcast float* %scevgep to <4 x float>*
   %scevgep10 = getelementptr float, float* %x, i64 %tmp
   %scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
-  %2 = load <4 x float>* %scevgep1011, align 16
+  %2 = load <4 x float>, <4 x float>* %scevgep1011, align 16
   %3 = bitcast <4 x float> %2 to <4 x i32>
   %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
   %5 = bitcast <4 x i32> %4 to <4 x float>
@@ -48,7 +48,7 @@
   store <4 x float> %19, <4 x float>* %scevgep9, align 16
   %tmp12 = add i64 %tmp, 4
   %tmp13 = trunc i64 %tmp12 to i32
-  %20 = load i32* %n, align 4
+  %20 = load i32, i32* %n, align 4
   %21 = icmp sgt i32 %20, %tmp13
   %indvar.next = add i64 %indvar, 1
   br i1 %21, label %bb, label %return
diff --git a/llvm/test/CodeGen/X86/lsr-reuse.ll b/llvm/test/CodeGen/X86/lsr-reuse.ll
index a8a18f1..dd1e40f 100644
--- a/llvm/test/CodeGen/X86/lsr-reuse.ll
+++ b/llvm/test/CodeGen/X86/lsr-reuse.ll
@@ -28,8 +28,8 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %i.next = add nsw i64 %i, 1
@@ -73,16 +73,16 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %j = add i64 %i, 256
   %Aj = getelementptr inbounds double, double* %A, i64 %j
   %Bj = getelementptr inbounds double, double* %B, i64 %j
   %Cj = getelementptr inbounds double, double* %C, i64 %j
-  %t3 = load double* %Bj
-  %t4 = load double* %Cj
+  %t3 = load double, double* %Bj
+  %t4 = load double, double* %Cj
   %o = fdiv double %t3, %t4
   store double %o, double* %Aj
   %i.next = add nsw i64 %i, 1
@@ -119,16 +119,16 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %j = sub i64 %i, 256
   %Aj = getelementptr inbounds double, double* %A, i64 %j
   %Bj = getelementptr inbounds double, double* %B, i64 %j
   %Cj = getelementptr inbounds double, double* %C, i64 %j
-  %t3 = load double* %Bj
-  %t4 = load double* %Cj
+  %t3 = load double, double* %Bj
+  %t4 = load double, double* %Cj
   %o = fdiv double %t3, %t4
   store double %o, double* %Aj
   %i.next = add nsw i64 %i, 1
@@ -165,16 +165,16 @@
   %Ak = getelementptr inbounds double, double* %A, i64 %k
   %Bk = getelementptr inbounds double, double* %B, i64 %k
   %Ck = getelementptr inbounds double, double* %C, i64 %k
-  %t1 = load double* %Bk
-  %t2 = load double* %Ck
+  %t1 = load double, double* %Bk
+  %t2 = load double, double* %Ck
   %m = fmul double %t1, %t2
   store double %m, double* %Ak
   %j = sub i64 %i, 256
   %Aj = getelementptr inbounds double, double* %A, i64 %j
   %Bj = getelementptr inbounds double, double* %B, i64 %j
   %Cj = getelementptr inbounds double, double* %C, i64 %j
-  %t3 = load double* %Bj
-  %t4 = load double* %Cj
+  %t3 = load double, double* %Bj
+  %t4 = load double, double* %Cj
   %o = fdiv double %t3, %t4
   store double %o, double* %Aj
   %i.next = add nsw i64 %i, 1
@@ -208,8 +208,8 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %i.next = add nsw i64 %i, 1
@@ -243,8 +243,8 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %i.next = add nsw i64 %i, 1
@@ -281,17 +281,17 @@
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
   %i5 = add i64 %i, 5
   %Ai = getelementptr double, double* %A, i64 %i5
-  %t2 = load double* %Ai
+  %t2 = load double, double* %Ai
   %Bi = getelementptr double, double* %B, i64 %i5
-  %t4 = load double* %Bi
+  %t4 = load double, double* %Bi
   %t5 = fadd double %t2, %t4
   %Ci = getelementptr double, double* %C, i64 %i5
   store double %t5, double* %Ci
   %i10 = add i64 %i, 10
   %Ai10 = getelementptr double, double* %A, i64 %i10
-  %t9 = load double* %Ai10
+  %t9 = load double, double* %Ai10
   %Bi10 = getelementptr double, double* %B, i64 %i10
-  %t11 = load double* %Bi10
+  %t11 = load double, double* %Bi10
   %t12 = fsub double %t9, %t11
   %Ci10 = getelementptr double, double* %C, i64 %i10
   store double %t12, double* %Ci10
@@ -328,17 +328,17 @@
   %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
   %i5 = add i64 %i, 5
   %Ai = getelementptr double, double* %A, i64 %i5
-  %t2 = load double* %Ai
+  %t2 = load double, double* %Ai
   %Bi = getelementptr double, double* %B, i64 %i5
-  %t4 = load double* %Bi
+  %t4 = load double, double* %Bi
   %t5 = fadd double %t2, %t4
   %Ci = getelementptr double, double* %C, i64 %i5
   store double %t5, double* %Ci
   %i10 = add i64 %i, 10
   %Ai10 = getelementptr double, double* %A, i64 %i10
-  %t9 = load double* %Ai10
+  %t9 = load double, double* %Ai10
   %Bi10 = getelementptr double, double* %B, i64 %i10
-  %t11 = load double* %Bi10
+  %t11 = load double, double* %Bi10
   %t12 = fsub double %t9, %t11
   %Ci10 = getelementptr double, double* %C, i64 %i10
   store double %t12, double* %Ci10
@@ -375,8 +375,8 @@
   %Ai = getelementptr inbounds double, double* %A, i64 %i
   %Bi = getelementptr inbounds double, double* %B, i64 %i
   %Ci = getelementptr inbounds double, double* %C, i64 %i
-  %t1 = load double* %Bi
-  %t2 = load double* %Ci
+  %t1 = load double, double* %Bi
+  %t2 = load double, double* %Ci
   %m = fmul double %t1, %t2
   store double %m, double* %Ai
   %i.next = add nsw i64 %i, 1
@@ -414,7 +414,7 @@
   %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
   %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
   %scevgep2526 = getelementptr [123123 x %struct.anon], [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
-  %1 = load i32* %scevgep2526, align 4            ; <i32> [#uses=2]
+  %1 = load i32, i32* %scevgep2526, align 4            ; <i32> [#uses=2]
   %2 = icmp sgt i32 %1, 0                         ; <i1> [#uses=1]
   br i1 %2, label %bb.nph, label %bb3
 
@@ -426,7 +426,7 @@
   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
   %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
   %c.08 = getelementptr [123123 x %struct.anon], [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
-  %3 = load i32* %c.08, align 4                   ; <i32> [#uses=1]
+  %3 = load i32, i32* %c.08, align 4                   ; <i32> [#uses=1]
   %4 = add nsw i32 %3, %s.07                      ; <i32> [#uses=2]
   %tmp19 = add i64 %indvar, 1                     ; <i64> [#uses=2]
   %5 = icmp sgt i64 %tmp23, %tmp19                ; <i1> [#uses=1]
@@ -493,7 +493,7 @@
 bb:
   %t = alloca float, align 4                      ; <float*> [#uses=3]
   %t7 = alloca float, align 4                     ; <float*> [#uses=2]
-  %t8 = load float* %arg3                         ; <float> [#uses=8]
+  %t8 = load float, float* %arg3                         ; <float> [#uses=8]
   %t9 = ptrtoint float* %arg to i64               ; <i64> [#uses=1]
   %t10 = ptrtoint float* %arg4 to i64             ; <i64> [#uses=1]
   %t11 = xor i64 %t10, %t9                        ; <i64> [#uses=1]
@@ -507,7 +507,7 @@
   br i1 %t18, label %bb19, label %bb213
 
 bb19:                                             ; preds = %bb
-  %t20 = load float* %arg2                        ; <float> [#uses=1]
+  %t20 = load float, float* %arg2                        ; <float> [#uses=1]
   br label %bb21
 
 bb21:                                             ; preds = %bb32, %bb19
@@ -526,7 +526,7 @@
   br i1 %t31, label %bb37, label %bb32
 
 bb32:                                             ; preds = %bb28
-  %t33 = load float* %t26                         ; <float> [#uses=1]
+  %t33 = load float, float* %t26                         ; <float> [#uses=1]
   %t34 = fmul float %t23, %t33                    ; <float> [#uses=1]
   store float %t34, float* %t25
   %t35 = fadd float %t23, %t8                     ; <float> [#uses=1]
@@ -604,10 +604,10 @@
   %t95 = bitcast float* %t94 to <4 x float>*      ; <<4 x float>*> [#uses=1]
   %t96 = mul i64 %t69, -16                        ; <i64> [#uses=1]
   %t97 = add i64 %t67, %t96                       ; <i64> [#uses=2]
-  %t98 = load <4 x float>* %t77                   ; <<4 x float>> [#uses=1]
-  %t99 = load <4 x float>* %t81                   ; <<4 x float>> [#uses=1]
-  %t100 = load <4 x float>* %t84                  ; <<4 x float>> [#uses=1]
-  %t101 = load <4 x float>* %t87                  ; <<4 x float>> [#uses=1]
+  %t98 = load <4 x float>, <4 x float>* %t77                   ; <<4 x float>> [#uses=1]
+  %t99 = load <4 x float>, <4 x float>* %t81                   ; <<4 x float>> [#uses=1]
+  %t100 = load <4 x float>, <4 x float>* %t84                  ; <<4 x float>> [#uses=1]
+  %t101 = load <4 x float>, <4 x float>* %t87                  ; <<4 x float>> [#uses=1]
   %t102 = fmul <4 x float> %t98, %t71             ; <<4 x float>> [#uses=1]
   %t103 = fadd <4 x float> %t71, %t55             ; <<4 x float>> [#uses=2]
   %t104 = fmul <4 x float> %t99, %t73             ; <<4 x float>> [#uses=1]
@@ -644,7 +644,7 @@
   %t123 = add i64 %t22, -1                        ; <i64> [#uses=1]
   %t124 = getelementptr inbounds float, float* %arg, i64 %t123 ; <float*> [#uses=1]
   %t125 = bitcast float* %t124 to <4 x float>*    ; <<4 x float>*> [#uses=1]
-  %t126 = load <4 x float>* %t125                 ; <<4 x float>> [#uses=1]
+  %t126 = load <4 x float>, <4 x float>* %t125                 ; <<4 x float>> [#uses=1]
   %t127 = add i64 %t22, 16                        ; <i64> [#uses=1]
   %t128 = add i64 %t22, 3                         ; <i64> [#uses=1]
   %t129 = add i64 %t22, 7                         ; <i64> [#uses=1]
@@ -692,10 +692,10 @@
   %t169 = bitcast float* %t168 to <4 x float>*    ; <<4 x float>*> [#uses=1]
   %t170 = mul i64 %t138, -16                      ; <i64> [#uses=1]
   %t171 = add i64 %t136, %t170                    ; <i64> [#uses=2]
-  %t172 = load <4 x float>* %t148                 ; <<4 x float>> [#uses=2]
-  %t173 = load <4 x float>* %t151                 ; <<4 x float>> [#uses=2]
-  %t174 = load <4 x float>* %t154                 ; <<4 x float>> [#uses=2]
-  %t175 = load <4 x float>* %t157                 ; <<4 x float>> [#uses=2]
+  %t172 = load <4 x float>, <4 x float>* %t148                 ; <<4 x float>> [#uses=2]
+  %t173 = load <4 x float>, <4 x float>* %t151                 ; <<4 x float>> [#uses=2]
+  %t174 = load <4 x float>, <4 x float>* %t154                 ; <<4 x float>> [#uses=2]
+  %t175 = load <4 x float>, <4 x float>* %t157                 ; <<4 x float>> [#uses=2]
   %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
   %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
@@ -734,7 +734,7 @@
   %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
   %t204 = getelementptr float, float* %t198, i64 %t202   ; <float*> [#uses=1]
   %t205 = getelementptr float, float* %t197, i64 %t202   ; <float*> [#uses=1]
-  %t206 = load float* %t204                       ; <float> [#uses=1]
+  %t206 = load float, float* %t204                       ; <float> [#uses=1]
   %t207 = fmul float %t203, %t206                 ; <float> [#uses=1]
   store float %t207, float* %t205
   %t208 = fadd float %t203, %t8                   ; <float> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/lsr-static-addr.ll b/llvm/test/CodeGen/X86/lsr-static-addr.ll
index 8b73a03..1765ed7 100644
--- a/llvm/test/CodeGen/X86/lsr-static-addr.ll
+++ b/llvm/test/CodeGen/X86/lsr-static-addr.ll
@@ -30,7 +30,7 @@
 for.body:
   %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr [0 x double], [0 x double]* @A, i64 0, i64 %i.06
-  %tmp3 = load double* %arrayidx, align 8
+  %tmp3 = load double, double* %arrayidx, align 8
   %mul = fmul double %tmp3, 2.300000e+00
   store double %mul, double* %arrayidx, align 8
   %inc = add nsw i64 %i.06, 1
diff --git a/llvm/test/CodeGen/X86/lsr-wrap.ll b/llvm/test/CodeGen/X86/lsr-wrap.ll
index d605e4f..adf9544 100644
--- a/llvm/test/CodeGen/X86/lsr-wrap.ll
+++ b/llvm/test/CodeGen/X86/lsr-wrap.ll
@@ -20,7 +20,7 @@
   %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
   %tmp = sub i16 0, %indvar                       ; <i16> [#uses=1]
   %tmp27 = trunc i16 %tmp to i8                   ; <i8> [#uses=1]
-  %tmp1 = load i32* @g_19, align 4                ; <i32> [#uses=2]
+  %tmp1 = load i32, i32* @g_19, align 4                ; <i32> [#uses=2]
   %tmp2 = add i32 %tmp1, 1                        ; <i32> [#uses=1]
   store i32 %tmp2, i32* @g_19, align 4
   %tmp3 = trunc i32 %tmp1 to i8                   ; <i8> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
index e98764a..aa9ae2b 100644
--- a/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
+++ b/llvm/test/CodeGen/X86/lzcnt-tzcnt.ll
@@ -106,7 +106,7 @@
 
 
 define i16 @test10_ctlz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 %v, 0
   %cond = select i1 %tobool, i16 16, i16 %cnt
@@ -119,7 +119,7 @@
 
 
 define i32 @test11_ctlz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 %v, 0
   %cond = select i1 %tobool, i32 32, i32 %cnt
@@ -132,7 +132,7 @@
 
 
 define i64 @test12_ctlz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 %v, 0
   %cond = select i1 %tobool, i64 64, i64 %cnt
@@ -145,7 +145,7 @@
 
 
 define i16 @test13_ctlz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 0, %v
   %cond = select i1 %tobool, i16 16, i16 %cnt
@@ -158,7 +158,7 @@
 
 
 define i32 @test14_ctlz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 0, %v
   %cond = select i1 %tobool, i32 32, i32 %cnt
@@ -171,7 +171,7 @@
 
 
 define i64 @test15_ctlz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 0, %v
   %cond = select i1 %tobool, i64 64, i64 %cnt
@@ -184,7 +184,7 @@
 
 
 define i16 @test16_ctlz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.ctlz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 0, %v
   %cond = select i1 %tobool, i16 %cnt, i16 16
@@ -197,7 +197,7 @@
 
 
 define i32 @test17_ctlz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.ctlz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 0, %v
   %cond = select i1 %tobool, i32 %cnt, i32 32
@@ -210,7 +210,7 @@
 
 
 define i64 @test18_ctlz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.ctlz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 0, %v
   %cond = select i1 %tobool, i64 %cnt, i64 64
@@ -322,7 +322,7 @@
 
 
 define i16 @test10_cttz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 %v, 0
   %cond = select i1 %tobool, i16 16, i16 %cnt
@@ -335,7 +335,7 @@
 
 
 define i32 @test11_cttz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 %v, 0
   %cond = select i1 %tobool, i32 32, i32 %cnt
@@ -348,7 +348,7 @@
 
 
 define i64 @test12_cttz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 %v, 0
   %cond = select i1 %tobool, i64 64, i64 %cnt
@@ -361,7 +361,7 @@
 
 
 define i16 @test13_cttz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 0, %v
   %cond = select i1 %tobool, i16 16, i16 %cnt
@@ -374,7 +374,7 @@
 
 
 define i32 @test14_cttz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 0, %v
   %cond = select i1 %tobool, i32 32, i32 %cnt
@@ -387,7 +387,7 @@
 
 
 define i64 @test15_cttz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 0, %v
   %cond = select i1 %tobool, i64 64, i64 %cnt
@@ -400,7 +400,7 @@
 
 
 define i16 @test16_cttz(i16* %ptr) {
-  %v = load i16* %ptr
+  %v = load i16, i16* %ptr
   %cnt = tail call i16 @llvm.cttz.i16(i16 %v, i1 true)
   %tobool = icmp eq i16 0, %v
   %cond = select i1 %tobool, i16 %cnt, i16 16
@@ -413,7 +413,7 @@
 
 
 define i32 @test17_cttz(i32* %ptr) {
-  %v = load i32* %ptr
+  %v = load i32, i32* %ptr
   %cnt = tail call i32 @llvm.cttz.i32(i32 %v, i1 true)
   %tobool = icmp eq i32 0, %v
   %cond = select i1 %tobool, i32 %cnt, i32 32
@@ -426,7 +426,7 @@
 
 
 define i64 @test18_cttz(i64* %ptr) {
-  %v = load i64* %ptr
+  %v = load i64, i64* %ptr
   %cnt = tail call i64 @llvm.cttz.i64(i64 %v, i1 true)
   %tobool = icmp eq i64 0, %v
   %cond = select i1 %tobool, i64 %cnt, i64 64
diff --git a/llvm/test/CodeGen/X86/machine-cse.ll b/llvm/test/CodeGen/X86/machine-cse.ll
index e5df2147..ce3ab4c 100644
--- a/llvm/test/CodeGen/X86/machine-cse.ll
+++ b/llvm/test/CodeGen/X86/machine-cse.ll
@@ -147,7 +147,7 @@
   br i1 %c, label %a, label %b
 
 a:
-  %l = load i32* @t2_global
+  %l = load i32, i32* @t2_global
   ret i32 %l
 
 b:
diff --git a/llvm/test/CodeGen/X86/masked-iv-safe.ll b/llvm/test/CodeGen/X86/masked-iv-safe.ll
index dcf2e1d..8c0a4d4 100644
--- a/llvm/test/CodeGen/X86/masked-iv-safe.ll
+++ b/llvm/test/CodeGen/X86/masked-iv-safe.ll
@@ -16,16 +16,16 @@
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -49,16 +49,16 @@
 	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -83,17 +83,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -118,17 +118,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -152,16 +152,16 @@
 	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -185,16 +185,16 @@
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -219,17 +219,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -254,17 +254,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fdiv double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
diff --git a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
index 28c2444..974a1cf 100644
--- a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -14,16 +14,16 @@
 	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -42,16 +42,16 @@
 	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -71,17 +71,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
@@ -101,17 +101,17 @@
         %s0 = shl i64 %indvar, 8
 	%indvar.i8 = ashr i64 %s0, 8
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%s1 = shl i64 %indvar, 24
 	%indvar.i24 = ashr i64 %s1, 24
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -130,16 +130,16 @@
         %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
@@ -158,16 +158,16 @@
         %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
@@ -187,17 +187,17 @@
         %s0 = shl i64 %indvar, 8
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 1
@@ -217,17 +217,17 @@
         %s0 = shl i64 %indvar, 8
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 1
@@ -246,16 +246,16 @@
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = sub i64 %indvar, 1
@@ -274,16 +274,16 @@
         %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
@@ -302,16 +302,16 @@
         %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
         %indvar.i8 = and i64 %indvar, 255
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %indvar.i24 = and i64 %indvar, 16777215
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
@@ -331,17 +331,17 @@
         %s0 = shl i64 %indvar, 8
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = add i64 %indvar, 3
@@ -361,17 +361,17 @@
         %s0 = shl i64 %indvar, 8
         %indvar.i8 = ashr i64 %s0, 8
         %t0 = getelementptr double, double* %d, i64 %indvar.i8
-        %t1 = load double* %t0
+        %t1 = load double, double* %t0
         %t2 = fmul double %t1, 0.1
         store double %t2, double* %t0
         %s1 = shl i64 %indvar, 24
         %indvar.i24 = ashr i64 %s1, 24
         %t3 = getelementptr double, double* %d, i64 %indvar.i24
-        %t4 = load double* %t3
+        %t4 = load double, double* %t3
         %t5 = fmul double %t4, 2.3
         store double %t5, double* %t3
         %t6 = getelementptr double, double* %d, i64 %indvar
-        %t7 = load double* %t6
+        %t7 = load double, double* %t6
         %t8 = fmul double %t7, 4.5
         store double %t8, double* %t6
         %indvar.next = sub i64 %indvar, 3
diff --git a/llvm/test/CodeGen/X86/mcinst-lowering.ll b/llvm/test/CodeGen/X86/mcinst-lowering.ll
index a82cfc4..51b2895 100644
--- a/llvm/test/CodeGen/X86/mcinst-lowering.ll
+++ b/llvm/test/CodeGen/X86/mcinst-lowering.ll
@@ -5,7 +5,7 @@
 
 define i32 @f0(i32* nocapture %x) nounwind readonly ssp {
 entry:
-  %tmp1 = load i32* %x                            ; <i32> [#uses=2]
+  %tmp1 = load i32, i32* %x                            ; <i32> [#uses=2]
   %tobool = icmp eq i32 %tmp1, 0                  ; <i1> [#uses=1]
   br i1 %tobool, label %if.end, label %return
 
diff --git a/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll b/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
index bf90d60..9bace29 100644
--- a/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
+++ b/llvm/test/CodeGen/X86/mem-intrin-base-reg.ll
@@ -25,8 +25,8 @@
 
 spill_vectors:
   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
-  %v0 = load <4 x i32>* %vp0
-  %v1 = load <4 x i32>* %vp1
+  %v0 = load <4 x i32>, <4 x i32>* %vp0
+  %v1 = load <4 x i32>, <4 x i32>* %vp1
   %vicmp = icmp slt <4 x i32> %v0, %v1
   %icmp = extractelement <4 x i1> %vicmp, i32 0
   call void @escape_vla_and_icmp(i8* null, i1 zeroext %icmp)
@@ -50,8 +50,8 @@
 
 spill_vectors:
   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
-  %v0 = load <4 x i32>* %vp0
-  %v1 = load <4 x i32>* %vp1
+  %v0 = load <4 x i32>, <4 x i32>* %vp0
+  %v1 = load <4 x i32>, <4 x i32>* %vp1
   %vicmp = icmp slt <4 x i32> %v0, %v1
   %icmp = extractelement <4 x i1> %vicmp, i32 0
   %vla = alloca i8, i32 %n
@@ -78,8 +78,8 @@
 
 spill_vectors:
   %vp1 = getelementptr <4 x i32>, <4 x i32>* %vp0, i32 1
-  %v0 = load <4 x i32>* %vp0
-  %v1 = load <4 x i32>* %vp1
+  %v0 = load <4 x i32>, <4 x i32>* %vp0
+  %v1 = load <4 x i32>, <4 x i32>* %vp1
   %vicmp = icmp slt <4 x i32> %v0, %v1
   %icmp = extractelement <4 x i1> %vicmp, i32 0
   %vla = alloca i8, i32 %n
diff --git a/llvm/test/CodeGen/X86/mem-promote-integers.ll b/llvm/test/CodeGen/X86/mem-promote-integers.ll
index ea38b95..3023cf2 100644
--- a/llvm/test/CodeGen/X86/mem-promote-integers.ll
+++ b/llvm/test/CodeGen/X86/mem-promote-integers.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -march=x86-64 < %s > /dev/null
 
 define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
-  %bb = load <1 x i8>* %b
+  %bb = load <1 x i8>, <1 x i8>* %b
   %tt = xor <1 x i8> %x, %bb
   store <1 x i8> %tt, <1 x i8>* %b
   br label %next
@@ -16,7 +16,7 @@
 
 
 define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
-  %bb = load <1 x i16>* %b
+  %bb = load <1 x i16>, <1 x i16>* %b
   %tt = xor <1 x i16> %x, %bb
   store <1 x i16> %tt, <1 x i16>* %b
   br label %next
@@ -27,7 +27,7 @@
 
 
 define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
-  %bb = load <1 x i32>* %b
+  %bb = load <1 x i32>, <1 x i32>* %b
   %tt = xor <1 x i32> %x, %bb
   store <1 x i32> %tt, <1 x i32>* %b
   br label %next
@@ -38,7 +38,7 @@
 
 
 define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
-  %bb = load <1 x i64>* %b
+  %bb = load <1 x i64>, <1 x i64>* %b
   %tt = xor <1 x i64> %x, %bb
   store <1 x i64> %tt, <1 x i64>* %b
   br label %next
@@ -49,7 +49,7 @@
 
 
 define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
-  %bb = load <1 x i128>* %b
+  %bb = load <1 x i128>, <1 x i128>* %b
   %tt = xor <1 x i128> %x, %bb
   store <1 x i128> %tt, <1 x i128>* %b
   br label %next
@@ -60,7 +60,7 @@
 
 
 define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
-  %bb = load <1 x i256>* %b
+  %bb = load <1 x i256>, <1 x i256>* %b
   %tt = xor <1 x i256> %x, %bb
   store <1 x i256> %tt, <1 x i256>* %b
   br label %next
@@ -71,7 +71,7 @@
 
 
 define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
-  %bb = load <1 x i512>* %b
+  %bb = load <1 x i512>, <1 x i512>* %b
   %tt = xor <1 x i512> %x, %bb
   store <1 x i512> %tt, <1 x i512>* %b
   br label %next
@@ -82,7 +82,7 @@
 
 
 define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
-  %bb = load <2 x i8>* %b
+  %bb = load <2 x i8>, <2 x i8>* %b
   %tt = xor <2 x i8> %x, %bb
   store <2 x i8> %tt, <2 x i8>* %b
   br label %next
@@ -93,7 +93,7 @@
 
 
 define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
-  %bb = load <2 x i16>* %b
+  %bb = load <2 x i16>, <2 x i16>* %b
   %tt = xor <2 x i16> %x, %bb
   store <2 x i16> %tt, <2 x i16>* %b
   br label %next
@@ -104,7 +104,7 @@
 
 
 define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
-  %bb = load <2 x i32>* %b
+  %bb = load <2 x i32>, <2 x i32>* %b
   %tt = xor <2 x i32> %x, %bb
   store <2 x i32> %tt, <2 x i32>* %b
   br label %next
@@ -115,7 +115,7 @@
 
 
 define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
-  %bb = load <2 x i64>* %b
+  %bb = load <2 x i64>, <2 x i64>* %b
   %tt = xor <2 x i64> %x, %bb
   store <2 x i64> %tt, <2 x i64>* %b
   br label %next
@@ -126,7 +126,7 @@
 
 
 define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
-  %bb = load <2 x i128>* %b
+  %bb = load <2 x i128>, <2 x i128>* %b
   %tt = xor <2 x i128> %x, %bb
   store <2 x i128> %tt, <2 x i128>* %b
   br label %next
@@ -137,7 +137,7 @@
 
 
 define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
-  %bb = load <2 x i256>* %b
+  %bb = load <2 x i256>, <2 x i256>* %b
   %tt = xor <2 x i256> %x, %bb
   store <2 x i256> %tt, <2 x i256>* %b
   br label %next
@@ -148,7 +148,7 @@
 
 
 define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
-  %bb = load <2 x i512>* %b
+  %bb = load <2 x i512>, <2 x i512>* %b
   %tt = xor <2 x i512> %x, %bb
   store <2 x i512> %tt, <2 x i512>* %b
   br label %next
@@ -159,7 +159,7 @@
 
 
 define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
-  %bb = load <3 x i8>* %b
+  %bb = load <3 x i8>, <3 x i8>* %b
   %tt = xor <3 x i8> %x, %bb
   store <3 x i8> %tt, <3 x i8>* %b
   br label %next
@@ -170,7 +170,7 @@
 
 
 define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
-  %bb = load <3 x i16>* %b
+  %bb = load <3 x i16>, <3 x i16>* %b
   %tt = xor <3 x i16> %x, %bb
   store <3 x i16> %tt, <3 x i16>* %b
   br label %next
@@ -181,7 +181,7 @@
 
 
 define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
-  %bb = load <3 x i32>* %b
+  %bb = load <3 x i32>, <3 x i32>* %b
   %tt = xor <3 x i32> %x, %bb
   store <3 x i32> %tt, <3 x i32>* %b
   br label %next
@@ -192,7 +192,7 @@
 
 
 define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
-  %bb = load <3 x i64>* %b
+  %bb = load <3 x i64>, <3 x i64>* %b
   %tt = xor <3 x i64> %x, %bb
   store <3 x i64> %tt, <3 x i64>* %b
   br label %next
@@ -203,7 +203,7 @@
 
 
 define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
-  %bb = load <3 x i128>* %b
+  %bb = load <3 x i128>, <3 x i128>* %b
   %tt = xor <3 x i128> %x, %bb
   store <3 x i128> %tt, <3 x i128>* %b
   br label %next
@@ -214,7 +214,7 @@
 
 
 define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
-  %bb = load <3 x i256>* %b
+  %bb = load <3 x i256>, <3 x i256>* %b
   %tt = xor <3 x i256> %x, %bb
   store <3 x i256> %tt, <3 x i256>* %b
   br label %next
@@ -225,7 +225,7 @@
 
 
 define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
-  %bb = load <3 x i512>* %b
+  %bb = load <3 x i512>, <3 x i512>* %b
   %tt = xor <3 x i512> %x, %bb
   store <3 x i512> %tt, <3 x i512>* %b
   br label %next
@@ -236,7 +236,7 @@
 
 
 define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
-  %bb = load <4 x i8>* %b
+  %bb = load <4 x i8>, <4 x i8>* %b
   %tt = xor <4 x i8> %x, %bb
   store <4 x i8> %tt, <4 x i8>* %b
   br label %next
@@ -247,7 +247,7 @@
 
 
 define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
-  %bb = load <4 x i16>* %b
+  %bb = load <4 x i16>, <4 x i16>* %b
   %tt = xor <4 x i16> %x, %bb
   store <4 x i16> %tt, <4 x i16>* %b
   br label %next
@@ -258,7 +258,7 @@
 
 
 define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
-  %bb = load <4 x i32>* %b
+  %bb = load <4 x i32>, <4 x i32>* %b
   %tt = xor <4 x i32> %x, %bb
   store <4 x i32> %tt, <4 x i32>* %b
   br label %next
@@ -269,7 +269,7 @@
 
 
 define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
-  %bb = load <4 x i64>* %b
+  %bb = load <4 x i64>, <4 x i64>* %b
   %tt = xor <4 x i64> %x, %bb
   store <4 x i64> %tt, <4 x i64>* %b
   br label %next
@@ -280,7 +280,7 @@
 
 
 define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
-  %bb = load <4 x i128>* %b
+  %bb = load <4 x i128>, <4 x i128>* %b
   %tt = xor <4 x i128> %x, %bb
   store <4 x i128> %tt, <4 x i128>* %b
   br label %next
@@ -291,7 +291,7 @@
 
 
 define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
-  %bb = load <4 x i256>* %b
+  %bb = load <4 x i256>, <4 x i256>* %b
   %tt = xor <4 x i256> %x, %bb
   store <4 x i256> %tt, <4 x i256>* %b
   br label %next
@@ -302,7 +302,7 @@
 
 
 define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
-  %bb = load <4 x i512>* %b
+  %bb = load <4 x i512>, <4 x i512>* %b
   %tt = xor <4 x i512> %x, %bb
   store <4 x i512> %tt, <4 x i512>* %b
   br label %next
@@ -313,7 +313,7 @@
 
 
 define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
-  %bb = load <5 x i8>* %b
+  %bb = load <5 x i8>, <5 x i8>* %b
   %tt = xor <5 x i8> %x, %bb
   store <5 x i8> %tt, <5 x i8>* %b
   br label %next
@@ -324,7 +324,7 @@
 
 
 define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
-  %bb = load <5 x i16>* %b
+  %bb = load <5 x i16>, <5 x i16>* %b
   %tt = xor <5 x i16> %x, %bb
   store <5 x i16> %tt, <5 x i16>* %b
   br label %next
@@ -335,7 +335,7 @@
 
 
 define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
-  %bb = load <5 x i32>* %b
+  %bb = load <5 x i32>, <5 x i32>* %b
   %tt = xor <5 x i32> %x, %bb
   store <5 x i32> %tt, <5 x i32>* %b
   br label %next
@@ -346,7 +346,7 @@
 
 
 define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
-  %bb = load <5 x i64>* %b
+  %bb = load <5 x i64>, <5 x i64>* %b
   %tt = xor <5 x i64> %x, %bb
   store <5 x i64> %tt, <5 x i64>* %b
   br label %next
@@ -357,7 +357,7 @@
 
 
 define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
-  %bb = load <5 x i128>* %b
+  %bb = load <5 x i128>, <5 x i128>* %b
   %tt = xor <5 x i128> %x, %bb
   store <5 x i128> %tt, <5 x i128>* %b
   br label %next
@@ -368,7 +368,7 @@
 
 
 define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
-  %bb = load <5 x i256>* %b
+  %bb = load <5 x i256>, <5 x i256>* %b
   %tt = xor <5 x i256> %x, %bb
   store <5 x i256> %tt, <5 x i256>* %b
   br label %next
@@ -379,7 +379,7 @@
 
 
 define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
-  %bb = load <5 x i512>* %b
+  %bb = load <5 x i512>, <5 x i512>* %b
   %tt = xor <5 x i512> %x, %bb
   store <5 x i512> %tt, <5 x i512>* %b
   br label %next
diff --git a/llvm/test/CodeGen/X86/misaligned-memset.ll b/llvm/test/CodeGen/X86/misaligned-memset.ll
index 21f8bf2..6e22e2c 100644
--- a/llvm/test/CodeGen/X86/misaligned-memset.ll
+++ b/llvm/test/CodeGen/X86/misaligned-memset.ll
@@ -8,7 +8,7 @@
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
   call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/X86/misched-aa-colored.ll b/llvm/test/CodeGen/X86/misched-aa-colored.ll
index 440b661..ef7b98a 100644
--- a/llvm/test/CodeGen/X86/misched-aa-colored.ll
+++ b/llvm/test/CodeGen/X86/misched-aa-colored.ll
@@ -156,9 +156,9 @@
   %Op.i = alloca %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", align 8
   %0 = bitcast %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i to i8*
   %retval.sroa.0.0.idx.i36 = getelementptr inbounds %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199", %"struct.std::pair.112.119.719.1079.2039.2159.2399.4199"* %ref.tmp.i, i64 0, i32 1, i32 0, i32 0
-  %retval.sroa.0.0.copyload.i37 = load i32* %retval.sroa.0.0.idx.i36, align 8
+  %retval.sroa.0.0.copyload.i37 = load i32, i32* %retval.sroa.0.0.idx.i36, align 8
   call void @llvm.lifetime.end(i64 24, i8* %0) #1
-  %agg.tmp8.sroa.2.0.copyload = load i32* undef, align 8
+  %agg.tmp8.sroa.2.0.copyload = load i32, i32* undef, align 8
   %1 = bitcast %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083"* %Op.i to i8*
   call void @llvm.lifetime.start(i64 16, i8* %1) #1
   %2 = getelementptr %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083", %"class.llvm::SDValue.3.603.963.1923.2043.2283.4083"* %Op.i, i64 0, i32 1
diff --git a/llvm/test/CodeGen/X86/misched-aa-mmos.ll b/llvm/test/CodeGen/X86/misched-aa-mmos.ll
index 5d51c28..c457a5e 100644
--- a/llvm/test/CodeGen/X86/misched-aa-mmos.ll
+++ b/llvm/test/CodeGen/X86/misched-aa-mmos.ll
@@ -20,11 +20,11 @@
 cond.end.i:
   %significand.i18.i = getelementptr inbounds %c1, %c1* %temp_rhs, i64 0, i32 1
   %exponent.i = getelementptr inbounds %c1, %c1* %temp_rhs, i64 0, i32 2
-  %0 = load i16* %exponent.i, align 8
+  %0 = load i16, i16* %exponent.i, align 8
   %sub.i = add i16 %0, -1
   store i16 %sub.i, i16* %exponent.i, align 8
   %parts.i.i = bitcast %u1* %significand.i18.i to i64**
-  %1 = load i64** %parts.i.i, align 8
+  %1 = load i64*, i64** %parts.i.i, align 8
   %call5.i = call zeroext i1 @bar(i64* %1, i32 undef) #1
   unreachable
 
diff --git a/llvm/test/CodeGen/X86/misched-balance.ll b/llvm/test/CodeGen/X86/misched-balance.ll
index 954575e..ca3b579 100644
--- a/llvm/test/CodeGen/X86/misched-balance.ll
+++ b/llvm/test/CodeGen/X86/misched-balance.ll
@@ -48,62 +48,62 @@
 ; CHECK-LABEL: %end
 for.body:
   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
-  %tmp57 = load i32* %tmp56, align 4
+  %tmp57 = load i32, i32* %tmp56, align 4
   %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
-  %tmp58 = load i32* %arrayidx12.us.i61, align 4
+  %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
   %mul.us.i = mul nsw i32 %tmp58, %tmp57
   %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
-  %tmp59 = load i32* %arrayidx8.us.i.1, align 4
+  %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
   %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
-  %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
+  %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
   %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
-  %tmp61 = load i32* %arrayidx8.us.i.2, align 4
+  %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
   %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
-  %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
+  %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
   %add.us.i.2 = add nsw i32 %mul.us.i.2, %add.us.i.1
   %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
-  %tmp63 = load i32* %arrayidx8.us.i.3, align 4
+  %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
   %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
-  %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
+  %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
   %mul.us.i.3 = mul nsw i32 %tmp64, %tmp63
   %add.us.i.3 = add nsw i32 %mul.us.i.3, %add.us.i.2
   %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
-  %tmp65 = load i32* %arrayidx8.us.i.4, align 4
+  %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
   %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
-  %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
+  %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
   %mul.us.i.4 = mul nsw i32 %tmp66, %tmp65
   %add.us.i.4 = add nsw i32 %mul.us.i.4, %add.us.i.3
   %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
-  %tmp67 = load i32* %arrayidx8.us.i.5, align 4
+  %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
   %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
-  %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
+  %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
   %mul.us.i.5 = mul nsw i32 %tmp68, %tmp67
   %add.us.i.5 = add nsw i32 %mul.us.i.5, %add.us.i.4
   %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
-  %tmp69 = load i32* %arrayidx8.us.i.6, align 4
+  %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
   %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
-  %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+  %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
   %mul.us.i.6 = mul nsw i32 %tmp70, %tmp69
   %add.us.i.6 = add nsw i32 %mul.us.i.6, %add.us.i.5
   %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
-  %tmp71 = load i32* %arrayidx8.us.i.7, align 4
+  %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
   %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
-  %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
+  %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
   %mul.us.i.7 = mul nsw i32 %tmp72, %tmp71
   %add.us.i.7 = add nsw i32 %mul.us.i.7, %add.us.i.6
   %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
-  %tmp73 = load i32* %arrayidx8.us.i.8, align 4
+  %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
   %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
-  %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
+  %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
   %mul.us.i.8 = mul nsw i32 %tmp74, %tmp73
   %add.us.i.8 = add nsw i32 %mul.us.i.8, %add.us.i.7
   %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
-  %tmp75 = load i32* %arrayidx8.us.i.9, align 4
+  %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
   %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
-  %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+  %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
   %mul.us.i.9 = mul nsw i32 %tmp76, %tmp75
   %add.us.i.9 = add nsw i32 %mul.us.i.9, %add.us.i.8
   %arrayidx16.us.i = getelementptr inbounds i32, i32* %tmp55, i64 %indvars.iv42.i
@@ -159,46 +159,46 @@
   br label %for.body
 for.body:
   %indvars.iv42.i = phi i64 [ %indvars.iv.next43.i, %for.body ], [ 0, %entry ]
-  %tmp57 = load i32* %tmp56, align 4
+  %tmp57 = load i32, i32* %tmp56, align 4
   %arrayidx12.us.i61 = getelementptr inbounds i32, i32* %pre, i64 %indvars.iv42.i
-  %tmp58 = load i32* %arrayidx12.us.i61, align 4
+  %tmp58 = load i32, i32* %arrayidx12.us.i61, align 4
   %arrayidx8.us.i.1 = getelementptr inbounds i32, i32* %tmp56, i64 1
-  %tmp59 = load i32* %arrayidx8.us.i.1, align 4
+  %tmp59 = load i32, i32* %arrayidx8.us.i.1, align 4
   %arrayidx12.us.i61.1 = getelementptr inbounds i32, i32* %pre94, i64 %indvars.iv42.i
-  %tmp60 = load i32* %arrayidx12.us.i61.1, align 4
+  %tmp60 = load i32, i32* %arrayidx12.us.i61.1, align 4
   %arrayidx8.us.i.2 = getelementptr inbounds i32, i32* %tmp56, i64 2
-  %tmp61 = load i32* %arrayidx8.us.i.2, align 4
+  %tmp61 = load i32, i32* %arrayidx8.us.i.2, align 4
   %arrayidx12.us.i61.2 = getelementptr inbounds i32, i32* %pre95, i64 %indvars.iv42.i
-  %tmp62 = load i32* %arrayidx12.us.i61.2, align 4
+  %tmp62 = load i32, i32* %arrayidx12.us.i61.2, align 4
   %arrayidx8.us.i.3 = getelementptr inbounds i32, i32* %tmp56, i64 3
-  %tmp63 = load i32* %arrayidx8.us.i.3, align 4
+  %tmp63 = load i32, i32* %arrayidx8.us.i.3, align 4
   %arrayidx12.us.i61.3 = getelementptr inbounds i32, i32* %pre96, i64 %indvars.iv42.i
-  %tmp64 = load i32* %arrayidx12.us.i61.3, align 4
+  %tmp64 = load i32, i32* %arrayidx12.us.i61.3, align 4
   %arrayidx8.us.i.4 = getelementptr inbounds i32, i32* %tmp56, i64 4
-  %tmp65 = load i32* %arrayidx8.us.i.4, align 4
+  %tmp65 = load i32, i32* %arrayidx8.us.i.4, align 4
   %arrayidx12.us.i61.4 = getelementptr inbounds i32, i32* %pre97, i64 %indvars.iv42.i
-  %tmp66 = load i32* %arrayidx12.us.i61.4, align 4
+  %tmp66 = load i32, i32* %arrayidx12.us.i61.4, align 4
   %arrayidx8.us.i.5 = getelementptr inbounds i32, i32* %tmp56, i64 5
-  %tmp67 = load i32* %arrayidx8.us.i.5, align 4
+  %tmp67 = load i32, i32* %arrayidx8.us.i.5, align 4
   %arrayidx12.us.i61.5 = getelementptr inbounds i32, i32* %pre98, i64 %indvars.iv42.i
-  %tmp68 = load i32* %arrayidx12.us.i61.5, align 4
+  %tmp68 = load i32, i32* %arrayidx12.us.i61.5, align 4
   %arrayidx8.us.i.6 = getelementptr inbounds i32, i32* %tmp56, i64 6
-  %tmp69 = load i32* %arrayidx8.us.i.6, align 4
+  %tmp69 = load i32, i32* %arrayidx8.us.i.6, align 4
   %arrayidx12.us.i61.6 = getelementptr inbounds i32, i32* %pre99, i64 %indvars.iv42.i
-  %tmp70 = load i32* %arrayidx12.us.i61.6, align 4
+  %tmp70 = load i32, i32* %arrayidx12.us.i61.6, align 4
   %mul.us.i = mul nsw i32 %tmp58, %tmp57
   %arrayidx8.us.i.7 = getelementptr inbounds i32, i32* %tmp56, i64 7
-  %tmp71 = load i32* %arrayidx8.us.i.7, align 4
+  %tmp71 = load i32, i32* %arrayidx8.us.i.7, align 4
   %arrayidx12.us.i61.7 = getelementptr inbounds i32, i32* %pre100, i64 %indvars.iv42.i
-  %tmp72 = load i32* %arrayidx12.us.i61.7, align 4
+  %tmp72 = load i32, i32* %arrayidx12.us.i61.7, align 4
   %arrayidx8.us.i.8 = getelementptr inbounds i32, i32* %tmp56, i64 8
-  %tmp73 = load i32* %arrayidx8.us.i.8, align 4
+  %tmp73 = load i32, i32* %arrayidx8.us.i.8, align 4
   %arrayidx12.us.i61.8 = getelementptr inbounds i32, i32* %pre101, i64 %indvars.iv42.i
-  %tmp74 = load i32* %arrayidx12.us.i61.8, align 4
+  %tmp74 = load i32, i32* %arrayidx12.us.i61.8, align 4
   %arrayidx8.us.i.9 = getelementptr inbounds i32, i32* %tmp56, i64 9
-  %tmp75 = load i32* %arrayidx8.us.i.9, align 4
+  %tmp75 = load i32, i32* %arrayidx8.us.i.9, align 4
   %arrayidx12.us.i61.9 = getelementptr inbounds i32, i32* %pre102, i64 %indvars.iv42.i
-  %tmp76 = load i32* %arrayidx12.us.i61.9, align 4
+  %tmp76 = load i32, i32* %arrayidx12.us.i61.9, align 4
   %mul.us.i.1 = mul nsw i32 %tmp60, %tmp59
   %add.us.i.1 = add nsw i32 %mul.us.i.1, %mul.us.i
   %mul.us.i.2 = mul nsw i32 %tmp62, %tmp61
@@ -243,20 +243,20 @@
 @d = external global i32, align 4
 define i32 @encpc1() nounwind {
 entry:
-  %l1 = load i32* @a, align 16
+  %l1 = load i32, i32* @a, align 16
   %conv = shl i32 %l1, 8
   %s5 = lshr i32 %l1, 8
   %add = or i32 %conv, %s5
   store i32 %add, i32* @b
-  %l6 = load i32* @a
-  %l7 = load i32* @c
+  %l6 = load i32, i32* @a
+  %l7 = load i32, i32* @c
   %add.i = add i32 %l7, %l6
   %idxprom.i = zext i32 %l7 to i64
   %arrayidx.i = getelementptr inbounds i32, i32* @d, i64 %idxprom.i
-  %l8 = load i32* %arrayidx.i
+  %l8 = load i32, i32* %arrayidx.i
   store i32 346, i32* @c
   store i32 20021, i32* @d
-  %l9 = load i32* @a
+  %l9 = load i32, i32* @a
   store i32 %l8, i32* @a
   store i32 %l9, i32* @b
   store i32 %add.i, i32* @c
diff --git a/llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll b/llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll
index fb2a986..0fc7d00 100644
--- a/llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll
+++ b/llvm/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -32,10 +32,10 @@
 define void @test_without_debug() {
 entry:
   %c = alloca %class.C, align 1
-  %0 = load i8* @argc, align 1
+  %0 = load i8, i8* @argc, align 1
   %conv = sext i8 %0 to i32
   %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
-  %1 = load i8* @argc, align 1
+  %1 = load i8, i8* @argc, align 1
   %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
   ret void
 }
@@ -46,12 +46,12 @@
 define void @test_with_debug() {
 entry:
   %c = alloca %class.C, align 1
-  %0 = load i8* @argc, align 1
+  %0 = load i8, i8* @argc, align 1
   tail call void @llvm.dbg.value(metadata i8 %0, i64 0, metadata !19, metadata !29)
   %conv = sext i8 %0 to i32
   tail call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
   %call = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %0, i8 signext 0, i32 %conv)
-  %1 = load i8* @argc, align 1
+  %1 = load i8, i8* @argc, align 1
   call void @llvm.dbg.value(metadata %class.C* %c, i64 0, metadata !18, metadata !29)
   %call2 = call i32 (%class.C*, i8, i8, i8, ...)* @test_function(%class.C* %c, i8 signext 0, i8 signext %1, i8 signext 0, i32 %conv)
   ret void
diff --git a/llvm/test/CodeGen/X86/misched-crash.ll b/llvm/test/CodeGen/X86/misched-crash.ll
index 7ebfee8..fa7de1a 100644
--- a/llvm/test/CodeGen/X86/misched-crash.ll
+++ b/llvm/test/CodeGen/X86/misched-crash.ll
@@ -9,7 +9,7 @@
   %cmp = icmp ult i64 %_x1, %_x2
   %cond = select i1 %cmp, i64 %_x1, i64 %_x2
   %cond10 = select i1 %cmp, i64 %_x2, i64 %_x1
-  %0 = load i64* null, align 8
+  %0 = load i64, i64* null, align 8
   %cmp16 = icmp ult i64 %cond, %0
   %cmp23 = icmp ugt i64 %cond10, 0
   br i1 %cmp16, label %land.lhs.true21, label %return
@@ -27,7 +27,7 @@
 for.body34.i:                                     ; preds = %for.inc39.i, %if.then24
   %index.178.i = phi i64 [ %add21.i, %if.then24 ], [ %inc41.i, %for.inc39.i ]
   %arrayidx35.i = getelementptr inbounds i8, i8* %plane, i64 %index.178.i
-  %1 = load i8* %arrayidx35.i, align 1
+  %1 = load i8, i8* %arrayidx35.i, align 1
   %tobool36.i = icmp eq i8 %1, 0
   br i1 %tobool36.i, label %for.inc39.i, label %return
 
diff --git a/llvm/test/CodeGen/X86/misched-fusion.ll b/llvm/test/CodeGen/X86/misched-fusion.ll
index 1f2ad3f..0975faa 100644
--- a/llvm/test/CodeGen/X86/misched-fusion.ll
+++ b/llvm/test/CodeGen/X86/misched-fusion.ll
@@ -16,7 +16,7 @@
 
 loop1:
   %cond = icmp eq i32* %var, null
-  %next.load = load i32** %next.ptr
+  %next.load = load i32*, i32** %next.ptr
   br i1 %cond, label %loop, label %loop2
 
 loop2:                                           ; preds = %loop1
@@ -42,8 +42,8 @@
 loop1:
   %var2 = sub i32 %var, 1
   %cond = icmp eq i32 %var2, 0
-  %next.load = load i32** %next.ptr
-  %next.var = load i32* %next.load
+  %next.load = load i32*, i32** %next.ptr
+  %next.var = load i32, i32* %next.load
   br i1 %cond, label %loop, label %loop2
 
 loop2:
@@ -70,8 +70,8 @@
 loop1:                                            ; preds = %loop2a, %loop2b
   %var2 = sub i32 %var, 1
   %cond = icmp slt i32 %var2, 0
-  %next.load = load i32** %next.ptr
-  %next.var = load i32* %next.load
+  %next.load = load i32*, i32** %next.ptr
+  %next.var = load i32, i32* %next.load
   br i1 %cond, label %loop2a, label %loop2b
 
 loop2b:                                           ; preds = %loop1
@@ -97,8 +97,8 @@
 loop1:                                            ; preds = %loop2a, %loop2b
   %var2 = sub i32 %var, 1
   %cond = icmp ult i32 %var2, %n
-  %next.load = load i32** %next.ptr
-  %next.var = load i32* %next.load
+  %next.load = load i32*, i32** %next.ptr
+  %next.var = load i32, i32* %next.load
   br i1 %cond, label %loop2a, label %loop2b
 
 loop2b:                                           ; preds = %loop1
diff --git a/llvm/test/CodeGen/X86/misched-matmul.ll b/llvm/test/CodeGen/X86/misched-matmul.ll
index 01720f2..3843446 100644
--- a/llvm/test/CodeGen/X86/misched-matmul.ll
+++ b/llvm/test/CodeGen/X86/misched-matmul.ll
@@ -15,86 +15,86 @@
 define void @wrap_mul4(double* nocapture %Out, [4 x double]* nocapture %A, [4 x double]* nocapture %B) #0 {
 entry:
   %arrayidx1.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 0
-  %0 = load double* %arrayidx1.i, align 8
+  %0 = load double, double* %arrayidx1.i, align 8
   %arrayidx3.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 0
-  %1 = load double* %arrayidx3.i, align 8
+  %1 = load double, double* %arrayidx3.i, align 8
   %mul.i = fmul double %0, %1
   %arrayidx5.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 1
-  %2 = load double* %arrayidx5.i, align 8
+  %2 = load double, double* %arrayidx5.i, align 8
   %arrayidx7.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 0
-  %3 = load double* %arrayidx7.i, align 8
+  %3 = load double, double* %arrayidx7.i, align 8
   %mul8.i = fmul double %2, %3
   %add.i = fadd double %mul.i, %mul8.i
   %arrayidx10.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 2
-  %4 = load double* %arrayidx10.i, align 8
+  %4 = load double, double* %arrayidx10.i, align 8
   %arrayidx12.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 0
-  %5 = load double* %arrayidx12.i, align 8
+  %5 = load double, double* %arrayidx12.i, align 8
   %mul13.i = fmul double %4, %5
   %add14.i = fadd double %add.i, %mul13.i
   %arrayidx16.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 0, i64 3
-  %6 = load double* %arrayidx16.i, align 8
+  %6 = load double, double* %arrayidx16.i, align 8
   %arrayidx18.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 0
-  %7 = load double* %arrayidx18.i, align 8
+  %7 = load double, double* %arrayidx18.i, align 8
   %mul19.i = fmul double %6, %7
   %add20.i = fadd double %add14.i, %mul19.i
   %arrayidx25.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 1
-  %8 = load double* %arrayidx25.i, align 8
+  %8 = load double, double* %arrayidx25.i, align 8
   %mul26.i = fmul double %0, %8
   %arrayidx30.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 1
-  %9 = load double* %arrayidx30.i, align 8
+  %9 = load double, double* %arrayidx30.i, align 8
   %mul31.i = fmul double %2, %9
   %add32.i = fadd double %mul26.i, %mul31.i
   %arrayidx36.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 1
-  %10 = load double* %arrayidx36.i, align 8
+  %10 = load double, double* %arrayidx36.i, align 8
   %mul37.i = fmul double %4, %10
   %add38.i = fadd double %add32.i, %mul37.i
   %arrayidx42.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 1
-  %11 = load double* %arrayidx42.i, align 8
+  %11 = load double, double* %arrayidx42.i, align 8
   %mul43.i = fmul double %6, %11
   %add44.i = fadd double %add38.i, %mul43.i
   %arrayidx49.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 2
-  %12 = load double* %arrayidx49.i, align 8
+  %12 = load double, double* %arrayidx49.i, align 8
   %mul50.i = fmul double %0, %12
   %arrayidx54.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 2
-  %13 = load double* %arrayidx54.i, align 8
+  %13 = load double, double* %arrayidx54.i, align 8
   %mul55.i = fmul double %2, %13
   %add56.i = fadd double %mul50.i, %mul55.i
   %arrayidx60.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 2
-  %14 = load double* %arrayidx60.i, align 8
+  %14 = load double, double* %arrayidx60.i, align 8
   %mul61.i = fmul double %4, %14
   %add62.i = fadd double %add56.i, %mul61.i
   %arrayidx66.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 2
-  %15 = load double* %arrayidx66.i, align 8
+  %15 = load double, double* %arrayidx66.i, align 8
   %mul67.i = fmul double %6, %15
   %add68.i = fadd double %add62.i, %mul67.i
   %arrayidx73.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 0, i64 3
-  %16 = load double* %arrayidx73.i, align 8
+  %16 = load double, double* %arrayidx73.i, align 8
   %mul74.i = fmul double %0, %16
   %arrayidx78.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 1, i64 3
-  %17 = load double* %arrayidx78.i, align 8
+  %17 = load double, double* %arrayidx78.i, align 8
   %mul79.i = fmul double %2, %17
   %add80.i = fadd double %mul74.i, %mul79.i
   %arrayidx84.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 2, i64 3
-  %18 = load double* %arrayidx84.i, align 8
+  %18 = load double, double* %arrayidx84.i, align 8
   %mul85.i = fmul double %4, %18
   %add86.i = fadd double %add80.i, %mul85.i
   %arrayidx90.i = getelementptr inbounds [4 x double], [4 x double]* %B, i64 3, i64 3
-  %19 = load double* %arrayidx90.i, align 8
+  %19 = load double, double* %arrayidx90.i, align 8
   %mul91.i = fmul double %6, %19
   %add92.i = fadd double %add86.i, %mul91.i
   %arrayidx95.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 0
-  %20 = load double* %arrayidx95.i, align 8
+  %20 = load double, double* %arrayidx95.i, align 8
   %mul98.i = fmul double %1, %20
   %arrayidx100.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 1
-  %21 = load double* %arrayidx100.i, align 8
+  %21 = load double, double* %arrayidx100.i, align 8
   %mul103.i = fmul double %3, %21
   %add104.i = fadd double %mul98.i, %mul103.i
   %arrayidx106.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 2
-  %22 = load double* %arrayidx106.i, align 8
+  %22 = load double, double* %arrayidx106.i, align 8
   %mul109.i = fmul double %5, %22
   %add110.i = fadd double %add104.i, %mul109.i
   %arrayidx112.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 1, i64 3
-  %23 = load double* %arrayidx112.i, align 8
+  %23 = load double, double* %arrayidx112.i, align 8
   %mul115.i = fmul double %7, %23
   %add116.i = fadd double %add110.i, %mul115.i
   %mul122.i = fmul double %8, %20
@@ -119,18 +119,18 @@
   %mul187.i = fmul double %19, %23
   %add188.i = fadd double %add182.i, %mul187.i
   %arrayidx191.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 0
-  %24 = load double* %arrayidx191.i, align 8
+  %24 = load double, double* %arrayidx191.i, align 8
   %mul194.i = fmul double %1, %24
   %arrayidx196.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 1
-  %25 = load double* %arrayidx196.i, align 8
+  %25 = load double, double* %arrayidx196.i, align 8
   %mul199.i = fmul double %3, %25
   %add200.i = fadd double %mul194.i, %mul199.i
   %arrayidx202.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 2
-  %26 = load double* %arrayidx202.i, align 8
+  %26 = load double, double* %arrayidx202.i, align 8
   %mul205.i = fmul double %5, %26
   %add206.i = fadd double %add200.i, %mul205.i
   %arrayidx208.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 2, i64 3
-  %27 = load double* %arrayidx208.i, align 8
+  %27 = load double, double* %arrayidx208.i, align 8
   %mul211.i = fmul double %7, %27
   %add212.i = fadd double %add206.i, %mul211.i
   %mul218.i = fmul double %8, %24
@@ -155,18 +155,18 @@
   %mul283.i = fmul double %19, %27
   %add284.i = fadd double %add278.i, %mul283.i
   %arrayidx287.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 0
-  %28 = load double* %arrayidx287.i, align 8
+  %28 = load double, double* %arrayidx287.i, align 8
   %mul290.i = fmul double %1, %28
   %arrayidx292.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 1
-  %29 = load double* %arrayidx292.i, align 8
+  %29 = load double, double* %arrayidx292.i, align 8
   %mul295.i = fmul double %3, %29
   %add296.i = fadd double %mul290.i, %mul295.i
   %arrayidx298.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 2
-  %30 = load double* %arrayidx298.i, align 8
+  %30 = load double, double* %arrayidx298.i, align 8
   %mul301.i = fmul double %5, %30
   %add302.i = fadd double %add296.i, %mul301.i
   %arrayidx304.i = getelementptr inbounds [4 x double], [4 x double]* %A, i64 3, i64 3
-  %31 = load double* %arrayidx304.i, align 8
+  %31 = load double, double* %arrayidx304.i, align 8
   %mul307.i = fmul double %7, %31
   %add308.i = fadd double %add302.i, %mul307.i
   %mul314.i = fmul double %8, %28
diff --git a/llvm/test/CodeGen/X86/misched-matrix.ll b/llvm/test/CodeGen/X86/misched-matrix.ll
index ea632ba..e62a1d0 100644
--- a/llvm/test/CodeGen/X86/misched-matrix.ll
+++ b/llvm/test/CodeGen/X86/misched-matrix.ll
@@ -94,57 +94,57 @@
 for.body:                              ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx8 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 0
-  %tmp = load i32* %arrayidx8, align 4
+  %tmp = load i32, i32* %arrayidx8, align 4
   %arrayidx12 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 0
-  %tmp1 = load i32* %arrayidx12, align 4
+  %tmp1 = load i32, i32* %arrayidx12, align 4
   %arrayidx8.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 1
-  %tmp2 = load i32* %arrayidx8.1, align 4
+  %tmp2 = load i32, i32* %arrayidx8.1, align 4
   %arrayidx12.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 0
-  %tmp3 = load i32* %arrayidx12.1, align 4
+  %tmp3 = load i32, i32* %arrayidx12.1, align 4
   %arrayidx8.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 2
-  %tmp4 = load i32* %arrayidx8.2, align 4
+  %tmp4 = load i32, i32* %arrayidx8.2, align 4
   %arrayidx12.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 0
-  %tmp5 = load i32* %arrayidx12.2, align 4
+  %tmp5 = load i32, i32* %arrayidx12.2, align 4
   %arrayidx8.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m1, i64 %indvars.iv, i64 3
-  %tmp6 = load i32* %arrayidx8.3, align 4
+  %tmp6 = load i32, i32* %arrayidx8.3, align 4
   %arrayidx12.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 0
-  %tmp8 = load i32* %arrayidx8, align 4
+  %tmp8 = load i32, i32* %arrayidx8, align 4
   %arrayidx12.137 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 1
-  %tmp9 = load i32* %arrayidx12.137, align 4
-  %tmp10 = load i32* %arrayidx8.1, align 4
+  %tmp9 = load i32, i32* %arrayidx12.137, align 4
+  %tmp10 = load i32, i32* %arrayidx8.1, align 4
   %arrayidx12.1.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 1
-  %tmp11 = load i32* %arrayidx12.1.1, align 4
-  %tmp12 = load i32* %arrayidx8.2, align 4
+  %tmp11 = load i32, i32* %arrayidx12.1.1, align 4
+  %tmp12 = load i32, i32* %arrayidx8.2, align 4
   %arrayidx12.2.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 1
-  %tmp13 = load i32* %arrayidx12.2.1, align 4
-  %tmp14 = load i32* %arrayidx8.3, align 4
+  %tmp13 = load i32, i32* %arrayidx12.2.1, align 4
+  %tmp14 = load i32, i32* %arrayidx8.3, align 4
   %arrayidx12.3.1 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 1
-  %tmp15 = load i32* %arrayidx12.3.1, align 4
-  %tmp16 = load i32* %arrayidx8, align 4
+  %tmp15 = load i32, i32* %arrayidx12.3.1, align 4
+  %tmp16 = load i32, i32* %arrayidx8, align 4
   %arrayidx12.239 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 2
-  %tmp17 = load i32* %arrayidx12.239, align 4
-  %tmp18 = load i32* %arrayidx8.1, align 4
+  %tmp17 = load i32, i32* %arrayidx12.239, align 4
+  %tmp18 = load i32, i32* %arrayidx8.1, align 4
   %arrayidx12.1.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 2
-  %tmp19 = load i32* %arrayidx12.1.2, align 4
-  %tmp20 = load i32* %arrayidx8.2, align 4
+  %tmp19 = load i32, i32* %arrayidx12.1.2, align 4
+  %tmp20 = load i32, i32* %arrayidx8.2, align 4
   %arrayidx12.2.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 2
-  %tmp21 = load i32* %arrayidx12.2.2, align 4
-  %tmp22 = load i32* %arrayidx8.3, align 4
+  %tmp21 = load i32, i32* %arrayidx12.2.2, align 4
+  %tmp22 = load i32, i32* %arrayidx8.3, align 4
   %arrayidx12.3.2 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 2
-  %tmp23 = load i32* %arrayidx12.3.2, align 4
-  %tmp24 = load i32* %arrayidx8, align 4
+  %tmp23 = load i32, i32* %arrayidx12.3.2, align 4
+  %tmp24 = load i32, i32* %arrayidx8, align 4
   %arrayidx12.341 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 0, i64 3
-  %tmp25 = load i32* %arrayidx12.341, align 4
-  %tmp26 = load i32* %arrayidx8.1, align 4
+  %tmp25 = load i32, i32* %arrayidx12.341, align 4
+  %tmp26 = load i32, i32* %arrayidx8.1, align 4
   %arrayidx12.1.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 1, i64 3
-  %tmp27 = load i32* %arrayidx12.1.3, align 4
-  %tmp28 = load i32* %arrayidx8.2, align 4
+  %tmp27 = load i32, i32* %arrayidx12.1.3, align 4
+  %tmp28 = load i32, i32* %arrayidx8.2, align 4
   %arrayidx12.2.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 2, i64 3
-  %tmp29 = load i32* %arrayidx12.2.3, align 4
-  %tmp30 = load i32* %arrayidx8.3, align 4
+  %tmp29 = load i32, i32* %arrayidx12.2.3, align 4
+  %tmp30 = load i32, i32* %arrayidx8.3, align 4
   %arrayidx12.3.3 = getelementptr inbounds [4 x i32], [4 x i32]* %m2, i64 3, i64 3
-  %tmp31 = load i32* %arrayidx12.3.3, align 4
-  %tmp7 = load i32* %arrayidx12.3, align 4
+  %tmp31 = load i32, i32* %arrayidx12.3.3, align 4
+  %tmp7 = load i32, i32* %arrayidx12.3, align 4
   %mul = mul nsw i32 %tmp1, %tmp
   %mul.1 = mul nsw i32 %tmp3, %tmp2
   %mul.2 = mul nsw i32 %tmp5, %tmp4
diff --git a/llvm/test/CodeGen/X86/misched-new.ll b/llvm/test/CodeGen/X86/misched-new.ll
index 89e45b7..410a7f3 100644
--- a/llvm/test/CodeGen/X86/misched-new.ll
+++ b/llvm/test/CodeGen/X86/misched-new.ll
@@ -90,12 +90,12 @@
 ; TOPDOWN: movzbl %al
 ; TOPDOWN: ret
 define void @testSubregTracking() nounwind uwtable ssp align 2 {
-  %tmp = load i8* undef, align 1
+  %tmp = load i8, i8* undef, align 1
   %tmp6 = sub i8 0, %tmp
-  %tmp7 = load i8* undef, align 1
+  %tmp7 = load i8, i8* undef, align 1
   %tmp8 = udiv i8 %tmp6, %tmp7
   %tmp9 = zext i8 %tmp8 to i64
-  %tmp10 = load i8* undef, align 1
+  %tmp10 = load i8, i8* undef, align 1
   %tmp11 = zext i8 %tmp10 to i64
   %tmp12 = mul i64 %tmp11, %tmp9
   %tmp13 = urem i8 %tmp6, %tmp7
diff --git a/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
index c536a39..89eb33e 100644
--- a/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
+++ b/llvm/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
@@ -12,7 +12,7 @@
 ; X86-64-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; X86-64-NEXT:    movb $1, %al
 ; X86-64-NEXT:    jmp _pass_v8qi ## TAILCALL
-  %tmp3 = load <8 x i8>* @g_v8qi, align 8
+  %tmp3 = load <8 x i8>, <8 x i8>* @g_v8qi, align 8
   %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
   %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
   ret void
diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll
index 0e46a29..114d253 100644
--- a/llvm/test/CodeGen/X86/mmx-arith.ll
+++ b/llvm/test/CodeGen/X86/mmx-arith.ll
@@ -8,48 +8,48 @@
 ; X64-LABEL: test0
 define void @test0(x86_mmx* %A, x86_mmx* %B) {
 entry:
-  %tmp1 = load x86_mmx* %A
-  %tmp3 = load x86_mmx* %B
+  %tmp1 = load x86_mmx, x86_mmx* %A
+  %tmp3 = load x86_mmx, x86_mmx* %B
   %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
   %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
   %tmp4 = add <8 x i8> %tmp1a, %tmp3a
   %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
   store x86_mmx %tmp4a, x86_mmx* %A
-  %tmp7 = load x86_mmx* %B
+  %tmp7 = load x86_mmx, x86_mmx* %B
   %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %tmp4a, x86_mmx %tmp7)
   store x86_mmx %tmp12, x86_mmx* %A
-  %tmp16 = load x86_mmx* %B
+  %tmp16 = load x86_mmx, x86_mmx* %B
   %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %tmp12, x86_mmx %tmp16)
   store x86_mmx %tmp21, x86_mmx* %A
-  %tmp27 = load x86_mmx* %B
+  %tmp27 = load x86_mmx, x86_mmx* %B
   %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
   %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
   %tmp28 = sub <8 x i8> %tmp21a, %tmp27a
   %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
   store x86_mmx %tmp28a, x86_mmx* %A
-  %tmp31 = load x86_mmx* %B
+  %tmp31 = load x86_mmx, x86_mmx* %B
   %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %tmp28a, x86_mmx %tmp31)
   store x86_mmx %tmp36, x86_mmx* %A
-  %tmp40 = load x86_mmx* %B
+  %tmp40 = load x86_mmx, x86_mmx* %B
   %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %tmp36, x86_mmx %tmp40)
   store x86_mmx %tmp45, x86_mmx* %A
-  %tmp51 = load x86_mmx* %B
+  %tmp51 = load x86_mmx, x86_mmx* %B
   %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
   %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
   %tmp52 = mul <8 x i8> %tmp45a, %tmp51a
   %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
   store x86_mmx %tmp52a, x86_mmx* %A
-  %tmp57 = load x86_mmx* %B
+  %tmp57 = load x86_mmx, x86_mmx* %B
   %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
   %tmp58 = and <8 x i8> %tmp52, %tmp57a
   %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
   store x86_mmx %tmp58a, x86_mmx* %A
-  %tmp63 = load x86_mmx* %B
+  %tmp63 = load x86_mmx, x86_mmx* %B
   %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
   %tmp64 = or <8 x i8> %tmp58, %tmp63a
   %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
   store x86_mmx %tmp64a, x86_mmx* %A
-  %tmp69 = load x86_mmx* %B
+  %tmp69 = load x86_mmx, x86_mmx* %B
   %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
   %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
   %tmp70 = xor <8 x i8> %tmp64b, %tmp69a
@@ -63,37 +63,37 @@
 ; X64-LABEL: test1
 define void @test1(x86_mmx* %A, x86_mmx* %B) {
 entry:
-  %tmp1 = load x86_mmx* %A
-  %tmp3 = load x86_mmx* %B
+  %tmp1 = load x86_mmx, x86_mmx* %A
+  %tmp3 = load x86_mmx, x86_mmx* %B
   %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
   %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
   %tmp4 = add <2 x i32> %tmp1a, %tmp3a
   %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
   store x86_mmx %tmp4a, x86_mmx* %A
-  %tmp9 = load x86_mmx* %B
+  %tmp9 = load x86_mmx, x86_mmx* %B
   %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
   %tmp10 = sub <2 x i32> %tmp4, %tmp9a
   %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
   store x86_mmx %tmp10a, x86_mmx* %A
-  %tmp15 = load x86_mmx* %B
+  %tmp15 = load x86_mmx, x86_mmx* %B
   %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
   %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
   %tmp16 = mul <2 x i32> %tmp10b, %tmp15a
   %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
   store x86_mmx %tmp16a, x86_mmx* %A
-  %tmp21 = load x86_mmx* %B
+  %tmp21 = load x86_mmx, x86_mmx* %B
   %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
   %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
   %tmp22 = and <2 x i32> %tmp16b, %tmp21a
   %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
   store x86_mmx %tmp22a, x86_mmx* %A
-  %tmp27 = load x86_mmx* %B
+  %tmp27 = load x86_mmx, x86_mmx* %B
   %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
   %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
   %tmp28 = or <2 x i32> %tmp22b, %tmp27a
   %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
   store x86_mmx %tmp28a, x86_mmx* %A
-  %tmp33 = load x86_mmx* %B
+  %tmp33 = load x86_mmx, x86_mmx* %B
   %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
   %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
   %tmp34 = xor <2 x i32> %tmp28b, %tmp33a
@@ -107,57 +107,57 @@
 ; X64-LABEL: test2
 define void @test2(x86_mmx* %A, x86_mmx* %B) {
 entry:
-  %tmp1 = load x86_mmx* %A
-  %tmp3 = load x86_mmx* %B
+  %tmp1 = load x86_mmx, x86_mmx* %A
+  %tmp3 = load x86_mmx, x86_mmx* %B
   %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
   %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
   %tmp4 = add <4 x i16> %tmp1a, %tmp3a
   %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
   store x86_mmx %tmp4a, x86_mmx* %A
-  %tmp7 = load x86_mmx* %B
+  %tmp7 = load x86_mmx, x86_mmx* %B
   %tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %tmp4a, x86_mmx %tmp7)
   store x86_mmx %tmp12, x86_mmx* %A
-  %tmp16 = load x86_mmx* %B
+  %tmp16 = load x86_mmx, x86_mmx* %B
   %tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %tmp12, x86_mmx %tmp16)
   store x86_mmx %tmp21, x86_mmx* %A
-  %tmp27 = load x86_mmx* %B
+  %tmp27 = load x86_mmx, x86_mmx* %B
   %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
   %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
   %tmp28 = sub <4 x i16> %tmp21a, %tmp27a
   %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
   store x86_mmx %tmp28a, x86_mmx* %A
-  %tmp31 = load x86_mmx* %B
+  %tmp31 = load x86_mmx, x86_mmx* %B
   %tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %tmp28a, x86_mmx %tmp31)
   store x86_mmx %tmp36, x86_mmx* %A
-  %tmp40 = load x86_mmx* %B
+  %tmp40 = load x86_mmx, x86_mmx* %B
   %tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %tmp36, x86_mmx %tmp40)
   store x86_mmx %tmp45, x86_mmx* %A
-  %tmp51 = load x86_mmx* %B
+  %tmp51 = load x86_mmx, x86_mmx* %B
   %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
   %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
   %tmp52 = mul <4 x i16> %tmp45a, %tmp51a
   %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
   store x86_mmx %tmp52a, x86_mmx* %A
-  %tmp55 = load x86_mmx* %B
+  %tmp55 = load x86_mmx, x86_mmx* %B
   %tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %tmp52a, x86_mmx %tmp55)
   store x86_mmx %tmp60, x86_mmx* %A
-  %tmp64 = load x86_mmx* %B
+  %tmp64 = load x86_mmx, x86_mmx* %B
   %tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %tmp60, x86_mmx %tmp64)
   %tmp70 = bitcast x86_mmx %tmp69 to x86_mmx
   store x86_mmx %tmp70, x86_mmx* %A
-  %tmp75 = load x86_mmx* %B
+  %tmp75 = load x86_mmx, x86_mmx* %B
   %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
   %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
   %tmp76 = and <4 x i16> %tmp70a, %tmp75a
   %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
   store x86_mmx %tmp76a, x86_mmx* %A
-  %tmp81 = load x86_mmx* %B
+  %tmp81 = load x86_mmx, x86_mmx* %B
   %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
   %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
   %tmp82 = or <4 x i16> %tmp76b, %tmp81a
   %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
   store x86_mmx %tmp82a, x86_mmx* %A
-  %tmp87 = load x86_mmx* %B
+  %tmp87 = load x86_mmx, x86_mmx* %B
   %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
   %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
   %tmp88 = xor <4 x i16> %tmp82b, %tmp87a
@@ -179,9 +179,9 @@
   %i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]
   %sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]
   %tmp13 = getelementptr <1 x i64>, <1 x i64>* %b, i32 %i.037.0
-  %tmp14 = load <1 x i64>* %tmp13
+  %tmp14 = load <1 x i64>, <1 x i64>* %tmp13
   %tmp18 = getelementptr <1 x i64>, <1 x i64>* %a, i32 %i.037.0
-  %tmp19 = load <1 x i64>* %tmp18
+  %tmp19 = load <1 x i64>, <1 x i64>* %tmp18
   %tmp21 = add <1 x i64> %tmp19, %tmp14
   %tmp22 = add <1 x i64> %tmp21, %sum.035.0
   %tmp25 = add i32 %i.037.0, 1
diff --git a/llvm/test/CodeGen/X86/mmx-bitcast.ll b/llvm/test/CodeGen/X86/mmx-bitcast.ll
index a2eb96a..4aa10a9 100644
--- a/llvm/test/CodeGen/X86/mmx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/mmx-bitcast.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT:    paddq %mm0, %mm0
 ; CHECK-NEXT:    movd %mm0, %rax
 ; CHECK-NEXT:    retq
-  %t = load x86_mmx* %p
+  %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
   %s = bitcast x86_mmx %u to i64
   ret i64 %s
@@ -20,7 +20,7 @@
 ; CHECK-NEXT:    paddd %mm0, %mm0
 ; CHECK-NEXT:    movd %mm0, %rax
 ; CHECK-NEXT:    retq
-  %t = load x86_mmx* %p
+  %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
   %s = bitcast x86_mmx %u to i64
   ret i64 %s
@@ -33,7 +33,7 @@
 ; CHECK-NEXT:    paddw %mm0, %mm0
 ; CHECK-NEXT:    movd %mm0, %rax
 ; CHECK-NEXT:    retq
-  %t = load x86_mmx* %p
+  %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
   %s = bitcast x86_mmx %u to i64
   ret i64 %s
@@ -46,7 +46,7 @@
 ; CHECK-NEXT:    paddb %mm0, %mm0
 ; CHECK-NEXT:    movd %mm0, %rax
 ; CHECK-NEXT:    retq
-  %t = load x86_mmx* %p
+  %t = load x86_mmx, x86_mmx* %p
   %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
   %s = bitcast x86_mmx %u to i64
   ret i64 %s
diff --git a/llvm/test/CodeGen/X86/mmx-copy-gprs.ll b/llvm/test/CodeGen/X86/mmx-copy-gprs.ll
index 3778755..6d39713 100644
--- a/llvm/test/CodeGen/X86/mmx-copy-gprs.ll
+++ b/llvm/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -11,7 +11,7 @@
 
 define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
 entry:
-	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
+	%tmp1 = load <1 x i64>, <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
 	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
 	ret void
 }
diff --git a/llvm/test/CodeGen/X86/mmx-fold-load.ll b/llvm/test/CodeGen/X86/mmx-fold-load.ll
index d49edac..2b9d30f 100644
--- a/llvm/test/CodeGen/X86/mmx-fold-load.ll
+++ b/llvm/test/CodeGen/X86/mmx-fold-load.ll
@@ -9,8 +9,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -26,8 +26,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -43,8 +43,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -60,8 +60,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -77,8 +77,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -94,8 +94,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -111,8 +111,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -128,8 +128,8 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast <1 x i64>* %a to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
-  %2 = load i32* %b, align 4
+  %1 = load x86_mmx, x86_mmx* %0, align 8
+  %2 = load i32, i32* %b, align 4
   %3 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %1, i32 %2)
   %4 = bitcast x86_mmx %3 to i64
   ret i64 %4
@@ -144,7 +144,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -161,7 +161,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -177,7 +177,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -193,7 +193,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -209,7 +209,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -225,7 +225,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -241,7 +241,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -257,7 +257,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
@@ -273,7 +273,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %v = load x86_mmx* %q
+  %v = load x86_mmx, x86_mmx* %q
   %u = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %t, x86_mmx %v)
   %s = bitcast x86_mmx %u to i64
   call void @llvm.x86.mmx.emms()
diff --git a/llvm/test/CodeGen/X86/movbe.ll b/llvm/test/CodeGen/X86/movbe.ll
index e248410..49e765d 100644
--- a/llvm/test/CodeGen/X86/movbe.ll
+++ b/llvm/test/CodeGen/X86/movbe.ll
@@ -16,7 +16,7 @@
 }
 
 define i16 @test2(i16* %x) nounwind {
-  %load = load i16* %x, align 2
+  %load = load i16, i16* %x, align 2
   %bswap = call i16 @llvm.bswap.i16(i16 %load)
   ret i16 %bswap
 ; CHECK-LABEL: test2:
@@ -36,7 +36,7 @@
 }
 
 define i32 @test4(i32* %x) nounwind {
-  %load = load i32* %x, align 4
+  %load = load i32, i32* %x, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %load)
   ret i32 %bswap
 ; CHECK-LABEL: test4:
@@ -56,7 +56,7 @@
 }
 
 define i64 @test6(i64* %x) nounwind {
-  %load = load i64* %x, align 8
+  %load = load i64, i64* %x, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %load)
   ret i64 %bswap
 ; CHECK-LABEL: test6:
diff --git a/llvm/test/CodeGen/X86/movfs.ll b/llvm/test/CodeGen/X86/movfs.ll
index 823e986..a337927 100644
--- a/llvm/test/CodeGen/X86/movfs.ll
+++ b/llvm/test/CodeGen/X86/movfs.ll
@@ -2,7 +2,7 @@
 
 define i32 @foo() nounwind readonly {
 entry:
-	%tmp = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
-	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp = load i32*, i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32, i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/X86/movgs.ll b/llvm/test/CodeGen/X86/movgs.ll
index 96c5dbb..da4c9b7 100644
--- a/llvm/test/CodeGen/X86/movgs.ll
+++ b/llvm/test/CodeGen/X86/movgs.ll
@@ -15,8 +15,8 @@
 ; X64-NEXT:    movl (%rax), %eax
 ; X64-NEXT:    retq
 entry:
-	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
-	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp = load i32*, i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32, i32* %tmp		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
@@ -39,7 +39,7 @@
 ; X64-NEXT:    {{(addq.*%rsp|popq)}}
 ; X64-NEXT:    retq
 entry:
-  %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+  %tmp9 = load void (i8*)*, void (i8*)* addrspace(256)* %tmp8, align 8
   tail call void %tmp9(i8* undef) nounwind optsize
   ret i64 0
 }
@@ -56,7 +56,7 @@
 ; X64-NEXT:    pmovsxwd %gs:(%{{(rcx|rdi)}}), %xmm0
 ; X64-NEXT:    retq
 entry:
-  %0 = load i64 addrspace(256)* %p
+  %0 = load i64, i64 addrspace(256)* %p
   %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
   %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
   %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
@@ -83,10 +83,10 @@
 ; X64-NEXT:    addl (%rcx), %eax
 ; X64-NEXT:    retq
 entry:
-	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
-	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
-	%tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
-	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp = load i32*, i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32, i32* %tmp		; <i32> [#uses=1]
+	%tmp2 = load i32*, i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp3 = load i32, i32* %tmp2		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp1, %tmp3
 	ret i32 %tmp4
 }
diff --git a/llvm/test/CodeGen/X86/movmsk.ll b/llvm/test/CodeGen/X86/movmsk.ll
index a474d1a..a7ebebc 100644
--- a/llvm/test/CodeGen/X86/movmsk.ll
+++ b/llvm/test/CodeGen/X86/movmsk.ll
@@ -105,7 +105,7 @@
   %0 = tail call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %x) nounwind
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   ret i32 %1
 }
 
@@ -118,7 +118,7 @@
   %1 = tail call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %0) nounwind
   %idxprom = sext i32 %1 to i64
   %arrayidx = getelementptr inbounds i32, i32* %indexTable, i64 %idxprom
-  %2 = load i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx, align 4
   ret i32 %2
 }
 
diff --git a/llvm/test/CodeGen/X86/movtopush.ll b/llvm/test/CodeGen/X86/movtopush.ll
index 4a5d903..4278910 100644
--- a/llvm/test/CodeGen/X86/movtopush.ll
+++ b/llvm/test/CodeGen/X86/movtopush.ll
@@ -196,7 +196,7 @@
 ; NORMAL-NEXT: addl $16, %esp
 define void @test7(i32* %ptr) optsize {
 entry:
-  %val = load i32* %ptr
+  %val = load i32, i32* %ptr
   call void @good(i32 1, i32 2, i32 %val, i32 4)
   ret void
 }
@@ -263,7 +263,7 @@
 define void @test10() optsize {
   %stack_fptr = alloca void (i32, i32, i32, i32)*
   store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
-  %good_ptr = load volatile void (i32, i32, i32, i32)** %stack_fptr
+  %good_ptr = load volatile void (i32, i32, i32, i32)*, void (i32, i32, i32, i32)** %stack_fptr
   call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
   call void (i32, i32, i32, i32)* %good_ptr(i32 1, i32 2, i32 3, i32 4)
   ret void
@@ -282,7 +282,7 @@
 ; NORMAL-NEXT: addl $16, %esp
 @the_global = external global i32
 define void @test11() optsize {
-  %myload = load i32* @the_global
+  %myload = load i32, i32* @the_global
   store i32 42, i32* @the_global
   call void @good(i32 %myload, i32 2, i32 3, i32 4)
   ret void
diff --git a/llvm/test/CodeGen/X86/ms-inline-asm.ll b/llvm/test/CodeGen/X86/ms-inline-asm.ll
index 65b897c..428eb1b 100644
--- a/llvm/test/CodeGen/X86/ms-inline-asm.ll
+++ b/llvm/test/CodeGen/X86/ms-inline-asm.ll
@@ -50,7 +50,7 @@
   store i32 2, i32* %b, align 4
   call void asm sideeffect inteldialect "lea ebx, foo\0A\09mov eax, [ebx].0\0A\09mov [ebx].4, ecx", "~{eax},~{dirflag},~{fpsr},~{flags}"() nounwind
   %b1 = getelementptr inbounds %struct.t18_type, %struct.t18_type* %foo, i32 0, i32 1
-  %0 = load i32* %b1, align 4
+  %0 = load i32, i32* %b1, align 4
   ret i32 %0
 ; CHECK: t18
 ; CHECK: {{## InlineAsm Start|#APP}}
@@ -87,7 +87,7 @@
   %res = alloca i32*, align 4
   call void asm sideeffect inteldialect "lea edi, dword ptr $0", "*m,~{edi},~{dirflag},~{fpsr},~{flags}"([2 x i32]* @results) nounwind
   call void asm sideeffect inteldialect "mov dword ptr $0, edi", "=*m,~{dirflag},~{fpsr},~{flags}"(i32** %res) nounwind
-  %0 = load i32** %res, align 4
+  %0 = load i32*, i32** %res, align 4
   ret i32* %0
 ; CHECK-LABEL: t30:
 ; CHECK: {{## InlineAsm Start|#APP}}
@@ -111,7 +111,7 @@
   %val = alloca i32, align 64
   store i32 -1, i32* %val, align 64
   call void asm sideeffect inteldialect "mov dword ptr $0, esp", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %val)
-  %sp = load i32* %val, align 64
+  %sp = load i32, i32* %val, align 64
   ret i32 %sp
 ; CHECK-LABEL: t31:
 ; CHECK: pushl %ebp
diff --git a/llvm/test/CodeGen/X86/mul128_sext_loop.ll b/llvm/test/CodeGen/X86/mul128_sext_loop.ll
index 88c5156..efb7e02 100644
--- a/llvm/test/CodeGen/X86/mul128_sext_loop.ll
+++ b/llvm/test/CodeGen/X86/mul128_sext_loop.ll
@@ -15,7 +15,7 @@
   %carry.02 = phi i128 [ 0, %.lr.ph ], [ %10, %3 ]
   %i.01 = phi i64 [ 0, %.lr.ph ], [ %11, %3 ]
   %4 = getelementptr inbounds i64, i64* %arr, i64 %i.01
-  %5 = load i64* %4, align 8
+  %5 = load i64, i64* %4, align 8
   %6 = sext i64 %5 to i128
   %7 = mul nsw i128 %6, %2
   %8 = add nsw i128 %7, %carry.02
diff --git a/llvm/test/CodeGen/X86/muloti.ll b/llvm/test/CodeGen/X86/muloti.ll
index 523b970..6c6198e 100644
--- a/llvm/test/CodeGen/X86/muloti.ll
+++ b/llvm/test/CodeGen/X86/muloti.ll
@@ -45,17 +45,17 @@
   store i64 %a.coerce0, i64* %1
   %2 = getelementptr %0, %0* %0, i32 0, i32 1
   store i64 %a.coerce1, i64* %2
-  %a = load i128* %coerce, align 16
+  %a = load i128, i128* %coerce, align 16
   store i128 %a, i128* %a.addr, align 16
   %3 = bitcast i128* %coerce1 to %0*
   %4 = getelementptr %0, %0* %3, i32 0, i32 0
   store i64 %b.coerce0, i64* %4
   %5 = getelementptr %0, %0* %3, i32 0, i32 1
   store i64 %b.coerce1, i64* %5
-  %b = load i128* %coerce1, align 16
+  %b = load i128, i128* %coerce1, align 16
   store i128 %b, i128* %b.addr, align 16
-  %tmp = load i128* %a.addr, align 16
-  %tmp2 = load i128* %b.addr, align 16
+  %tmp = load i128, i128* %a.addr, align 16
+  %tmp2 = load i128, i128* %b.addr, align 16
   %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
 ; CHECK: cmov
 ; CHECK: divti3
@@ -70,7 +70,7 @@
 nooverflow:                                       ; preds = %entry
   store i128 %7, i128* %retval
   %9 = bitcast i128* %retval to %0*
-  %10 = load %0* %9, align 1
+  %10 = load %0, %0* %9, align 1
   ret %0 %10
 }
 
diff --git a/llvm/test/CodeGen/X86/mult-alt-generic-i686.ll b/llvm/test/CodeGen/X86/mult-alt-generic-i686.ll
index 54bc3a4..3472e94 100644
--- a/llvm/test/CodeGen/X86/mult-alt-generic-i686.ll
+++ b/llvm/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -165,7 +165,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -190,10 +190,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -205,10 +205,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -220,7 +220,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -277,10 +277,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -294,10 +294,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/llvm/test/CodeGen/X86/mult-alt-generic-x86_64.ll
index 84a9c81..7f92a0d 100644
--- a/llvm/test/CodeGen/X86/mult-alt-generic-x86_64.ll
+++ b/llvm/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -33,10 +33,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -48,10 +48,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -63,7 +63,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -120,10 +120,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -137,10 +137,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -165,7 +165,7 @@
 
 define void @multi_m() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -190,10 +190,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -205,10 +205,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* %in1, align 4
+  %tmp1 = load i32, i32* %in1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   ret void
@@ -220,7 +220,7 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
   ret void
@@ -277,10 +277,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
@@ -294,10 +294,10 @@
   %in1 = alloca i32, align 4
   store i32 0, i32* %out0, align 4
   store i32 1, i32* %in1, align 4
-  %tmp = load i32* %in1, align 4
+  %tmp = load i32, i32* %in1, align 4
   %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* %out0, align 4
-  %tmp1 = load i32* @min1, align 4
+  %tmp1 = load i32, i32* @min1, align 4
   %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
   store i32 %1, i32* %out0, align 4
   %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
diff --git a/llvm/test/CodeGen/X86/mult-alt-x86.ll b/llvm/test/CodeGen/X86/mult-alt-x86.ll
index cb2219a..5174f85 100644
--- a/llvm/test/CodeGen/X86/mult-alt-x86.ll
+++ b/llvm/test/CodeGen/X86/mult-alt-x86.ll
@@ -11,7 +11,7 @@
 
 define void @single_R() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -19,7 +19,7 @@
 
 define void @single_q() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -27,7 +27,7 @@
 
 define void @single_Q() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -35,7 +35,7 @@
 
 define void @single_a() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -43,7 +43,7 @@
 
 define void @single_b() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -51,7 +51,7 @@
 
 define void @single_c() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -59,7 +59,7 @@
 
 define void @single_d() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -67,7 +67,7 @@
 
 define void @single_S() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -75,7 +75,7 @@
 
 define void @single_D() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -83,7 +83,7 @@
 
 define void @single_A() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
   store i32 %0, i32* @mout0, align 4
   ret void
@@ -106,7 +106,7 @@
 
 define void @single_y() nounwind {
 entry:
-  %tmp = load double* @din1, align 8
+  %tmp = load double, double* @din1, align 8
   %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
   store double %0, double* @dout0, align 8
   ret void
@@ -114,7 +114,7 @@
 
 define void @single_x() nounwind {
 entry:
-  %tmp = load double* @din1, align 8
+  %tmp = load double, double* @din1, align 8
   %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
   store double %0, double* @dout0, align 8
   ret void
@@ -191,70 +191,70 @@
 
 define void @multi_R() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_q() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_Q() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_a() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_b() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_c() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_d() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_S() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_D() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
 
 define void @multi_A() nounwind {
 entry:
-  %tmp = load i32* @min1, align 4
+  %tmp = load i32, i32* @min1, align 4
   call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
   ret void
 }
@@ -276,14 +276,14 @@
 
 define void @multi_y() nounwind {
 entry:
-  %tmp = load double* @din1, align 8
+  %tmp = load double, double* @din1, align 8
   call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
   ret void
 }
 
 define void @multi_x() nounwind {
 entry:
-  %tmp = load double* @din1, align 8
+  %tmp = load double, double* @din1, align 8
   call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll b/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll
index ab2420b..be778da 100644
--- a/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/llvm/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -19,7 +19,7 @@
 entry:
   %times4 = alloca float, align 4                 ; <float*> [#uses=3]
   %timesN = alloca float, align 4                 ; <float*> [#uses=2]
-  %0 = load float* %Step, align 4                 ; <float> [#uses=8]
+  %0 = load float, float* %Step, align 4                 ; <float> [#uses=8]
   %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1]
   %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1]
   %tmp = xor i64 %2, %1                           ; <i64> [#uses=1]
@@ -34,11 +34,11 @@
   br i1 %9, label %bb, label %return
 
 bb:                                               ; preds = %entry
-  %10 = load float* %Start, align 4               ; <float> [#uses=1]
+  %10 = load float, float* %Start, align 4               ; <float> [#uses=1]
   br label %bb2
 
 bb1:                                              ; preds = %bb3
-  %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1]
+  %11 = load float, float* %I_addr.0, align 4            ; <float> [#uses=1]
   %12 = fmul float %11, %x.0                      ; <float> [#uses=1]
   store float %12, float* %O_addr.0, align 4
   %13 = fadd float %x.0, %0                       ; <float> [#uses=1]
@@ -127,10 +127,10 @@
   %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
   %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1]
   %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2]
-  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
-  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
-  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
-  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %36 = load <4 x float>, <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>, <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>, <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>, <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
   %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
   %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
   %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
@@ -168,7 +168,7 @@
   %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1]
   %49 = getelementptr inbounds float, float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
   %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
-  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %51 = load <4 x float>, <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
   %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1]
   %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1]
   %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1]
@@ -216,10 +216,10 @@
   %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
   %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1]
   %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2]
-  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
-  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
-  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
-  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %52 = load <4 x float>, <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>, <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>, <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>, <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
   %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
   %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
   %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
@@ -263,7 +263,7 @@
   %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
   %I_addr.433 = getelementptr float, float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
   %O_addr.432 = getelementptr float, float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
-  %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1]
+  %75 = load float, float* %I_addr.433, align 4          ; <float> [#uses=1]
   %76 = fmul float %75, %x.130                    ; <float> [#uses=1]
   store float %76, float* %O_addr.432, align 4
   %77 = fadd float %x.130, %0                     ; <float> [#uses=2]
@@ -293,7 +293,7 @@
 
 inner:                                       ; preds = %bb0, %if.end275
   %i8 = phi i32 [ %a, %outer ], [ %indvar.next159, %bb0 ] ; <i32> [#uses=2]
-  %t338 = load i32* undef                     ; <i32> [#uses=1]
+  %t338 = load i32, i32* undef                     ; <i32> [#uses=1]
   %t191 = mul i32 %i8, %t338        ; <i32> [#uses=1]
   %t179 = add i32 %i6, %t191        ; <i32> [#uses=1]
   br label %bb0
diff --git a/llvm/test/CodeGen/X86/mulx32.ll b/llvm/test/CodeGen/X86/mulx32.ll
index b75ac00..42ef2eb 100644
--- a/llvm/test/CodeGen/X86/mulx32.ll
+++ b/llvm/test/CodeGen/X86/mulx32.ll
@@ -11,7 +11,7 @@
 }
 
 define i64 @f2(i32 %a, i32* %p) {
-  %b = load i32* %p
+  %b = load i32, i32* %p
   %x = zext i32 %a to i64
   %y = zext i32 %b to i64
   %r = mul i64 %x, %y
diff --git a/llvm/test/CodeGen/X86/mulx64.ll b/llvm/test/CodeGen/X86/mulx64.ll
index d573028..808c022 100644
--- a/llvm/test/CodeGen/X86/mulx64.ll
+++ b/llvm/test/CodeGen/X86/mulx64.ll
@@ -11,7 +11,7 @@
 }
 
 define i128 @f2(i64 %a, i64* %p) {
-  %b = load i64* %p
+  %b = load i64, i64* %p
   %x = zext i64 %a to i128
   %y = zext i64 %b to i128
   %r = mul i128 %x, %y
diff --git a/llvm/test/CodeGen/X86/musttail-indirect.ll b/llvm/test/CodeGen/X86/musttail-indirect.ll
index 3156090..7bb71c3 100644
--- a/llvm/test/CodeGen/X86/musttail-indirect.ll
+++ b/llvm/test/CodeGen/X86/musttail-indirect.ll
@@ -31,8 +31,8 @@
 define x86_thiscallcc i32 @f_thunk(%struct.B* %this, i32) {
 entry:
   %1 = bitcast %struct.B* %this to i32 (%struct.B*, i32)***
-  %vtable = load i32 (%struct.B*, i32)*** %1
-  %2 = load i32 (%struct.B*, i32)** %vtable
+  %vtable = load i32 (%struct.B*, i32)**, i32 (%struct.B*, i32)*** %1
+  %2 = load i32 (%struct.B*, i32)*, i32 (%struct.B*, i32)** %vtable
   %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, i32 %0)
   ret i32 %3
 }
@@ -45,9 +45,9 @@
 define x86_thiscallcc i32 @g_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
 entry:
   %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
-  %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vtable = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)**, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
   %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 1
-  %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  %2 = load i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
   %3 = musttail call x86_thiscallcc i32 %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
   ret i32 %3
 }
@@ -59,9 +59,9 @@
 define x86_thiscallcc void @h_thunk(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca) {
 entry:
   %1 = bitcast %struct.B* %this to void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)***
-  %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
+  %vtable = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)**, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*** %1
   %vfn = getelementptr inbounds void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vtable, i32 2
-  %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
+  %2 = load void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)*, void (%struct.B*, <{ %struct.A, i32, %struct.A }>*)** %vfn
   musttail call x86_thiscallcc void %2(%struct.B* %this, <{ %struct.A, i32, %struct.A }>* inalloca %0)
   ret void
 }
@@ -73,9 +73,9 @@
 define x86_thiscallcc %struct.A* @i_thunk(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca) {
 entry:
   %1 = bitcast %struct.B* %this to %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)***
-  %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
+  %vtable = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)**, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*** %1
   %vfn = getelementptr inbounds %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vtable, i32 3
-  %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
+  %2 = load %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)*, %struct.A* (%struct.B*, <{ %struct.A*, %struct.A, i32, %struct.A }>*)** %vfn
   %3 = musttail call x86_thiscallcc %struct.A* %2(%struct.B* %this, <{ %struct.A*, %struct.A, i32, %struct.A }>* inalloca %0)
   ret %struct.A* %3
 }
@@ -86,9 +86,9 @@
 define x86_thiscallcc void @j_thunk(%struct.A* noalias sret %agg.result, %struct.B* %this, i32) {
 entry:
   %1 = bitcast %struct.B* %this to void (%struct.A*, %struct.B*, i32)***
-  %vtable = load void (%struct.A*, %struct.B*, i32)*** %1
+  %vtable = load void (%struct.A*, %struct.B*, i32)**, void (%struct.A*, %struct.B*, i32)*** %1
   %vfn = getelementptr inbounds void (%struct.A*, %struct.B*, i32)*, void (%struct.A*, %struct.B*, i32)** %vtable, i32 4
-  %2 = load void (%struct.A*, %struct.B*, i32)** %vfn
+  %2 = load void (%struct.A*, %struct.B*, i32)*, void (%struct.A*, %struct.B*, i32)** %vfn
   musttail call x86_thiscallcc void %2(%struct.A* sret %agg.result, %struct.B* %this, i32 %0)
   ret void
 }
@@ -100,11 +100,11 @@
 define x86_stdcallcc i32 @stdcall_thunk(<{ %struct.B*, %struct.A }>* inalloca) {
 entry:
   %this_ptr = getelementptr inbounds <{ %struct.B*, %struct.A }>, <{ %struct.B*, %struct.A }>* %0, i32 0, i32 0
-  %this = load %struct.B** %this_ptr
+  %this = load %struct.B*, %struct.B** %this_ptr
   %1 = bitcast %struct.B* %this to i32 (<{ %struct.B*, %struct.A }>*)***
-  %vtable = load i32 (<{ %struct.B*, %struct.A }>*)*** %1
+  %vtable = load i32 (<{ %struct.B*, %struct.A }>*)**, i32 (<{ %struct.B*, %struct.A }>*)*** %1
   %vfn = getelementptr inbounds i32 (<{ %struct.B*, %struct.A }>*)*, i32 (<{ %struct.B*, %struct.A }>*)** %vtable, i32 1
-  %2 = load i32 (<{ %struct.B*, %struct.A }>*)** %vfn
+  %2 = load i32 (<{ %struct.B*, %struct.A }>*)*, i32 (<{ %struct.B*, %struct.A }>*)** %vfn
   %3 = musttail call x86_stdcallcc i32 %2(<{ %struct.B*, %struct.A }>* inalloca %0)
   ret i32 %3
 }
@@ -116,9 +116,9 @@
 define x86_fastcallcc i32 @fastcall_thunk(%struct.B* inreg %this, <{ %struct.A }>* inalloca) {
 entry:
   %1 = bitcast %struct.B* %this to i32 (%struct.B*, <{ %struct.A }>*)***
-  %vtable = load i32 (%struct.B*, <{ %struct.A }>*)*** %1
+  %vtable = load i32 (%struct.B*, <{ %struct.A }>*)**, i32 (%struct.B*, <{ %struct.A }>*)*** %1
   %vfn = getelementptr inbounds i32 (%struct.B*, <{ %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A }>*)** %vtable, i32 1
-  %2 = load i32 (%struct.B*, <{ %struct.A }>*)** %vfn
+  %2 = load i32 (%struct.B*, <{ %struct.A }>*)*, i32 (%struct.B*, <{ %struct.A }>*)** %vfn
   %3 = musttail call x86_fastcallcc i32 %2(%struct.B* inreg %this, <{ %struct.A }>* inalloca %0)
   ret i32 %3
 }
diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index f72160f..52115b2 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -107,19 +107,19 @@
 
 define void @h_thunk(%struct.Foo* %this, ...) {
   %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0
-  %cond = load i1* %cond_p
+  %cond = load i1, i1* %cond_p
   br i1 %cond, label %then, label %else
 
 then:
   %a_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 1
-  %a_i8 = load i8** %a_p
+  %a_i8 = load i8*, i8** %a_p
   %a = bitcast i8* %a_i8 to void (%struct.Foo*, ...)*
   musttail call void (%struct.Foo*, ...)* %a(%struct.Foo* %this, ...)
   ret void
 
 else:
   %b_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 2
-  %b_i8 = load i8** %b_p
+  %b_i8 = load i8*, i8** %b_p
   %b = bitcast i8* %b_i8 to void (%struct.Foo*, ...)*
   store i32 42, i32* @g
   musttail call void (%struct.Foo*, ...)* %b(%struct.Foo* %this, ...)
diff --git a/llvm/test/CodeGen/X86/nancvt.ll b/llvm/test/CodeGen/X86/nancvt.ll
index 0c1f8b9..9222f6b 100644
--- a/llvm/test/CodeGen/X86/nancvt.ll
+++ b/llvm/test/CodeGen/X86/nancvt.ll
@@ -29,38 +29,38 @@
 	br label %bb23
 
 bb:		; preds = %bb23
-	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp1 = getelementptr [3 x i32], [3 x i32]* @fnan, i32 0, i32 %tmp		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
 	%tmp3 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp34 = bitcast float* %tmp3 to i32*		; <i32*> [#uses=1]
 	store i32 %tmp2, i32* %tmp34, align 4
 	%tmp5 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp6 = load float* %tmp5, align 4		; <float> [#uses=1]
+	%tmp6 = load float, float* %tmp5, align 4		; <float> [#uses=1]
 	%tmp67 = fpext float %tmp6 to double		; <double> [#uses=1]
 	%tmp8 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	store double %tmp67, double* %tmp8, align 8
 	%tmp9 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp910 = bitcast double* %tmp9 to i64*		; <i64*> [#uses=1]
-	%tmp11 = load i64* %tmp910, align 8		; <i64> [#uses=1]
+	%tmp11 = load i64, i64* %tmp910, align 8		; <i64> [#uses=1]
 	%tmp1112 = trunc i64 %tmp11 to i32		; <i32> [#uses=1]
 	%tmp13 = and i32 %tmp1112, -1		; <i32> [#uses=1]
 	%tmp14 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1415 = bitcast double* %tmp14 to i64*		; <i64*> [#uses=1]
-	%tmp16 = load i64* %tmp1415, align 8		; <i64> [#uses=1]
+	%tmp16 = load i64, i64* %tmp1415, align 8		; <i64> [#uses=1]
 	%.cast = zext i32 32 to i64		; <i64> [#uses=1]
 	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
 	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
 	%tmp19 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
 	store volatile i32 %tmp1718, i32* @var
 	store volatile i32 %tmp13, i32* @var
-	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp21 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
 	store i32 %tmp22, i32* %i, align 4
 	br label %bb23
 
 bb23:		; preds = %bb, %entry
-	%tmp24 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp24 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp25 = icmp sle i32 %tmp24, 2		; <i1> [#uses=1]
 	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
@@ -71,29 +71,29 @@
 	br label %bb46
 
 bb28:		; preds = %bb46
-	%tmp29 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp29 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp30 = getelementptr [3 x i64], [3 x i64]* @dnan, i32 0, i32 %tmp29		; <i64*> [#uses=1]
-	%tmp31 = load i64* %tmp30, align 8		; <i64> [#uses=1]
+	%tmp31 = load i64, i64* %tmp30, align 8		; <i64> [#uses=1]
 	%tmp32 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp3233 = bitcast double* %tmp32 to i64*		; <i64*> [#uses=1]
 	store i64 %tmp31, i64* %tmp3233, align 8
 	%tmp35 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp36 = load double* %tmp35, align 8		; <double> [#uses=1]
+	%tmp36 = load double, double* %tmp35, align 8		; <double> [#uses=1]
 	%tmp3637 = fptrunc double %tmp36 to float		; <float> [#uses=1]
 	%tmp38 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	store float %tmp3637, float* %tmp38, align 4
 	%tmp39 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
-	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
+	%tmp41 = load i32, i32* %tmp3940, align 4		; <i32> [#uses=1]
 	%tmp42 = getelementptr [6 x i8], [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
 	store volatile i32 %tmp41, i32* @var
-	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp44 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
 	store i32 %tmp45, i32* %i, align 4
 	br label %bb46
 
 bb46:		; preds = %bb28, %bb27
-	%tmp47 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp47 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp48 = icmp sle i32 %tmp47, 2		; <i1> [#uses=1]
 	%tmp4849 = zext i1 %tmp48 to i8		; <i8> [#uses=1]
 	%toBool50 = icmp ne i8 %tmp4849, 0		; <i1> [#uses=1]
@@ -104,38 +104,38 @@
 	br label %bb78
 
 bb52:		; preds = %bb78
-	%tmp53 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp53 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp54 = getelementptr [3 x i32], [3 x i32]* @fsnan, i32 0, i32 %tmp53		; <i32*> [#uses=1]
-	%tmp55 = load i32* %tmp54, align 4		; <i32> [#uses=1]
+	%tmp55 = load i32, i32* %tmp54, align 4		; <i32> [#uses=1]
 	%tmp56 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp5657 = bitcast float* %tmp56 to i32*		; <i32*> [#uses=1]
 	store i32 %tmp55, i32* %tmp5657, align 4
 	%tmp58 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp59 = load float* %tmp58, align 4		; <float> [#uses=1]
+	%tmp59 = load float, float* %tmp58, align 4		; <float> [#uses=1]
 	%tmp5960 = fpext float %tmp59 to double		; <double> [#uses=1]
 	%tmp61 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	store double %tmp5960, double* %tmp61, align 8
 	%tmp62 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp6263 = bitcast double* %tmp62 to i64*		; <i64*> [#uses=1]
-	%tmp64 = load i64* %tmp6263, align 8		; <i64> [#uses=1]
+	%tmp64 = load i64, i64* %tmp6263, align 8		; <i64> [#uses=1]
 	%tmp6465 = trunc i64 %tmp64 to i32		; <i32> [#uses=1]
 	%tmp66 = and i32 %tmp6465, -1		; <i32> [#uses=1]
 	%tmp68 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp6869 = bitcast double* %tmp68 to i64*		; <i64*> [#uses=1]
-	%tmp70 = load i64* %tmp6869, align 8		; <i64> [#uses=1]
+	%tmp70 = load i64, i64* %tmp6869, align 8		; <i64> [#uses=1]
 	%.cast71 = zext i32 32 to i64		; <i64> [#uses=1]
 	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
 	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
 	%tmp74 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
 	store volatile i32 %tmp7273, i32* @var
 	store volatile i32 %tmp66, i32* @var
-	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp76 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
 	store i32 %tmp77, i32* %i, align 4
 	br label %bb78
 
 bb78:		; preds = %bb52, %bb51
-	%tmp79 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp79 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp80 = icmp sle i32 %tmp79, 2		; <i1> [#uses=1]
 	%tmp8081 = zext i1 %tmp80 to i8		; <i8> [#uses=1]
 	%toBool82 = icmp ne i8 %tmp8081, 0		; <i1> [#uses=1]
@@ -146,29 +146,29 @@
 	br label %bb101
 
 bb84:		; preds = %bb101
-	%tmp85 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp85 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp86 = getelementptr [3 x i64], [3 x i64]* @dsnan, i32 0, i32 %tmp85		; <i64*> [#uses=1]
-	%tmp87 = load i64* %tmp86, align 8		; <i64> [#uses=1]
+	%tmp87 = load i64, i64* %tmp86, align 8		; <i64> [#uses=1]
 	%tmp88 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp8889 = bitcast double* %tmp88 to i64*		; <i64*> [#uses=1]
 	store i64 %tmp87, i64* %tmp8889, align 8
 	%tmp90 = getelementptr %struct..1anon, %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp91 = load double* %tmp90, align 8		; <double> [#uses=1]
+	%tmp91 = load double, double* %tmp90, align 8		; <double> [#uses=1]
 	%tmp9192 = fptrunc double %tmp91 to float		; <float> [#uses=1]
 	%tmp93 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	store float %tmp9192, float* %tmp93, align 4
 	%tmp94 = getelementptr %struct..0anon, %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
 	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
-	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
+	%tmp96 = load i32, i32* %tmp9495, align 4		; <i32> [#uses=1]
 	%tmp97 = getelementptr [6 x i8], [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
 	store volatile i32 %tmp96, i32* @var
-	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp99 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
 	store i32 %tmp100, i32* %i, align 4
 	br label %bb101
 
 bb101:		; preds = %bb84, %bb83
-	%tmp102 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp102 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp103 = icmp sle i32 %tmp102, 2		; <i1> [#uses=1]
 	%tmp103104 = zext i1 %tmp103 to i8		; <i8> [#uses=1]
 	%toBool105 = icmp ne i8 %tmp103104, 0		; <i1> [#uses=1]
@@ -178,6 +178,6 @@
 	br label %return
 
 return:		; preds = %bb106
-	%retval107 = load i32* %retval		; <i32> [#uses=1]
+	%retval107 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval107
 }
diff --git a/llvm/test/CodeGen/X86/narrow-shl-load.ll b/llvm/test/CodeGen/X86/narrow-shl-load.ll
index 5175bfc..9dc0d74 100644
--- a/llvm/test/CodeGen/X86/narrow-shl-load.ll
+++ b/llvm/test/CodeGen/X86/narrow-shl-load.ll
@@ -11,7 +11,7 @@
   br label %while.cond
 
 while.cond:                                       ; preds = %while.cond, %bb.nph
-  %tmp6 = load i32* undef, align 4
+  %tmp6 = load i32, i32* undef, align 4
   %and = or i64 undef, undef
   %conv11 = zext i32 undef to i64
   %conv14 = zext i32 %tmp6 to i64
@@ -20,7 +20,7 @@
   %and17 = or i64 %shl15.masked, %conv11
   %add = add i64 %and17, 1
   %xor = xor i64 %add, %and
-  %tmp20 = load i64* undef, align 8
+  %tmp20 = load i64, i64* undef, align 8
   %add21 = add i64 %xor, %tmp20
   %conv22 = trunc i64 %add21 to i32
   store i32 %conv22, i32* undef, align 4
@@ -34,7 +34,7 @@
 ; PR8757
 define i32 @test3(i32 *%P) nounwind ssp {
   store volatile i32 128, i32* %P
-  %tmp4.pre = load i32* %P
+  %tmp4.pre = load i32, i32* %P
   %phitmp = trunc i32 %tmp4.pre to i16
   %phitmp13 = shl i16 %phitmp, 8
   %phitmp14 = ashr i16 %phitmp13, 8
diff --git a/llvm/test/CodeGen/X86/narrow_op-1.ll b/llvm/test/CodeGen/X86/narrow_op-1.ll
index 89ae3f1..836a23d 100644
--- a/llvm/test/CodeGen/X86/narrow_op-1.ll
+++ b/llvm/test/CodeGen/X86/narrow_op-1.ll
@@ -5,7 +5,7 @@
 
 define void @t1() nounwind optsize ssp {
 entry:
-	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	%1 = or i32 %0, 65536
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
@@ -17,7 +17,7 @@
 
 define void @t2() nounwind optsize ssp {
 entry:
-	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%0 = load i32, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	%1 = or i32 %0, 16842752
 	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
 	ret void
diff --git a/llvm/test/CodeGen/X86/negate-add-zero.ll b/llvm/test/CodeGen/X86/negate-add-zero.ll
index 923c74b..629395e 100644
--- a/llvm/test/CodeGen/X86/negate-add-zero.ll
+++ b/llvm/test/CodeGen/X86/negate-add-zero.ll
@@ -828,22 +828,22 @@
 
 define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
 	%1 = getelementptr double, double* null, i32 -1		; <double*> [#uses=1]
-	%2 = load double* %1, align 8		; <double> [#uses=1]
-	%3 = load double* null, align 8		; <double> [#uses=2]
-	%4 = load double* null, align 8		; <double> [#uses=2]
-	%5 = load double* null, align 8		; <double> [#uses=3]
+	%2 = load double, double* %1, align 8		; <double> [#uses=1]
+	%3 = load double, double* null, align 8		; <double> [#uses=2]
+	%4 = load double, double* null, align 8		; <double> [#uses=2]
+	%5 = load double, double* null, align 8		; <double> [#uses=3]
 	%6 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%7 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
 	%8 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
 	%9 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
-	%10 = load double* null, align 8		; <double> [#uses=2]
+	%10 = load double, double* null, align 8		; <double> [#uses=2]
 	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
-	%12 = load double* null, align 8		; <double> [#uses=2]
+	%12 = load double, double* null, align 8		; <double> [#uses=2]
 	%13 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
-	%14 = load double* %13, align 8		; <double> [#uses=2]
+	%14 = load double, double* %13, align 8		; <double> [#uses=2]
 	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
 	%16 = getelementptr %struct.HNodeTranslateRotate3, %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
-	%17 = load double* %16, align 8		; <double> [#uses=2]
+	%17 = load double, double* %16, align 8		; <double> [#uses=2]
 	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
 	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
 	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
diff --git a/llvm/test/CodeGen/X86/no-cmov.ll b/llvm/test/CodeGen/X86/no-cmov.ll
index e13edf2..8fc0f70 100644
--- a/llvm/test/CodeGen/X86/no-cmov.ll
+++ b/llvm/test/CodeGen/X86/no-cmov.ll
@@ -2,7 +2,7 @@
 
 define i32 @test1(i32 %g, i32* %j) {
   %tobool = icmp eq i32 %g, 0
-  %cmp = load i32* %j, align 4
+  %cmp = load i32, i32* %j, align 4
   %retval.0 = select i1 %tobool, i32 1, i32 %cmp
   ret i32 %retval.0
 
diff --git a/llvm/test/CodeGen/X86/norex-subreg.ll b/llvm/test/CodeGen/X86/norex-subreg.ll
index fb41ded..dd47af9 100644
--- a/llvm/test/CodeGen/X86/norex-subreg.ll
+++ b/llvm/test/CodeGen/X86/norex-subreg.ll
@@ -15,17 +15,17 @@
 
 define void @f() nounwind uwtable ssp {
 entry:
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %add = add i32 0, %0
   %conv1 = trunc i32 %add to i16
   %bf.value = and i16 %conv1, 255
   %1 = and i16 %bf.value, 255
   %2 = shl i16 %1, 8
-  %3 = load i16* undef, align 1
+  %3 = load i16, i16* undef, align 1
   %4 = and i16 %3, 255
   %5 = or i16 %4, %2
   store i16 %5, i16* undef, align 1
-  %6 = load i16* undef, align 1
+  %6 = load i16, i16* undef, align 1
   %7 = lshr i16 %6, 8
   %bf.clear2 = and i16 %7, 255
   %conv3 = zext i16 %bf.clear2 to i32
diff --git a/llvm/test/CodeGen/X86/nosse-error1.ll b/llvm/test/CodeGen/X86/nosse-error1.ll
index 291379ee..7617d59 100644
--- a/llvm/test/CodeGen/X86/nosse-error1.ll
+++ b/llvm/test/CodeGen/X86/nosse-error1.ll
@@ -12,16 +12,16 @@
 
 define void @test() nounwind {
 entry:
-	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%0 = load float, float* @f, align 4		; <float> [#uses=1]
 	%1 = tail call float @foo1(float %0) nounwind		; <float> [#uses=1]
 	store float %1, float* @f, align 4
-	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%2 = load double, double* @d, align 8		; <double> [#uses=1]
 	%3 = tail call double @foo2(double %2) nounwind		; <double> [#uses=1]
 	store double %3, double* @d, align 8
-	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%4 = load float, float* @f, align 4		; <float> [#uses=1]
 	%5 = tail call float @foo3(float %4) nounwind		; <float> [#uses=1]
 	store float %5, float* @f, align 4
-	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%6 = load double, double* @d, align 8		; <double> [#uses=1]
 	%7 = tail call double @foo4(double %6) nounwind		; <double> [#uses=1]
 	store double %7, double* @d, align 8
 	ret void
diff --git a/llvm/test/CodeGen/X86/nosse-error2.ll b/llvm/test/CodeGen/X86/nosse-error2.ll
index a7cee2d..3da80aa 100644
--- a/llvm/test/CodeGen/X86/nosse-error2.ll
+++ b/llvm/test/CodeGen/X86/nosse-error2.ll
@@ -12,16 +12,16 @@
 
 define void @test() nounwind {
 entry:
-	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%0 = load float, float* @f, align 4		; <float> [#uses=1]
 	%1 = tail call inreg float @foo1(float inreg %0) nounwind		; <float> [#uses=1]
 	store float %1, float* @f, align 4
-	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%2 = load double, double* @d, align 8		; <double> [#uses=1]
 	%3 = tail call inreg double @foo2(double inreg %2) nounwind		; <double> [#uses=1]
 	store double %3, double* @d, align 8
-	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%4 = load float, float* @f, align 4		; <float> [#uses=1]
 	%5 = tail call inreg float @foo3(float inreg %4) nounwind		; <float> [#uses=1]
 	store float %5, float* @f, align 4
-	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%6 = load double, double* @d, align 8		; <double> [#uses=1]
 	%7 = tail call inreg double @foo4(double inreg %6) nounwind		; <double> [#uses=1]
 	store double %7, double* @d, align 8
 	ret void
diff --git a/llvm/test/CodeGen/X86/nosse-varargs.ll b/llvm/test/CodeGen/X86/nosse-varargs.ll
index c3ea1c3..8a81d0e 100644
--- a/llvm/test/CodeGen/X86/nosse-varargs.ll
+++ b/llvm/test/CodeGen/X86/nosse-varargs.ll
@@ -13,13 +13,13 @@
 	%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*		; <i8*> [#uses=2]
 	call void @llvm.va_start(i8* %ap12)
 	%0 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0		; <i32*> [#uses=2]
-	%1 = load i32* %0, align 8		; <i32> [#uses=3]
+	%1 = load i32, i32* %0, align 8		; <i32> [#uses=3]
 	%2 = icmp ult i32 %1, 48		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %bb3
 
 bb:		; preds = %entry
 	%3 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3		; <i8**> [#uses=1]
-	%4 = load i8** %3, align 8		; <i8*> [#uses=1]
+	%4 = load i8*, i8** %3, align 8		; <i8*> [#uses=1]
 	%5 = inttoptr i32 %1 to i8*		; <i8*> [#uses=1]
 	%6 = ptrtoint i8* %5 to i64		; <i64> [#uses=1]
 	%ctg2 = getelementptr i8, i8* %4, i64 %6		; <i8*> [#uses=1]
@@ -29,7 +29,7 @@
 
 bb3:		; preds = %entry
 	%8 = getelementptr [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2		; <i8**> [#uses=2]
-	%9 = load i8** %8, align 8		; <i8*> [#uses=2]
+	%9 = load i8*, i8** %8, align 8		; <i8*> [#uses=2]
 	%10 = getelementptr i8, i8* %9, i64 8		; <i8*> [#uses=1]
 	store i8* %10, i8** %8, align 8
 	br label %bb4
@@ -37,7 +37,7 @@
 bb4:		; preds = %bb3, %bb
 	%addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ]		; <i8*> [#uses=1]
 	%11 = bitcast i8* %addr.0.0 to i32*		; <i32*> [#uses=1]
-	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%12 = load i32, i32* %11, align 4		; <i32> [#uses=1]
 	call void @llvm.va_end(i8* %ap12)
 	ret i32 %12
 }
diff --git a/llvm/test/CodeGen/X86/object-size.ll b/llvm/test/CodeGen/X86/object-size.ll
index 0610f0b..12f99db 100644
--- a/llvm/test/CodeGen/X86/object-size.ll
+++ b/llvm/test/CodeGen/X86/object-size.ll
@@ -9,7 +9,7 @@
 
 define void @bar() nounwind ssp {
 entry:
-  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %tmp = load i8*, i8** @p                             ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp, i1 0) ; <i64> [#uses=1]
   %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
 ; CHECK: movq $-1, [[RAX:%r..]]
@@ -17,14 +17,14 @@
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
-  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp1 = load i8*, i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8*, i8** @p                            ; <i8*> [#uses=1]
   %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
   br label %cond.end
 
 cond.false:                                       ; preds = %entry
-  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp3 = load i8*, i8** @p                            ; <i8*> [#uses=1]
   %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
   br label %cond.end
 
@@ -44,12 +44,12 @@
   %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
   store i8* %__dest, i8** %__dest.addr
   store i8* %__src, i8** %__src.addr
-  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
-  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
-  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %tmp = load i8*, i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8*, i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8*, i8** %__dest.addr                  ; <i8*> [#uses=1]
   %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp2, i1 1) ; <i64> [#uses=1]
   %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
   store i8* %call, i8** %retval
-  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  %1 = load i8*, i8** %retval                          ; <i8*> [#uses=1]
   ret i8* %1
 }
diff --git a/llvm/test/CodeGen/X86/opt-ext-uses.ll b/llvm/test/CodeGen/X86/opt-ext-uses.ll
index 72fb38b..5d05ad9 100644
--- a/llvm/test/CodeGen/X86/opt-ext-uses.ll
+++ b/llvm/test/CodeGen/X86/opt-ext-uses.ll
@@ -2,7 +2,7 @@
 
 define signext i16 @t()   {
 entry:
-        %tmp180 = load i16* null, align 2               ; <i16> [#uses=3]
+        %tmp180 = load i16, i16* null, align 2               ; <i16> [#uses=3]
         %tmp180181 = sext i16 %tmp180 to i32            ; <i32> [#uses=1]
         %tmp182 = add i16 %tmp180, 10
         %tmp185 = icmp slt i16 %tmp182, 0               ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll
index effd53a..006592a 100644
--- a/llvm/test/CodeGen/X86/optimize-max-0.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-0.ll
@@ -33,7 +33,7 @@
   %8 = shl i32 %x.06, 1
   %9 = add i32 %6, %8
   %10 = getelementptr i8, i8* %r, i32 %9
-  %11 = load i8* %10, align 1
+  %11 = load i8, i8* %10, align 1
   %12 = getelementptr i8, i8* %j, i32 %7
   store i8 %11, i8* %12, align 1
   br label %bb7
@@ -104,7 +104,7 @@
   %25 = shl i32 %x.12, 2
   %26 = add i32 %25, %21
   %27 = getelementptr i8, i8* %r, i32 %26
-  %28 = load i8* %27, align 1
+  %28 = load i8, i8* %27, align 1
   %.sum = add i32 %22, %x.12
   %29 = getelementptr i8, i8* %j, i32 %.sum
   store i8 %28, i8* %29, align 1
@@ -112,7 +112,7 @@
   %31 = or i32 %30, 2
   %32 = add i32 %31, %21
   %33 = getelementptr i8, i8* %r, i32 %32
-  %34 = load i8* %33, align 1
+  %34 = load i8, i8* %33, align 1
   %.sum6 = add i32 %23, %x.12
   %35 = getelementptr i8, i8* %j, i32 %.sum6
   store i8 %34, i8* %35, align 1
@@ -258,7 +258,7 @@
   %8 = shl i32 %x.06, 1
   %9 = add i32 %6, %8
   %10 = getelementptr i8, i8* %r, i32 %9
-  %11 = load i8* %10, align 1
+  %11 = load i8, i8* %10, align 1
   %12 = getelementptr i8, i8* %j, i32 %7
   store i8 %11, i8* %12, align 1
   br label %bb7
@@ -329,7 +329,7 @@
   %25 = shl i32 %x.12, 2
   %26 = add i32 %25, %21
   %27 = getelementptr i8, i8* %r, i32 %26
-  %28 = load i8* %27, align 1
+  %28 = load i8, i8* %27, align 1
   %.sum = add i32 %22, %x.12
   %29 = getelementptr i8, i8* %j, i32 %.sum
   store i8 %28, i8* %29, align 1
@@ -337,7 +337,7 @@
   %31 = or i32 %30, 2
   %32 = add i32 %31, %21
   %33 = getelementptr i8, i8* %r, i32 %32
-  %34 = load i8* %33, align 1
+  %34 = load i8, i8* %33, align 1
   %.sum6 = add i32 %23, %x.12
   %35 = getelementptr i8, i8* %j, i32 %.sum6
   store i8 %34, i8* %35, align 1
diff --git a/llvm/test/CodeGen/X86/optimize-max-2.ll b/llvm/test/CodeGen/X86/optimize-max-2.ll
index 5e6c8ee..45b542e 100644
--- a/llvm/test/CodeGen/X86/optimize-max-2.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-2.ll
@@ -20,7 +20,7 @@
 bb4:		; preds = %bb4, %entry
 	%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ]		; <i64> [#uses=2]
 	%scevgep = getelementptr double, double* %p, i64 %i.07		; <double*> [#uses=2]
-	%0 = load double* %scevgep, align 8		; <double> [#uses=1]
+	%0 = load double, double* %scevgep, align 8		; <double> [#uses=1]
 	%1 = fmul double %0, 2.000000e+00		; <double> [#uses=1]
 	store double %1, double* %scevgep, align 8
 	%2 = add i64 %i.07, 1		; <i64> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/optimize-max-3.ll b/llvm/test/CodeGen/X86/optimize-max-3.ll
index 7ffca0a..71885ef 100644
--- a/llvm/test/CodeGen/X86/optimize-max-3.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-3.ll
@@ -21,7 +21,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
   %arrayidx = getelementptr double, double* %p, i64 %i    ; <double*> [#uses=2]
-  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %t4 = load double, double* %arrayidx                    ; <double> [#uses=1]
   %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
   store double %mul, double* %arrayidx
   %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
diff --git a/llvm/test/CodeGen/X86/packed_struct.ll b/llvm/test/CodeGen/X86/packed_struct.ll
index da6e8f8..3670fc6 100644
--- a/llvm/test/CodeGen/X86/packed_struct.ll
+++ b/llvm/test/CodeGen/X86/packed_struct.ll
@@ -17,9 +17,9 @@
 
 define i32 @foo() nounwind {
 entry:
-	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
-	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
-	%tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp = load i32, i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp3, %tmp		; <i32> [#uses=1]
 	%tmp7 = add i32 %tmp4, %tmp6		; <i32> [#uses=1]
 	ret i32 %tmp7
@@ -27,8 +27,8 @@
 
 define i8 @bar() nounwind {
 entry:
-	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
-	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
+	%tmp = load i8, i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
+	%tmp4 = load i8, i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
 	%tmp5 = add i8 %tmp4, %tmp		; <i8> [#uses=1]
 	ret i8 %tmp5
 }
diff --git a/llvm/test/CodeGen/X86/palignr-2.ll b/llvm/test/CodeGen/X86/palignr-2.ll
index 4df9a22..8b32387 100644
--- a/llvm/test/CodeGen/X86/palignr-2.ll
+++ b/llvm/test/CodeGen/X86/palignr-2.ll
@@ -20,8 +20,8 @@
 entry:
 ; CHECK-LABEL: t2:
 ; palignr $4, _b, %xmm0
-  %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
-  %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %0 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %1 = load <2 x i64>, <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
   %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
   store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
   ret void
diff --git a/llvm/test/CodeGen/X86/patchpoint.ll b/llvm/test/CodeGen/X86/patchpoint.ll
index 05f0bab..24e324f 100644
--- a/llvm/test/CodeGen/X86/patchpoint.ll
+++ b/llvm/test/CodeGen/X86/patchpoint.ll
@@ -47,13 +47,13 @@
 entry:
   %tmp80 = add i64 %tmp79, -16
   %tmp81 = inttoptr i64 %tmp80 to i64*
-  %tmp82 = load i64* %tmp81, align 8
+  %tmp82 = load i64, i64* %tmp81, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 5, i64 %arg, i64 %tmp2, i64 %tmp10, i64 %tmp82)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 15, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp82)
-  %tmp83 = load i64* %tmp33, align 8
+  %tmp83 = load i64, i64* %tmp33, align 8
   %tmp84 = add i64 %tmp83, -24
   %tmp85 = inttoptr i64 %tmp84 to i64*
-  %tmp86 = load i64* %tmp85, align 8
+  %tmp86 = load i64, i64* %tmp85, align 8
   tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 17, i32 5, i64 %arg, i64 %tmp10, i64 %tmp86)
   tail call void (i64, i32, i8*, i32, ...)* @llvm.experimental.patchpoint.void(i64 18, i32 30, i8* null, i32 3, i64 %arg, i64 %tmp10, i64 %tmp86)
   ret i64 10
diff --git a/llvm/test/CodeGen/X86/peep-test-0.ll b/llvm/test/CodeGen/X86/peep-test-0.ll
index 7f06ed8..1772f00 100644
--- a/llvm/test/CodeGen/X86/peep-test-0.ll
+++ b/llvm/test/CodeGen/X86/peep-test-0.ll
@@ -10,7 +10,7 @@
 	%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ]
 	%i.03 = add i64 %indvar, %n
 	%0 = getelementptr double, double* %d, i64 %i.03
-	%1 = load double* %0, align 8
+	%1 = load double, double* %0, align 8
 	%2 = fmul double %1, 3.000000e+00
 	store double %2, double* %0, align 8
 	%indvar.next = add i64 %indvar, 1
diff --git a/llvm/test/CodeGen/X86/peep-test-1.ll b/llvm/test/CodeGen/X86/peep-test-1.ll
index 80f3a877..7448da3 100644
--- a/llvm/test/CodeGen/X86/peep-test-1.ll
+++ b/llvm/test/CodeGen/X86/peep-test-1.ll
@@ -10,7 +10,7 @@
 	%indvar = phi i32 [ 0, %0 ], [ %indvar.next, %bb ]
 	%i.03 = sub i32 %n, %indvar
 	%1 = getelementptr double, double* %p, i32 %i.03
-	%2 = load double* %1, align 4
+	%2 = load double, double* %1, align 4
 	%3 = fmul double %2, 2.930000e+00
 	store double %3, double* %1, align 4
 	%4 = add i32 %i.03, -1
diff --git a/llvm/test/CodeGen/X86/peephole-fold-movsd.ll b/llvm/test/CodeGen/X86/peephole-fold-movsd.ll
index f174e4b..818040a 100644
--- a/llvm/test/CodeGen/X86/peephole-fold-movsd.ll
+++ b/llvm/test/CodeGen/X86/peephole-fold-movsd.ll
@@ -18,9 +18,9 @@
   %tmpcast = bitcast <2 x double>* %1 to %struct.S1*
   call void @foo3(%struct.S1* %tmpcast) #2
   %p2 = getelementptr inbounds %struct.S1, %struct.S1* %tmpcast, i64 0, i32 0
-  %2 = load double* %p2, align 16
+  %2 = load double, double* %p2, align 16
   %p3 = getelementptr inbounds %struct.S1, %struct.S1* %tmpcast, i64 0, i32 1
-  %3 = load double* %p3, align 8
+  %3 = load double, double* %p3, align 8
   %4 = insertelement <2 x double> undef, double %2, i32 0
   %5 = insertelement <2 x double> %4, double 0.000000e+00, i32 1
   %6 = insertelement <2 x double> undef, double %3, i32 1
diff --git a/llvm/test/CodeGen/X86/peephole-multiple-folds.ll b/llvm/test/CodeGen/X86/peephole-multiple-folds.ll
index a6cec66..9fcc1a2 100644
--- a/llvm/test/CodeGen/X86/peephole-multiple-folds.ll
+++ b/llvm/test/CodeGen/X86/peephole-multiple-folds.ll
@@ -13,8 +13,8 @@
 ; CHECK: vfmadd231ps ({{%rsi|%rdx}}),
   %vsum1 = phi <8 x float> [ %vsum1.next, %loopbody ], [ zeroinitializer, %entry ]
   %vsum2 = phi <8 x float> [ %vsum2.next, %loopbody ], [ zeroinitializer, %entry ]
-  %m1 = load <8 x float>* %p1, align 1
-  %m2 = load <8 x float>* %p2, align 1
+  %m1 = load <8 x float>, <8 x float>* %p1, align 1
+  %m2 = load <8 x float>, <8 x float>* %p2, align 1
   %vsum1.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m1, <8 x float> zeroinitializer, <8 x float> %vsum1)
   %vsum2.next = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %m2, <8 x float> zeroinitializer, <8 x float> %vsum2)
   %vsum1.next.1 = extractelement <8 x float> %vsum1.next, i32 0
diff --git a/llvm/test/CodeGen/X86/phi-bit-propagation.ll b/llvm/test/CodeGen/X86/phi-bit-propagation.ll
index bcffe3c..37f3f09 100644
--- a/llvm/test/CodeGen/X86/phi-bit-propagation.ll
+++ b/llvm/test/CodeGen/X86/phi-bit-propagation.ll
@@ -15,10 +15,10 @@
 
 for.body:                                         ; preds = %for.cond
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %conv
-  %tmp5 = load i32* %arrayidx, align 4
+  %tmp5 = load i32, i32* %arrayidx, align 4
   %conv6 = zext i32 %tmp5 to i64
   %rem.i.i.i.i = and i64 %conv6, 63
-  %tmp3.i = load i64* %tmp.i.i.i.i, align 8
+  %tmp3.i = load i64, i64* %tmp.i.i.i.i, align 8
   %shl.i.i = shl i64 1, %rem.i.i.i.i
   %and.i = and i64 %shl.i.i, %tmp3.i
   %cmp.i = icmp eq i64 %and.i, 0
diff --git a/llvm/test/CodeGen/X86/phielim-split.ll b/llvm/test/CodeGen/X86/phielim-split.ll
index bcf61c2..148b803 100644
--- a/llvm/test/CodeGen/X86/phielim-split.ll
+++ b/llvm/test/CodeGen/X86/phielim-split.ll
@@ -19,7 +19,7 @@
 for.cond:                                         ; preds = %entry, %for.cond
   %p.addr.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %p, %entry ]
   %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i64 1
-  %0 = load i8* %p.addr.0, align 1
+  %0 = load i8, i8* %p.addr.0, align 1
   %tobool = icmp eq i8 %0, 0
   br i1 %tobool, label %for.cond, label %if.end2
 
diff --git a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
index 37eca1c..e72483e 100644
--- a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -50,15 +50,15 @@
   store i32 %asmtmp2, i32* %"%eax"
   %3 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
   call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
-  %4 = load i32* %result, align 4                 ; <i32> [#uses=1]
+  %4 = load i32, i32* %result, align 4                 ; <i32> [#uses=1]
   %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
   store i32 0, i32* %0, align 4
-  %6 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  %6 = load i32, i32* %0, align 4                      ; <i32> [#uses=1]
   store i32 %6, i32* %retval, align 4
   br label %return
 
 return:                                           ; preds = %entry
-  %retval3 = load i32* %retval                    ; <i32> [#uses=1]
+  %retval3 = load i32, i32* %retval                    ; <i32> [#uses=1]
   ret i32 %retval3
 }
 
diff --git a/llvm/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/llvm/test/CodeGen/X86/phys_subreg_coalesce-3.ll
index c6af23d..74e3d12 100644
--- a/llvm/test/CodeGen/X86/phys_subreg_coalesce-3.ll
+++ b/llvm/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -26,7 +26,7 @@
   %j.06 = sub i32 %j.03, %indvar                  ; <i32> [#uses=1]
   %tmp11 = sub i32 %tmp10, %indvar                ; <i32> [#uses=1]
   %scevgep = getelementptr i32, i32* %ptr, i32 %tmp11  ; <i32*> [#uses=1]
-  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %1 = load i32, i32* %scevgep, align 4                ; <i32> [#uses=1]
   %2 = ashr i32 %j.06, %shifts                    ; <i32> [#uses=1]
   %3 = and i32 %2, 65535                          ; <i32> [#uses=1]
   %4 = getelementptr inbounds i32, i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pic.ll b/llvm/test/CodeGen/X86/pic.ll
index da1e224..faaf73b 100644
--- a/llvm/test/CodeGen/X86/pic.ll
+++ b/llvm/test/CodeGen/X86/pic.ll
@@ -7,7 +7,7 @@
 define void @test0() nounwind {
 entry:
     store i32* @dst, i32** @ptr
-    %tmp.s = load i32* @src
+    %tmp.s = load i32, i32* @src
     store i32 %tmp.s, i32* @dst
     ret void
     
@@ -29,7 +29,7 @@
 define void @test1() nounwind {
 entry:
     store i32* @dst2, i32** @ptr2
-    %tmp.s = load i32* @src2
+    %tmp.s = load i32, i32* @src2
     store i32 %tmp.s, i32* @dst2
     ret void
     
@@ -71,7 +71,7 @@
 entry:
     %tmp = call void(...)*(...)* @afoo()
     store void(...)* %tmp, void(...)** @pfoo
-    %tmp1 = load void(...)** @pfoo
+    %tmp1 = load void(...)*, void(...)** @pfoo
     call void(...)* %tmp1()
     ret void
 ; LINUX-LABEL: test3:
@@ -107,7 +107,7 @@
 define void @test5() nounwind {
 entry:
     store i32* @dst6, i32** @ptr6
-    %tmp.s = load i32* @src6
+    %tmp.s = load i32, i32* @src6
     store i32 %tmp.s, i32* @dst6
     ret void
     
diff --git a/llvm/test/CodeGen/X86/pic_jumptable.ll b/llvm/test/CodeGen/X86/pic_jumptable.ll
index d66ff0c..dd0a8ac0 100644
--- a/llvm/test/CodeGen/X86/pic_jumptable.ll
+++ b/llvm/test/CodeGen/X86/pic_jumptable.ll
@@ -31,7 +31,7 @@
 	%Y_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %Y, i32* %Y_addr
-	%tmp = load i32* %Y_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %Y_addr		; <i32> [#uses=1]
 	switch i32 %tmp, label %bb10 [
 		 i32 0, label %bb3
 		 i32 1, label %bb
diff --git a/llvm/test/CodeGen/X86/pmovext.ll b/llvm/test/CodeGen/X86/pmovext.ll
index f0e468f..6c76949 100644
--- a/llvm/test/CodeGen/X86/pmovext.ll
+++ b/llvm/test/CodeGen/X86/pmovext.ll
@@ -8,7 +8,7 @@
 ;CHECK-NEXT: ret
 define void @intrin_pmov(i16* noalias %dest, i8* noalias %src) nounwind uwtable ssp {
   %1 = bitcast i8* %src to <2 x i64>*
-  %2 = load <2 x i64>* %1, align 16
+  %2 = load <2 x i64>, <2 x i64>* %1, align 16
   %3 = bitcast <2 x i64> %2 to <16 x i8>
   %4 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %3) nounwind
   %5 = bitcast i16* %dest to i8*
diff --git a/llvm/test/CodeGen/X86/pmovsx-inreg.ll b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
index 07979f6..2897d6b 100644
--- a/llvm/test/CodeGen/X86/pmovsx-inreg.ll
+++ b/llvm/test/CodeGen/X86/pmovsx-inreg.ll
@@ -6,7 +6,7 @@
 ; These tests inject a store into the chain to test the inreg versions of pmovsx
 
 define void @test1(<2 x i8>* %in, <2 x i64>* %out) nounwind {
-  %wide.load35 = load <2 x i8>* %in, align 1
+  %wide.load35 = load <2 x i8>, <2 x i8>* %in, align 1
   %sext = sext <2 x i8> %wide.load35 to <2 x i64>
   store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
   store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -23,7 +23,7 @@
 }
 
 define void @test2(<4 x i8>* %in, <4 x i64>* %out) nounwind {
-  %wide.load35 = load <4 x i8>* %in, align 1
+  %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
   %sext = sext <4 x i8> %wide.load35 to <4 x i64>
   store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
   store <4 x i64> %sext, <4 x i64>* %out, align 8
@@ -34,7 +34,7 @@
 }
 
 define void @test3(<4 x i8>* %in, <4 x i32>* %out) nounwind {
-  %wide.load35 = load <4 x i8>* %in, align 1
+  %wide.load35 = load <4 x i8>, <4 x i8>* %in, align 1
   %sext = sext <4 x i8> %wide.load35 to <4 x i32>
   store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
   store <4 x i32> %sext, <4 x i32>* %out, align 8
@@ -51,7 +51,7 @@
 }
 
 define void @test4(<8 x i8>* %in, <8 x i32>* %out) nounwind {
-  %wide.load35 = load <8 x i8>* %in, align 1
+  %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
   %sext = sext <8 x i8> %wide.load35 to <8 x i32>
   store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
   store <8 x i32> %sext, <8 x i32>* %out, align 8
@@ -62,7 +62,7 @@
 }
 
 define void @test5(<8 x i8>* %in, <8 x i16>* %out) nounwind {
-  %wide.load35 = load <8 x i8>* %in, align 1
+  %wide.load35 = load <8 x i8>, <8 x i8>* %in, align 1
   %sext = sext <8 x i8> %wide.load35 to <8 x i16>
   store <8 x i16> zeroinitializer, <8 x i16>* undef, align 8
   store <8 x i16> %sext, <8 x i16>* %out, align 8
@@ -79,7 +79,7 @@
 }
 
 define void @test6(<16 x i8>* %in, <16 x i16>* %out) nounwind {
-  %wide.load35 = load <16 x i8>* %in, align 1
+  %wide.load35 = load <16 x i8>, <16 x i8>* %in, align 1
   %sext = sext <16 x i8> %wide.load35 to <16 x i16>
   store <16 x i16> zeroinitializer, <16 x i16>* undef, align 8
   store <16 x i16> %sext, <16 x i16>* %out, align 8
@@ -90,7 +90,7 @@
 }
 
 define void @test7(<2 x i16>* %in, <2 x i64>* %out) nounwind {
-  %wide.load35 = load <2 x i16>* %in, align 1
+  %wide.load35 = load <2 x i16>, <2 x i16>* %in, align 1
   %sext = sext <2 x i16> %wide.load35 to <2 x i64>
   store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
   store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -108,7 +108,7 @@
 }
 
 define void @test8(<4 x i16>* %in, <4 x i64>* %out) nounwind {
-  %wide.load35 = load <4 x i16>* %in, align 1
+  %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
   %sext = sext <4 x i16> %wide.load35 to <4 x i64>
   store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
   store <4 x i64> %sext, <4 x i64>* %out, align 8
@@ -119,7 +119,7 @@
 }
 
 define void @test9(<4 x i16>* %in, <4 x i32>* %out) nounwind {
-  %wide.load35 = load <4 x i16>* %in, align 1
+  %wide.load35 = load <4 x i16>, <4 x i16>* %in, align 1
   %sext = sext <4 x i16> %wide.load35 to <4 x i32>
   store <4 x i32> zeroinitializer, <4 x i32>* undef, align 8
   store <4 x i32> %sext, <4 x i32>* %out, align 8
@@ -136,7 +136,7 @@
 }
 
 define void @test10(<8 x i16>* %in, <8 x i32>* %out) nounwind {
-  %wide.load35 = load <8 x i16>* %in, align 1
+  %wide.load35 = load <8 x i16>, <8 x i16>* %in, align 1
   %sext = sext <8 x i16> %wide.load35 to <8 x i32>
   store <8 x i32> zeroinitializer, <8 x i32>* undef, align 8
   store <8 x i32> %sext, <8 x i32>* %out, align 8
@@ -147,7 +147,7 @@
 }
 
 define void @test11(<2 x i32>* %in, <2 x i64>* %out) nounwind {
-  %wide.load35 = load <2 x i32>* %in, align 1
+  %wide.load35 = load <2 x i32>, <2 x i32>* %in, align 1
   %sext = sext <2 x i32> %wide.load35 to <2 x i64>
   store <2 x i64> zeroinitializer, <2 x i64>* undef, align 8
   store <2 x i64> %sext, <2 x i64>* %out, align 8
@@ -164,7 +164,7 @@
 }
 
 define void @test12(<4 x i32>* %in, <4 x i64>* %out) nounwind {
-  %wide.load35 = load <4 x i32>* %in, align 1
+  %wide.load35 = load <4 x i32>, <4 x i32>* %in, align 1
   %sext = sext <4 x i32> %wide.load35 to <4 x i64>
   store <4 x i64> zeroinitializer, <4 x i64>* undef, align 8
   store <4 x i64> %sext, <4 x i64>* %out, align 8
diff --git a/llvm/test/CodeGen/X86/pmulld.ll b/llvm/test/CodeGen/X86/pmulld.ll
index 3db0f73..3fe3ebc 100644
--- a/llvm/test/CodeGen/X86/pmulld.ll
+++ b/llvm/test/CodeGen/X86/pmulld.ll
@@ -20,7 +20,7 @@
 ; WIN64-NEXT: movdqa  (%rcx), %xmm0
 ; WIN64-NEXT: pmulld  (%rdx), %xmm0
 
-  %B = load <4 x i32>* %Bp
+  %B = load <4 x i32>, <4 x i32>* %Bp
   %C = mul <4 x i32> %A, %B
   ret <4 x i32> %C
 }
diff --git a/llvm/test/CodeGen/X86/pointer-vector.ll b/llvm/test/CodeGen/X86/pointer-vector.ll
index 5e0c2dae..48c8b23 100644
--- a/llvm/test/CodeGen/X86/pointer-vector.ll
+++ b/llvm/test/CodeGen/X86/pointer-vector.ll
@@ -31,7 +31,7 @@
 ;CHECK: LOAD0
 define <4 x i8*> @LOAD0(<4 x i8*>* %p) nounwind {
 entry:
-  %G = load <4 x i8*>* %p
+  %G = load <4 x i8*>, <4 x i8*>* %p
 ;CHECK: movaps
   ret <4 x i8*> %G
 ;CHECK: ret
@@ -40,7 +40,7 @@
 ;CHECK: LOAD1
 define <4 x i8*> @LOAD1(<4 x i8*>* %p) nounwind {
 entry:
-  %G = load <4 x i8*>* %p
+  %G = load <4 x i8*>, <4 x i8*>* %p
 ;CHECK: movdqa
 ;CHECK: pshufd
 ;CHECK: movdqa
@@ -55,11 +55,11 @@
 entry:
   %I = alloca <4 x i8*>
 ;CHECK: sub
-  %G = load <4 x i8*>* %p
+  %G = load <4 x i8*>, <4 x i8*>* %p
 ;CHECK: movaps
   store <4 x i8*> %G, <4 x i8*>* %I
 ;CHECK: movaps
-  %Z = load <4 x i8*>* %I
+  %Z = load <4 x i8*>, <4 x i8*>* %I
   ret <4 x i8*> %Z
 ;CHECK: add
 ;CHECK: ret
@@ -68,7 +68,7 @@
 ;CHECK: INT2PTR0
 define <4 x i32> @INT2PTR0(<4 x i8*>* %p) nounwind {
 entry:
-  %G = load <4 x i8*>* %p
+  %G = load <4 x i8*>, <4 x i8*>* %p
 ;CHECK: movl
 ;CHECK: movaps
   %K = ptrtoint <4 x i8*> %G to <4 x i32>
@@ -79,7 +79,7 @@
 ;CHECK: INT2PTR1
 define <4 x i32*> @INT2PTR1(<4 x i8>* %p) nounwind {
 entry:
-  %G = load <4 x i8>* %p
+  %G = load <4 x i8>, <4 x i8>* %p
 ;CHECK: movl
 ;CHECK: pmovzxbd (%
   %K = inttoptr <4 x i8> %G to <4 x i32*>
@@ -90,7 +90,7 @@
 ;CHECK: BITCAST0
 define <4 x i32*> @BITCAST0(<4 x i8*>* %p) nounwind {
 entry:
-  %G = load <4 x i8*>* %p
+  %G = load <4 x i8*>, <4 x i8*>* %p
 ;CHECK: movl
   %T = bitcast <4 x i8*> %G to <4 x i32*>
 ;CHECK: movaps
@@ -101,7 +101,7 @@
 ;CHECK: BITCAST1
 define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
 entry:
-  %G = load <2 x i8*>* %p
+  %G = load <2 x i8*>, <2 x i8*>* %p
 ;CHECK: movl
 ;CHECK: pmovzxdq
   %T = bitcast <2 x i8*> %G to <2 x i32*>
@@ -112,8 +112,8 @@
 ;CHECK: ICMP0
 define <4 x i32> @ICMP0(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
 entry:
-  %g0 = load <4 x i8*>* %p0
-  %g1 = load <4 x i8*>* %p1
+  %g0 = load <4 x i8*>, <4 x i8*>* %p0
+  %g1 = load <4 x i8*>, <4 x i8*>* %p1
   %k = icmp sgt <4 x i8*> %g0, %g1
   ;CHECK: pcmpgtd
   %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
@@ -124,8 +124,8 @@
 ;CHECK: ICMP1
 define <4 x i32> @ICMP1(<4 x i8*>* %p0, <4 x i8*>* %p1) nounwind {
 entry:
-  %g0 = load <4 x i8*>* %p0
-  %g1 = load <4 x i8*>* %p1
+  %g0 = load <4 x i8*>, <4 x i8*>* %p0
+  %g1 = load <4 x i8*>, <4 x i8*>* %p1
   %k = icmp eq <4 x i8*> %g0, %g1
   ;CHECK: pcmpeqd
   %j = select <4 x i1> %k, <4 x i32> <i32 0, i32 1, i32 2, i32 4>, <4 x i32> <i32 9, i32 8, i32 7, i32 6>
diff --git a/llvm/test/CodeGen/X86/postra-licm.ll b/llvm/test/CodeGen/X86/postra-licm.ll
index 00d04da..c5978de 100644
--- a/llvm/test/CodeGen/X86/postra-licm.ll
+++ b/llvm/test/CodeGen/X86/postra-licm.ll
@@ -113,7 +113,7 @@
   br i1 false, label %bb47, label %bb32.loopexit
 
 bb47:                                             ; preds = %bb45
-  %10 = load i32* %7, align 4                     ; <i32> [#uses=0]
+  %10 = load i32, i32* %7, align 4                     ; <i32> [#uses=0]
   unreachable
 
 bb70:                                             ; preds = %bb32.loopexit, %bb30
@@ -165,16 +165,16 @@
   %scevgep13 = getelementptr i8, i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
   %tmp15 = add i64 %tmp14, %tmp9                  ; <i64> [#uses=1]
   %scevgep16 = getelementptr i8, i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
-  %0 = load i8* undef, align 1                    ; <i8> [#uses=1]
+  %0 = load i8, i8* undef, align 1                    ; <i8> [#uses=1]
   %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
   %2 = getelementptr inbounds [16 x i16], [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
-  %3 = load i16* %2, align 2                      ; <i16> [#uses=1]
+  %3 = load i16, i16* %2, align 2                      ; <i16> [#uses=1]
   %4 = trunc i16 %3 to i8                         ; <i8> [#uses=1]
   store i8 %4, i8* undef, align 1
   %5 = and i32 %1, 15                             ; <i32> [#uses=1]
   %6 = zext i32 %5 to i64                         ; <i64> [#uses=1]
   %7 = getelementptr inbounds [16 x i16], [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
-  %8 = load i16* %7, align 2                      ; <i16> [#uses=2]
+  %8 = load i16, i16* %7, align 2                      ; <i16> [#uses=2]
   %9 = lshr i16 %8, 8                             ; <i16> [#uses=1]
   %10 = trunc i16 %9 to i8                        ; <i8> [#uses=1]
   store i8 %10, i8* %scevgep13, align 1
diff --git a/llvm/test/CodeGen/X86/pr10475.ll b/llvm/test/CodeGen/X86/pr10475.ll
index 3efc39e..d81fce8 100644
--- a/llvm/test/CodeGen/X86/pr10475.ll
+++ b/llvm/test/CodeGen/X86/pr10475.ll
@@ -10,7 +10,7 @@
   br i1 undef, label %CF79, label %CF84.critedge.critedge
 
 CF84.critedge.critedge:                           ; preds = %CF79
-  %L35 = load <8 x i32>* undef
+  %L35 = load <8 x i32>, <8 x i32>* undef
   br label %CF85
 
 CF85:                                             ; preds = %CF85, %CF84.critedge.critedge
diff --git a/llvm/test/CodeGen/X86/pr10525.ll b/llvm/test/CodeGen/X86/pr10525.ll
index 30ce297..436d89c 100644
--- a/llvm/test/CodeGen/X86/pr10525.ll
+++ b/llvm/test/CodeGen/X86/pr10525.ll
@@ -4,7 +4,7 @@
 
 define void @autogen_163411_5000() {
 BB:
-  %L = load <2 x i64>* undef
+  %L = load <2 x i64>, <2 x i64>* undef
   %Shuff11 = shufflevector <2 x i64> %L, <2 x i64> %L, <2 x i32> <i32 2, i32 0>
   %I51 = insertelement <2 x i64> undef, i64 undef, i32 0
   %Shuff152 = shufflevector <2 x i64> %I51, <2 x i64> %Shuff11, <2 x i32> <i32 1, i32 3>
diff --git a/llvm/test/CodeGen/X86/pr11334.ll b/llvm/test/CodeGen/X86/pr11334.ll
index 0bdb0ec..6da4697 100644
--- a/llvm/test/CodeGen/X86/pr11334.ll
+++ b/llvm/test/CodeGen/X86/pr11334.ll
@@ -57,7 +57,7 @@
 
 define void @test_vector_creation() nounwind {
   %1 = insertelement <4 x double> undef, double 0.000000e+00, i32 2
-  %2 = load double addrspace(1)* null
+  %2 = load double, double addrspace(1)* null
   %3 = insertelement <4 x double> %1, double %2, i32 3
   store <4 x double> %3, <4 x double>* undef
   ret void
diff --git a/llvm/test/CodeGen/X86/pr12360.ll b/llvm/test/CodeGen/X86/pr12360.ll
index 6734036..3e762da 100644
--- a/llvm/test/CodeGen/X86/pr12360.ll
+++ b/llvm/test/CodeGen/X86/pr12360.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT: ret
 
 entry:
-  %0 = load i8* %x, align 1, !range !0
+  %0 = load i8, i8* %x, align 1, !range !0
   %tobool = trunc i8 %0 to i1
   ret i1 %tobool
 }
@@ -17,7 +17,7 @@
 ; CHECK-NEXT: ret
 
 entry:
-  %0 = load i8* %x, align 1, !range !0
+  %0 = load i8, i8* %x, align 1, !range !0
   %tobool = icmp ne i8 %0, 0
   ret i1 %tobool
 }
diff --git a/llvm/test/CodeGen/X86/pr12889.ll b/llvm/test/CodeGen/X86/pr12889.ll
index 428e9b7..8234fcc 100644
--- a/llvm/test/CodeGen/X86/pr12889.ll
+++ b/llvm/test/CodeGen/X86/pr12889.ll
@@ -6,7 +6,7 @@
 
 define void @func() nounwind uwtable {
 entry:
-  %0 = load i8* @c0, align 1
+  %0 = load i8, i8* @c0, align 1
   %tobool = icmp ne i8 %0, 0
   %conv = zext i1 %tobool to i8
   %storemerge = shl nuw nsw i8 %conv, %conv
diff --git a/llvm/test/CodeGen/X86/pr13209.ll b/llvm/test/CodeGen/X86/pr13209.ll
index 8e5eca2..0d5196f 100644
--- a/llvm/test/CodeGen/X86/pr13209.ll
+++ b/llvm/test/CodeGen/X86/pr13209.ll
@@ -11,37 +11,37 @@
   %frombool.i5915.ph = phi i8 [ undef, %if.end51 ], [ %frombool.i5917, %jit_return ]
   br label %indirectgoto
 do.end165:
-  %tmp92 = load i8** %x, align 8
+  %tmp92 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end209:
-  %tmp104 = load i8** %x, align 8
+  %tmp104 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end220:
-  %tmp107 = load i8** %x, align 8
+  %tmp107 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end231:
-  %tmp110 = load i8** %x, align 8
+  %tmp110 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end242:
-  %tmp113 = load i8** %x, align 8
+  %tmp113 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end253:
-  %tmp116 = load i8** %x, align 8
+  %tmp116 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end286:
-  %tmp125 = load i8** %x, align 8
+  %tmp125 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end297:
-  %tmp128 = load i8** %x, align 8
+  %tmp128 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end308:
-  %tmp131 = load i8** %x, align 8
+  %tmp131 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end429:
-  %tmp164 = load i8** %x, align 8
+  %tmp164 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.end440:
-  %tmp167 = load i8** %x, align 8
+  %tmp167 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.body482:
   br i1 false, label %indirectgoto, label %do.body495
@@ -55,16 +55,16 @@
 jit_return:
   br label %indirectgoto.preheader
 L_JSOP_UINT24:
-  %tmp864 = load i8** %x, align 8
+  %tmp864 = load i8*, i8** %x, align 8
   br label %indirectgoto
 L_JSOP_THROWING:
-  %tmp1201 = load i8** %x, align 8
+  %tmp1201 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.body4936:
-  %tmp1240 = load i8** %x, align 8
+  %tmp1240 = load i8*, i8** %x, align 8
   br label %indirectgoto
 do.body5184:
-  %tmp1340 = load i8** %x, align 8
+  %tmp1340 = load i8*, i8** %x, align 8
   br label %indirectgoto
 if.end5571:
   br  label %inline_return
diff --git a/llvm/test/CodeGen/X86/pr13859.ll b/llvm/test/CodeGen/X86/pr13859.ll
index 719721d..1ebc796 100644
--- a/llvm/test/CodeGen/X86/pr13859.ll
+++ b/llvm/test/CodeGen/X86/pr13859.ll
@@ -7,7 +7,7 @@
   %aMyAlloca = alloca i32, align 32
   %dest = alloca <1 x i64>, align 32
 
-  %a32 = load i32* %aMyAlloca, align 4
+  %a32 = load i32, i32* %aMyAlloca, align 4
   %aconv = trunc i32 %a32 to i16
   %a36 = insertelement <4 x i16> undef, i16 %aconv, i32 0
   %a37 = insertelement <4 x i16> %a36, i16 %aconv, i32 1
diff --git a/llvm/test/CodeGen/X86/pr13899.ll b/llvm/test/CodeGen/X86/pr13899.ll
index 978d813..abfb918 100644
--- a/llvm/test/CodeGen/X86/pr13899.ll
+++ b/llvm/test/CodeGen/X86/pr13899.ll
@@ -26,25 +26,25 @@
                          <8 x float> %i8, <8 x float> %i9)
 
 define <8 x float> @foo64(<8 x float>* %p) {
-  %1 = load <8 x float>* %p
+  %1 = load <8 x float>, <8 x float>* %p
   %idx1 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 1
-  %2 = load <8 x float>* %idx1
+  %2 = load <8 x float>, <8 x float>* %idx1
   %idx2 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 2
-  %3 = load <8 x float>* %idx2
+  %3 = load <8 x float>, <8 x float>* %idx2
   %idx3 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 3
-  %4 = load <8 x float>* %idx3
+  %4 = load <8 x float>, <8 x float>* %idx3
   %idx4 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 4
-  %5 = load <8 x float>* %idx4
+  %5 = load <8 x float>, <8 x float>* %idx4
   %idx5 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 5
-  %6 = load <8 x float>* %idx5
+  %6 = load <8 x float>, <8 x float>* %idx5
   %idx6 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 6
-  %7 = load <8 x float>* %idx6
+  %7 = load <8 x float>, <8 x float>* %idx6
   %idx7 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 7
-  %8 = load <8 x float>* %idx7
+  %8 = load <8 x float>, <8 x float>* %idx7
   %idx8 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 8
-  %9 = load <8 x float>* %idx8
+  %9 = load <8 x float>, <8 x float>* %idx8
   %idx9 = getelementptr inbounds <8 x float>, <8 x float>* %p, i64 9
-  %10 = load <8 x float>* %idx9
+  %10 = load <8 x float>, <8 x float>* %idx9
   %r = tail call <8 x float> @bar64(<8 x float> %1, <8 x float> %2,
                                     <8 x float> %3, <8 x float> %4,
                                     <8 x float> %5, <8 x float> %6,
diff --git a/llvm/test/CodeGen/X86/pr14161.ll b/llvm/test/CodeGen/X86/pr14161.ll
index c2bb8d3..b7084c0 100644
--- a/llvm/test/CodeGen/X86/pr14161.ll
+++ b/llvm/test/CodeGen/X86/pr14161.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 entry:
-  %2 = load <4 x i32>* %0, align 16
+  %2 = load <4 x i32>, <4 x i32>* %0, align 16
   %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
   %4 = extractelement <4 x i32> %3, i32 0
   %5 = extractelement <4 x i32> %3, i32 1
@@ -31,7 +31,7 @@
 ; CHECK-NEXT:    pmovzxwq %xmm0, %xmm0
 ; CHECK-NEXT:    retq
 entry:
-  %2 = load <4 x i32>* %0, align 16
+  %2 = load <4 x i32>, <4 x i32>* %0, align 16
   %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %2, <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
   %4 = extractelement <4 x i32> %3, i32 0
   %5 = extractelement <4 x i32> %3, i32 1
diff --git a/llvm/test/CodeGen/X86/pr14562.ll b/llvm/test/CodeGen/X86/pr14562.ll
index e66f175..3167454 100644
--- a/llvm/test/CodeGen/X86/pr14562.ll
+++ b/llvm/test/CodeGen/X86/pr14562.ll
@@ -3,7 +3,7 @@
 @temp1 = global i64 -77129852189294865, align 8
 
 define void @foo() nounwind {
-  %x = load i64* @temp1, align 8
+  %x = load i64, i64* @temp1, align 8
   %s = shl i64 %x, 32
   %t = trunc i64 %s to i32
   %z = zext i32 %t to i64
diff --git a/llvm/test/CodeGen/X86/pr1505b.ll b/llvm/test/CodeGen/X86/pr1505b.ll
index c348fec..9580497 100644
--- a/llvm/test/CodeGen/X86/pr1505b.ll
+++ b/llvm/test/CodeGen/X86/pr1505b.ll
@@ -33,7 +33,7 @@
 define i32 @main() {
 entry:
 ; CHECK: flds
-	%tmp6 = load volatile float* @a		; <float> [#uses=1]
+	%tmp6 = load volatile float, float* @a		; <float> [#uses=1]
 ; CHECK: fstps (%esp)
 ; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
@@ -41,7 +41,7 @@
 ; CHECK: fstp
 
 ; CHECK: fldl
-	%tmp12 = load volatile double* @b		; <double> [#uses=1]
+	%tmp12 = load volatile double, double* @b		; <double> [#uses=1]
 ; CHECK: fstpl (%esp)
 ; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll
index 90df9905..e9f41d9 100644
--- a/llvm/test/CodeGen/X86/pr15267.ll
+++ b/llvm/test/CodeGen/X86/pr15267.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
 
 define <4 x i3> @test1(<4 x i3>* %in) nounwind {
-  %ret = load <4 x i3>* %in, align 1
+  %ret = load <4 x i3>, <4 x i3>* %in, align 1
   ret <4 x i3> %ret
 }
 ; CHECK-LABEL: test1
@@ -20,7 +20,7 @@
 ; CHECK: ret
 
 define <4 x i1> @test2(<4 x i1>* %in) nounwind {
-  %ret = load <4 x i1>* %in, align 1
+  %ret = load <4 x i1>, <4 x i1>* %in, align 1
   ret <4 x i1> %ret
 }
 
@@ -40,7 +40,7 @@
 ; CHECK: ret
 
 define <4 x i64> @test3(<4 x i1>* %in) nounwind {
-  %wide.load35 = load <4 x i1>* %in, align 1
+  %wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
   %sext = sext <4 x i1> %wide.load35 to <4 x i64>
   ret <4 x i64> %sext
 }
@@ -68,7 +68,7 @@
 ; CHECK: ret
 
 define <16 x i4> @test4(<16 x i4>* %in) nounwind {
-  %ret = load <16 x i4>* %in, align 1
+  %ret = load <16 x i4>, <16 x i4>* %in, align 1
   ret <16 x i4> %ret
 }
 
diff --git a/llvm/test/CodeGen/X86/pr15309.ll b/llvm/test/CodeGen/X86/pr15309.ll
index fdda471..e9d9b9e 100644
--- a/llvm/test/CodeGen/X86/pr15309.ll
+++ b/llvm/test/CodeGen/X86/pr15309.ll
@@ -3,7 +3,7 @@
 define void @test_convert_float2_ulong2(<2 x i64>* nocapture %src, <2 x float>* nocapture %dest) noinline {
 L.entry:
   %0 = getelementptr <2 x i64>, <2 x i64>* %src, i32 10
-  %1 = load <2 x i64>* %0, align 16
+  %1 = load <2 x i64>, <2 x i64>* %0, align 16
   %2 = uitofp <2 x i64> %1 to <2 x float>
   %3 = getelementptr <2 x float>, <2 x float>* %dest, i32 10
   store <2 x float> %2, <2 x float>* %3, align 8
diff --git a/llvm/test/CodeGen/X86/pr18023.ll b/llvm/test/CodeGen/X86/pr18023.ll
index 4c6f8cf..c376fe2 100644
--- a/llvm/test/CodeGen/X86/pr18023.ll
+++ b/llvm/test/CodeGen/X86/pr18023.ll
@@ -15,15 +15,15 @@
 define void @func() {
   store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
   store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 0), align 4
-  %1 = load volatile i32* @b, align 4
+  %1 = load volatile i32, i32* @b, align 4
   store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
   store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
-  %2 = load volatile i32* @b, align 4
+  %2 = load volatile i32, i32* @b, align 4
   store i32 1, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
   store i32 0, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 2), align 4
-  %3 = load volatile i32* @b, align 4
+  %3 = load volatile i32, i32* @b, align 4
   store i32 3, i32* @c, align 4
-  %4 = load i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([3 x i32]* @a, i64 0, i64 1), align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %4)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/pr18162.ll b/llvm/test/CodeGen/X86/pr18162.ll
index 3afa3ba..7912db8 100644
--- a/llvm/test/CodeGen/X86/pr18162.ll
+++ b/llvm/test/CodeGen/X86/pr18162.ll
@@ -13,15 +13,15 @@
 entry:
   %retval = alloca i32
   %this.addr = alloca %"Iterator"*
-  %this1 = load %"Iterator"** %this.addr
+  %this1 = load %"Iterator"*, %"Iterator"** %this.addr
   %bundle_ = getelementptr inbounds %"Iterator", %"Iterator"* %this1, i32 0, i32 0
-  %0 = load i32** %bundle_
+  %0 = load i32*, i32** %bundle_
   %1 = call { i64, <2 x float> } @Call()
   %2 = call { i64, <2 x float> }* @CallPtr()
   %3 = getelementptr { i64, <2 x float> }, { i64, <2 x float> }* %2, i32 0, i32 1
   %4 = extractvalue { i64, <2 x float> } %1, 1
   store <2 x float> %4, <2 x float>* %3
-  %5 = load { i64, <2 x float> }* %2
+  %5 = load { i64, <2 x float> }, { i64, <2 x float> }* %2
   ret { i64, <2 x float> } %5
 }
 
diff --git a/llvm/test/CodeGen/X86/pr18846.ll b/llvm/test/CodeGen/X86/pr18846.ll
index eb6addb..02c33fe 100644
--- a/llvm/test/CodeGen/X86/pr18846.ll
+++ b/llvm/test/CodeGen/X86/pr18846.ll
@@ -21,31 +21,31 @@
   br i1 undef, label %for.body29, label %for.body65
 
 for.body65:                                       ; preds = %for.body29
-  %0 = load float* undef, align 4, !tbaa !1
+  %0 = load float, float* undef, align 4, !tbaa !1
   %vecinit7.i4448 = insertelement <8 x float> undef, float %0, i32 7
-  %1 = load float* null, align 4, !tbaa !1
+  %1 = load float, float* null, align 4, !tbaa !1
   %vecinit7.i4304 = insertelement <8 x float> undef, float %1, i32 7
-  %2 = load float* undef, align 4, !tbaa !1
+  %2 = load float, float* undef, align 4, !tbaa !1
   %vecinit7.i4196 = insertelement <8 x float> undef, float %2, i32 7
   %3 = or i64 0, 16
   %add.ptr111.sum4096 = add i64 %3, 0
-  %4 = load <8 x float>* null, align 16, !tbaa !5
+  %4 = load <8 x float>, <8 x float>* null, align 16, !tbaa !5
   %add.ptr162 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr111.sum4096
   %__v.i4158 = bitcast float* %add.ptr162 to <8 x float>*
-  %5 = load <8 x float>* %__v.i4158, align 16, !tbaa !5
+  %5 = load <8 x float>, <8 x float>* %__v.i4158, align 16, !tbaa !5
   %add.ptr158.sum40975066 = or i64 %add.ptr111.sum4096, 8
   %add.ptr183 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr158.sum40975066
   %__v.i4162 = bitcast float* %add.ptr183 to <8 x float>*
-  %6 = load <8 x float>* %__v.i4162, align 16, !tbaa !5
+  %6 = load <8 x float>, <8 x float>* %__v.i4162, align 16, !tbaa !5
   %add.ptr200.sum40995067 = or i64 undef, 8
   %add.ptr225 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr200.sum40995067
   %__v.i4167 = bitcast float* %add.ptr225 to <8 x float>*
-  %7 = load <8 x float>* %__v.i4167, align 4, !tbaa !5
-  %8 = load <8 x float>* undef, align 16, !tbaa !5
+  %7 = load <8 x float>, <8 x float>* %__v.i4167, align 4, !tbaa !5
+  %8 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
   %add.ptr242.sum41015068 = or i64 0, 8
   %add.ptr267 = getelementptr inbounds [65536 x float], [65536 x float]* null, i64 0, i64 %add.ptr242.sum41015068
   %__v.i4171 = bitcast float* %add.ptr267 to <8 x float>*
-  %9 = load <8 x float>* %__v.i4171, align 4, !tbaa !5
+  %9 = load <8 x float>, <8 x float>* %__v.i4171, align 4, !tbaa !5
   %mul.i4690 = fmul <8 x float> %7, undef
   %add.i4665 = fadd <8 x float> undef, undef
   %mul.i4616 = fmul <8 x float> %8, undef
@@ -56,8 +56,8 @@
   %mul.i4578 = fmul <8 x float> %9, undef
   %add.i4577 = fadd <8 x float> %add.i4593, %mul.i4578
   call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4577) #1
-  %10 = load <8 x float>* null, align 16, !tbaa !5
-  %11 = load <8 x float>* undef, align 16, !tbaa !5
+  %10 = load <8 x float>, <8 x float>* null, align 16, !tbaa !5
+  %11 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
   %mul.i4564 = fmul <8 x float> %4, undef
   %add.i4563 = fadd <8 x float> %10, %mul.i4564
   %mul.i4560 = fmul <8 x float> %5, undef
@@ -107,7 +107,7 @@
   call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4293) #1
   call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4291) #1
   call void @llvm.x86.avx.storeu.ps.256(i8* undef, <8 x float> %add.i4289) #1
-  %12 = load <8 x float>* undef, align 16, !tbaa !5
+  %12 = load <8 x float>, <8 x float>* undef, align 16, !tbaa !5
   %mul.i4274 = fmul <8 x float> undef, undef
   %add.i4273 = fadd <8 x float> %12, %mul.i4274
   %mul.i4258 = fmul <8 x float> %7, undef
diff --git a/llvm/test/CodeGen/X86/pr20020.ll b/llvm/test/CodeGen/X86/pr20020.ll
index 5bc87d1..56c12fa 100644
--- a/llvm/test/CodeGen/X86/pr20020.ll
+++ b/llvm/test/CodeGen/X86/pr20020.ll
@@ -41,12 +41,12 @@
   %iv20 = phi i32 [ %iv.next21, %for.body3 ], [ %iv19, %for.body3.lr.ph ]
   %iv15 = phi %struct.planet* [ %gep16, %for.body3 ], [ %iv, %for.body3.lr.ph ]
   %iv1517 = bitcast %struct.planet* %iv15 to double*
-  %2 = load double* %x, align 8
+  %2 = load double, double* %x, align 8
   %gep18 = getelementptr double, double* %iv1517, i64 -1
-  %3 = load double* %gep18, align 8
+  %3 = load double, double* %gep18, align 8
   %sub = fsub double %2, %3
-  %4 = load double* %y, align 8
-  %5 = load double* %iv1517, align 8
+  %4 = load double, double* %y, align 8
+  %5 = load double, double* %iv1517, align 8
   %sub8 = fsub double %4, %5
   %add10 = fadd double %sub, %sub8
   %call = tail call double @sqrt(double %sub8) #2
diff --git a/llvm/test/CodeGen/X86/pr2177.ll b/llvm/test/CodeGen/X86/pr2177.ll
index 01e632b..8260a7f 100644
--- a/llvm/test/CodeGen/X86/pr2177.ll
+++ b/llvm/test/CodeGen/X86/pr2177.ll
@@ -22,10 +22,10 @@
 bb54:		; preds = %bb5
 	ret void
 bb118:		; preds = %bb5, %bb5, %bb5, %bb5
-	%tmp125 = load i8** null, align 8		; <i8*> [#uses=1]
+	%tmp125 = load i8*, i8** null, align 8		; <i8*> [#uses=1]
 	%tmp125126 = bitcast i8* %tmp125 to %struct.S2259*		; <%struct.S2259*> [#uses=1]
 	%tmp128 = getelementptr %struct.S2259, %struct.S2259* %tmp125126, i32 0, i32 0		; <<4 x i16>*> [#uses=1]
-	%tmp129 = load <4 x i16>* %tmp128, align 8		; <<4 x i16>> [#uses=1]
+	%tmp129 = load <4 x i16>, <4 x i16>* %tmp128, align 8		; <<4 x i16>> [#uses=1]
 	store <4 x i16> %tmp129, <4 x i16>* null, align 8
 	ret void
 bb155:		; preds = %bb5
diff --git a/llvm/test/CodeGen/X86/pr2182.ll b/llvm/test/CodeGen/X86/pr2182.ll
index 94429b2..0cf3acf 100644
--- a/llvm/test/CodeGen/X86/pr2182.ll
+++ b/llvm/test/CodeGen/X86/pr2182.ll
@@ -15,16 +15,16 @@
 ; CHECK-NEXT: addl $3, (%{{.*}})
 ; CHECK-NEXT: ret
 
-  %tmp = load volatile i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp = load volatile i32, i32* @x, align 4           ; <i32> [#uses=1]
   %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
   store volatile i32 %tmp1, i32* @x, align 4
-  %tmp.1 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp.1 = load volatile i32, i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
   store volatile i32 %tmp1.1, i32* @x, align 4
-  %tmp.2 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp.2 = load volatile i32, i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
   store volatile i32 %tmp1.2, i32* @x, align 4
-  %tmp.3 = load volatile i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp.3 = load volatile i32, i32* @x, align 4         ; <i32> [#uses=1]
   %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
   store volatile i32 %tmp1.3, i32* @x, align 4
   ret void
diff --git a/llvm/test/CodeGen/X86/pr2326.ll b/llvm/test/CodeGen/X86/pr2326.ll
index f82dcb5..9cf83bb 100644
--- a/llvm/test/CodeGen/X86/pr2326.ll
+++ b/llvm/test/CodeGen/X86/pr2326.ll
@@ -4,12 +4,12 @@
 define i32 @func_59(i32 %p_60) nounwind  {
 entry:
 	%l_108 = alloca i32		; <i32*> [#uses=2]
-	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp16 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = load i32, i32* %l_108, align 4		; <i32> [#uses=1]
 	%tmp17 = icmp eq i32 %tmp15, %tmp16		; <i1> [#uses=1]
 	%tmp1718 = zext i1 %tmp17 to i8		; <i8> [#uses=1]
-	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp20 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp19 = load i32, i32* null, align 4		; <i32> [#uses=1]
+	%tmp20 = load i32, i32* %l_108, align 4		; <i32> [#uses=1]
 	%tmp21 = icmp ule i32 %tmp19, %tmp20		; <i1> [#uses=1]
 	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
 	%toBool23 = icmp ne i8 %tmp1718, 0		; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr2656.ll b/llvm/test/CodeGen/X86/pr2656.ll
index 6d9c27e..94b7ba1 100644
--- a/llvm/test/CodeGen/X86/pr2656.ll
+++ b/llvm/test/CodeGen/X86/pr2656.ll
@@ -12,9 +12,9 @@
 define void @foo(%struct.anon* byval %p) nounwind {
 entry:
 	%tmp = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp1 = load float* %tmp		; <float> [#uses=1]
+	%tmp1 = load float, float* %tmp		; <float> [#uses=1]
 	%tmp2 = getelementptr %struct.anon, %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
-	%tmp3 = load float* %tmp2		; <float> [#uses=1]
+	%tmp3 = load float, float* %tmp2		; <float> [#uses=1]
 	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
 	%conv = fpext float %neg to double		; <double> [#uses=1]
 	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr2849.ll b/llvm/test/CodeGen/X86/pr2849.ll
index c46f660..c3fd101 100644
--- a/llvm/test/CodeGen/X86/pr2849.ll
+++ b/llvm/test/CodeGen/X86/pr2849.ll
@@ -13,20 +13,20 @@
 
 define void @obshow() {
 entry:
-	%tmp = load %struct.HashEntry** @hash_table_begin, align 8
+	%tmp = load %struct.HashEntry*, %struct.HashEntry** @hash_table_begin, align 8
 	br i1 false, label %xlygetvalue.exit, label %xlygetvalue.exit
 
 xlygetvalue.exit:
 	%storemerge.in.i = phi %struct.NODE** [ null, %entry ], [ null, %entry ]
-	%storemerge.i = load %struct.NODE** %storemerge.in.i
+	%storemerge.i = load %struct.NODE*, %struct.NODE** %storemerge.in.i
 	%tmp1 = ptrtoint %struct.NODE** %storemerge.in.i to i64
 	%tmp2 = lshr i64 %tmp1, 3
 	%tmp3 = and i64 %tmp2, 2147483647
 	%tmp4 = getelementptr %struct.HashEntry, %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
-	%tmp7 = load i8** %tmp4, align 8
+	%tmp7 = load i8*, i8** %tmp4, align 8
 	%tmp8 = getelementptr %struct.NODE, %struct.NODE* %storemerge.i, i64 0, i32 2
 	%tmp9 = bitcast %struct.anon* %tmp8 to %struct.NODE***
-	%tmp11 = load %struct.NODE*** %tmp9, align 8
+	%tmp11 = load %struct.NODE**, %struct.NODE*** %tmp9, align 8
 	%tmp12 = ptrtoint %struct.NODE** %tmp11 to i64
 	%tmp13 = lshr i64 %tmp12, 3
 	%tmp14 = and i64 %tmp13, 2147483647
diff --git a/llvm/test/CodeGen/X86/pr2924.ll b/llvm/test/CodeGen/X86/pr2924.ll
index b9e8dc1..14e9fc4 100644
--- a/llvm/test/CodeGen/X86/pr2924.ll
+++ b/llvm/test/CodeGen/X86/pr2924.ll
@@ -7,18 +7,18 @@
 
 define x86_stdcallcc { i32, i8* } @_D3std6string7toupperFAaZAa({ i32, i8* } %s) {
 entry_std.string.toupper:
-        %tmp58 = load i32* null
+        %tmp58 = load i32, i32* null
         %tmp59 = icmp eq i32 %tmp58, 0
-        %r.val = load { i32, i8* }* null, align 8
+        %r.val = load { i32, i8* }, { i32, i8* }* null, align 8
         %condtmp.0 = select i1 %tmp59, { i32, i8* } undef, { i32, i8* } %r.val 
 
         ret { i32, i8* } %condtmp.0
 }
 define { } @empty({ } %s) {
 entry_std.string.toupper:
-        %tmp58 = load i32* null
+        %tmp58 = load i32, i32* null
         %tmp59 = icmp eq i32 %tmp58, 0
-        %r.val = load { }* null, align 8
+        %r.val = load { }, { }* null, align 8
         %condtmp.0 = select i1 %tmp59, { } undef, { } %r.val
         ret { } %condtmp.0
 }
diff --git a/llvm/test/CodeGen/X86/pr2982.ll b/llvm/test/CodeGen/X86/pr2982.ll
index 3f9a595..ab46005 100644
--- a/llvm/test/CodeGen/X86/pr2982.ll
+++ b/llvm/test/CodeGen/X86/pr2982.ll
@@ -12,11 +12,11 @@
 
 define void @bar() nounwind {
 entry:
-        %0 = load i32* @g_279, align 4          ; <i32> [#uses=1]
+        %0 = load i32, i32* @g_279, align 4          ; <i32> [#uses=1]
         %1 = shl i32 %0, 1              ; <i32> [#uses=1]
         %2 = and i32 %1, 2              ; <i32> [#uses=1]
-        %3 = load i32* @g_265, align 4          ; <i32> [#uses=1]
-        %4 = load i8* @g_3, align 1             ; <i8> [#uses=1]
+        %3 = load i32, i32* @g_265, align 4          ; <i32> [#uses=1]
+        %4 = load i8, i8* @g_3, align 1             ; <i8> [#uses=1]
         %5 = sext i8 %4 to i32          ; <i32> [#uses=1]
         %6 = add i32 %2, %3             ; <i32> [#uses=1]
         %7 = add i32 %6, %5             ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr3216.ll b/llvm/test/CodeGen/X86/pr3216.ll
index a4a4821..23dcf56 100644
--- a/llvm/test/CodeGen/X86/pr3216.ll
+++ b/llvm/test/CodeGen/X86/pr3216.ll
@@ -8,7 +8,7 @@
 ; CHECK: sar{{.}} $5
 ; CHECK: ret
 
-   %tmp = load i8* @foo
+   %tmp = load i8, i8* @foo
    %bf.lo = lshr i8 %tmp, 5
    %bf.lo.cleared = and i8 %bf.lo, 7
    %1 = shl i8 %bf.lo.cleared, 5
diff --git a/llvm/test/CodeGen/X86/pr3241.ll b/llvm/test/CodeGen/X86/pr3241.ll
index 2f7917b..f89634d 100644
--- a/llvm/test/CodeGen/X86/pr3241.ll
+++ b/llvm/test/CodeGen/X86/pr3241.ll
@@ -9,7 +9,7 @@
 	%t1 = call i32 @safe_add_macro_uint32_t_u_u() nounwind
 	%t2 = icmp sgt i32 %t1, 0
 	%t3 = zext i1 %t2 to i32
-	%t4 = load i32* @g_620, align 4
+	%t4 = load i32, i32* @g_620, align 4
 	%t5 = icmp eq i32 %t3, %t4
 	%t6 = xor i32 %p_21, 1
 	%t7 = call i32 @func_55(i32 %t6) nounwind
diff --git a/llvm/test/CodeGen/X86/pr3244.ll b/llvm/test/CodeGen/X86/pr3244.ll
index 2598c2f..b08a223 100644
--- a/llvm/test/CodeGen/X86/pr3244.ll
+++ b/llvm/test/CodeGen/X86/pr3244.ll
@@ -6,8 +6,8 @@
 
 define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
 entry:
-        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
-        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %0 = load i16, i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32, i32* @g_487, align 4          ; <i32> [#uses=1]
         %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
         %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
         %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
diff --git a/llvm/test/CodeGen/X86/pr3317.ll b/llvm/test/CodeGen/X86/pr3317.ll
index 3854dea..cab8ae6 100644
--- a/llvm/test/CodeGen/X86/pr3317.ll
+++ b/llvm/test/CodeGen/X86/pr3317.ll
@@ -21,7 +21,7 @@
 define i32 @JnJVM_java_rmi_activation_ActivationGroupID_hashCode__(%JavaObject* nocapture) nounwind {
 start:
         %1 = getelementptr %JavaObject, %JavaObject* %0, i64 1, i32 1                ; <%JavaCommonClass**> [#uses=1]
-        %2 = load %JavaCommonClass** %1         ; <%JavaCommonClass*> [#uses=4]
+        %2 = load %JavaCommonClass*, %JavaCommonClass** %1         ; <%JavaCommonClass*> [#uses=4]
         %3 = icmp eq %JavaCommonClass* %2, null         ; <i1> [#uses=1]
         br i1 %3, label %verifyNullExit1, label %verifyNullCont2
 
@@ -32,13 +32,13 @@
 verifyNullCont2:                ; preds = %start
         %4 = bitcast %JavaCommonClass* %2 to { %JavaObject, i16, i32, i64 }*            ; <{ %JavaObject, i16, i32, i64 }*> [#uses=1]
         %5 = getelementptr { %JavaObject, i16, i32, i64 }, { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2             ; <i32*> [#uses=1]
-        %6 = load i32* %5               ; <i32> [#uses=1]
+        %6 = load i32, i32* %5               ; <i32> [#uses=1]
         %7 = getelementptr %JavaCommonClass, %JavaCommonClass* %2, i64 0, i32 4           ; <%JavaClass***> [#uses=1]
         %8 = bitcast %JavaClass*** %7 to i64*           ; <i64*> [#uses=1]
-        %9 = load i64* %8               ; <i64> [#uses=1]
+        %9 = load i64, i64* %8               ; <i64> [#uses=1]
         %10 = trunc i64 %9 to i32               ; <i32> [#uses=1]
         %11 = getelementptr %JavaCommonClass, %JavaCommonClass* %2, i64 0, i32 3          ; <i16*> [#uses=1]
-        %12 = load i16* %11             ; <i16> [#uses=1]
+        %12 = load i16, i16* %11             ; <i16> [#uses=1]
         %13 = sext i16 %12 to i32               ; <i32> [#uses=1]
         %14 = xor i32 %10, %6           ; <i32> [#uses=1]
         %15 = xor i32 %14, %13          ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/pr3366.ll b/llvm/test/CodeGen/X86/pr3366.ll
index 1127b60..b89a69a 100644
--- a/llvm/test/CodeGen/X86/pr3366.ll
+++ b/llvm/test/CodeGen/X86/pr3366.ll
@@ -3,7 +3,7 @@
 
 define void @_ada_c34002a() nounwind {
 entry:
-  %0 = load i8* null, align 1
+  %0 = load i8, i8* null, align 1
   %1 = sdiv i8 90, %0
   %2 = icmp ne i8 %1, 3
   %3 = zext i1 %2 to i8
diff --git a/llvm/test/CodeGen/X86/pr9127.ll b/llvm/test/CodeGen/X86/pr9127.ll
index ba92c77..33f9ace 100644
--- a/llvm/test/CodeGen/X86/pr9127.ll
+++ b/llvm/test/CodeGen/X86/pr9127.ll
@@ -3,7 +3,7 @@
 
 define i8 @foobar(double %d, double* %x) {
 entry:
-  %tmp2 = load double* %x, align 8
+  %tmp2 = load double, double* %x, align 8
   %cmp = fcmp oeq double %tmp2, %d
   %conv3 = zext i1 %cmp to i8
   ret i8 %conv3
diff --git a/llvm/test/CodeGen/X86/pre-ra-sched.ll b/llvm/test/CodeGen/X86/pre-ra-sched.ll
index e8ddce6..f8e196b 100644
--- a/llvm/test/CodeGen/X86/pre-ra-sched.ll
+++ b/llvm/test/CodeGen/X86/pre-ra-sched.ll
@@ -15,32 +15,32 @@
 ; CHECK: *** Final schedule
 define i32 @test(i8* %pin) #0 {
   %g0 = getelementptr inbounds i8, i8* %pin, i64 0
-  %l0 = load i8* %g0, align 1
+  %l0 = load i8, i8* %g0, align 1
 
   %g1a = getelementptr inbounds i8, i8* %pin, i64 1
-  %l1a = load i8* %g1a, align 1
+  %l1a = load i8, i8* %g1a, align 1
   %z1a = zext i8 %l1a to i32
   %g1b = getelementptr inbounds i8, i8* %pin, i64 2
-  %l1b = load i8* %g1b, align 1
+  %l1b = load i8, i8* %g1b, align 1
   %z1b = zext i8 %l1b to i32
   %c1 = icmp ne i8 %l0, 0
   %x1 = xor i32 %z1a, %z1b
   %s1 = select i1 %c1, i32 %z1a, i32 %x1
 
   %g2a = getelementptr inbounds i8, i8* %pin, i64 3
-  %l2a = load i8* %g2a, align 1
+  %l2a = load i8, i8* %g2a, align 1
   %z2a = zext i8 %l2a to i32
   %g2b = getelementptr inbounds i8, i8* %pin, i64 4
-  %l2b = load i8* %g2b, align 1
+  %l2b = load i8, i8* %g2b, align 1
   %z2b = zext i8 %l2b to i32
   %x2 = xor i32 %z2a, %z2b
   %s2 = select i1 %c1, i32 %z2a, i32 %x2
 
   %g3a = getelementptr inbounds i8, i8* %pin, i64 5
-  %l3a = load i8* %g3a, align 1
+  %l3a = load i8, i8* %g3a, align 1
   %z3a = zext i8 %l3a to i32
   %g3b = getelementptr inbounds i8, i8* %pin, i64 6
-  %l3b = load i8* %g3b, align 1
+  %l3b = load i8, i8* %g3b, align 1
   %z3b = zext i8 %l3b to i32
   %x3 = xor i32 %z3a, %z3b
   %s3 = select i1 %c1, i32 %z3a, i32 %x3
diff --git a/llvm/test/CodeGen/X86/private-2.ll b/llvm/test/CodeGen/X86/private-2.ll
index c9d6793..21b6b3a 100644
--- a/llvm/test/CodeGen/X86/private-2.ll
+++ b/llvm/test/CodeGen/X86/private-2.ll
@@ -10,6 +10,6 @@
 define internal i32* @"\01-[Example1 whatever]"() nounwind optsize ssp {
 entry:
 	%0 = getelementptr %struct.A, %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0		; <i32**> [#uses=1]
-	%1 = load i32** %0, align 8		; <i32*> [#uses=1]
+	%1 = load i32*, i32** %0, align 8		; <i32*> [#uses=1]
 	ret i32* %1
 }
diff --git a/llvm/test/CodeGen/X86/private.ll b/llvm/test/CodeGen/X86/private.ll
index c02d193..4b936d2 100644
--- a/llvm/test/CodeGen/X86/private.ll
+++ b/llvm/test/CodeGen/X86/private.ll
@@ -10,7 +10,7 @@
 
 define i32 @bar() {
         call void @foo()
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 
 ; CHECK-LABEL: bar:
diff --git a/llvm/test/CodeGen/X86/promote-assert-zext.ll b/llvm/test/CodeGen/X86/promote-assert-zext.ll
index b582806..5067483 100644
--- a/llvm/test/CodeGen/X86/promote-assert-zext.ll
+++ b/llvm/test/CodeGen/X86/promote-assert-zext.ll
@@ -11,7 +11,7 @@
 
 define i64 @_ZL5matchPKtPKhiR9MatchData(i8* %tmp13) nounwind {
 entry:
-  %tmp14 = load i8* %tmp13, align 1
+  %tmp14 = load i8, i8* %tmp13, align 1
   %tmp17 = zext i8 %tmp14 to i16
   br label %bb341
 
diff --git a/llvm/test/CodeGen/X86/promote-trunc.ll b/llvm/test/CodeGen/X86/promote-trunc.ll
index 40a58b0..a20557a 100644
--- a/llvm/test/CodeGen/X86/promote-trunc.ll
+++ b/llvm/test/CodeGen/X86/promote-trunc.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=x86-64
 
 define<4 x i8> @func_8_64() {
-  %F = load <4 x i64>* undef
+  %F = load <4 x i64>, <4 x i64>* undef
   %G = trunc <4 x i64> %F to <4 x i8>
-  %H = load <4 x i64>* undef
+  %H = load <4 x i64>, <4 x i64>* undef
   %Y = trunc <4 x i64> %H to <4 x i8>
   %T = add <4 x i8> %Y, %G
   ret <4 x i8> %T
diff --git a/llvm/test/CodeGen/X86/promote.ll b/llvm/test/CodeGen/X86/promote.ll
index 283f48c..38cdc14 100644
--- a/llvm/test/CodeGen/X86/promote.ll
+++ b/llvm/test/CodeGen/X86/promote.ll
@@ -9,7 +9,7 @@
 entry:
 ; CHECK: pmul
 ; CHECK-NOT: mulb
-  %0 = load <4 x i8>* %A, align 8
+  %0 = load <4 x i8>, <4 x i8>* %A, align 8
   %mul = mul <4 x i8> %0, %0
   store <4 x i8> %mul, <4 x i8>* undef
   ret i32 0
@@ -23,7 +23,7 @@
 ; CHECK: pmovzxbd
 ; CHECK: paddd
 ; CHECK: pshufb
-  %0 = load <4 x i8>* %A, align 8
+  %0 = load <4 x i8>, <4 x i8>* %A, align 8
   %add = add <4 x i8> %0, %0
   store <4 x i8> %add, <4 x i8>* undef
   ret i32 0
diff --git a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
index ca5a02c..105a035 100644
--- a/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
+++ b/llvm/test/CodeGen/X86/pshufb-mask-comments.ll
@@ -41,10 +41,10 @@
 ; CHECK-LABEL: test5
 ; CHECK: pshufb {{.*}}
   store <2 x i64> <i64 1, i64 0>, <2 x i64>* undef, align 16
-  %l = load <2 x i64>* undef, align 16
+  %l = load <2 x i64>, <2 x i64>* undef, align 16
   %shuffle = shufflevector <2 x i64> %l, <2 x i64> undef, <2 x i32> zeroinitializer
   store <2 x i64> %shuffle, <2 x i64>* undef, align 16
-  %1 = load <16 x i8>* undef, align 16
+  %1 = load <16 x i8>, <16 x i8>* undef, align 16
   %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> undef, <16 x i8> %1)
   ret <16 x i8> %2
 }
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index bda164b..4b83b55 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -9,7 +9,7 @@
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>* %1, align 2
+  %2 = load <8 x i16>, <8 x i16>* %1, align 2
   %3 = icmp slt <8 x i16> %2, zeroinitializer
   %4 = xor <8 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
   %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
@@ -42,7 +42,7 @@
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <8 x i16>*
-  %2 = load <8 x i16>* %1, align 2
+  %2 = load <8 x i16>, <8 x i16>* %1, align 2
   %3 = icmp ugt <8 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
   %4 = add <8 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
   %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
@@ -77,7 +77,7 @@
   %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
   %1 = getelementptr inbounds i16, i16* %head, i64 0
   %2 = bitcast i16* %1 to <8 x i16>*
-  %3 = load <8 x i16>* %2, align 2
+  %3 = load <8 x i16>, <8 x i16>* %2, align 2
   %4 = icmp ult <8 x i16> %3, %broadcast15
   %5 = sub <8 x i16> %3, %broadcast15
   %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
@@ -116,7 +116,7 @@
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <16 x i8>*
-  %2 = load <16 x i8>* %1, align 1
+  %2 = load <16 x i8>, <16 x i8>* %1, align 1
   %3 = icmp slt <16 x i8> %2, zeroinitializer
   %4 = xor <16 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
   %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
@@ -149,7 +149,7 @@
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <16 x i8>*
-  %2 = load <16 x i8>* %1, align 1
+  %2 = load <16 x i8>, <16 x i8>* %1, align 1
   %3 = icmp ugt <16 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
   %4 = add <16 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
   %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
@@ -184,7 +184,7 @@
   %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
   %1 = getelementptr inbounds i8, i8* %head, i64 0
   %2 = bitcast i8* %1 to <16 x i8>*
-  %3 = load <16 x i8>* %2, align 1
+  %3 = load <16 x i8>, <16 x i8>* %2, align 1
   %4 = icmp ult <16 x i8> %3, %broadcast15
   %5 = sub <16 x i8> %3, %broadcast15
   %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
@@ -225,7 +225,7 @@
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <16 x i16>*
-  %2 = load <16 x i16>* %1, align 2
+  %2 = load <16 x i16>, <16 x i16>* %1, align 2
   %3 = icmp slt <16 x i16> %2, zeroinitializer
   %4 = xor <16 x i16> %2, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
   %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
@@ -245,7 +245,7 @@
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <16 x i16>*
-  %2 = load <16 x i16>* %1, align 2
+  %2 = load <16 x i16>, <16 x i16>* %1, align 2
   %3 = icmp ugt <16 x i16> %2, <i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766, i16 32766>
   %4 = add <16 x i16> %2, <i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767, i16 -32767>
   %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
@@ -267,7 +267,7 @@
   %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
   %1 = getelementptr inbounds i16, i16* %head, i64 0
   %2 = bitcast i16* %1 to <16 x i16>*
-  %3 = load <16 x i16>* %2, align 2
+  %3 = load <16 x i16>, <16 x i16>* %2, align 2
   %4 = icmp ult <16 x i16> %3, %broadcast15
   %5 = sub <16 x i16> %3, %broadcast15
   %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
@@ -289,7 +289,7 @@
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <32 x i8>*
-  %2 = load <32 x i8>* %1, align 1
+  %2 = load <32 x i8>, <32 x i8>* %1, align 1
   %3 = icmp slt <32 x i8> %2, zeroinitializer
   %4 = xor <32 x i8> %2, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
   %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
@@ -309,7 +309,7 @@
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <32 x i8>*
-  %2 = load <32 x i8>* %1, align 1
+  %2 = load <32 x i8>, <32 x i8>* %1, align 1
   %3 = icmp ugt <32 x i8> %2, <i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126, i8 126>
   %4 = add <32 x i8> %2, <i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127, i8 -127>
   %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
@@ -331,7 +331,7 @@
   %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
   %1 = getelementptr inbounds i8, i8* %head, i64 0
   %2 = bitcast i8* %1 to <32 x i8>*
-  %3 = load <32 x i8>* %2, align 1
+  %3 = load <32 x i8>, <32 x i8>* %2, align 1
   %4 = icmp ult <32 x i8> %3, %broadcast15
   %5 = sub <32 x i8> %3, %broadcast15
   %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll
index 938babf..e842631 100644
--- a/llvm/test/CodeGen/X86/ragreedy-bug.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll
@@ -30,31 +30,31 @@
 define fastcc i32 @prune_match(%struct.Connector_struct* nocapture readonly %a, %struct.Connector_struct* nocapture readonly %b) #9 {
 entry:
   %label56 = bitcast %struct.Connector_struct* %a to i16*
-  %0 = load i16* %label56, align 2
+  %0 = load i16, i16* %label56, align 2
   %label157 = bitcast %struct.Connector_struct* %b to i16*
-  %1 = load i16* %label157, align 2
+  %1 = load i16, i16* %label157, align 2
   %cmp = icmp eq i16 %0, %1
   br i1 %cmp, label %if.end, label %return, !prof !988
 if.end:
   %priority = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 2
-  %2 = load i8* %priority, align 1
+  %2 = load i8, i8* %priority, align 1
   %priority5 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 2
-  %3 = load i8* %priority5, align 1
+  %3 = load i8, i8* %priority5, align 1
   %string = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %a, i64 0, i32 5
-  %4 = load i8** %string, align 8
+  %4 = load i8*, i8** %string, align 8
   %string7 = getelementptr inbounds %struct.Connector_struct, %struct.Connector_struct* %b, i64 0, i32 5
-  %5 = load i8** %string7, align 8
+  %5 = load i8*, i8** %string7, align 8
   br label %while.cond
 while.cond:
   %lsr.iv27 = phi i64 [ %lsr.iv.next28, %if.end17 ], [ 0, %if.end ]
   %scevgep55 = getelementptr i8, i8* %4, i64 %lsr.iv27
-  %6 = load i8* %scevgep55, align 1
+  %6 = load i8, i8* %scevgep55, align 1
   %idxprom.i.i = sext i8 %6 to i64
   %isascii.i.i224 = icmp sgt i8 %6, -1
   br i1 %isascii.i.i224, label %cond.true.i.i, label %cond.false.i.i, !prof !181
 cond.true.i.i:
   %arrayidx.i.i = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i
-  %7 = load i32* %arrayidx.i.i, align 4
+  %7 = load i32, i32* %arrayidx.i.i, align 4
   %and.i.i = and i32 %7, 32768
   br label %isupper.exit
 cond.false.i.i:
@@ -70,13 +70,13 @@
   %sunkaddr = ptrtoint i8* %5 to i64
   %sunkaddr58 = add i64 %sunkaddr, %lsr.iv27
   %sunkaddr59 = inttoptr i64 %sunkaddr58 to i8*
-  %9 = load i8* %sunkaddr59, align 1
+  %9 = load i8, i8* %sunkaddr59, align 1
   %idxprom.i.i214 = sext i8 %9 to i64
   %isascii.i.i213225 = icmp sgt i8 %9, -1
   br i1 %isascii.i.i213225, label %cond.true.i.i217, label %cond.false.i.i219, !prof !181
 cond.true.i.i217:
   %arrayidx.i.i215 = getelementptr inbounds %struct._RuneLocale, %struct._RuneLocale* @_DefaultRuneLocale, i64 0, i32 5, i64 %idxprom.i.i214
-  %10 = load i32* %arrayidx.i.i215, align 4
+  %10 = load i32, i32* %arrayidx.i.i215, align 4
   %and.i.i216 = and i32 %10, 32768
   br label %isupper.exit223
 cond.false.i.i219:
@@ -92,11 +92,11 @@
   %sunkaddr60 = ptrtoint i8* %4 to i64
   %sunkaddr61 = add i64 %sunkaddr60, %lsr.iv27
   %sunkaddr62 = inttoptr i64 %sunkaddr61 to i8*
-  %12 = load i8* %sunkaddr62, align 1
+  %12 = load i8, i8* %sunkaddr62, align 1
   %sunkaddr63 = ptrtoint i8* %5 to i64
   %sunkaddr64 = add i64 %sunkaddr63, %lsr.iv27
   %sunkaddr65 = inttoptr i64 %sunkaddr64 to i8*
-  %13 = load i8* %sunkaddr65, align 1
+  %13 = load i8, i8* %sunkaddr65, align 1
   %cmp14 = icmp eq i8 %12, %13
   br i1 %cmp14, label %if.end17, label %return, !prof !991
 if.end17:
@@ -110,13 +110,13 @@
   %sunkaddr66 = ptrtoint %struct.Connector_struct* %a to i64
   %sunkaddr67 = add i64 %sunkaddr66, 16
   %sunkaddr68 = inttoptr i64 %sunkaddr67 to i8**
-  %16 = load i8** %sunkaddr68, align 8
-  %17 = load i8* %16, align 1
+  %16 = load i8*, i8** %sunkaddr68, align 8
+  %17 = load i8, i8* %16, align 1
   %cmp26 = icmp eq i8 %17, 83
   %sunkaddr69 = ptrtoint i8* %4 to i64
   %sunkaddr70 = add i64 %sunkaddr69, %lsr.iv27
   %sunkaddr71 = inttoptr i64 %sunkaddr70 to i8*
-  %18 = load i8* %sunkaddr71, align 1
+  %18 = load i8, i8* %sunkaddr71, align 1
   br i1 %cmp26, label %land.lhs.true28, label %while.cond59.preheader, !prof !993
 land.lhs.true28:
   switch i8 %18, label %land.rhs.preheader [
@@ -127,7 +127,7 @@
   %sunkaddr72 = ptrtoint i8* %5 to i64
   %sunkaddr73 = add i64 %sunkaddr72, %lsr.iv27
   %sunkaddr74 = inttoptr i64 %sunkaddr73 to i8*
-  %19 = load i8* %sunkaddr74, align 1
+  %19 = load i8, i8* %sunkaddr74, align 1
   switch i8 %19, label %land.rhs.preheader [
     i8 112, label %land.lhs.true43
   ], !prof !995
@@ -152,7 +152,7 @@
   %sunkaddr76 = add i64 %sunkaddr75, %lsr.iv27
   %sunkaddr77 = add i64 %sunkaddr76, -1
   %sunkaddr78 = inttoptr i64 %sunkaddr77 to i8*
-  %24 = load i8* %sunkaddr78, align 1
+  %24 = load i8, i8* %sunkaddr78, align 1
   %cmp55 = icmp eq i8 %24, 73
   %cmp61233 = icmp eq i8 %18, 0
   %or.cond265 = or i1 %cmp55, %cmp61233
@@ -168,7 +168,7 @@
   %lsr.iv = phi i64 [ 0, %land.rhs.preheader ], [ %lsr.iv.next, %if.then83 ]
   %25 = phi i8 [ %27, %if.then83 ], [ %18, %land.rhs.preheader ]
   %scevgep34 = getelementptr i8, i8* %scevgep33, i64 %lsr.iv
-  %26 = load i8* %scevgep34, align 1
+  %26 = load i8, i8* %scevgep34, align 1
   %cmp64 = icmp eq i8 %26, 0
   br i1 %cmp64, label %return, label %while.body66, !prof !1000
 while.body66:
@@ -184,7 +184,7 @@
 if.then83:
   %scevgep44 = getelementptr i8, i8* %scevgep43, i64 %lsr.iv
   %scevgep45 = getelementptr i8, i8* %scevgep44, i64 1
-  %27 = load i8* %scevgep45, align 1
+  %27 = load i8, i8* %scevgep45, align 1
   %cmp61 = icmp eq i8 %27, 0
   %lsr.iv.next = add i64 %lsr.iv, 1
   br i1 %cmp61, label %return, label %land.rhs, !prof !999
@@ -197,7 +197,7 @@
   %sunkaddr79 = ptrtoint i8* %4 to i64
   %sunkaddr80 = add i64 %sunkaddr79, %lsr.iv27
   %sunkaddr81 = inttoptr i64 %sunkaddr80 to i8*
-  %28 = load i8* %sunkaddr81, align 1
+  %28 = load i8, i8* %sunkaddr81, align 1
   %cmp97238 = icmp eq i8 %28, 0
   br i1 %cmp97238, label %return, label %land.rhs99.preheader, !prof !1004
 land.rhs99.preheader:
@@ -208,7 +208,7 @@
   %lsr.iv17 = phi i64 [ 0, %land.rhs99.preheader ], [ %lsr.iv.next18, %if.then117 ]
   %29 = phi i8 [ %31, %if.then117 ], [ %28, %land.rhs99.preheader ]
   %scevgep32 = getelementptr i8, i8* %scevgep31, i64 %lsr.iv17
-  %30 = load i8* %scevgep32, align 1
+  %30 = load i8, i8* %scevgep32, align 1
   %cmp101 = icmp eq i8 %30, 0
   br i1 %cmp101, label %return, label %while.body104, !prof !1005
 while.body104:
@@ -221,7 +221,7 @@
 if.then117:
   %scevgep41 = getelementptr i8, i8* %scevgep40, i64 %lsr.iv17
   %scevgep42 = getelementptr i8, i8* %scevgep41, i64 1
-  %31 = load i8* %scevgep42, align 1
+  %31 = load i8, i8* %scevgep42, align 1
   %cmp97 = icmp eq i8 %31, 0
   %lsr.iv.next18 = add i64 %lsr.iv17, 1
   br i1 %cmp97, label %return, label %land.rhs99, !prof !1004
@@ -234,7 +234,7 @@
   %sunkaddr82 = ptrtoint i8* %4 to i64
   %sunkaddr83 = add i64 %sunkaddr82, %lsr.iv27
   %sunkaddr84 = inttoptr i64 %sunkaddr83 to i8*
-  %32 = load i8* %sunkaddr84, align 1
+  %32 = load i8, i8* %sunkaddr84, align 1
   %cmp132244 = icmp eq i8 %32, 0
   br i1 %cmp132244, label %return, label %land.rhs134.preheader, !prof !1008
 land.rhs134.preheader:
@@ -245,7 +245,7 @@
   %lsr.iv22 = phi i64 [ 0, %land.rhs134.preheader ], [ %lsr.iv.next23, %if.then152 ]
   %33 = phi i8 [ %35, %if.then152 ], [ %32, %land.rhs134.preheader ]
   %scevgep30 = getelementptr i8, i8* %scevgep29, i64 %lsr.iv22
-  %34 = load i8* %scevgep30, align 1
+  %34 = load i8, i8* %scevgep30, align 1
   %cmp136 = icmp eq i8 %34, 0
   br i1 %cmp136, label %return, label %while.body139, !prof !1009
 while.body139:
@@ -258,7 +258,7 @@
 if.then152:
   %scevgep38 = getelementptr i8, i8* %scevgep37, i64 %lsr.iv22
   %scevgep39 = getelementptr i8, i8* %scevgep38, i64 1
-  %35 = load i8* %scevgep39, align 1
+  %35 = load i8, i8* %scevgep39, align 1
   %cmp132 = icmp eq i8 %35, 0
   %lsr.iv.next23 = add i64 %lsr.iv22, 1
   br i1 %cmp132, label %return, label %land.rhs134, !prof !1008
diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
index 454680f1..68ce452 100644
--- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -340,7 +340,7 @@
 
 while.body1679:
   %oldc.43406 = phi i32 [ %inc, %syEchoch.exit3070 ], [ %oldc.1.lcssa, %for.body1664.lr.ph ]
-  %4 = load %struct.TMP.2** %echo.i3101, align 8, !tbaa !6
+  %4 = load %struct.TMP.2*, %struct.TMP.2** %echo.i3101, align 8, !tbaa !6
   %call.i3062 = call i32 @fileno(%struct.TMP.2* %4)
   br i1 undef, label %if.then.i3069, label %syEchoch.exit3070
 
@@ -362,7 +362,7 @@
 for.body1723:
   %q.303203 = phi i8* [ getelementptr inbounds ([8192 x i8]* @syHistory, i64 0, i64 8189), %if.then1477 ], [ %incdec.ptr1730, %for.body1723 ]
   %add.ptr1728 = getelementptr i8, i8* %q.303203, i64 %idx.neg1727
-  %5 = load i8* %add.ptr1728, align 1, !tbaa !5
+  %5 = load i8, i8* %add.ptr1728, align 1, !tbaa !5
   %incdec.ptr1730 = getelementptr i8, i8* %q.303203, i64 -1
   br label %for.body1723
 
diff --git a/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll b/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
index 430dc98..d8085b3 100644
--- a/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
+++ b/llvm/test/CodeGen/X86/ragreedy-last-chance-recoloring.ll
@@ -37,26 +37,26 @@
 
 bb206:                                            ; preds = %bb
   %tmp = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 undef
-  %tmp207 = load i32* %tmp
+  %tmp207 = load i32, i32* %tmp
   %tmp208 = add i32 %tmp207, 1
   %tmp209 = inttoptr i32 %tmp208 to i8*
   indirectbr i8* %tmp209, [label %bb213]
 
 bb213:                                            ; preds = %bb206
-  %tmp214 = load i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
-  %tmp215 = load i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
+  %tmp214 = load i32, i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
+  %tmp215 = load i8*, i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
   %tmp216 = urem i32 -717428541, %tmp214
   %tmp217 = getelementptr i8, i8* %tmp215, i32 %tmp216
   %tmp218 = bitcast i8* %tmp217 to i32*
-  %tmp219 = load i32* %tmp218, align 4
+  %tmp219 = load i32, i32* %tmp218, align 4
   store i32 %tmp219, i32* undef, align 4
   %tmp220 = select i1 false, i32 359373646, i32 1677237955
   %tmp221 = add i32 %tmp220, 0
   indirectbr i8* undef, [label %bb432, label %bb222]
 
 bb222:                                            ; preds = %bb213
-  %tmp224 = load i32* undef, align 4
-  %tmp225 = load i32* undef, align 4
+  %tmp224 = load i32, i32* undef, align 4
+  %tmp225 = load i32, i32* undef, align 4
   %tmp226 = xor i32 %tmp225, %tmp224
   %tmp227 = shl i32 %tmp226, 1
   %tmp228 = and i32 %tmp227, -2048880334
@@ -66,12 +66,12 @@
   %tmp232 = mul i32 %tmp231, 1603744721
   %tmp233 = urem i32 %tmp232, 259
   %tmp234 = getelementptr [259 x i8], [259 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 2039075) to [259 x i8]*), i32 0, i32 %tmp233
-  %tmp235 = load i8* %tmp234, align 1
+  %tmp235 = load i8, i8* %tmp234, align 1
   %tmp236 = add i32 %tmp233, 2
   %tmp237 = getelementptr [264 x i8], [264 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 3388166) to [264 x i8]*), i32 0, i32 %tmp236
-  %tmp238 = load i8* %tmp237, align 1
+  %tmp238 = load i8, i8* %tmp237, align 1
   %tmp239 = getelementptr [265 x i8], [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 0
-  %tmp240 = load i8* %tmp239, align 1
+  %tmp240 = load i8, i8* %tmp239, align 1
   %tmp241 = add i32 %tmp233, 6
   %tmp242 = trunc i32 %tmp241 to i8
   %tmp243 = mul i8 %tmp242, -3
@@ -80,7 +80,7 @@
   %tmp246 = and i8 %tmp245, 6
   %tmp247 = sub i8 0, %tmp246
   %tmp248 = add i8 %tmp244, %tmp247
-  %tmp249 = load i8* undef, align 1
+  %tmp249 = load i8, i8* undef, align 1
   %tmp250 = xor i8 %tmp235, 17
   %tmp251 = xor i8 %tmp250, %tmp238
   %tmp252 = xor i8 %tmp251, %tmp240
@@ -88,13 +88,13 @@
   %tmp254 = xor i8 %tmp253, %tmp248
   %tmp255 = zext i8 %tmp254 to i16
   %tmp256 = shl nuw i16 %tmp255, 8
-  %tmp257 = load i8* null, align 1
-  %tmp258 = load i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
-  %tmp259 = load i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
+  %tmp257 = load i8, i8* null, align 1
+  %tmp258 = load i32, i32* @fp_dh_18716afa4a5354de0a302c8edb3b0ee1, align 4
+  %tmp259 = load i8*, i8** @fp_dh_20a33cdeefab8f4c8887e82766cb9dcb, align 4
   %tmp260 = urem i32 -717428541, %tmp258
   %tmp261 = getelementptr i8, i8* %tmp259, i32 %tmp260
   %tmp262 = bitcast i8* %tmp261 to i32*
-  %tmp263 = load i32* %tmp262, align 4
+  %tmp263 = load i32, i32* %tmp262, align 4
   %tmp264 = xor i32 %tmp263, 0
   %tmp265 = shl i32 %tmp264, 1
   %tmp266 = and i32 %tmp265, -1312119832
@@ -105,7 +105,7 @@
   %tmp271 = urem i32 %tmp270, 259
   %tmp274 = add i32 %tmp271, 3
   %tmp275 = getelementptr [265 x i8], [265 x i8]* bitcast (i8* getelementptr inbounds ([5419648 x i8]* @fp_dh_9d93c897906e39883c58b034c8e786b2, i32 0, i32 1325165) to [265 x i8]*), i32 0, i32 %tmp274
-  %tmp276 = load i8* %tmp275, align 1
+  %tmp276 = load i8, i8* %tmp275, align 1
   %tmp277 = add i32 %tmp271, 6
   %tmp278 = trunc i32 %tmp277 to i8
   %tmp279 = mul i8 %tmp278, -3
@@ -162,7 +162,7 @@
   %tmp335 = zext i1 %tmp333 to i32
   %tmp336 = add i32 %tmp334, %tmp335
   %tmp337 = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp336
-  %tmp338 = load i32* %tmp337
+  %tmp338 = load i32, i32* %tmp337
   %tmp339 = add i32 %tmp338, 1
   %tmp340 = inttoptr i32 %tmp339 to i8*
   indirectbr i8* %tmp340, [label %bb85, label %bb439]
@@ -171,7 +171,7 @@
   %tmp433 = phi i32 [ %tmp221, %bb213 ], [ %tmp433, %bb432 ]
   %tmp434 = add i32 %tmp433, 1022523279
   %tmp435 = getelementptr [499 x i32], [499 x i32]* @fp_dh_36985b17790d59a27994eaab5dcb00ee, i32 0, i32 %tmp434
-  %tmp436 = load i32* %tmp435
+  %tmp436 = load i32, i32* %tmp435
   %tmp437 = add i32 %tmp436, 1
   %tmp438 = inttoptr i32 %tmp437 to i8*
   indirectbr i8* %tmp438, [label %bb432]
diff --git a/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll
index d2e6888..2a81e82 100644
--- a/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll
+++ b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll
@@ -8,7 +8,7 @@
 ; CHECK: decq	(%{{rdi|rcx}})
 ; CHECK-NEXT: je
   %refcnt = getelementptr inbounds %struct.obj, %struct.obj* %o, i64 0, i32 0
-  %0 = load i64* %refcnt, align 8
+  %0 = load i64, i64* %refcnt, align 8
   %dec = add i64 %0, -1
   store i64 %dec, i64* %refcnt, align 8
   %tobool = icmp eq i64 %dec, 0
@@ -33,7 +33,7 @@
 entry:
 ; CHECK: decq
 ; CHECK-NOT: decq
-%0 = load i64* @c, align 8
+%0 = load i64, i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
 store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %dec.i, 0
@@ -47,7 +47,7 @@
 define i32 @test2() nounwind uwtable ssp {
 entry:
 ; CHECK-NOT: decq ({{.*}})
-%0 = load i64* @c, align 8
+%0 = load i64, i64* @c, align 8
 %dec.i = add nsw i64 %0, -1
 store i64 %dec.i, i64* @c, align 8
 %tobool.i = icmp ne i64 %0, 0
@@ -71,7 +71,7 @@
 entry:
   %s64 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 0
 ; CHECK-NOT: load 
-  %0 = load i64* %s64, align 8
+  %0 = load i64, i64* %s64, align 8
 ; CHECK: decq ({{.*}})
   %dec = add i64 %0, -1
   store i64 %dec, i64* %s64, align 8
@@ -82,7 +82,7 @@
 if.end:
   %s32 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 1
 ; CHECK-NOT: load 
-  %1 = load i32* %s32, align 4
+  %1 = load i32, i32* %s32, align 4
 ; CHECK: decl {{[0-9][0-9]*}}({{.*}})
   %dec1 = add i32 %1, -1
   store i32 %dec1, i32* %s32, align 4
@@ -93,7 +93,7 @@
 if.end1:
   %s16 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 2
 ; CHECK-NOT: load 
-  %2 = load i16* %s16, align 2
+  %2 = load i16, i16* %s16, align 2
 ; CHECK: decw {{[0-9][0-9]*}}({{.*}})
   %dec2 = add i16 %2, -1
   store i16 %dec2, i16* %s16, align 2
@@ -104,7 +104,7 @@
 if.end2:
   %s8 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 3
 ; CHECK-NOT: load 
-  %3 = load i8* %s8
+  %3 = load i8, i8* %s8
 ; CHECK: decb {{[0-9][0-9]*}}({{.*}})
   %dec3 = add i8 %3, -1
   store i8 %dec3, i8* %s8
@@ -125,7 +125,7 @@
 entry:
   %s64 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 0
 ; CHECK-NOT: load 
-  %0 = load i64* %s64, align 8
+  %0 = load i64, i64* %s64, align 8
 ; CHECK: incq ({{.*}})
   %inc = add i64 %0, 1
   store i64 %inc, i64* %s64, align 8
@@ -136,7 +136,7 @@
 if.end:
   %s32 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 1
 ; CHECK-NOT: load 
-  %1 = load i32* %s32, align 4
+  %1 = load i32, i32* %s32, align 4
 ; CHECK: incl {{[0-9][0-9]*}}({{.*}})
   %inc1 = add i32 %1, 1
   store i32 %inc1, i32* %s32, align 4
@@ -147,7 +147,7 @@
 if.end1:
   %s16 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 2
 ; CHECK-NOT: load 
-  %2 = load i16* %s16, align 2
+  %2 = load i16, i16* %s16, align 2
 ; CHECK: incw {{[0-9][0-9]*}}({{.*}})
   %inc2 = add i16 %2, 1
   store i16 %inc2, i16* %s16, align 2
@@ -158,7 +158,7 @@
 if.end2:
   %s8 = getelementptr inbounds %struct.obj2, %struct.obj2* %o, i64 0, i32 3
 ; CHECK-NOT: load 
-  %3 = load i8* %s8
+  %3 = load i8, i8* %s8
 ; CHECK: incb {{[0-9][0-9]*}}({{.*}})
   %inc3 = add i8 %3, 1
   store i8 %inc3, i8* %s8
@@ -181,9 +181,9 @@
 entry:
 ; CHECK-LABEL: test3:
 ; CHECK: decq 16(%rax)
-  %0 = load i64** @foo, align 8
+  %0 = load i64*, i64** @foo, align 8
   %arrayidx = getelementptr inbounds i64, i64* %0, i64 2
-  %1 = load i64* %arrayidx, align 8
+  %1 = load i64, i64* %arrayidx, align 8
   %dec = add i64 %1, -1
   store i64 %dec, i64* %arrayidx, align 8
   %cmp = icmp eq i64 %dec, 0
@@ -209,8 +209,8 @@
 
 define void @test4() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @x, align 4
-  %1 = load i32* @y, align 4
+  %0 = load i32, i32* @x, align 4
+  %1 = load i32, i32* @y, align 4
   %dec = add nsw i32 %1, -1
   store i32 %dec, i32* @y, align 4
   %tobool.i = icmp ne i32 %dec, 0
diff --git a/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll b/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
index 3d4f4fa..016b0d1 100644
--- a/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
+++ b/llvm/test/CodeGen/X86/regalloc-reconcile-broken-hints.ll
@@ -46,7 +46,7 @@
   %next = getelementptr inbounds i8, i8* %call, i64 8
   %tmp = bitcast i8* %next to %struct._list**
   %tmp2 = bitcast i8* %call to %struct._list*
-  %.pre78 = load i32* @ncol, align 4
+  %.pre78 = load i32, i32* @ncol, align 4
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.inc32, %entry
@@ -60,7 +60,7 @@
   %row.172 = phi i32 [ %row.3, %if.end31 ], [ 0, %for.cond1.preheader ]
   %col.071 = phi i32 [ %inc, %if.end31 ], [ 0, %for.cond1.preheader ]
   %call4 = tail call i32* @make_data()
-  %tmp5 = load i32* @ncol, align 4
+  %tmp5 = load i32, i32* @ncol, align 4
   %tobool14.i = icmp eq i32 %tmp5, 0
   br i1 %tobool14.i, label %while.cond.i, label %while.body.lr.ph.i
 
@@ -84,9 +84,9 @@
 
 land.rhs.i:                                       ; preds = %while.cond.i
   %arrayidx.i67 = getelementptr inbounds i32, i32* %call4, i64 %indvars.iv.next.i65
-  %tmp11 = load i32* %arrayidx.i67, align 4
+  %tmp11 = load i32, i32* %arrayidx.i67, align 4
   %arrayidx2.i68 = getelementptr inbounds i32, i32* %data, i64 %indvars.iv.next.i65
-  %tmp12 = load i32* %arrayidx2.i68, align 4
+  %tmp12 = load i32, i32* %arrayidx2.i68, align 4
   %cmp.i69 = icmp eq i32 %tmp11, %tmp12
   br i1 %cmp.i69, label %while.cond.i, label %equal_data.exit
 
@@ -96,14 +96,14 @@
 
 if.then:                                          ; preds = %equal_data.exit
   %next7 = getelementptr inbounds %struct._list, %struct._list* %current.173, i64 0, i32 1
-  %tmp14 = load %struct._list** %next7, align 8
+  %tmp14 = load %struct._list*, %struct._list** %next7, align 8
   %next12 = getelementptr inbounds %struct._list, %struct._list* %tmp14, i64 0, i32 1
   store %struct._list* null, %struct._list** %next12, align 8
-  %tmp15 = load %struct._list** %next7, align 8
-  %tmp16 = load i32* %value, align 4
+  %tmp15 = load %struct._list*, %struct._list** %next7, align 8
+  %tmp16 = load i32, i32* %value, align 4
   %cmp14 = icmp eq i32 %tmp16, 1
   %.tmp16 = select i1 %cmp14, i32 0, i32 %tmp16
-  %tmp18 = load i32* %all, align 4
+  %tmp18 = load i32, i32* %all, align 4
   %tmp19 = or i32 %tmp18, %.tmp16
   %tmp20 = icmp eq i32 %tmp19, 0
   br i1 %tmp20, label %if.then19, label %if.end31
@@ -123,12 +123,12 @@
   %row.3 = phi i32 [ %.row.172, %if.else ], [ %row.172, %if.then ], [ 0, %if.then19 ]
   %current.2 = phi %struct._list* [ %current.173, %if.else ], [ %tmp15, %if.then ], [ %tmp15, %if.then19 ]
   %inc = add nsw i32 %col.1, 1
-  %tmp25 = load i32* @ncol, align 4
+  %tmp25 = load i32, i32* @ncol, align 4
   %cmp2 = icmp eq i32 %inc, %tmp25
   br i1 %cmp2, label %for.cond1.for.inc32_crit_edge, label %for.body3
 
 for.cond1.for.inc32_crit_edge:                    ; preds = %if.end31
-  %.pre79 = load i32* @nrow, align 4
+  %.pre79 = load i32, i32* @nrow, align 4
   br label %for.inc32
 
 for.inc32:                                        ; preds = %for.cond1.for.inc32_crit_edge, %for.cond1.preheader
@@ -140,6 +140,6 @@
   br i1 %cmp, label %for.end34, label %for.cond1.preheader
 
 for.end34:                                        ; preds = %for.inc32
-  %.pre = load %struct._list** %tmp, align 8
+  %.pre = load %struct._list*, %struct._list** %tmp, align 8
   ret %struct._list* %.pre
 }
diff --git a/llvm/test/CodeGen/X86/regpressure.ll b/llvm/test/CodeGen/X86/regpressure.ll
index 19f5ef8..8f352b8 100644
--- a/llvm/test/CodeGen/X86/regpressure.ll
+++ b/llvm/test/CodeGen/X86/regpressure.ll
@@ -9,57 +9,57 @@
 ;; folded into the multiplies, 2 registers otherwise.
 
 define i32 @regpressure1(i32* %P) {
-	%A = load i32* %P		; <i32> [#uses=1]
+	%A = load i32, i32* %P		; <i32> [#uses=1]
 	%Bp = getelementptr i32, i32* %P, i32 1		; <i32*> [#uses=1]
-	%B = load i32* %Bp		; <i32> [#uses=1]
+	%B = load i32, i32* %Bp		; <i32> [#uses=1]
 	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
 	%Cp = getelementptr i32, i32* %P, i32 2		; <i32*> [#uses=1]
-	%C = load i32* %Cp		; <i32> [#uses=1]
+	%C = load i32, i32* %Cp		; <i32> [#uses=1]
 	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
 	%Dp = getelementptr i32, i32* %P, i32 3		; <i32*> [#uses=1]
-	%D = load i32* %Dp		; <i32> [#uses=1]
+	%D = load i32, i32* %Dp		; <i32> [#uses=1]
 	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
 	%Ep = getelementptr i32, i32* %P, i32 4		; <i32*> [#uses=1]
-	%E = load i32* %Ep		; <i32> [#uses=1]
+	%E = load i32, i32* %Ep		; <i32> [#uses=1]
 	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]
 	%Fp = getelementptr i32, i32* %P, i32 5		; <i32*> [#uses=1]
-	%F = load i32* %Fp		; <i32> [#uses=1]
+	%F = load i32, i32* %Fp		; <i32> [#uses=1]
 	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]
 	%Gp = getelementptr i32, i32* %P, i32 6		; <i32*> [#uses=1]
-	%G = load i32* %Gp		; <i32> [#uses=1]
+	%G = load i32, i32* %Gp		; <i32> [#uses=1]
 	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]
 	%Hp = getelementptr i32, i32* %P, i32 7		; <i32*> [#uses=1]
-	%H = load i32* %Hp		; <i32> [#uses=1]
+	%H = load i32, i32* %Hp		; <i32> [#uses=1]
 	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]
 	%Ip = getelementptr i32, i32* %P, i32 8		; <i32*> [#uses=1]
-	%I = load i32* %Ip		; <i32> [#uses=1]
+	%I = load i32, i32* %Ip		; <i32> [#uses=1]
 	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]
 	%Jp = getelementptr i32, i32* %P, i32 9		; <i32*> [#uses=1]
-	%J = load i32* %Jp		; <i32> [#uses=1]
+	%J = load i32, i32* %Jp		; <i32> [#uses=1]
 	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]
 	ret i32 %s9
 }
 
 define i32 @regpressure2(i32* %P) {
-	%A = load i32* %P		; <i32> [#uses=1]
+	%A = load i32, i32* %P		; <i32> [#uses=1]
 	%Bp = getelementptr i32, i32* %P, i32 1		; <i32*> [#uses=1]
-	%B = load i32* %Bp		; <i32> [#uses=1]
+	%B = load i32, i32* %Bp		; <i32> [#uses=1]
 	%Cp = getelementptr i32, i32* %P, i32 2		; <i32*> [#uses=1]
-	%C = load i32* %Cp		; <i32> [#uses=1]
+	%C = load i32, i32* %Cp		; <i32> [#uses=1]
 	%Dp = getelementptr i32, i32* %P, i32 3		; <i32*> [#uses=1]
-	%D = load i32* %Dp		; <i32> [#uses=1]
+	%D = load i32, i32* %Dp		; <i32> [#uses=1]
 	%Ep = getelementptr i32, i32* %P, i32 4		; <i32*> [#uses=1]
-	%E = load i32* %Ep		; <i32> [#uses=1]
+	%E = load i32, i32* %Ep		; <i32> [#uses=1]
 	%Fp = getelementptr i32, i32* %P, i32 5		; <i32*> [#uses=1]
-	%F = load i32* %Fp		; <i32> [#uses=1]
+	%F = load i32, i32* %Fp		; <i32> [#uses=1]
 	%Gp = getelementptr i32, i32* %P, i32 6		; <i32*> [#uses=1]
-	%G = load i32* %Gp		; <i32> [#uses=1]
+	%G = load i32, i32* %Gp		; <i32> [#uses=1]
 	%Hp = getelementptr i32, i32* %P, i32 7		; <i32*> [#uses=1]
-	%H = load i32* %Hp		; <i32> [#uses=1]
+	%H = load i32, i32* %Hp		; <i32> [#uses=1]
 	%Ip = getelementptr i32, i32* %P, i32 8		; <i32*> [#uses=1]
-	%I = load i32* %Ip		; <i32> [#uses=1]
+	%I = load i32, i32* %Ip		; <i32> [#uses=1]
 	%Jp = getelementptr i32, i32* %P, i32 9		; <i32*> [#uses=1]
-	%J = load i32* %Jp		; <i32> [#uses=1]
+	%J = load i32, i32* %Jp		; <i32> [#uses=1]
 	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
 	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
 	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
@@ -73,25 +73,25 @@
 }
 
 define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {
-	%A = load i16* %P		; <i16> [#uses=1]
+	%A = load i16, i16* %P		; <i16> [#uses=1]
 	%Bp = getelementptr i16, i16* %P, i32 1		; <i16*> [#uses=1]
-	%B = load i16* %Bp		; <i16> [#uses=1]
+	%B = load i16, i16* %Bp		; <i16> [#uses=1]
 	%Cp = getelementptr i16, i16* %P, i32 2		; <i16*> [#uses=1]
-	%C = load i16* %Cp		; <i16> [#uses=1]
+	%C = load i16, i16* %Cp		; <i16> [#uses=1]
 	%Dp = getelementptr i16, i16* %P, i32 3		; <i16*> [#uses=1]
-	%D = load i16* %Dp		; <i16> [#uses=1]
+	%D = load i16, i16* %Dp		; <i16> [#uses=1]
 	%Ep = getelementptr i16, i16* %P, i32 4		; <i16*> [#uses=1]
-	%E = load i16* %Ep		; <i16> [#uses=1]
+	%E = load i16, i16* %Ep		; <i16> [#uses=1]
 	%Fp = getelementptr i16, i16* %P, i32 5		; <i16*> [#uses=1]
-	%F = load i16* %Fp		; <i16> [#uses=1]
+	%F = load i16, i16* %Fp		; <i16> [#uses=1]
 	%Gp = getelementptr i16, i16* %P, i32 6		; <i16*> [#uses=1]
-	%G = load i16* %Gp		; <i16> [#uses=1]
+	%G = load i16, i16* %Gp		; <i16> [#uses=1]
 	%Hp = getelementptr i16, i16* %P, i32 7		; <i16*> [#uses=1]
-	%H = load i16* %Hp		; <i16> [#uses=1]
+	%H = load i16, i16* %Hp		; <i16> [#uses=1]
 	%Ip = getelementptr i16, i16* %P, i32 8		; <i16*> [#uses=1]
-	%I = load i16* %Ip		; <i16> [#uses=1]
+	%I = load i16, i16* %Ip		; <i16> [#uses=1]
 	%Jp = getelementptr i16, i16* %P, i32 9		; <i16*> [#uses=1]
-	%J = load i16* %Jp		; <i16> [#uses=1]
+	%J = load i16, i16* %Jp		; <i16> [#uses=1]
 	%A.upgrd.1 = sext i16 %A to i32		; <i32> [#uses=1]
 	%B.upgrd.2 = sext i16 %B to i32		; <i32> [#uses=1]
 	%D.upgrd.3 = sext i16 %D to i32		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/remat-constant.ll b/llvm/test/CodeGen/X86/remat-constant.ll
index 3e81320..5a6826f 100644
--- a/llvm/test/CodeGen/X86/remat-constant.ll
+++ b/llvm/test/CodeGen/X86/remat-constant.ll
@@ -7,7 +7,7 @@
 declare void @qux(float %f) nounwind 
 
 define void @foo() nounwind  {
-  %f = load float* @a
+  %f = load float, float* @a
   call void @bar()
   call void @qux(float %f)
   call void @qux(float %f)
diff --git a/llvm/test/CodeGen/X86/remat-fold-load.ll b/llvm/test/CodeGen/X86/remat-fold-load.ll
index 185bda1..3478033b 100644
--- a/llvm/test/CodeGen/X86/remat-fold-load.ll
+++ b/llvm/test/CodeGen/X86/remat-fold-load.ll
@@ -40,24 +40,24 @@
   %call4.i.i.i.i68 = call noalias i8* @malloc(i32 undef) nounwind
   %tmp1 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
   %buf_6.i.i.i.i70 = bitcast %type_d* %tmp1 to i8**
-  %tmp2 = load i8** %buf_6.i.i.i.i70, align 4
+  %tmp2 = load i8*, i8** %buf_6.i.i.i.i70, align 4
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %tmp2, i32 undef, i32 1, i1 false) nounwind
   unreachable
 
 if.else.i.i.i.i74:                                ; preds = %while.body12
   %i_.i.i.i.i72 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
-  %tmp3 = load i64* %i_.i.i.i.i72, align 4
+  %tmp3 = load i64, i64* %i_.i.i.i.i72, align 4
   %tmp4 = zext i64 %tmp3 to i128
   %tmp5 = shl nuw nsw i128 %tmp4, 32
   %ins148 = or i128 %tmp5, %ins151
   %second3.i.i76 = getelementptr inbounds %type_a, %type_a* %tmp, i32 0, i32 1, i32 1
-  %tmp6 = load i32* %second3.i.i76, align 4
+  %tmp6 = load i32, i32* %second3.i.i76, align 4
   %tmp7 = zext i32 %tmp6 to i128
   %tmp8 = shl nuw i128 %tmp7, 96
   %mask144 = and i128 %ins148, 79228162495817593519834398720
-  %tmp9 = load %type_e** undef, align 4
+  %tmp9 = load %type_e*, %type_e** undef, align 4
   %len_.i.i.i.i86 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 0, i32 0
-  %tmp10 = load i32* %len_.i.i.i.i86, align 4
+  %tmp10 = load i32, i32* %len_.i.i.i.i86, align 4
   %tmp11 = zext i32 %tmp10 to i128
   %ins135 = or i128 %tmp11, %ins135156160
   %cmp.i.i.i.i.i88 = icmp sgt i32 %tmp10, 8
@@ -68,7 +68,7 @@
   %ins126 = or i128 0, %ins135
   %tmp12 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 0, i32 1
   %buf_6.i.i.i.i91 = bitcast %type_d* %tmp12 to i8**
-  %tmp13 = load i8** %buf_6.i.i.i.i91, align 4
+  %tmp13 = load i8*, i8** %buf_6.i.i.i.i91, align 4
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call4.i.i.i.i89, i8* %tmp13, i32 %tmp10, i32 1, i1 false) nounwind
   br label %A
 
@@ -79,7 +79,7 @@
 A:                                                ; preds = %if.else.i.i.i.i95, %if.then.i.i.i.i92
   %ins135157 = phi i128 [ %ins126, %if.then.i.i.i.i92 ], [ undef, %if.else.i.i.i.i95 ]
   %second3.i.i97 = getelementptr inbounds %type_e, %type_e* %tmp9, i32 0, i32 1
-  %tmp14 = load i64* %second3.i.i97, align 4
+  %tmp14 = load i64, i64* %second3.i.i97, align 4
   %tmp15 = trunc i64 %tmp14 to i32
   %cmp.i99 = icmp sgt i32 %tmp6, %tmp15
   %tmp16 = trunc i128 %ins135157 to i32
@@ -118,13 +118,13 @@
 if.end24:                                         ; preds = %E, %C
   %phitmp = or i128 %tmp8, %mask144
   %phitmp158 = or i128 undef, undef
-  %tmp18 = load %type_a** undef, align 4
-  %tmp19 = load %type_a** undef, align 4
+  %tmp18 = load %type_a*, %type_a** undef, align 4
+  %tmp19 = load %type_a*, %type_a** undef, align 4
   %cmp.i49 = icmp eq %type_a* %tmp18, %tmp19
   br i1 %cmp.i49, label %while.cond10.while.end26_crit_edge, label %while.body12
 
 while.cond10.while.end26_crit_edge:               ; preds = %if.end24
-  %.pre = load %type_e** undef, align 4
+  %.pre = load %type_e*, %type_e** undef, align 4
   br label %while.end26
 
 while.end26:                                      ; preds = %while.cond10.while.end26_crit_edge, %while.end.while.end26_crit_edge
diff --git a/llvm/test/CodeGen/X86/remat-invalid-liveness.ll b/llvm/test/CodeGen/X86/remat-invalid-liveness.ll
index 92f75e1..c6b43b0 100644
--- a/llvm/test/CodeGen/X86/remat-invalid-liveness.ll
+++ b/llvm/test/CodeGen/X86/remat-invalid-liveness.ll
@@ -40,7 +40,7 @@
 
 sw.bb.i:                                          ; preds = %entry
   %call.i.i.i = tail call i32 undef(%struct.A* %ht, i8 zeroext 22, i32 undef, i32 0, %struct.D* undef)
-  %bf.load.i.i = load i128* undef, align 4
+  %bf.load.i.i = load i128, i128* undef, align 4
   %bf.lshr.i.i = lshr i128 %bf.load.i.i, %const72
   %shl1.i.i = shl nuw nsw i128 %bf.lshr.i.i, 8
   %shl.i.i = trunc i128 %shl1.i.i to i32
@@ -59,13 +59,13 @@
 
 cond.end12.i.i:                                   ; preds = %cond.false10.i.i, %__XXX2.exit.i.i
   %.sink.in.i.i = phi i8** [ %arrayidx.i.i.i, %__XXX2.exit.i.i ], [ %arrayidx.i6.i.i, %cond.false10.i.i ]
-  %.sink.i.i = load i8** %.sink.in.i.i, align 4
+  %.sink.i.i = load i8*, i8** %.sink.in.i.i, align 4
   %tmp = bitcast i8* %.sink.i.i to %union.E*
   br i1 undef, label %for.body.i.i, label %if.end196
 
 for.body.i.i:                                     ; preds = %for.body.i.i, %cond.end12.i.i
   %weak.i.i = getelementptr inbounds %union.E, %union.E* %tmp, i32 undef, i32 0
-  %tmp1 = load i32* %weak.i.i, align 4
+  %tmp1 = load i32, i32* %weak.i.i, align 4
   %cmp36.i.i = icmp ne i32 %tmp1, %shl.i.i
   %or.cond = and i1 %cmp36.i.i, false
   br i1 %or.cond, label %for.body.i.i, label %if.end196
diff --git a/llvm/test/CodeGen/X86/remat-scalar-zero.ll b/llvm/test/CodeGen/X86/remat-scalar-zero.ll
index d577d6d..0f08193 100644
--- a/llvm/test/CodeGen/X86/remat-scalar-zero.ll
+++ b/llvm/test/CodeGen/X86/remat-scalar-zero.ll
@@ -10,39 +10,39 @@
 
 define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
 entry:
-  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %tmp1 = load double, double* %x                         ; <double> [#uses=1]
   %arrayidx4 = getelementptr inbounds double, double* %x, i64 1 ; <double*> [#uses=1]
-  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %tmp5 = load double, double* %arrayidx4                 ; <double> [#uses=1]
   %arrayidx8 = getelementptr inbounds double, double* %x, i64 2 ; <double*> [#uses=1]
-  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %tmp9 = load double, double* %arrayidx8                 ; <double> [#uses=1]
   %arrayidx12 = getelementptr inbounds double, double* %x, i64 3 ; <double*> [#uses=1]
-  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %tmp13 = load double, double* %arrayidx12               ; <double> [#uses=1]
   %arrayidx16 = getelementptr inbounds double, double* %x, i64 4 ; <double*> [#uses=1]
-  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %tmp17 = load double, double* %arrayidx16               ; <double> [#uses=1]
   %arrayidx20 = getelementptr inbounds double, double* %x, i64 5 ; <double*> [#uses=1]
-  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %tmp21 = load double, double* %arrayidx20               ; <double> [#uses=1]
   %arrayidx24 = getelementptr inbounds double, double* %x, i64 6 ; <double*> [#uses=1]
-  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %tmp25 = load double, double* %arrayidx24               ; <double> [#uses=1]
   %arrayidx28 = getelementptr inbounds double, double* %x, i64 7 ; <double*> [#uses=1]
-  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %tmp29 = load double, double* %arrayidx28               ; <double> [#uses=1]
   %arrayidx32 = getelementptr inbounds double, double* %x, i64 8 ; <double*> [#uses=1]
-  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %tmp33 = load double, double* %arrayidx32               ; <double> [#uses=1]
   %arrayidx36 = getelementptr inbounds double, double* %x, i64 9 ; <double*> [#uses=1]
-  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %tmp37 = load double, double* %arrayidx36               ; <double> [#uses=1]
   %arrayidx40 = getelementptr inbounds double, double* %x, i64 10 ; <double*> [#uses=1]
-  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %tmp41 = load double, double* %arrayidx40               ; <double> [#uses=1]
   %arrayidx44 = getelementptr inbounds double, double* %x, i64 11 ; <double*> [#uses=1]
-  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %tmp45 = load double, double* %arrayidx44               ; <double> [#uses=1]
   %arrayidx48 = getelementptr inbounds double, double* %x, i64 12 ; <double*> [#uses=1]
-  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %tmp49 = load double, double* %arrayidx48               ; <double> [#uses=1]
   %arrayidx52 = getelementptr inbounds double, double* %x, i64 13 ; <double*> [#uses=1]
-  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %tmp53 = load double, double* %arrayidx52               ; <double> [#uses=1]
   %arrayidx56 = getelementptr inbounds double, double* %x, i64 14 ; <double*> [#uses=1]
-  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %tmp57 = load double, double* %arrayidx56               ; <double> [#uses=1]
   %arrayidx60 = getelementptr inbounds double, double* %x, i64 15 ; <double*> [#uses=1]
-  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %tmp61 = load double, double* %arrayidx60               ; <double> [#uses=1]
   %arrayidx64 = getelementptr inbounds double, double* %x, i64 16 ; <double*> [#uses=1]
-  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %tmp65 = load double, double* %arrayidx64               ; <double> [#uses=1]
   %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
   store double %div, double* %y
   %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll
index 24b2da9..779e1cd 100644
--- a/llvm/test/CodeGen/X86/reverse_branches.ll
+++ b/llvm/test/CodeGen/X86/reverse_branches.ll
@@ -68,7 +68,7 @@
 do.body.i:
   %n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
   %p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
-  %2 = load i8* %p.0.i, align 1
+  %2 = load i8, i8* %p.0.i, align 1
   %cmp3.i = icmp eq i8 %2, 120
   br i1 %cmp3.i, label %exit, label %do.cond.i
 
diff --git a/llvm/test/CodeGen/X86/rip-rel-address.ll b/llvm/test/CodeGen/X86/rip-rel-address.ll
index 24ff07b..b49d597 100644
--- a/llvm/test/CodeGen/X86/rip-rel-address.ll
+++ b/llvm/test/CodeGen/X86/rip-rel-address.ll
@@ -6,7 +6,7 @@
 
 @a = internal global double 3.4
 define double @foo() nounwind {
-  %a = load double* @a
+  %a = load double, double* @a
   ret double %a
   
 ; PIC64:    movsd	_a(%rip), %xmm0
diff --git a/llvm/test/CodeGen/X86/rot32.ll b/llvm/test/CodeGen/X86/rot32.ll
index 7bdd606..5738f70f 100644
--- a/llvm/test/CodeGen/X86/rot32.ll
+++ b/llvm/test/CodeGen/X86/rot32.ll
@@ -61,7 +61,7 @@
 entry:
 ; BMI2-LABEL: xfoop:
 ; BMI2: rorxl $25, ({{.+}}), %{{.+}}
-	%x = load i32* %p
+	%x = load i32, i32* %p
 	%a = lshr i32 %x, 25
 	%b = shl i32 %x, 7
 	%c = or i32 %a, %b
@@ -94,7 +94,7 @@
 entry:
 ; BMI2-LABEL: xunp:
 ; BMI2: rorxl $7, ({{.+}}), %{{.+}}
-	%x = load i32* %p
+	%x = load i32, i32* %p
 	%a = lshr i32 %x, 7
 	%b = shl i32 %x, 25
 	%c = or i32 %a, %b
diff --git a/llvm/test/CodeGen/X86/rot64.ll b/llvm/test/CodeGen/X86/rot64.ll
index e19a35d..f77bde0 100644
--- a/llvm/test/CodeGen/X86/rot64.ll
+++ b/llvm/test/CodeGen/X86/rot64.ll
@@ -55,7 +55,7 @@
 entry:
 ; BMI2-LABEL: xfoop:
 ; BMI2: rorxq $57, ({{.+}}), %{{.+}}
-	%x = load i64* %p
+	%x = load i64, i64* %p
 	%a = lshr i64 %x, 57
 	%b = shl i64 %x, 7
 	%c = or i64 %a, %b
@@ -84,7 +84,7 @@
 entry:
 ; BMI2-LABEL: xunp:
 ; BMI2: rorxq $7, ({{.+}}), %{{.+}}
-	%x = load i64* %p
+	%x = load i64, i64* %p
 	%a = lshr i64 %x, 7
 	%b = shl i64 %x, 57
 	%c = or i64 %a, %b
diff --git a/llvm/test/CodeGen/X86/rotate4.ll b/llvm/test/CodeGen/X86/rotate4.ll
index 5372612..56a7d32 100644
--- a/llvm/test/CodeGen/X86/rotate4.ll
+++ b/llvm/test/CodeGen/X86/rotate4.ll
@@ -68,7 +68,7 @@
 ; no store:
 ; CHECK-NOT: mov
 entry:
-  %a = load i32* %pa, align 16
+  %a = load i32, i32* %pa, align 16
   %and = and i32 %b, 31
   %shl = shl i32 %a, %and
   %0 = sub i32 0, %b
@@ -86,7 +86,7 @@
 ; no store:
 ; CHECK-NOT: mov
 entry:
-  %a = load i32* %pa, align 16
+  %a = load i32, i32* %pa, align 16
   %and = and i32 %b, 31
   %shl = lshr i32 %a, %and
   %0 = sub i32 0, %b
@@ -104,7 +104,7 @@
 ; no store:
 ; CHECK-NOT: mov
 entry:
-  %a = load i64* %pa, align 16
+  %a = load i64, i64* %pa, align 16
   %and = and i64 %b, 63
   %shl = shl i64 %a, %and
   %0 = sub i64 0, %b
@@ -122,7 +122,7 @@
 ; no store:
 ; CHECK-NOT: mov
 entry:
-  %a = load i64* %pa, align 16
+  %a = load i64, i64* %pa, align 16
   %and = and i64 %b, 63
   %shl = lshr i64 %a, %and
   %0 = sub i64 0, %b
diff --git a/llvm/test/CodeGen/X86/sandybridge-loads.ll b/llvm/test/CodeGen/X86/sandybridge-loads.ll
index b8c364e..2e31154 100644
--- a/llvm/test/CodeGen/X86/sandybridge-loads.ll
+++ b/llvm/test/CodeGen/X86/sandybridge-loads.ll
@@ -8,10 +8,10 @@
 ;CHECK: ret
 
 define void @wideloads(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
-  %v0 = load <8 x float>* %a, align 16  ; <---- unaligned!
-  %v1 = load <8 x float>* %b, align 32  ; <---- aligned!
+  %v0 = load <8 x float>, <8 x float>* %a, align 16  ; <---- unaligned!
+  %v1 = load <8 x float>, <8 x float>* %b, align 32  ; <---- aligned!
   %m0 = fcmp olt <8 x float> %v1, %v0
-  %v2 = load <8 x float>* %c, align 32  ; <---- aligned!
+  %v2 = load <8 x float>, <8 x float>* %c, align 32  ; <---- aligned!
   %m1 = fcmp olt <8 x float> %v2, %v0
   %mand = and <8 x i1> %m1, %m0
   %r = zext <8 x i1> %mand to <8 x i32>
@@ -30,8 +30,8 @@
 ;CHECK: ret
 
 define void @widestores(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
-  %v0 = load <8 x float>* %a, align 32
-  %v1 = load <8 x float>* %b, align 32
+  %v0 = load <8 x float>, <8 x float>* %a, align 32
+  %v1 = load <8 x float>, <8 x float>* %b, align 32
   store <8 x float> %v0, <8 x float>* %b, align 32 ; <--- aligned
   store <8 x float> %v1, <8 x float>* %a, align 16 ; <--- unaligned
   ret void
diff --git a/llvm/test/CodeGen/X86/scalar-extract.ll b/llvm/test/CodeGen/X86/scalar-extract.ll
index 2845838..b8ef5e7 100644
--- a/llvm/test/CodeGen/X86/scalar-extract.ll
+++ b/llvm/test/CodeGen/X86/scalar-extract.ll
@@ -6,7 +6,7 @@
 
 define void @foo(<2 x i16>* %A, <2 x i16>* %B) {
 entry:
-	%tmp1 = load <2 x i16>* %A		; <<2 x i16>> [#uses=1]
+	%tmp1 = load <2 x i16>, <2 x i16>* %A		; <<2 x i16>> [#uses=1]
 	store <2 x i16> %tmp1, <2 x i16>* %B
 	ret void
 }
diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll
index 2d23fc4..1671f8f 100644
--- a/llvm/test/CodeGen/X86/scalar_widen_div.ll
+++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll
@@ -17,17 +17,17 @@
   store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
   store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
   store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
-  %tmp = load <2 x i32> addrspace(1)** %qdest.addr
-  %tmp1 = load i32* %index
+  %tmp = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %qdest.addr
+  %tmp1 = load i32, i32* %index
   %arrayidx = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp, i32 %tmp1
-  %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
-  %tmp3 = load i32* %index
+  %tmp2 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %nsource.addr
+  %tmp3 = load i32, i32* %index
   %arrayidx4 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
-  %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
-  %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
-  %tmp7 = load i32* %index
+  %tmp5 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx4
+  %tmp6 = load <2 x i32> addrspace(1)*, <2 x i32> addrspace(1)** %dsource.addr
+  %tmp7 = load i32, i32* %index
   %arrayidx8 = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
-  %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
+  %tmp9 = load <2 x i32>, <2 x i32> addrspace(1)* %arrayidx8
   %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
   store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
   ret void
@@ -181,9 +181,9 @@
 for.body:
   %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 
   %arrayidx11 = getelementptr <3 x i32>, <3 x i32>* %dest, i32 %i.014
-  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+  %tmp4 = load <3 x i32>, <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
   %arrayidx7 = getelementptr inbounds <3 x i32>, <3 x i32>* %old, i32 %i.014
-  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+  %tmp8 = load <3 x i32>, <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
   %div = sdiv <3 x i32> %tmp4, %tmp8
   store <3 x i32> %div, <3 x i32>* %arrayidx11
   %inc = add nsw i32 %i.014, 1
diff --git a/llvm/test/CodeGen/X86/scalarize-bitcast.ll b/llvm/test/CodeGen/X86/scalarize-bitcast.ll
index f6b29ec..6de511f 100644
--- a/llvm/test/CodeGen/X86/scalarize-bitcast.ll
+++ b/llvm/test/CodeGen/X86/scalarize-bitcast.ll
@@ -6,7 +6,7 @@
 
 define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind {
 entry:
-	%tmp1 = load i32* %src		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %src		; <i32> [#uses=1]
 	%0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0		; <<2 x i32>> [#uses=1]
 	%1 = insertelement <2 x i32> %0, i32 0, i32 1		; <<2 x i32>> [#uses=1]
 	%conv.i.i = bitcast <2 x i32> %1 to <1 x i64>		; <<1 x i64>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/scev-interchange.ll b/llvm/test/CodeGen/X86/scev-interchange.ll
index 0e7047b..e224c08 100644
--- a/llvm/test/CodeGen/X86/scev-interchange.ll
+++ b/llvm/test/CodeGen/X86/scev-interchange.ll
@@ -296,7 +296,7 @@
 
 bb15.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
 	%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ]		; <i32> [#uses=2]
-	%tmp6.i.i195.i.i = load i32* undef, align 4		; <i32> [#uses=1]
+	%tmp6.i.i195.i.i = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i		; <i32> [#uses=1]
 	%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i		; <i32> [#uses=0]
 	%1 = add i32 %j1.0212.i.i, 1		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/segmented-stacks.ll b/llvm/test/CodeGen/X86/segmented-stacks.ll
index 3e47121..4127288 100644
--- a/llvm/test/CodeGen/X86/segmented-stacks.ll
+++ b/llvm/test/CodeGen/X86/segmented-stacks.ll
@@ -152,7 +152,7 @@
 }
 
 define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
-       %addend = load i32 * %closure
+       %addend = load i32 , i32 * %closure
        %result = add i32 %other, %addend
        %mem = alloca i32, i32 10
        call void @dummy_use (i32* %mem, i32 10)
diff --git a/llvm/test/CodeGen/X86/seh-safe-div.ll b/llvm/test/CodeGen/X86/seh-safe-div.ll
index e294f24..94d250a 100644
--- a/llvm/test/CodeGen/X86/seh-safe-div.ll
+++ b/llvm/test/CodeGen/X86/seh-safe-div.ll
@@ -58,7 +58,7 @@
   resume { i8*, i32 } %vals
 
 __try.cont:
-  %safe_ret = load i32* %r, align 4
+  %safe_ret = load i32, i32* %r, align 4
   ret i32 %safe_ret
 }
 
@@ -117,8 +117,8 @@
 
 define void @try_body(i32* %r, i32* %n, i32* %d) {
 entry:
-  %0 = load i32* %n, align 4
-  %1 = load i32* %d, align 4
+  %0 = load i32, i32* %n, align 4
+  %1 = load i32, i32* %d, align 4
   %div = sdiv i32 %0, %1
   store i32 %div, i32* %r, align 4
   ret void
@@ -146,8 +146,8 @@
 
 define i32 @safe_div_filt0(i8* %eh_ptrs, i8* %rbp) {
   %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-  %eh_rec = load i32** %eh_ptrs_c
-  %eh_code = load i32* %eh_rec
+  %eh_rec = load i32*, i32** %eh_ptrs_c
+  %eh_code = load i32, i32* %eh_rec
   ; EXCEPTION_ACCESS_VIOLATION = 0xC0000005
   %cmp = icmp eq i32 %eh_code, 3221225477
   %filt.res = zext i1 %cmp to i32
@@ -156,8 +156,8 @@
 
 define i32 @safe_div_filt1(i8* %eh_ptrs, i8* %rbp) {
   %eh_ptrs_c = bitcast i8* %eh_ptrs to i32**
-  %eh_rec = load i32** %eh_ptrs_c
-  %eh_code = load i32* %eh_rec
+  %eh_rec = load i32*, i32** %eh_ptrs_c
+  %eh_code = load i32, i32* %eh_rec
   ; EXCEPTION_INT_DIVIDE_BY_ZERO = 0xC0000094
   %cmp = icmp eq i32 %eh_code, 3221225620
   %filt.res = zext i1 %cmp to i32
diff --git a/llvm/test/CodeGen/X86/select-with-and-or.ll b/llvm/test/CodeGen/X86/select-with-and-or.ll
index 1ccf30b..40af46b 100644
--- a/llvm/test/CodeGen/X86/select-with-and-or.ll
+++ b/llvm/test/CodeGen/X86/select-with-and-or.ll
@@ -62,7 +62,7 @@
 define <4 x i32> @test7(<4 x float> %a, <4 x float> %b, <4 x i32>* %p) {
   %f = fcmp ult <4 x float> %a, %b
   %s = sext <4 x i1> %f to <4 x i32>
-  %l = load <4 x i32>* %p
+  %l = load <4 x i32>, <4 x i32>* %p
   %r = and <4 x i32> %l, %s
   ret <4 x i32> %r
 ; CHECK: test7
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index e272045..a4e06b3 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -5,8 +5,8 @@
 %0 = type { i64, i32 }
 
 define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
-  %t0 = load %0* %p
-  %t1 = load %0* %q
+  %t0 = load %0, %0* %p
+  %t1 = load %0, %0* %q
   %t4 = select i1 %r, %0 %t0, %0 %t1
   %t5 = extractvalue %0 %t4, 1
   ret i32 %t5
@@ -63,7 +63,7 @@
 	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
 	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
 	%1 = getelementptr i8, i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
-	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	%2 = load i8, i8* %1, align 1		; <i8> [#uses=1]
 	ret i8 %2
 ; CHECK-LABEL: test4:
 ; CHECK: movsbl	({{.*}},4), %eax
@@ -82,8 +82,8 @@
 }
 
 define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
-        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
-        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp = load <4 x float>, <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>, <4 x float>* %B            ; <<4 x float>> [#uses=2]
         %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
         %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
         %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/setcc-narrowing.ll b/llvm/test/CodeGen/X86/setcc-narrowing.ll
index 25cb2c82..bf5b450 100644
--- a/llvm/test/CodeGen/X86/setcc-narrowing.ll
+++ b/llvm/test/CodeGen/X86/setcc-narrowing.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: setne %al
 ; CHECK-NEXT: movzbl %al, %eax
 ; CHECK-NEXT: ret
-  %0 = load i64* @t1.global, align 8
+  %0 = load i64, i64* @t1.global, align 8
   %and = and i64 4294967295, %0
   %cmp = icmp sgt i64 %and, 0
   %conv = zext i1 %cmp to i32
diff --git a/llvm/test/CodeGen/X86/sext-load.ll b/llvm/test/CodeGen/X86/sext-load.ll
index e5eef33..2ea6e01 100644
--- a/llvm/test/CodeGen/X86/sext-load.ll
+++ b/llvm/test/CodeGen/X86/sext-load.ll
@@ -23,7 +23,7 @@
 entry:
   %b48 = getelementptr inbounds { i16, [6 x i8] }, { i16, [6 x i8] }* %this, i32 0, i32 1
   %cast = bitcast [6 x i8]* %b48 to i48*
-  %bf.load = load i48* %cast, align 2
+  %bf.load = load i48, i48* %cast, align 2
   %bf.ashr = ashr i48 %bf.load, 32
   %bf.cast = trunc i48 %bf.ashr to i32
   ret i32 %bf.cast
diff --git a/llvm/test/CodeGen/X86/sha.ll b/llvm/test/CodeGen/X86/sha.ll
index bf81e99..fe42637b 100644
--- a/llvm/test/CodeGen/X86/sha.ll
+++ b/llvm/test/CodeGen/X86/sha.ll
@@ -13,7 +13,7 @@
 
 define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
   ret <4 x i32> %1
   ; CHECK: test_sha1rnds4rm
@@ -32,7 +32,7 @@
 
 define <4 x i32> @test_sha1nexterm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
   ret <4 x i32> %1
   ; CHECK: test_sha1nexterm
@@ -51,7 +51,7 @@
 
 define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
   ret <4 x i32> %1
   ; CHECK: test_sha1msg1rm
@@ -70,7 +70,7 @@
 
 define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
   ret <4 x i32> %1
   ; CHECK: test_sha1msg2rm
@@ -91,7 +91,7 @@
 
 define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, <4 x i32>* %b, <4 x i32> %c) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
   ret <4 x i32> %1
   ; CHECK: test_sha256rnds2rm
@@ -112,7 +112,7 @@
 
 define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
   ret <4 x i32> %1
   ; CHECK: test_sha256msg1rm
@@ -131,7 +131,7 @@
 
 define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, <4 x i32>* %b) nounwind uwtable {
 entry:
-  %0 = load <4 x i32>* %b
+  %0 = load <4 x i32>, <4 x i32>* %b
   %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
   ret <4 x i32> %1
   ; CHECK: test_sha256msg2rm
diff --git a/llvm/test/CodeGen/X86/shift-and.ll b/llvm/test/CodeGen/X86/shift-and.ll
index d487368..edd43a3 100644
--- a/llvm/test/CodeGen/X86/shift-and.ll
+++ b/llvm/test/CodeGen/X86/shift-and.ll
@@ -38,7 +38,7 @@
 ; X64-NOT: andl
 ; X64: sarw
        %shamt = and i16 %t, 31
-       %tmp = load i16* @X
+       %tmp = load i16, i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
        store i16 %tmp1, i16* @X
        ret void
@@ -71,7 +71,7 @@
 ; X64: decq
 ; X64: andq
   %shr = lshr i64 %key, 3
-  %0 = load i64* %val, align 8
+  %0 = load i64, i64* %val, align 8
   %sub = add i64 %0, 2305843009213693951
   %and = and i64 %sub, %shr
   ret i64 %and
diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll
index 7615754..63b6ec5 100644
--- a/llvm/test/CodeGen/X86/shift-bmi2.ll
+++ b/llvm/test/CodeGen/X86/shift-bmi2.ll
@@ -27,7 +27,7 @@
 
 define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i32* %p
+  %x = load i32, i32* %p
   %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
 ; Source order scheduling prevents folding, rdar:14208996.
@@ -41,7 +41,7 @@
 
 define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
 entry:
-  %x = load i32* %p
+  %x = load i32, i32* %p
   %shl = shl i32 %x, 5
 ; BMI2: shl32pi
 ; BMI2-NOT: shlxl
@@ -72,7 +72,7 @@
 
 define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i64* %p
+  %x = load i64, i64* %p
   %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
 ; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
@@ -82,7 +82,7 @@
 
 define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
 entry:
-  %x = load i64* %p
+  %x = load i64, i64* %p
   %shl = shl i64 %x, 7
 ; BMI264: shl64pi
 ; BMI264-NOT: shlxq
@@ -104,7 +104,7 @@
 
 define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i32* %p
+  %x = load i32, i32* %p
   %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
 ; Source order scheduling prevents folding, rdar:14208996.
@@ -127,7 +127,7 @@
 
 define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i64* %p
+  %x = load i64, i64* %p
   %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
 ; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
@@ -149,7 +149,7 @@
 
 define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i32* %p
+  %x = load i32, i32* %p
   %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
 ; Source order scheduling prevents folding, rdar:14208996.
@@ -172,7 +172,7 @@
 
 define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
 entry:
-  %x = load i64* %p
+  %x = load i64, i64* %p
   %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
 ; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
diff --git a/llvm/test/CodeGen/X86/shift-coalesce.ll b/llvm/test/CodeGen/X86/shift-coalesce.ll
index 5241042..dee7d37 100644
--- a/llvm/test/CodeGen/X86/shift-coalesce.ll
+++ b/llvm/test/CodeGen/X86/shift-coalesce.ll
@@ -6,7 +6,7 @@
 ; PR687
 
 define i64 @foo(i64 %x, i64* %X) {
-        %tmp.1 = load i64* %X           ; <i64> [#uses=1]
+        %tmp.1 = load i64, i64* %X           ; <i64> [#uses=1]
         %tmp.3 = trunc i64 %tmp.1 to i8         ; <i8> [#uses=1]
         %shift.upgrd.1 = zext i8 %tmp.3 to i64          ; <i64> [#uses=1]
         %tmp.4 = shl i64 %x, %shift.upgrd.1             ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/shift-codegen.ll b/llvm/test/CodeGen/X86/shift-codegen.ll
index 88b8610..7d52bde 100644
--- a/llvm/test/CodeGen/X86/shift-codegen.ll
+++ b/llvm/test/CodeGen/X86/shift-codegen.ll
@@ -15,9 +15,9 @@
 ; CHECK-NOT: lea
 ; CHECK: ret
 
-  %tmp = load i32* @Y             ; <i32> [#uses=1]
+  %tmp = load i32, i32* @Y             ; <i32> [#uses=1]
   %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]
-  %tmp2 = load i32* @X            ; <i32> [#uses=1]
+  %tmp2 = load i32, i32* @X            ; <i32> [#uses=1]
   %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]
   store i32 %tmp3, i32* @X
   ret void
diff --git a/llvm/test/CodeGen/X86/shift-combine.ll b/llvm/test/CodeGen/X86/shift-combine.ll
index 754464c..ec62bcd 100644
--- a/llvm/test/CodeGen/X86/shift-combine.ll
+++ b/llvm/test/CodeGen/X86/shift-combine.ll
@@ -13,7 +13,7 @@
   %tmp2 = lshr i32 %x, 2
   %tmp3 = and i32 %tmp2, 3
   %tmp4 = getelementptr [4 x i32], [4 x i32]* @array, i32 0, i32 %tmp3
-  %tmp5 = load i32* %tmp4, align 4
+  %tmp5 = load i32, i32* %tmp4, align 4
   ret i32 %tmp5
 }
 
diff --git a/llvm/test/CodeGen/X86/shift-folding.ll b/llvm/test/CodeGen/X86/shift-folding.ll
index eca7643..6988787 100644
--- a/llvm/test/CodeGen/X86/shift-folding.ll
+++ b/llvm/test/CodeGen/X86/shift-folding.ll
@@ -44,7 +44,7 @@
 ; CHECK: ret
 
 entry:
-  %tmp4 = load i32* %d
+  %tmp4 = load i32, i32* %d
   %tmp512 = lshr i32 %tmp4, 24
   ret i32 %tmp512
 }
@@ -63,7 +63,7 @@
   %index = lshr i32 %i.zext, 11
   %index.zext = zext i32 %index to i64
   %val.ptr = getelementptr inbounds i32, i32* %arr, i64 %index.zext
-  %val = load i32* %val.ptr
+  %val = load i32, i32* %val.ptr
   %val.zext = zext i32 %val to i64
   %sum = add i64 %val.zext, %index.zext
   ret i64 %sum
diff --git a/llvm/test/CodeGen/X86/shift-one.ll b/llvm/test/CodeGen/X86/shift-one.ll
index 0f80f90..1ff02eb 100644
--- a/llvm/test/CodeGen/X86/shift-one.ll
+++ b/llvm/test/CodeGen/X86/shift-one.ll
@@ -3,7 +3,7 @@
 @x = external global i32                ; <i32*> [#uses=1]
 
 define i32 @test() {
-        %tmp.0 = load i32* @x           ; <i32> [#uses=1]
+        %tmp.0 = load i32, i32* @x           ; <i32> [#uses=1]
         %tmp.1 = shl i32 %tmp.0, 1              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/llvm/test/CodeGen/X86/shift-parts.ll b/llvm/test/CodeGen/X86/shift-parts.ll
index 763da63..0b25a75 100644
--- a/llvm/test/CodeGen/X86/shift-parts.ll
+++ b/llvm/test/CodeGen/X86/shift-parts.ll
@@ -9,7 +9,7 @@
 
 define i32 @int87(i32 %uint64p_8, i1 %cond) nounwind {
 entry:
-  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  %srcval4 = load i320, i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
   br label %for.cond
 
 for.cond:                                         ; preds = %for.cond, %entry
diff --git a/llvm/test/CodeGen/X86/shl-i64.ll b/llvm/test/CodeGen/X86/shl-i64.ll
index 073b35b..849912c 100644
--- a/llvm/test/CodeGen/X86/shl-i64.ll
+++ b/llvm/test/CodeGen/X86/shl-i64.ll
@@ -7,9 +7,9 @@
 define void @test_cl(<4 x i64>*  %dst, <4 x i64>* %src, i32 %idx) {
 entry:
   %arrayidx = getelementptr inbounds <4 x i64>, <4 x i64> * %src, i32 %idx
-  %0 = load <4 x i64> * %arrayidx, align 32
+  %0 = load <4 x i64> , <4 x i64> * %arrayidx, align 32
   %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> * %dst, i32 %idx
-  %1 = load <4 x i64> * %arrayidx1, align 32
+  %1 = load <4 x i64> , <4 x i64> * %arrayidx1, align 32
   %2 = extractelement <4 x i64> %1, i32 0
   %and = and i64 %2, 63
   %3 = insertelement <4 x i64> undef, i64 %and, i32 0    
diff --git a/llvm/test/CodeGen/X86/shl_undef.ll b/llvm/test/CodeGen/X86/shl_undef.ll
index 705af5b..f59d014 100644
--- a/llvm/test/CodeGen/X86/shl_undef.ll
+++ b/llvm/test/CodeGen/X86/shl_undef.ll
@@ -18,7 +18,7 @@
   %tmp0 = alloca i8
   %tmp1 = alloca i32
   store i8 1, i8* %tmp0
-  %tmp921.i7845 = load i8* %a0, align 1
+  %tmp921.i7845 = load i8, i8* %a0, align 1
   %tmp309 = xor i8 %tmp921.i7845, 104
   %tmp592 = zext i8 %tmp309 to i32
   %tmp862 = xor i32 1293461297, %tmp592
@@ -49,7 +49,7 @@
 ; shl undef, x -> 0
 define i32 @foo1_undef(i32* %a0) nounwind {
 entry:
-  %tmp1 = load i32* %a0, align 1
+  %tmp1 = load i32, i32* %a0, align 1
   %tmp2 = shl i32 undef, %tmp1;
   ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/X86/shrink-compare.ll b/llvm/test/CodeGen/X86/shrink-compare.ll
index 4ddef4c..0efa073 100644
--- a/llvm/test/CodeGen/X86/shrink-compare.ll
+++ b/llvm/test/CodeGen/X86/shrink-compare.ll
@@ -4,7 +4,7 @@
 
 define void @test1(i32* nocapture %X) nounwind minsize {
 entry:
-  %tmp1 = load i32* %X, align 4
+  %tmp1 = load i32, i32* %X, align 4
   %and = and i32 %tmp1, 255
   %cmp = icmp eq i32 %and, 47
   br i1 %cmp, label %if.then, label %if.end
@@ -72,7 +72,7 @@
 ; PR16551
 define void @test5(i32 %X) nounwind minsize {
 entry:
-  %bf.load = load i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
+  %bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
   %bf.lshr = lshr i56 %bf.load, 32
   %bf.cast = trunc i56 %bf.lshr to i32
   %cmp = icmp ne i32 %bf.cast, 1
diff --git a/llvm/test/CodeGen/X86/shuffle-combine-crash.ll b/llvm/test/CodeGen/X86/shuffle-combine-crash.ll
index 6ab7b97..06fcaa9 100644
--- a/llvm/test/CodeGen/X86/shuffle-combine-crash.ll
+++ b/llvm/test/CodeGen/X86/shuffle-combine-crash.ll
@@ -18,7 +18,7 @@
   br i1 undef, label %5, label %1
 
 ; <label>:1                                       ; preds = %0
-  %2 = load <4 x i8>* undef
+  %2 = load <4 x i8>, <4 x i8>* undef
   %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
   %4 = shufflevector <4 x i8> %3, <4 x i8> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
   store <4 x i8> %4, <4 x i8>* undef
diff --git a/llvm/test/CodeGen/X86/sibcall-4.ll b/llvm/test/CodeGen/X86/sibcall-4.ll
index a1a9feb..23b73c0 100644
--- a/llvm/test/CodeGen/X86/sibcall-4.ll
+++ b/llvm/test/CodeGen/X86/sibcall-4.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: t:
 ; CHECK: jmpl *%eax
   %nm3 = getelementptr i32, i32* %Sp_Arg, i32 1
-  %nm9 = load i32* %Sp_Arg
+  %nm9 = load i32, i32* %Sp_Arg
   %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
   tail call ghccc void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
   ret void
diff --git a/llvm/test/CodeGen/X86/sibcall-5.ll b/llvm/test/CodeGen/X86/sibcall-5.ll
index b065cce..aab028b 100644
--- a/llvm/test/CodeGen/X86/sibcall-5.ll
+++ b/llvm/test/CodeGen/X86/sibcall-5.ll
@@ -46,7 +46,7 @@
 ; X64_BAD: call
 ; X64_BAD: call
 ; X64_BAD: call
-  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
+  %1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_2", align 8, !invariant.load !0
   %2 = bitcast %0* %self to i8*
   %3 = tail call { double, double } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to { double, double } (i8*, i8*)*)(i8* %2, i8* %1) optsize
   %4 = extractvalue { double, double } %3, 0
diff --git a/llvm/test/CodeGen/X86/sibcall.ll b/llvm/test/CodeGen/X86/sibcall.ll
index 2a70d01..d32e567 100644
--- a/llvm/test/CodeGen/X86/sibcall.ll
+++ b/llvm/test/CodeGen/X86/sibcall.ll
@@ -288,10 +288,10 @@
 ; X32ABI-NEXT: movl 12(%edi), %eax
 ; X32ABI-NEXT: jmpq *%rax
   %0 = getelementptr inbounds %struct.__block_literal_2, %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
-  %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
+  %1 = load void ()*, void ()** %0, align 8                 ; <void ()*> [#uses=2]
   %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
   %3 = getelementptr inbounds %struct.__block_literal_1, %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
-  %4 = load i8** %3, align 8                      ; <i8*> [#uses=1]
+  %4 = load i8*, i8** %3, align 8                      ; <i8*> [#uses=1]
   %5 = bitcast i8* %4 to void (i8*)*              ; <void (i8*)*> [#uses=1]
   %6 = bitcast void ()* %1 to i8*                 ; <i8*> [#uses=1]
   tail call void %5(i8* %6) nounwind
diff --git a/llvm/test/CodeGen/X86/simple-zext.ll b/llvm/test/CodeGen/X86/simple-zext.ll
index ccd8292..b80c0bc 100644
--- a/llvm/test/CodeGen/X86/simple-zext.ll
+++ b/llvm/test/CodeGen/X86/simple-zext.ll
@@ -6,7 +6,7 @@
 
 define void @load_zext(i32* nocapture %p){
 entry:
-  %0 = load i32* %p, align 4
+  %0 = load i32, i32* %p, align 4
   %and = and i32 %0, 255
   tail call void @use(i32 %and)
   ret void
diff --git a/llvm/test/CodeGen/X86/sink-hoist.ll b/llvm/test/CodeGen/X86/sink-hoist.ll
index 5e5bac5..2f70a83f 100644
--- a/llvm/test/CodeGen/X86/sink-hoist.ll
+++ b/llvm/test/CodeGen/X86/sink-hoist.ll
@@ -50,7 +50,7 @@
 bb:
   %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
   %scevgep = getelementptr double, double* %p, i64 %i.03
-  %1 = load double* %scevgep, align 8
+  %1 = load double, double* %scevgep, align 8
   %2 = fdiv double 3.200000e+00, %1
   store double %2, double* %scevgep, align 8
   %3 = add nsw i64 %i.03, 1
@@ -104,7 +104,7 @@
 bb:                                               ; preds = %bb60
   %i.0 = phi i32 [ 0, %bb60 ]                    ; <i32> [#uses=2]
   %0 = bitcast float* %x_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
-  %1 = load <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
+  %1 = load <4 x float>, <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
   %tmp20 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
   %tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
   %tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
@@ -130,7 +130,7 @@
   %5 = getelementptr float, float* %x_addr.0, i64 4      ; <float*> [#uses=1]
   %6 = getelementptr float, float* %y_addr.0, i64 4      ; <float*> [#uses=1]
   %7 = add i32 %i.0, 4                            ; <i32> [#uses=1]
-  %8 = load i32* %n, align 4                      ; <i32> [#uses=1]
+  %8 = load i32, i32* %n, align 4                      ; <i32> [#uses=1]
   %9 = icmp sgt i32 %8, %7                        ; <i1> [#uses=1]
   br i1 %9, label %bb60, label %return
 
@@ -157,14 +157,14 @@
 
 define void @default_get_pch_validity() nounwind {
 entry:
-  %tmp4 = load i32* @cl_options_count, align 4    ; <i32> [#uses=1]
+  %tmp4 = load i32, i32* @cl_options_count, align 4    ; <i32> [#uses=1]
   %tmp5 = icmp eq i32 %tmp4, 0                    ; <i1> [#uses=1]
   br i1 %tmp5, label %bb6, label %bb2
 
 bb2:                                              ; preds = %bb2, %entry
   %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
   %tmp25 = add i64 %i.019, 1                      ; <i64> [#uses=2]
-  %tmp11 = load i32* @cl_options_count, align 4   ; <i32> [#uses=1]
+  %tmp11 = load i32, i32* @cl_options_count, align 4   ; <i32> [#uses=1]
   %tmp12 = zext i32 %tmp11 to i64                 ; <i64> [#uses=1]
   %tmp13 = icmp ugt i64 %tmp12, %tmp25            ; <i1> [#uses=1]
   br i1 %tmp13, label %bb2, label %bb6
diff --git a/llvm/test/CodeGen/X86/slow-incdec.ll b/llvm/test/CodeGen/X86/slow-incdec.ll
index 965dd8b..1857f61 100644
--- a/llvm/test/CodeGen/X86/slow-incdec.ll
+++ b/llvm/test/CodeGen/X86/slow-incdec.ll
@@ -29,7 +29,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.cond
   %i.06 = phi i32 [ %dec, %for.cond ], [ %s, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06
-  %0 = load i32* %arrayidx, align 4, !tbaa !1
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
   %cmp1 = icmp eq i32 %0, 0
 ;
   %dec = add nsw i32 %i.06, -1
@@ -60,7 +60,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.cond
   %i.06 = phi i32 [ %inc, %for.cond ], [ %s, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.06
-  %0 = load i32* %arrayidx, align 4, !tbaa !1
+  %0 = load i32, i32* %arrayidx, align 4, !tbaa !1
   %cmp1 = icmp eq i32 %0, 0
   %inc = add nsw i32 %i.06, 1
   br i1 %cmp1, label %for.end.loopexit, label %for.cond
diff --git a/llvm/test/CodeGen/X86/split-vector-bitcast.ll b/llvm/test/CodeGen/X86/split-vector-bitcast.ll
index fae15cf..8d80754 100644
--- a/llvm/test/CodeGen/X86/split-vector-bitcast.ll
+++ b/llvm/test/CodeGen/X86/split-vector-bitcast.ll
@@ -3,7 +3,7 @@
 ; PR10497 + another isel issue with sse2 disabled
 ; (This is primarily checking that this construct doesn't crash.)
 define void @a(<2 x float>* %a, <2 x i32>* %b) {
-  %cc = load <2 x float>* %a
+  %cc = load <2 x float>, <2 x float>* %a
   %c = fadd <2 x float> %cc, %cc
   %dd = bitcast <2 x float> %c to <2 x i32>
   %d = add <2 x i32> %dd, %dd
diff --git a/llvm/test/CodeGen/X86/sse-align-0.ll b/llvm/test/CodeGen/X86/sse-align-0.ll
index 8ffd312..54c89ea 100644
--- a/llvm/test/CodeGen/X86/sse-align-0.ll
+++ b/llvm/test/CodeGen/X86/sse-align-0.ll
@@ -2,12 +2,12 @@
 ; CHECK-NOT:     mov
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
-  %t = load <4 x float>* %p
+  %t = load <4 x float>, <4 x float>* %p
   %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
-  %t = load <2 x double>* %p
+  %t = load <2 x double>, <2 x double>* %p
   %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-1.ll b/llvm/test/CodeGen/X86/sse-align-1.ll
index c7a5cd5..1a6058c 100644
--- a/llvm/test/CodeGen/X86/sse-align-1.ll
+++ b/llvm/test/CodeGen/X86/sse-align-1.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=x86-64 | grep movap | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
-  %t = load <4 x float>* %p
+  %t = load <4 x float>, <4 x float>* %p
   ret <4 x float> %t
 }
 define <2 x double> @bar(<2 x double>* %p) nounwind {
-  %t = load <2 x double>* %p
+  %t = load <2 x double>, <2 x double>* %p
   ret <2 x double> %t
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-10.ll b/llvm/test/CodeGen/X86/sse-align-10.ll
index 0f91697..81bf553 100644
--- a/llvm/test/CodeGen/X86/sse-align-10.ll
+++ b/llvm/test/CodeGen/X86/sse-align-10.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 | grep movups | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
-  %t = load <2 x i64>* %p, align 8
+  %t = load <2 x i64>, <2 x i64>* %p, align 8
   ret <2 x i64> %t
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-12.ll b/llvm/test/CodeGen/X86/sse-align-12.ll
index 396da0f..9441cc0 100644
--- a/llvm/test/CodeGen/X86/sse-align-12.ll
+++ b/llvm/test/CodeGen/X86/sse-align-12.ll
@@ -6,7 +6,7 @@
 ; CHECK-NEXT:    movups (%rdi), %xmm0
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
 ; CHECK-NEXT:    retq
-  %x = load <4 x float>* %y, align 4
+  %x = load <4 x float>, <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 0
   %b = extractelement <4 x float> %x, i32 1
   %c = extractelement <4 x float> %x, i32 2
@@ -24,7 +24,7 @@
 ; CHECK-NEXT:    movups (%rdi), %xmm1
 ; CHECK-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; CHECK-NEXT:    retq
-  %x = load <4 x float>* %y, align 4
+  %x = load <4 x float>, <4 x float>* %y, align 4
   %a = extractelement <4 x float> %x, i32 2
   %b = extractelement <4 x float> %x, i32 3
   %c = extractelement <4 x float> %z, i32 2
@@ -42,7 +42,7 @@
 ; CHECK-NEXT:    movupd (%rdi), %xmm0
 ; CHECK-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    retq
-  %x = load <2 x double>* %y, align 8
+  %x = load <2 x double>, <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 0
   %c = extractelement <2 x double> %x, i32 1
   %p = insertelement <2 x double> undef, double %c, i32 0
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:    movupd (%rdi), %xmm1
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; CHECK-NEXT:    retq
-  %x = load <2 x double>* %y, align 8
+  %x = load <2 x double>, <2 x double>* %y, align 8
   %a = extractelement <2 x double> %x, i32 1
   %c = extractelement <2 x double> %z, i32 1
   %p = insertelement <2 x double> undef, double %c, i32 0
diff --git a/llvm/test/CodeGen/X86/sse-align-2.ll b/llvm/test/CodeGen/X86/sse-align-2.ll
index 98e75b5..063cc9d 100644
--- a/llvm/test/CodeGen/X86/sse-align-2.ll
+++ b/llvm/test/CodeGen/X86/sse-align-2.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=penryn | FileCheck %s
 
 define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
-  %t = load <4 x float>* %p, align 4
+  %t = load <4 x float>, <4 x float>* %p, align 4
   %z = fmul <4 x float> %t, %x
   ret <4 x float> %z
 }
@@ -11,7 +11,7 @@
 ; CHECK: ret
 
 define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
-  %t = load <2 x double>* %p, align 8
+  %t = load <2 x double>, <2 x double>* %p, align 8
   %z = fmul <2 x double> %t, %x
   ret <2 x double> %z
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-5.ll b/llvm/test/CodeGen/X86/sse-align-5.ll
index 21cd231..a64b953 100644
--- a/llvm/test/CodeGen/X86/sse-align-5.ll
+++ b/llvm/test/CodeGen/X86/sse-align-5.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86-64 | grep movaps | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p) nounwind {
-  %t = load <2 x i64>* %p
+  %t = load <2 x i64>, <2 x i64>* %p
   ret <2 x i64> %t
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-6.ll b/llvm/test/CodeGen/X86/sse-align-6.ll
index fcea1b1..01f2251 100644
--- a/llvm/test/CodeGen/X86/sse-align-6.ll
+++ b/llvm/test/CodeGen/X86/sse-align-6.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
 
 define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
-  %t = load <2 x i64>* %p, align 8
+  %t = load <2 x i64>, <2 x i64>* %p, align 8
   %z = mul <2 x i64> %t, %x
   ret <2 x i64> %z
 }
diff --git a/llvm/test/CodeGen/X86/sse-align-9.ll b/llvm/test/CodeGen/X86/sse-align-9.ll
index cb26b95..182c91c 100644
--- a/llvm/test/CodeGen/X86/sse-align-9.ll
+++ b/llvm/test/CodeGen/X86/sse-align-9.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -march=x86-64 | grep movup | count 2
 
 define <4 x float> @foo(<4 x float>* %p) nounwind {
-  %t = load <4 x float>* %p, align 4
+  %t = load <4 x float>, <4 x float>* %p, align 4
   ret <4 x float> %t
 }
 define <2 x double> @bar(<2 x double>* %p) nounwind {
-  %t = load <2 x double>* %p, align 8
+  %t = load <2 x double>, <2 x double>* %p, align 8
   ret <2 x double> %t
 }
diff --git a/llvm/test/CodeGen/X86/sse-domains.ll b/llvm/test/CodeGen/X86/sse-domains.ll
index 5b5d0f9..8016a24 100644
--- a/llvm/test/CodeGen/X86/sse-domains.ll
+++ b/llvm/test/CodeGen/X86/sse-domains.ll
@@ -35,7 +35,7 @@
   %and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
   %incdec.ptr = getelementptr inbounds <4 x i32>, <4 x i32>* %p.addr.04, i64 1
   store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
-  %0 = load <4 x i32>* %incdec.ptr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %incdec.ptr, align 16
   %add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
   %tobool = icmp eq i32 %dec, 0
   br i1 %tobool, label %while.end, label %while.body
diff --git a/llvm/test/CodeGen/X86/sse-intel-ocl.ll b/llvm/test/CodeGen/X86/sse-intel-ocl.ll
index 1885050..b96ecc5 100644
--- a/llvm/test/CodeGen/X86/sse-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/sse-intel-ocl.ll
@@ -36,7 +36,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %2, %1
   ret <16 x float> %3
 }
@@ -63,7 +63,7 @@
   %y = alloca <16 x float>, align 16
   %x = fadd <16 x float> %a, %b
   %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) 
-  %2 = load <16 x float>* %y, align 16
+  %2 = load <16 x float>, <16 x float>* %y, align 16
   %3 = fadd <16 x float> %1, %b
   %4 = fadd <16 x float> %2, %3
   ret <16 x float> %4
diff --git a/llvm/test/CodeGen/X86/sse-load-ret.ll b/llvm/test/CodeGen/X86/sse-load-ret.ll
index 1ebcb1a..8da45a7 100644
--- a/llvm/test/CodeGen/X86/sse-load-ret.ll
+++ b/llvm/test/CodeGen/X86/sse-load-ret.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
 
 define double @test1(double* %P) {
-        %X = load double* %P            ; <double> [#uses=1]
+        %X = load double, double* %P            ; <double> [#uses=1]
         ret double %X
 }
 
diff --git a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
index bb55829..1c61a51 100644
--- a/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
+++ b/llvm/test/CodeGen/X86/sse-unaligned-mem-feature.ll
@@ -4,7 +4,7 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
-	%A = load <4 x float>* %P, align 4
+	%A = load <4 x float>, <4 x float>* %P, align 4
 	%B = fadd <4 x float> %A, %In
 	ret <4 x float> %B
 
diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll
index 37efa51..ce98606 100644
--- a/llvm/test/CodeGen/X86/sse2.ll
+++ b/llvm/test/CodeGen/X86/sse2.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
 ; CHECK-NEXT:    movapd %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-	%tmp3 = load <2 x double>* %A, align 16
+	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
@@ -26,7 +26,7 @@
 ; CHECK-NEXT:    movhpd {{[0-9]+}}(%esp), %xmm0
 ; CHECK-NEXT:    movapd %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-	%tmp3 = load <2 x double>* %A, align 16
+	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
 	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
 	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
 	store <2 x double> %tmp9, <2 x double>* %r, align 16
@@ -44,8 +44,8 @@
 ; CHECK-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
-	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %B		; <<4 x float>> [#uses=2]
+	%tmp3 = load <4 x float>, <4 x float>* %A		; <<4 x float>> [#uses=2]
 	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
 	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
 	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
@@ -80,9 +80,9 @@
 ; CHECK-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; CHECK-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; CHECK-NEXT:    retl
-	%tmp = load i8** %ptr		; <i8*> [#uses=1]
+	%tmp = load i8*, i8** %ptr		; <i8*> [#uses=1]
 	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
-	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
+	%tmp.upgrd.2 = load float, float* %tmp.upgrd.1		; <float> [#uses=1]
 	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
 	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
 	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
@@ -103,7 +103,7 @@
 ; CHECK-NEXT:    movaps (%ecx), %xmm0
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-  %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+  %tmp1 = load <4 x float>, <4 x float>* %A            ; <<4 x float>> [#uses=1]
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
   store <4 x float> %tmp2, <4 x float>* %res
   ret void
@@ -129,10 +129,10 @@
 ; CHECK-NEXT:    movl L_x$non_lazy_ptr, %eax
 ; CHECK-NEXT:    movups (%eax), %xmm0
 ; CHECK-NEXT:    retl
-	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
-	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
-	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
-	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp = load i32, i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
 	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
 	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
 	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
@@ -186,7 +186,7 @@
 ; CHECK-NEXT:    addps %xmm1, %xmm0
 ; CHECK-NEXT:    movaps %xmm0, 0
 ; CHECK-NEXT:    retl
-  %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+  %tmp1 = load <4 x float>, <4 x float>* null          ; <<4 x float>> [#uses=2]
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
   %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
   %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
@@ -205,8 +205,8 @@
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
 ; CHECK-NEXT:    movaps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-  %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
-  %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+  %tmp3 = load <4 x float>, <4 x float>* %B            ; <<4 x float>> [#uses=1]
+  %tmp5 = load <4 x float>, <4 x float>* %C            ; <<4 x float>> [#uses=1]
   %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
   store <4 x float> %tmp11, <4 x float>* %res
   ret void
@@ -224,8 +224,8 @@
 ; CHECK-NEXT:    subps %xmm1, %xmm2
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; CHECK-NEXT:    retl
-  %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
-  %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+  %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=2]
+  %tmp5 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=2]
   %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
   %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
   %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
@@ -241,8 +241,8 @@
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
 ; CHECK-NEXT:    retl
 entry:
-  %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
-  %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+  %tmp = load <4 x float>, <4 x float>* %y             ; <<4 x float>> [#uses=1]
+  %tmp3 = load <4 x float>, <4 x float>* %x            ; <<4 x float>> [#uses=1]
   %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
   ret <4 x float> %tmp4
 }
@@ -257,7 +257,7 @@
 ; CHECK-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
 ; CHECK-NEXT:    retl
   %i5 = getelementptr inbounds <4 x double>, <4 x double>* %srcA, i32 3
-  %i6 = load <4 x double>* %i5, align 32
+  %i6 = load <4 x double>, <4 x double>* %i5, align 32
   %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
   ret <2 x double> %i7
 }
diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub.ll
index 431588f..76141fc 100644
--- a/llvm/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/llvm/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -87,7 +87,7 @@
 
 
 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
-  %1 = load <4 x float>* %B
+  %1 = load <4 x float>, <4 x float>* %B
   %add = fadd <4 x float> %A, %1
   %sub = fsub <4 x float> %A, %1
   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -100,7 +100,7 @@
 
 
 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
-  %1 = load <8 x float>* %B
+  %1 = load <8 x float>, <8 x float>* %B
   %add = fadd <8 x float> %A, %1
   %sub = fsub <8 x float> %A, %1
   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
@@ -115,7 +115,7 @@
 
 
 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
-  %1 = load <4 x double>* %B
+  %1 = load <4 x double>, <4 x double>* %B
   %add = fadd <4 x double> %A, %1
   %sub = fsub <4 x double> %A, %1
   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
@@ -130,7 +130,7 @@
 
 
 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
-  %1 = load <2 x double>* %B
+  %1 = load <2 x double>, <2 x double>* %B
   %sub = fsub <2 x double> %A, %1
   %add = fadd <2 x double> %A, %1
   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
diff --git a/llvm/test/CodeGen/X86/sse3.ll b/llvm/test/CodeGen/X86/sse3.ll
index 6c0b701..c1cd91b 100644
--- a/llvm/test/CodeGen/X86/sse3.ll
+++ b/llvm/test/CodeGen/X86/sse3.ll
@@ -14,7 +14,7 @@
 ; X64-NEXT:    movdqa %xmm0, (%rdi)
 ; X64-NEXT:    retq
 entry:
-	%tmp3 = load <8 x i16>* %old
+	%tmp3 = load <8 x i16>, <8 x i16>* %old
 	%tmp6 = shufflevector <8 x i16> %tmp3,
                 <8 x i16> < i16 1, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
                 <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
@@ -31,8 +31,8 @@
 ; X64-NEXT:    andps (%rdi), %xmm0
 ; X64-NEXT:    orps %xmm1, %xmm0
 ; X64-NEXT:    retq
-	%tmp1 = load <8 x i16>* %A
-	%tmp2 = load <8 x i16>* %B
+	%tmp1 = load <8 x i16>, <8 x i16>* %A
+	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
 	ret <8 x i16> %tmp3
 
@@ -112,7 +112,7 @@
 ; X64-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
 ; X64-NEXT:    movdqa %xmm0, (%rdi)
 ; X64-NEXT:    retq
-	%tmp = load <2 x i64>* %A
+	%tmp = load <2 x i64>, <2 x i64>* %A
 	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
 	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
 	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
@@ -142,9 +142,9 @@
 ; X64-NEXT:    movhpd (%rsi), %xmm0
 ; X64-NEXT:    movapd %xmm0, (%rdi)
 ; X64-NEXT:    retq
-	%tmp = load <4 x float>* %r
+	%tmp = load <4 x float>, <4 x float>* %r
 	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
-	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.4 = load double, double* %tmp.upgrd.3
 	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
 	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1
 	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>
@@ -178,7 +178,7 @@
 ; X64-NEXT:    movq _g2@{{.*}}(%rip), %rax
 ; X64-NEXT:    movq %xmm0, (%rax)
 ; X64-NEXT:    retq
-  load <4 x i32>* @g1, align 16
+  load <4 x i32>, <4 x i32>* @g1, align 16
   bitcast <4 x i32> %1 to <8 x i16>
   shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
   bitcast <8 x i16> %3 to <2 x i64>
@@ -273,9 +273,9 @@
 ; X64-NEXT:    andpd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
 entry:
-  %tmp1 = load <4 x float>* undef, align 16
+  %tmp1 = load <4 x float>, <4 x float>* undef, align 16
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-  %tmp3 = load <4 x float>* undef, align 16
+  %tmp3 = load <4 x float>, <4 x float>* undef, align 16
   %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
   %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
   %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
index 55faf4d..a16e792 100644
--- a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
 ; SSE41: pmovsxbw (%rdi), %xmm0
 ; AVX:  vpmovsxbw (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
   ret <8 x i16> %2
 }
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
 ; SSE41: pmovsxbd (%rdi), %xmm0
 ; AVX:  vpmovsxbd (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
   ret <4 x i32> %2
 }
@@ -23,7 +23,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
 ; SSE41: pmovsxbq (%rdi), %xmm0
 ; AVX:  vpmovsxbq (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
   ret <2 x i64> %2
 }
@@ -32,7 +32,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
 ; SSE41: pmovsxwd (%rdi), %xmm0
 ; AVX:  vpmovsxwd (%rdi), %xmm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
   ret <4 x i32> %2
 }
@@ -41,7 +41,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
 ; SSE41: pmovsxwq (%rdi), %xmm0
 ; AVX:  vpmovsxwq (%rdi), %xmm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
   ret <2 x i64> %2
 }
@@ -50,7 +50,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
 ; SSE41: pmovsxdq (%rdi), %xmm0
 ; AVX:  vpmovsxdq (%rdi), %xmm0
-  %1 = load <4 x i32>* %a, align 1
+  %1 = load <4 x i32>, <4 x i32>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
   ret <2 x i64> %2
 }
@@ -59,7 +59,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
 ; SSE41: pmovzxbw (%rdi), %xmm0
 ; AVX:  vpmovzxbw (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
   ret <8 x i16> %2
 }
@@ -68,7 +68,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
 ; SSE41: pmovzxbd (%rdi), %xmm0
 ; AVX:  vpmovzxbd (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
   ret <4 x i32> %2
 }
@@ -77,7 +77,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
 ; SSE41: pmovzxbq (%rdi), %xmm0
 ; AVX:  vpmovzxbq (%rdi), %xmm0
-  %1 = load <16 x i8>* %a, align 1
+  %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
   ret <2 x i64> %2
 }
@@ -86,7 +86,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
 ; SSE41: pmovzxwd (%rdi), %xmm0
 ; AVX:  vpmovzxwd (%rdi), %xmm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
   ret <4 x i32> %2
 }
@@ -95,7 +95,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
 ; SSE41: pmovzxwq (%rdi), %xmm0
 ; AVX:  vpmovzxwq (%rdi), %xmm0
-  %1 = load <8 x i16>* %a, align 1
+  %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
   ret <2 x i64> %2
 }
@@ -104,7 +104,7 @@
 ; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
 ; SSE41: pmovzxdq (%rdi), %xmm0
 ; AVX:  vpmovzxdq (%rdi), %xmm0
-  %1 = load <4 x i32>* %a, align 1
+  %1 = load <4 x i32>, <4 x i32>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
   ret <2 x i64> %2
 }
diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll
index 1a76539..f13cd0f 100644
--- a/llvm/test/CodeGen/X86/sse41.ll
+++ b/llvm/test/CodeGen/X86/sse41.ll
@@ -43,7 +43,7 @@
 ; X64-NEXT:    pmovsxbd (%rdi), %xmm0
 ; X64-NEXT:    retq
 entry:
-	%0 = load i32* %p, align 4
+	%0 = load i32, i32* %p, align 4
 	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
 	%2 = insertelement <4 x i32> %1, i32 0, i32 1
 	%3 = insertelement <4 x i32> %2, i32 0, i32 2
@@ -66,7 +66,7 @@
 ; X64-NEXT:    pmovsxwd (%rdi), %xmm0
 ; X64-NEXT:    retq
 entry:
-	%0 = load i64* %p		; <i64> [#uses=1]
+	%0 = load i64, i64* %p		; <i64> [#uses=1]
 	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
 	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
@@ -87,7 +87,7 @@
 ; X64-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
 ; X64-NEXT:    retq
 entry:
-	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%0 = load i16, i16* @g16, align 2		; <i16> [#uses=1]
 	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
 	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
 	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
@@ -330,7 +330,7 @@
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; X64-NEXT:    retq
 entry:
-  %0 = load <4 x float>* %pb, align 16
+  %0 = load <4 x float>, <4 x float>* %pb, align 16
   %vecinit6 = shufflevector <4 x float> %a, <4 x float> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x float> %vecinit6
 }
@@ -366,7 +366,7 @@
 ; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5],xmm1[6,7]
 ; X64-NEXT:    retq
 entry:
-  %0 = load <4 x i32>* %pb, align 16
+  %0 = load <4 x i32>, <4 x i32>* %pb, align 16
   %vecinit6 = shufflevector <4 x i32> %a, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x i32> %vecinit6
 }
@@ -399,7 +399,7 @@
 ; X64:       ## BB#0:
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
 ; X64-NEXT:    retq
-  %1 = load float* %b, align 4
+  %1 = load float, float* %b, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
   ret <4 x float> %result
@@ -421,7 +421,7 @@
 ; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
 ; X64-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
 ; X64-NEXT:    retq
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %2 = insertelement <4 x i32> undef, i32 %1, i32 0
   %result = shufflevector <4 x i32> %a, <4 x i32> %2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
   ret <4 x i32> %result
@@ -823,7 +823,7 @@
 ; X64:       ## BB#0:
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; X64-NEXT:    retq
-  %1 = load <4 x float>* %pb, align 16
+  %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
   ret <4 x float> %2
 }
@@ -841,7 +841,7 @@
 ; X64:       ## BB#0:
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
 ; X64-NEXT:    retq
-  %1 = load <4 x float>* %pb, align 16
+  %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
   ret <4 x float> %2
 }
@@ -862,7 +862,7 @@
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
 ; X64-NEXT:    retq
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
-  %2 = load <4 x float>* %1, align 16
+  %2 = load <4 x float>, <4 x float>* %1, align 16
   %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192)
   ret <4 x float> %3
 }
@@ -884,7 +884,7 @@
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; X64-NEXT:    retq
   %1 = getelementptr inbounds float, float* %fb, i64 %index
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
   %5 = insertelement <4 x float> %4, float %2, i32 2
@@ -908,7 +908,7 @@
 ; X64-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; X64-NEXT:    retq
-  %1 = load <4 x float>* %b, align 4
+  %1 = load <4 x float>, <4 x float>* %b, align 4
   %2 = extractelement <4 x float> %1, i32 0
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
@@ -948,7 +948,7 @@
 ; X64-NEXT:    addps %xmm3, %xmm0
 ; X64-NEXT:    retq
   %1 = getelementptr inbounds float, float* %fb, i64 %index
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = insertelement <4 x float> undef, float %2, i32 0
   %4 = insertelement <4 x float> %3, float %2, i32 1
   %5 = insertelement <4 x float> %4, float %2, i32 2
@@ -978,7 +978,7 @@
 ; X64-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; X64-NEXT:    movapd %xmm1, %xmm0
 ; X64-NEXT:    retq
-  %1 = load float* %b, align 4
+  %1 = load float, float* %b, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %result = shufflevector <4 x float> %a, <4 x float> %2, <4 x i32> <i32 4, i32 undef, i32 0, i32 7>
   ret <4 x float> %result
@@ -997,7 +997,7 @@
 ; X64:       ## BB#0:
 ; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
 ; X64-NEXT:    retq
-  %load = load <4 x float> *%ptr
+  %load = load <4 x float> , <4 x float> *%ptr
   %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
   ret <4 x float> %ret
 }
diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll
index 5ca8009..706c86b 100644
--- a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll
@@ -16,8 +16,8 @@
   ; CHECK: movl $7
   ; CHECK: pcmpestri $7, (
   ; CHECK: movl
-  %1 = load <16 x i8>* %a0
-  %2 = load <16 x i8>* %a2
+  %1 = load <16 x i8>, <16 x i8>* %a0
+  %2 = load <16 x i8>, <16 x i8>* %a2
   %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -94,7 +94,7 @@
   ; CHECK: movl $7
   ; CHECK: pcmpestrm $7,
   ; CHECK-NOT: vmov
-  %1 = load <16 x i8>* %a2
+  %1 = load <16 x i8>, <16 x i8>* %a2
   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %1, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -112,8 +112,8 @@
 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
   ; CHECK: pcmpistri $7, (
   ; CHECK: movl
-  %1 = load <16 x i8>* %a0
-  %2 = load <16 x i8>* %a1
+  %1 = load <16 x i8>, <16 x i8>* %a0
+  %2 = load <16 x i8>, <16 x i8>* %a1
   %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %1, <16 x i8> %2, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -176,7 +176,7 @@
 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
   ; CHECK: pcmpistrm $7, (
   ; CHECK-NOT: vmov
-  %1 = load <16 x i8>* %a1
+  %1 = load <16 x i8>, <16 x i8>* %a1
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %1, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
diff --git a/llvm/test/CodeGen/X86/ssp-data-layout.ll b/llvm/test/CodeGen/X86/ssp-data-layout.ll
index c3f6c18..4a63ace 100644
--- a/llvm/test/CodeGen/X86/ssp-data-layout.ll
+++ b/llvm/test/CodeGen/X86/ssp-data-layout.ll
@@ -153,18 +153,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -297,18 +297,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -429,18 +429,18 @@
   %arraydecay22 = getelementptr inbounds [2 x i8], [2 x i8]* %small, i32 0, i32 0
   %arraydecay23 = getelementptr inbounds [8 x i32], [8 x i32]* %large2, i32 0, i32 0
   %arraydecay24 = getelementptr inbounds [2 x i16], [2 x i16]* %small2, i32 0, i32 0
-  %0 = load i32* %x, align 4
-  %1 = load i32* %y, align 4
-  %2 = load i32* %z, align 4
+  %0 = load i32, i32* %x, align 4
+  %1 = load i32, i32* %y, align 4
+  %2 = load i32, i32* %z, align 4
   %coerce.dive = getelementptr %struct.struct_large_char, %struct.struct_large_char* %a, i32 0, i32 0
   %3 = bitcast [8 x i8]* %coerce.dive to i64*
-  %4 = load i64* %3, align 1
+  %4 = load i64, i64* %3, align 1
   %coerce.dive25 = getelementptr %struct.struct_small_char, %struct.struct_small_char* %b, i32 0, i32 0
   %5 = bitcast [2 x i8]* %coerce.dive25 to i16*
-  %6 = load i16* %5, align 1
+  %6 = load i16, i16* %5, align 1
   %coerce.dive26 = getelementptr %struct.struct_small_nonchar, %struct.struct_small_nonchar* %d, i32 0, i32 0
   %7 = bitcast [2 x i16]* %coerce.dive26 to i32*
-  %8 = load i32* %7, align 1
+  %8 = load i32, i32* %7, align 1
   call void @takes_all(i64 %4, i16 %6, %struct.struct_large_nonchar* byval align 8 %c, i32 %8, i8* %arraydecay, i8* %arraydecay22, i32* %arraydecay23, i16* %arraydecay24, i32* %ptr, i32 %0, i32 %1, i32 %2)
   ret void
 }
@@ -464,7 +464,7 @@
   %arrayidx = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i64 0
   store i8 %call1, i8* %arrayidx, align 1
   call void @end_large_char()
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %arraydecay = getelementptr inbounds [8 x i8], [8 x i8]* %large, i32 0, i32 0
   call void @takes_two(i32 %0, i8* %arraydecay)
   ret void
diff --git a/llvm/test/CodeGen/X86/stack-align.ll b/llvm/test/CodeGen/X86/stack-align.ll
index 059a356..0cff95f 100644
--- a/llvm/test/CodeGen/X86/stack-align.ll
+++ b/llvm/test/CodeGen/X86/stack-align.ll
@@ -12,11 +12,11 @@
 
 define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
-	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp3 = load double, double* @G, align 16		; <double> [#uses=1]
 	%tmp4 = tail call double @fabs( double %tmp3 ) readnone	; <double> [#uses=1]
         store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }, { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
+	%tmp1 = load volatile double, double* %tmp, align 8		; <double> [#uses=1]
 	%tmp2 = tail call double @fabs( double %tmp1 ) readnone	; <double> [#uses=1]
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store volatile double %tmp6, double* %P, align 8
diff --git a/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll b/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll
index a84b77e..e8ded42 100644
--- a/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -11,7 +11,7 @@
 define i32 @_Z18read_response_sizev() #0 {
 entry:
   tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !{!"0x102"}), !dbg !39
-  %0 = load i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
+  %0 = load i64, i64* getelementptr inbounds ({ i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
   tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !64, metadata !{!"0x102"}), !dbg !71
   %1 = trunc i64 %0 to i32
   ret i32 %1
diff --git a/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll b/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
index 1db4a1d..f3f9eeb 100644
--- a/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-vreg-to-vreg-copy.ll
@@ -28,7 +28,7 @@
 define void @do_something(i32 %i) #0 {
 entry:
   %data = alloca [8 x i8], align 1
-  %0 = load i32* @state, align 4
+  %0 = load i32, i32* @state, align 4
   %cmp = icmp eq i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
diff --git a/llvm/test/CodeGen/X86/stack-protector-weight.ll b/llvm/test/CodeGen/X86/stack-protector-weight.ll
index 1976bfb..4220a4c 100644
--- a/llvm/test/CodeGen/X86/stack-protector-weight.ll
+++ b/llvm/test/CodeGen/X86/stack-protector-weight.ll
@@ -22,7 +22,7 @@
   call void @foo2(i32* %arraydecay)
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %a, i64 0, i64 %idxprom
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   call void @llvm.lifetime.end(i64 512, i8* %0)
   ret i32 %1
 }
diff --git a/llvm/test/CodeGen/X86/stack-protector.ll b/llvm/test/CodeGen/X86/stack-protector.ll
index 4598431..eab5efe 100644
--- a/llvm/test/CodeGen/X86/stack-protector.ll
+++ b/llvm/test/CodeGen/X86/stack-protector.ll
@@ -44,7 +44,7 @@
   %buf = alloca [16 x i8], align 16
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -80,7 +80,7 @@
   %buf = alloca [16 x i8], align 16
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -112,7 +112,7 @@
   %buf = alloca [16 x i8], align 16
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -144,7 +144,7 @@
   %buf = alloca [16 x i8], align 16
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -176,7 +176,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
@@ -210,7 +210,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
@@ -244,7 +244,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
@@ -278,7 +278,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo, %struct.foo* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [16 x i8], [16 x i8]* %buf1, i32 0, i32 0
@@ -310,7 +310,7 @@
   %buf = alloca [4 x i8], align 1
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -342,7 +342,7 @@
   %buf = alloca [4 x i8], align 1
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -374,7 +374,7 @@
   %buf = alloca [4 x i8], align 1
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -406,7 +406,7 @@
   %buf = alloca [4 x i8], align 1
   store i8* %a, i8** %a.addr, align 8
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %arraydecay1 = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %arraydecay1)
@@ -438,7 +438,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
@@ -472,7 +472,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
@@ -506,7 +506,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
@@ -540,7 +540,7 @@
   store i8* %a, i8** %a.addr, align 8
   %buf = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i8* @strcpy(i8* %arraydecay, i8* %0)
   %buf1 = getelementptr inbounds %struct.foo.0, %struct.foo.0* %b, i32 0, i32 0
   %arraydecay2 = getelementptr inbounds [4 x i8], [4 x i8]* %buf1, i32 0, i32 0
@@ -570,7 +570,7 @@
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
@@ -598,7 +598,7 @@
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
@@ -626,7 +626,7 @@
 ; DARWIN-X64: .cfi_endproc
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
@@ -654,7 +654,7 @@
 ; DARWIN-X64: callq ___stack_chk_fail
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
-  %0 = load i8** %a.addr, align 8
+  %0 = load i8*, i8** %a.addr, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %0)
   ret void
 }
@@ -683,7 +683,7 @@
   %a = alloca i32, align 4
   %j = alloca i32*, align 8
   store i32 0, i32* %retval
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %a, align 4
   store i32* %a, i32** %j, align 8
@@ -715,7 +715,7 @@
   %a = alloca i32, align 4
   %j = alloca i32*, align 8
   store i32 0, i32* %retval
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %a, align 4
   store i32* %a, i32** %j, align 8
@@ -747,7 +747,7 @@
   %a = alloca i32, align 4
   %j = alloca i32*, align 8
   store i32 0, i32* %retval
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %a, align 4
   store i32* %a, i32** %j, align 8
@@ -779,7 +779,7 @@
   %a = alloca i32, align 4
   %j = alloca i32*, align 8
   store i32 0, i32* %retval
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %add = add nsw i32 %0, 1
   store i32 %add, i32* %a, align 4
   store i32* %a, i32** %j, align 8
@@ -1318,7 +1318,7 @@
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
-  %0 = load i32** %b, align 8
+  %0 = load i32*, i32** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
@@ -1348,7 +1348,7 @@
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
-  %0 = load i32** %b, align 8
+  %0 = load i32*, i32** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
@@ -1378,7 +1378,7 @@
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
-  %0 = load i32** %b, align 8
+  %0 = load i32*, i32** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
@@ -1408,7 +1408,7 @@
   %b = alloca i32*, align 8
   %y = getelementptr inbounds %struct.pair, %struct.pair* %c, i32 0, i32 1
   store i32* %y, i32** %b, align 8
-  %0 = load i32** %b, align 8
+  %0 = load i32*, i32** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32* %0)
   ret void
 }
@@ -1767,7 +1767,7 @@
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
-  %1 = load float** %b, align 8
+  %1 = load float*, float** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
@@ -1799,7 +1799,7 @@
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
-  %1 = load float** %b, align 8
+  %1 = load float*, float** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
@@ -1831,7 +1831,7 @@
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
-  %1 = load float** %b, align 8
+  %1 = load float*, float** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
@@ -1863,7 +1863,7 @@
   store i32 0, i32* %a, align 4
   %0 = bitcast i32* %a to float*
   store float* %0, float** %b, align 8
-  %1 = load float** %b, align 8
+  %1 = load float*, float** %b, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), float* %1)
   ret void
 }
@@ -2428,7 +2428,7 @@
   %call = call i32* @getp()
   store i32* %call, i32** %a, align 8
   store i32** %a, i32*** %b, align 8
-  %0 = load i32*** %b, align 8
+  %0 = load i32**, i32*** %b, align 8
   call void @funcall2(i32** %0)
   ret void
 }
@@ -2459,7 +2459,7 @@
   %call = call i32* @getp()
   store i32* %call, i32** %a, align 8
   store i32** %a, i32*** %b, align 8
-  %0 = load i32*** %b, align 8
+  %0 = load i32**, i32*** %b, align 8
   call void @funcall2(i32** %0)
   ret void
 }
@@ -2490,7 +2490,7 @@
   %call = call i32* @getp()
   store i32* %call, i32** %a, align 8
   store i32** %a, i32*** %b, align 8
-  %0 = load i32*** %b, align 8
+  %0 = load i32**, i32*** %b, align 8
   call void @funcall2(i32** %0)
   ret void
 }
@@ -2521,7 +2521,7 @@
   %call = call i32* @getp()
   store i32* %call, i32** %a, align 8
   store i32** %a, i32*** %b, align 8
-  %0 = load i32*** %b, align 8
+  %0 = load i32**, i32*** %b, align 8
   call void @funcall2(i32** %0)
   ret void
 }
@@ -2552,7 +2552,7 @@
   store i32* %call, i32** %a, align 8
   %0 = bitcast i32** %a to float**
   store float** %0, float*** %b, align 8
-  %1 = load float*** %b, align 8
+  %1 = load float**, float*** %b, align 8
   call void @funfloat2(float** %1)
   ret void
 }
@@ -2584,7 +2584,7 @@
   store i32* %call, i32** %a, align 8
   %0 = bitcast i32** %a to float**
   store float** %0, float*** %b, align 8
-  %1 = load float*** %b, align 8
+  %1 = load float**, float*** %b, align 8
   call void @funfloat2(float** %1)
   ret void
 }
@@ -2616,7 +2616,7 @@
   store i32* %call, i32** %a, align 8
   %0 = bitcast i32** %a to float**
   store float** %0, float*** %b, align 8
-  %1 = load float*** %b, align 8
+  %1 = load float**, float*** %b, align 8
   call void @funfloat2(float** %1)
   ret void
 }
@@ -2648,7 +2648,7 @@
   store i32* %call, i32** %a, align 8
   %0 = bitcast i32** %a to float**
   store float** %0, float*** %b, align 8
-  %1 = load float*** %b, align 8
+  %1 = load float**, float*** %b, align 8
   call void @funfloat2(float** %1)
   ret void
 }
@@ -2676,7 +2676,7 @@
   %a = alloca %class.A, align 1
   %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2704,7 +2704,7 @@
   %a = alloca %class.A, align 1
   %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2732,7 +2732,7 @@
   %a = alloca %class.A, align 1
   %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2760,7 +2760,7 @@
   %a = alloca %class.A, align 1
   %array = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2791,7 +2791,7 @@
   %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
   %array = bitcast %union.anon.1* %e to [2 x i8]*
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2823,7 +2823,7 @@
   %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
   %array = bitcast %union.anon.1* %e to [2 x i8]*
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2855,7 +2855,7 @@
   %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
   %array = bitcast %union.anon.1* %e to [2 x i8]*
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2887,7 +2887,7 @@
   %e = getelementptr inbounds %struct.anon.0, %struct.anon.0* %d, i32 0, i32 0
   %array = bitcast %union.anon.1* %e to [2 x i8]*
   %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* %array, i32 0, i64 0
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   ret i8 %0
 }
 
@@ -2914,7 +2914,7 @@
   %n.addr = alloca i32, align 4
   %a = alloca i32*, align 8
   store i32 %n, i32* %n.addr, align 4
-  %0 = load i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
   %conv = sext i32 %0 to i64
   %1 = alloca i8, i64 %conv
   %2 = bitcast i8* %1 to i32*
@@ -2946,7 +2946,7 @@
   %n.addr = alloca i32, align 4
   %a = alloca i32*, align 8
   store i32 %n, i32* %n.addr, align 4
-  %0 = load i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
   %conv = sext i32 %0 to i64
   %1 = alloca i8, i64 %conv
   %2 = bitcast i8* %1 to i32*
@@ -2978,7 +2978,7 @@
   %n.addr = alloca i32, align 4
   %a = alloca i32*, align 8
   store i32 %n, i32* %n.addr, align 4
-  %0 = load i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
   %conv = sext i32 %0 to i64
   %1 = alloca i8, i64 %conv
   %2 = bitcast i8* %1 to i32*
@@ -3010,7 +3010,7 @@
   %n.addr = alloca i32, align 4
   %a = alloca i32*, align 8
   store i32 %n, i32* %n.addr, align 4
-  %0 = load i32* %n.addr, align 4
+  %0 = load i32, i32* %n.addr, align 4
   %conv = sext i32 %0 to i64
   %1 = alloca i8, i64 %conv
   %2 = bitcast i8* %1 to i32*
@@ -3040,7 +3040,7 @@
 ; DARWIN-X64: .cfi_endproc
   %a = alloca [4 x i32], align 16
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 }
 
@@ -3067,7 +3067,7 @@
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 }
 
@@ -3094,7 +3094,7 @@
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 }
 
@@ -3121,7 +3121,7 @@
 ; DARWIN-X64: callq ___stack_chk_fail
   %a = alloca [4 x i32], align 16
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 }
 
@@ -3151,7 +3151,7 @@
   %c = alloca %struct.nest, align 4
   %b = getelementptr inbounds %struct.nest, %struct.nest* %c, i32 0, i32 1
   %_a = getelementptr inbounds %struct.pair, %struct.pair* %b, i32 0, i32 0
-  %0 = load i32* %_a, align 4
+  %0 = load i32, i32* %_a, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %0)
   ret void
 }
@@ -3182,7 +3182,7 @@
 ; DARWIN-X64: callq ___stack_chk_fail
   %tmp = alloca %struct.small*, align 8
   %tmp1 = call i32 (...)* @dummy(%struct.small** %tmp)
-  %tmp2 = load %struct.small** %tmp, align 8
+  %tmp2 = load %struct.small*, %struct.small** %tmp, align 8
   %tmp3 = ptrtoint %struct.small* %tmp2 to i64
   %tmp4 = trunc i64 %tmp3 to i32
   %tmp5 = icmp sgt i32 %tmp4, 0
@@ -3193,7 +3193,7 @@
   %tmp8 = phi i64 [ %tmp20, %bb17 ], [ 1, %bb ]
   %tmp9 = phi i32 [ %tmp14, %bb17 ], [ %tmp1, %bb ]
   %tmp10 = getelementptr inbounds %struct.small, %struct.small* %tmp7, i64 0, i32 0
-  %tmp11 = load i8* %tmp10, align 1
+  %tmp11 = load i8, i8* %tmp10, align 1
   %tmp12 = icmp eq i8 %tmp11, 1
   %tmp13 = add nsw i32 %tmp9, 8
   %tmp14 = select i1 %tmp12, i32 %tmp13, i32 %tmp9
@@ -3203,7 +3203,7 @@
 
 bb17:                                             ; preds = %bb6
   %tmp18 = getelementptr inbounds %struct.small*, %struct.small** %tmp, i64 %tmp8
-  %tmp19 = load %struct.small** %tmp18, align 8
+  %tmp19 = load %struct.small*, %struct.small** %tmp18, align 8
   %tmp20 = add i64 %tmp8, 1
   br label %bb6
 
@@ -3344,9 +3344,9 @@
   %1 = bitcast %struct.small_char* %test to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
   %2 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 0
-  %3 = load i64* %2, align 1
+  %3 = load i64, i64* %2, align 1
   %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
-  %5 = load i8* %4, align 1
+  %5 = load i8, i8* %4, align 1
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
   ret i32 %call
 }
@@ -3378,9 +3378,9 @@
   %1 = bitcast %struct.small_char* %test to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 12, i32 0, i1 false)
   %2 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 0
-  %3 = load i64* %2, align 1
+  %3 = load i64, i64* %2, align 1
   %4 = getelementptr { i64, i8 }, { i64, i8 }* %test.coerce, i32 0, i32 1
-  %5 = load i8* %4, align 1
+  %5 = load i8, i8* %4, align 1
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i64 %3, i8 %5)
   ret i32 %call
 }
@@ -3409,7 +3409,7 @@
   %test = alloca i8*, align 8
   %0 = alloca i8, i64 4
   store i8* %0, i8** %test, align 8
-  %1 = load i8** %test, align 8
+  %1 = load i8*, i8** %test, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
   ret i32 %call
 }
@@ -3437,7 +3437,7 @@
   %test = alloca i8*, align 8
   %0 = alloca i8, i64 5
   store i8* %0, i8** %test, align 8
-  %1 = load i8** %test, align 8
+  %1 = load i8*, i8** %test, align 8
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i8* %1)
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/X86/stackmap.ll b/llvm/test/CodeGen/X86/stackmap.ll
index 5e356f3..fc958ec 100644
--- a/llvm/test/CodeGen/X86/stackmap.ll
+++ b/llvm/test/CodeGen/X86/stackmap.ll
@@ -321,7 +321,7 @@
   unreachable
 
 bb2:
-  %tmp = load i64* inttoptr (i64 140685446136880 to i64*)
+  %tmp = load i64, i64* inttoptr (i64 140685446136880 to i64*)
   br i1 undef, label %bb16, label %bb17
 
 bb16:
diff --git a/llvm/test/CodeGen/X86/statepoint-forward.ll b/llvm/test/CodeGen/X86/statepoint-forward.ll
index 12a6ac2..5a1b18af 100644
--- a/llvm/test/CodeGen/X86/statepoint-forward.ll
+++ b/llvm/test/CodeGen/X86/statepoint-forward.ll
@@ -22,12 +22,12 @@
 ;; be valid, but is not currently implemented.
 define i1 @test_load_forward(i32 addrspace(1)* addrspace(1)* %p) gc "statepoint-example" {
 entry:
-  %before = load i32 addrspace(1)* addrspace(1)* %p
+  %before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
   %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
-  %after = load i32 addrspace(1)* addrspace(1)* %pnew
+  %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
 
@@ -46,7 +46,7 @@
   store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
   %safepoint_token = tail call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
   %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token, i32 4, i32 4)
-  %after = load i32 addrspace(1)* addrspace(1)* %pnew
+  %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
 
@@ -69,11 +69,11 @@
 ; statepoint does not provide the collector with this root.
 define i1 @test_load_forward_nongc_heap(i32 addrspace(1)** %p) gc "statepoint-example" {
 entry:
-  %before = load i32 addrspace(1)** %p
+  %before = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
   call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
-  %after = load i32 addrspace(1)** %p
+  %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
 
@@ -91,7 +91,7 @@
   call void @llvm.assume(i1 %cmp1)
   store i32 addrspace(1)* %v, i32 addrspace(1)** %p
   call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @func, i32 0, i32 0, i32 0)
-  %after = load i32 addrspace(1)** %p
+  %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
 
diff --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll
index e3cc2fa..6c1c56e 100644
--- a/llvm/test/CodeGen/X86/store-narrow.ll
+++ b/llvm/test/CodeGen/X86/store-narrow.ll
@@ -6,7 +6,7 @@
 
 define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i32* %a0, align 4
+  %A = load i32, i32* %a0, align 4
   %B = and i32 %A, -256     ; 0xFFFFFF00
   %C = zext i8 %a1 to i32
   %D = or i32 %C, %B
@@ -23,7 +23,7 @@
 
 define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i32* %a0, align 4
+  %A = load i32, i32* %a0, align 4
   %B = and i32 %A, -65281    ; 0xFFFF00FF
   %C = zext i8 %a1 to i32
   %CS = shl i32 %C, 8
@@ -40,7 +40,7 @@
 
 define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i32* %a0, align 4
+  %A = load i32, i32* %a0, align 4
   %B = and i32 %A, -65536    ; 0xFFFF0000
   %C = zext i16 %a1 to i32
   %D = or i32 %B, %C
@@ -56,7 +56,7 @@
 
 define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i32* %a0, align 4
+  %A = load i32, i32* %a0, align 4
   %B = and i32 %A, 65535    ; 0x0000FFFF
   %C = zext i16 %a1 to i32
   %CS = shl i32 %C, 16
@@ -73,7 +73,7 @@
 
 define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i64* %a0, align 4
+  %A = load i64, i64* %a0, align 4
   %B = and i64 %A, -4294901761    ; 0xFFFFFFFF0000FFFF
   %C = zext i16 %a1 to i64
   %CS = shl i64 %C, 16
@@ -90,7 +90,7 @@
 
 define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
 entry:
-  %A = load i64* %a0, align 4
+  %A = load i64, i64* %a0, align 4
   %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
   %C = zext i8 %a1 to i64
   %CS = shl i64 %C, 40
@@ -108,8 +108,8 @@
 
 define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
 entry:
-  %OtherLoad = load i32 *%P2
-  %A = load i64* %a0, align 4
+  %OtherLoad = load i32 , i32 *%P2
+  %A = load i64, i64* %a0, align 4
   %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
   %C = zext i8 %a1 to i64
   %CS = shl i64 %C, 40
@@ -136,7 +136,7 @@
 ; X64-NEXT: movl %eax, _g_16(%rip)
 ; X64-NEXT: ret
 define void @test8() nounwind {
-  %tmp = load i32* @g_16
+  %tmp = load i32, i32* @g_16
   store i32 0, i32* @g_16
   %or = or i32 %tmp, 1
   store i32 %or, i32* @g_16
@@ -147,7 +147,7 @@
 ; X64-NEXT: orb $1, _g_16(%rip)
 ; X64-NEXT: ret
 define void @test9() nounwind {
-  %tmp = load i32* @g_16
+  %tmp = load i32, i32* @g_16
   %or = or i32 %tmp, 1
   store i32 %or, i32* @g_16
   ret void
@@ -160,7 +160,7 @@
 ; X64-NEXT: ret
 define i8 @test10(i8* %P) nounwind ssp {
 entry:
-  %tmp = load i8* %P, align 1
+  %tmp = load i8, i8* %P, align 1
   %conv = sext i8 %tmp to i32
   %shr3 = lshr i32 %conv, 8
   %conv2 = trunc i32 %shr3 to i8
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold.ll b/llvm/test/CodeGen/X86/store_op_load_fold.ll
index bbeb744..49fef93 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold.ll
@@ -9,7 +9,7 @@
 ; CHECK-NOT: mov
 ; CHECK: add
 ; CHECK-NEXT: ret
-        %tmp.0 = load i16* @X           ; <i16> [#uses=1]
+        %tmp.0 = load i16, i16* @X           ; <i16> [#uses=1]
         %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
         store i16 %tmp.3, i16* @X
         ret void
@@ -23,7 +23,7 @@
 ; CHECK: mov
 ; CHECK-NEXT: and
 ; CHECK-NEXT: ret
-  %bf.load35 = load i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
+  %bf.load35 = load i56, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
   %bf.clear36 = and i56 %bf.load35, -1125895611875329
   store i56 %bf.clear36, i56* bitcast ([7 x i8]* getelementptr inbounds (%struct.S2* @s2, i32 0, i32 5) to i56*), align 16
   ret void
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
index 9be94f9..f47d87f 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
@@ -8,8 +8,8 @@
 cond_true2732.preheader:                ; preds = %entry
         %tmp2666 = getelementptr %struct.Macroblock, %struct.Macroblock* null, i32 0, i32 13                ; <i64*> [#uses=2]
         %tmp2674 = trunc i32 0 to i8            ; <i8> [#uses=1]
-        %tmp2667.us.us = load i64* %tmp2666             ; <i64> [#uses=1]
-        %tmp2670.us.us = load i64* null         ; <i64> [#uses=1]
+        %tmp2667.us.us = load i64, i64* %tmp2666             ; <i64> [#uses=1]
+        %tmp2670.us.us = load i64, i64* null         ; <i64> [#uses=1]
         %shift.upgrd.1 = zext i8 %tmp2674 to i64                ; <i64> [#uses=1]
         %tmp2675.us.us = shl i64 %tmp2670.us.us, %shift.upgrd.1         ; <i64> [#uses=1]
         %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1          ; <i64> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/stride-nine-with-base-reg.ll b/llvm/test/CodeGen/X86/stride-nine-with-base-reg.ll
index 0190f1d..551bd7c 100644
--- a/llvm/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/llvm/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -18,7 +18,7 @@
 bb:
 	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
 	%tmp2 = getelementptr [1000 x i8], [1000 x i8]* @B, i32 0, i32 %i.019.0
-	%tmp3 = load i8* %tmp2, align 4
+	%tmp3 = load i8, i8* %tmp2, align 4
 	%tmp4 = mul i8 %tmp3, 2
 	%tmp5 = getelementptr [1000 x i8], [1000 x i8]* @A, i32 0, i32 %i.019.0
 	store i8 %tmp4, i8* %tmp5, align 4
diff --git a/llvm/test/CodeGen/X86/stride-reuse.ll b/llvm/test/CodeGen/X86/stride-reuse.ll
index c4409e0..af036f3 100644
--- a/llvm/test/CodeGen/X86/stride-reuse.ll
+++ b/llvm/test/CodeGen/X86/stride-reuse.ll
@@ -14,7 +14,7 @@
 bb:
 	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
 	%tmp2 = getelementptr [1000 x float], [1000 x float]* @B, i32 0, i32 %i.019.0
-	%tmp3 = load float* %tmp2, align 4
+	%tmp3 = load float, float* %tmp2, align 4
 	%tmp4 = fmul float %tmp3, 2.000000e+00
 	%tmp5 = getelementptr [1000 x float], [1000 x float]* @A, i32 0, i32 %i.019.0
 	store float %tmp4, float* %tmp5, align 4
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-0.ll b/llvm/test/CodeGen/X86/subreg-to-reg-0.ll
index d718c85..251a754 100644
--- a/llvm/test/CodeGen/X86/subreg-to-reg-0.ll
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -4,7 +4,7 @@
 ; x86-64's implicit zero-extension!
 
 define i64 @foo(i32* %p) nounwind {
-  %t = load i32* %p
+  %t = load i32, i32* %p
   %n = add i32 %t, 1
   %z = zext i32 %n to i64
   ret i64 %z
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-2.ll b/llvm/test/CodeGen/X86/subreg-to-reg-2.ll
index 5b71c53..6766b01 100644
--- a/llvm/test/CodeGen/X86/subreg-to-reg-2.ll
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -10,8 +10,8 @@
 
 define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind {
 prologue:
-	%array = load %XXValue** inttoptr (i64 11111111 to %XXValue**)		; <%XXValue*> [#uses=0]
-	%index = load %XXValue** inttoptr (i64 22222222 to %XXValue**)		; <%XXValue*> [#uses=1]
+	%array = load %XXValue*, %XXValue** inttoptr (i64 11111111 to %XXValue**)		; <%XXValue*> [#uses=0]
+	%index = load %XXValue*, %XXValue** inttoptr (i64 22222222 to %XXValue**)		; <%XXValue*> [#uses=1]
 	%tmp = ptrtoint %XXValue* %index to i64		; <i64> [#uses=2]
 	store i64 %tmp, i64* %out
 	%tmp6 = trunc i64 %tmp to i32		; <i32> [#uses=1]
@@ -20,6 +20,6 @@
 bb5:		; preds = %prologue
 	%tmp10 = zext i32 %tmp6 to i64		; <i64> [#uses=1]
 	%tmp11 = getelementptr %"struct.XXC::ArrayStorage", %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10		; <%XXValue**> [#uses=1]
-	%tmp12 = load %XXValue** %tmp11, align 8		; <%XXValue*> [#uses=1]
+	%tmp12 = load %XXValue*, %XXValue** %tmp11, align 8		; <%XXValue*> [#uses=1]
 	ret %XXValue* %tmp12
 }
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-4.ll b/llvm/test/CodeGen/X86/subreg-to-reg-4.ll
index 0693789..8340fc5 100644
--- a/llvm/test/CodeGen/X86/subreg-to-reg-4.ll
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -28,7 +28,7 @@
 }
 define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
 entry:
-        %p = load i64* %x
+        %p = load i64, i64* %x
 	%t0 = add i64 %p, %y
 	%t1 = and i64 %t0, 4294967295
         %t2 = xor i64 %t1, %u
@@ -37,7 +37,7 @@
 }
 define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
 entry:
-        %p = load i64* %x
+        %p = load i64, i64* %x
 	%t0 = add i64 %p, %y
         %t1 = xor i64 %t0, %u
 	%t2 = and i64 %t1, 4294967295
@@ -46,8 +46,8 @@
 }
 define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
 entry:
-        %a = load i64* %x
-        %b = load i64* %y
+        %a = load i64, i64* %x
+        %b = load i64, i64* %y
 	%t0 = add i64 %a, %b
 	%t1 = and i64 %t0, 4294967295
         store i64 %t1, i64* %z
@@ -94,7 +94,7 @@
 }
 define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
 entry:
-        %p = load i64* %x
+        %p = load i64, i64* %x
 	%t0 = sub i64 %p, %y
 	%t1 = and i64 %t0, 4294967295
         %t2 = xor i64 %t1, %u
@@ -103,7 +103,7 @@
 }
 define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
 entry:
-        %p = load i64* %x
+        %p = load i64, i64* %x
 	%t0 = sub i64 %p, %y
         %t1 = xor i64 %t0, %u
 	%t2 = and i64 %t1, 4294967295
@@ -112,8 +112,8 @@
 }
 define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
 entry:
-        %a = load i64* %x
-        %b = load i64* %y
+        %a = load i64, i64* %x
+        %b = load i64, i64* %y
 	%t0 = sub i64 %a, %b
 	%t1 = and i64 %t0, 4294967295
         store i64 %t1, i64* %z
diff --git a/llvm/test/CodeGen/X86/subreg-to-reg-6.ll b/llvm/test/CodeGen/X86/subreg-to-reg-6.ll
index 76430cd..bef09fa 100644
--- a/llvm/test/CodeGen/X86/subreg-to-reg-6.ll
+++ b/llvm/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -2,7 +2,7 @@
 
 define i64 @foo() nounwind {
 entry:
-	%t0 = load i32* null, align 8
+	%t0 = load i32, i32* null, align 8
 	switch i32 %t0, label %bb65 [
 		i32 16, label %bb
 		i32 12, label %bb56
diff --git a/llvm/test/CodeGen/X86/switch-bt.ll b/llvm/test/CodeGen/X86/switch-bt.ll
index 065d8cd..1137821 100644
--- a/llvm/test/CodeGen/X86/switch-bt.ll
+++ b/llvm/test/CodeGen/X86/switch-bt.ll
@@ -16,8 +16,8 @@
 entry:
   %l.addr = alloca i8*, align 8                   ; <i8**> [#uses=2]
   store i8* %l, i8** %l.addr
-  %tmp = load i8** %l.addr                        ; <i8*> [#uses=1]
-  %tmp1 = load i8* %tmp                           ; <i8> [#uses=1]
+  %tmp = load i8*, i8** %l.addr                        ; <i8*> [#uses=1]
+  %tmp1 = load i8, i8* %tmp                           ; <i8> [#uses=1]
   %conv = sext i8 %tmp1 to i32                    ; <i32> [#uses=1]
   switch i32 %conv, label %sw.default [
     i32 62, label %sw.bb
diff --git a/llvm/test/CodeGen/X86/switch-zextload.ll b/llvm/test/CodeGen/X86/switch-zextload.ll
index 55425bc..2dd3f0e 100644
--- a/llvm/test/CodeGen/X86/switch-zextload.ll
+++ b/llvm/test/CodeGen/X86/switch-zextload.ll
@@ -9,7 +9,7 @@
 
 define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
 entry:
-	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	%0 = load i8, i8* null, align 1		; <i8> [#uses=1]
 	switch i8 %0, label %return [
 		i8 2, label %bb31
 		i8 0, label %bb80
diff --git a/llvm/test/CodeGen/X86/tail-call-win64.ll b/llvm/test/CodeGen/X86/tail-call-win64.ll
index 23e9280..8811b75 100644
--- a/llvm/test/CodeGen/X86/tail-call-win64.ll
+++ b/llvm/test/CodeGen/X86/tail-call-win64.ll
@@ -27,7 +27,7 @@
 @g_fptr = global void ()* @tail_tgt
 
 define void @tail_jmp_mem() {
-  %fptr = load void ()** @g_fptr
+  %fptr = load void ()*, void ()** @g_fptr
   tail call void ()* %fptr()
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/tail-dup-addr.ll b/llvm/test/CodeGen/X86/tail-dup-addr.ll
index c68a8c6..3e5c8c8 100644
--- a/llvm/test/CodeGen/X86/tail-dup-addr.ll
+++ b/llvm/test/CodeGen/X86/tail-dup-addr.ll
@@ -10,7 +10,7 @@
 
 define void @foo() noreturn nounwind uwtable ssp {
 entry:
-  %tmp = load i32* @a, align 4
+  %tmp = load i32, i32* @a, align 4
   %foo = icmp eq i32 0, %tmp
   br i1 %foo, label %sw.bb, label %sw.default
 
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index 4a7f5fe..f590176 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -127,11 +127,11 @@
 define i1 @dont_merge_oddly(float* %result) nounwind {
 entry:
   %tmp4 = getelementptr float, float* %result, i32 2
-  %tmp5 = load float* %tmp4, align 4
+  %tmp5 = load float, float* %tmp4, align 4
   %tmp7 = getelementptr float, float* %result, i32 4
-  %tmp8 = load float* %tmp7, align 4
+  %tmp8 = load float, float* %tmp7, align 4
   %tmp10 = getelementptr float, float* %result, i32 6
-  %tmp11 = load float* %tmp10, align 4
+  %tmp11 = load float, float* %tmp10, align 4
   %tmp12 = fcmp olt float %tmp8, %tmp11
   br i1 %tmp12, label %bb, label %bb21
 
@@ -179,7 +179,7 @@
 
 define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
 entry:
-  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  %tmp4 = load i8, i8* null, align 8                  ; <i8> [#uses=3]
   switch i8 %tmp4, label %bb3 [
     i8 18, label %bb
   ]
@@ -199,9 +199,9 @@
   br label %bb3
 
 lvalue_p.exit:                                    ; preds = %bb.i
-  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp21 = load %union.tree_node*, %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
   %tmp22 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
-  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp23 = load i8, i8* %tmp22, align 8               ; <i8> [#uses=1]
   %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
   switch i32 %tmp24, label %lvalue_p.exit4 [
     i32 0, label %bb2.i3
@@ -211,9 +211,9 @@
 bb.i1:                                            ; preds = %lvalue_p.exit
   %tmp25 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
   %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
-  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp27 = load %union.tree_node*, %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
   %tmp28 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
-  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp29 = load i8, i8* %tmp28, align 8               ; <i8> [#uses=1]
   %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
   switch i32 %tmp30, label %lvalue_p.exit4 [
     i32 0, label %bb2.i.i2
@@ -228,9 +228,9 @@
 bb2.i.i2:                                         ; preds = %bb.i1
   %tmp35 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
   %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
-  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp37 = load %union.tree_node*, %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
   %tmp38 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
-  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  %tmp39 = load i8, i8* %tmp38, align 8               ; <i8> [#uses=1]
   switch i8 %tmp39, label %bb2 [
     i8 16, label %lvalue_p.exit4
     i8 23, label %lvalue_p.exit4
@@ -239,9 +239,9 @@
 bb2.i3:                                           ; preds = %lvalue_p.exit
   %tmp40 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
   %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
-  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp42 = load %union.tree_node*, %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
   %tmp43 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
-  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  %tmp44 = load i8, i8* %tmp43, align 8               ; <i8> [#uses=1]
   switch i8 %tmp44, label %bb2 [
     i8 16, label %lvalue_p.exit4
     i8 23, label %lvalue_p.exit4
diff --git a/llvm/test/CodeGen/X86/tailcall-64.ll b/llvm/test/CodeGen/X86/tailcall-64.ll
index 962854a..f4d51c2 100644
--- a/llvm/test/CodeGen/X86/tailcall-64.ll
+++ b/llvm/test/CodeGen/X86/tailcall-64.ll
@@ -188,7 +188,7 @@
 define void @fold_indexed_load(i8* %mbstr, i64 %idxprom) nounwind uwtable ssp {
 entry:
   %dsplen = getelementptr inbounds [0 x %struct.funcs], [0 x %struct.funcs]* @func_table, i64 0, i64 %idxprom, i32 2
-  %x1 = load i32 (i8*)** %dsplen, align 8
+  %x1 = load i32 (i8*)*, i32 (i8*)** %dsplen, align 8
   %call = tail call i32 %x1(i8* %mbstr) nounwind
   ret void
 }
@@ -214,7 +214,7 @@
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds [0 x i32 (i8*, ...)*], [0 x i32 (i8*, ...)*]* @funcs, i64 0, i64 %idxprom
-  %0 = load i32 (i8*, ...)** %arrayidx, align 8
+  %0 = load i32 (i8*, ...)*, i32 (i8*, ...)** %arrayidx, align 8
   %call = tail call i32 (i8*, ...)* %0(i8* null, i32 0, i32 0, i32 0, i32 0, i32 0) nounwind
   ret i32 %call
 }
diff --git a/llvm/test/CodeGen/X86/tailcall-returndup-void.ll b/llvm/test/CodeGen/X86/tailcall-returndup-void.ll
index 6be0e6c..d9406ec 100644
--- a/llvm/test/CodeGen/X86/tailcall-returndup-void.ll
+++ b/llvm/test/CodeGen/X86/tailcall-returndup-void.ll
@@ -16,7 +16,7 @@
   br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
 
 n1ZQ.i:                                           ; preds = %n26p
-  %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
+  %ln1ZT.i = load i64, i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
   %ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
   tail call ghccc void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
   br label %rBL_info.exit
@@ -30,7 +30,7 @@
 
 c26a:                                             ; preds = %c263
   %ln27h = getelementptr inbounds i64, i64* %Base_Arg, i64 -2
-  %ln27j = load i64* %ln27h, align 8
+  %ln27j = load i64, i64* %ln27h, align 8
   %ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
   tail call ghccc void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
   ret void
diff --git a/llvm/test/CodeGen/X86/tailcall-ri64.ll b/llvm/test/CodeGen/X86/tailcall-ri64.ll
index 4cdf2d8..443d488 100644
--- a/llvm/test/CodeGen/X86/tailcall-ri64.ll
+++ b/llvm/test/CodeGen/X86/tailcall-ri64.ll
@@ -16,9 +16,9 @@
 %this, %vt* %Ty) align 2 {
 entry:
   %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
-  %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+  %vtable = load %vt* (%vt*, %class*)**, %vt* (%vt*, %class*)*** %0, align 8
   %vfn = getelementptr inbounds %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vtable, i64 4
-  %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+  %1 = load %vt* (%vt*, %class*)*, %vt* (%vt*, %class*)** %vfn, align 8
   %call = tail call %vt* %1(%vt* %Ty, %class* %this)
   ret %vt* %call
 }
diff --git a/llvm/test/CodeGen/X86/tailcallbyval.ll b/llvm/test/CodeGen/X86/tailcallbyval.ll
index d3a740c..8a0113a 100644
--- a/llvm/test/CodeGen/X86/tailcallbyval.ll
+++ b/llvm/test/CodeGen/X86/tailcallbyval.ll
@@ -6,7 +6,7 @@
 define  fastcc i32 @tailcallee(%struct.s* byval %a) nounwind {
 entry:
         %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 0
-        %tmp3 = load i32* %tmp2
+        %tmp3 = load i32, i32* %tmp2
         ret i32 %tmp3
 ; CHECK: tailcallee
 ; CHECK: movl 4(%esp), %eax
diff --git a/llvm/test/CodeGen/X86/tailcallbyval64.ll b/llvm/test/CodeGen/X86/tailcallbyval64.ll
index db912ca..9df1470 100644
--- a/llvm/test/CodeGen/X86/tailcallbyval64.ll
+++ b/llvm/test/CodeGen/X86/tailcallbyval64.ll
@@ -36,7 +36,7 @@
 define  fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
 entry:
         %tmp2 = getelementptr %struct.s, %struct.s* %a, i32 0, i32 1
-        %tmp3 = load i64* %tmp2, align 8
+        %tmp3 = load i64, i64* %tmp2, align 8
         %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
         ret i64 %tmp4
 }
diff --git a/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
index 1beee72..12218cc 100644
--- a/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/tbm-intrinsics-x86_64.ll
@@ -16,7 +16,7 @@
   ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
   ; CHECK-NOT: mov
   ; CHECK: bextr $
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   %0 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %tmp1, i32 2814)
   ret i32 %0
 }
@@ -37,7 +37,7 @@
   ; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
   ; CHECK-NOT: mov
   ; CHECK: bextr $
-  %tmp1 = load i64* %a, align 8
+  %tmp1 = load i64, i64* %a, align 8
   %0 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %tmp1, i64 2814)
   ret i64 %0
 }
diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll
index 79eea10..80d36d5 100644
--- a/llvm/test/CodeGen/X86/tbm_patterns.ll
+++ b/llvm/test/CodeGen/X86/tbm_patterns.ll
@@ -15,7 +15,7 @@
   ; CHECK-LABEL: test_x86_tbm_bextri_u32_m:
   ; CHECK-NOT: mov
   ; CHECK: bextr $
-  %0 = load i32* %a
+  %0 = load i32, i32* %a
   %1 = lshr i32 %0, 4
   %2 = and i32 %1, 4095
   ret i32 %2
@@ -36,7 +36,7 @@
   ; CHECK-LABEL: test_x86_tbm_bextri_u64_m:
   ; CHECK-NOT: mov
   ; CHECK: bextr $
-  %0 = load i64* %a
+  %0 = load i64, i64* %a
   %1 = lshr i64 %0, 4
   %2 = and i64 %1, 4095
   ret i64 %2
diff --git a/llvm/test/CodeGen/X86/test-shrink-bug.ll b/llvm/test/CodeGen/X86/test-shrink-bug.ll
index 64631ea..1bb1e63 100644
--- a/llvm/test/CodeGen/X86/test-shrink-bug.ll
+++ b/llvm/test/CodeGen/X86/test-shrink-bug.ll
@@ -14,7 +14,7 @@
 
 define i32 @func_35(i64 %p_38) nounwind ssp {
 entry:
-  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %tmp = load i8, i8* @g_14                           ; <i8> [#uses=2]
   %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
   %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
   %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/testl-commute.ll b/llvm/test/CodeGen/X86/testl-commute.ll
index bf6debf..a9a9e58 100644
--- a/llvm/test/CodeGen/X86/testl-commute.ll
+++ b/llvm/test/CodeGen/X86/testl-commute.ll
@@ -13,8 +13,8 @@
 ; CHECK: ret
 
 entry:
-	%0 = load i32* %P, align 4		; <i32> [#uses=3]
-	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32, i32* %G, align 4		; <i32> [#uses=1]
 	%2 = and i32 %1, %0		; <i32> [#uses=1]
 	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
 	br i1 %3, label %bb1, label %bb
@@ -34,8 +34,8 @@
 ; CHECK: ret
 
 entry:
-	%0 = load i32* %P, align 4		; <i32> [#uses=3]
-	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32, i32* %G, align 4		; <i32> [#uses=1]
 	%2 = and i32 %0, %1		; <i32> [#uses=1]
 	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
 	br i1 %3, label %bb1, label %bb
@@ -55,8 +55,8 @@
 ; CHECK: ret
 
 entry:
-	%0 = load i32* %P, align 4		; <i32> [#uses=3]
-	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32, i32* %G, align 4		; <i32> [#uses=1]
 	%2 = and i32 %0, %1		; <i32> [#uses=1]
 	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
 	br i1 %3, label %bb1, label %bb
diff --git a/llvm/test/CodeGen/X86/tls-addr-non-leaf-function.ll b/llvm/test/CodeGen/X86/tls-addr-non-leaf-function.ll
index ec47232..b9cab65 100644
--- a/llvm/test/CodeGen/X86/tls-addr-non-leaf-function.ll
+++ b/llvm/test/CodeGen/X86/tls-addr-non-leaf-function.ll
@@ -32,6 +32,6 @@
 
 @x = thread_local global i32 0
 define i32 @foo() "no-frame-pointer-elim-non-leaf" {
-  %a = load i32* @x, align 4
+  %a = load i32, i32* @x, align 4
   ret i32 %a
 }
diff --git a/llvm/test/CodeGen/X86/tls-local-dynamic.ll b/llvm/test/CodeGen/X86/tls-local-dynamic.ll
index 4841e52..1f1b41a 100644
--- a/llvm/test/CodeGen/X86/tls-local-dynamic.ll
+++ b/llvm/test/CodeGen/X86/tls-local-dynamic.ll
@@ -32,7 +32,7 @@
 
 
 if.else:
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %cmp1 = icmp eq i32 %i, 2
   br i1 %cmp1, label %if.then2, label %return
 ; Now we call __tls_get_addr.
@@ -43,7 +43,7 @@
 
 
 if.then2:
-  %1 = load i32* @y, align 4
+  %1 = load i32, i32* @y, align 4
   %add = add nsw i32 %1, %0
   br label %return
 ; This accesses TLS, but is dominated by the previous block,
diff --git a/llvm/test/CodeGen/X86/tls-pic.ll b/llvm/test/CodeGen/X86/tls-pic.ll
index 0c79da6..805bc25 100644
--- a/llvm/test/CodeGen/X86/tls-pic.ll
+++ b/llvm/test/CodeGen/X86/tls-pic.ll
@@ -7,7 +7,7 @@
 
 define i32 @f1() {
 entry:
-	%tmp1 = load i32* @i
+	%tmp1 = load i32, i32* @i
 	ret i32 %tmp1
 }
 
@@ -39,7 +39,7 @@
 
 define i32 @f3() {
 entry:
-	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
 
@@ -68,8 +68,8 @@
 
 define i32 @f5() nounwind {
 entry:
-	%0 = load i32* @j, align 4
-	%1 = load i32* @k, align 4
+	%0 = load i32, i32* @j, align 4
+	%1 = load i32, i32* @k, align 4
 	%add = add nsw i32 %0, %1
 	ret i32 %add
 }
diff --git a/llvm/test/CodeGen/X86/tls-pie.ll b/llvm/test/CodeGen/X86/tls-pie.ll
index d1e09c2..10fe1e9 100644
--- a/llvm/test/CodeGen/X86/tls-pie.ll
+++ b/llvm/test/CodeGen/X86/tls-pie.ll
@@ -15,7 +15,7 @@
 ; X64-NEXT: ret
 
 entry:
-	%tmp1 = load i32* @i
+	%tmp1 = load i32, i32* @i
 	ret i32 %tmp1
 }
 
@@ -49,7 +49,7 @@
 ; X64-NEXT: ret
 
 entry:
-	%tmp1 = load i32* @i2
+	%tmp1 = load i32, i32* @i2
 	ret i32 %tmp1
 }
 
diff --git a/llvm/test/CodeGen/X86/tls.ll b/llvm/test/CodeGen/X86/tls.ll
index 75e7fc4..93ea833 100644
--- a/llvm/test/CodeGen/X86/tls.ll
+++ b/llvm/test/CodeGen/X86/tls.ll
@@ -40,7 +40,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i32* @i1
+	%tmp1 = load i32, i32* @i1
 	ret i32 %tmp1
 }
 
@@ -105,7 +105,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i32* @i2
+	%tmp1 = load i32, i32* @i2
 	ret i32 %tmp1
 }
 
@@ -168,7 +168,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i32* @i3
+	%tmp1 = load i32, i32* @i3
 	ret i32 %tmp1
 }
 
@@ -219,7 +219,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i32* @i4
+	%tmp1 = load i32, i32* @i4
 	ret i32 %tmp1
 }
 
@@ -258,7 +258,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i32* @i5
+	%tmp1 = load i32, i32* @i5
 	ret i32 %tmp1
 }
 
@@ -309,7 +309,7 @@
 ; MINGW32: retl
 
 entry:
-	%tmp1 = load i16* @s1
+	%tmp1 = load i16, i16* @s1
 	ret i16 %tmp1
 }
 
@@ -341,7 +341,7 @@
 
 
 entry:
-	%tmp1 = load i16* @s1
+	%tmp1 = load i16, i16* @s1
   %tmp2 = sext i16 %tmp1 to i32
 	ret i32 %tmp2
 }
@@ -373,7 +373,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i8* @b1
+	%tmp1 = load i8, i8* @b1
 	ret i8 %tmp1
 }
 
@@ -404,7 +404,7 @@
 ; MINGW32-NEXT: retl
 
 entry:
-	%tmp1 = load i8* @b1
+	%tmp1 = load i8, i8* @b1
   %tmp2 = sext i8 %tmp1 to i32
 	ret i32 %tmp2
 }
diff --git a/llvm/test/CodeGen/X86/tlv-1.ll b/llvm/test/CodeGen/X86/tlv-1.ll
index 66e2f81..f06810c 100644
--- a/llvm/test/CodeGen/X86/tlv-1.ll
+++ b/llvm/test/CodeGen/X86/tlv-1.ll
@@ -25,8 +25,8 @@
 ; CHECK: movq _b@TLVP(%rip),
 ; CHECK: callq *
 ; CHECK: subl (%rax), [[REGISTER]]
-  %0 = load i32* @a, align 4
-  %1 = load i32* @b, align 4
+  %0 = load i32, i32* @a, align 4
+  %1 = load i32, i32* @b, align 4
   %sub = sub nsw i32 %0, %1
   ret i32 %sub
 }
diff --git a/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll b/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
index 8de6297..65f42bb 100644
--- a/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
+++ b/llvm/test/CodeGen/X86/trunc-ext-ld-st.ll
@@ -10,7 +10,7 @@
 ;CHECK: ret
 
 define void @load_2_i8(<2 x i8>* %A)  {
-   %T = load <2 x i8>* %A
+   %T = load <2 x i8>, <2 x i8>* %A
    %G = add <2 x i8> %T, <i8 9, i8 7>
    store <2 x i8> %G, <2 x i8>* %A
    ret void
@@ -24,7 +24,7 @@
 ;CHECK: movd
 ;CHECK: ret
 define void @load_2_i16(<2 x i16>* %A)  {
-   %T = load <2 x i16>* %A
+   %T = load <2 x i16>, <2 x i16>* %A
    %G = add <2 x i16> %T, <i16 9, i16 7>
    store <2 x i16> %G, <2 x i16>* %A
    ret void
@@ -36,7 +36,7 @@
 ;CHECK: pshufd
 ;CHECK: ret
 define void @load_2_i32(<2 x i32>* %A)  {
-   %T = load <2 x i32>* %A
+   %T = load <2 x i32>, <2 x i32>* %A
    %G = add <2 x i32> %T, <i32 9, i32 7>
    store <2 x i32> %G, <2 x i32>* %A
    ret void
@@ -48,7 +48,7 @@
 ;CHECK: pshufb
 ;CHECK: ret
 define void @load_4_i8(<4 x i8>* %A)  {
-   %T = load <4 x i8>* %A
+   %T = load <4 x i8>, <4 x i8>* %A
    %G = add <4 x i8> %T, <i8 1, i8 4, i8 9, i8 7>
    store <4 x i8> %G, <4 x i8>* %A
    ret void
@@ -60,7 +60,7 @@
 ;CHECK: pshufb
 ;CHECK: ret
 define void @load_4_i16(<4 x i16>* %A)  {
-   %T = load <4 x i16>* %A
+   %T = load <4 x i16>, <4 x i16>* %A
    %G = add <4 x i16> %T, <i16 1, i16 4, i16 9, i16 7>
    store <4 x i16> %G, <4 x i16>* %A
    ret void
@@ -72,7 +72,7 @@
 ;CHECK: pshufb
 ;CHECK: ret
 define void @load_8_i8(<8 x i8>* %A)  {
-   %T = load <8 x i8>* %A
+   %T = load <8 x i8>, <8 x i8>* %A
    %G = add <8 x i8> %T, %T
    store <8 x i8> %G, <8 x i8>* %A
    ret void
diff --git a/llvm/test/CodeGen/X86/trunc-to-bool.ll b/llvm/test/CodeGen/X86/trunc-to-bool.ll
index 0ed6347..3dd98ee 100644
--- a/llvm/test/CodeGen/X86/trunc-to-bool.ll
+++ b/llvm/test/CodeGen/X86/trunc-to-bool.ll
@@ -25,7 +25,7 @@
 ; CHECK: btl
 
 define i32 @test3(i8* %ptr) nounwind {
-    %val = load i8* %ptr
+    %val = load i8, i8* %ptr
     %tmp = trunc i8 %val to i1
     br i1 %tmp, label %cond_true, label %cond_false
 cond_true:
diff --git a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll b/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll
index 7564324..9a98e47 100644
--- a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -13,15 +13,15 @@
 	%skiplist_addr.0 = getelementptr i8, i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
 	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
 	%tmp7 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
-	%tmp8 = load <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
-	%tmp11 = load <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
+	%tmp8 = load <2 x i64>, <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
+	%tmp11 = load <2 x i64>, <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
 	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
 	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
 	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
 	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
 	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
-	%tmp37 = load i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
+	%tmp37 = load i8, i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
 	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
 	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
 	br i1 %tmp38, label %return, label %bb
diff --git a/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll b/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll
index 4b73c8c..a44d44d 100644
--- a/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ b/llvm/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -20,7 +20,7 @@
   ; HASWELL: vmovups
   ; HASWELL: retq
 
-  %A = load <8 x float>* %Ap, align 16
+  %A = load <8 x float>, <8 x float>* %Ap, align 16
   ret <8 x float> %A
 }
 
@@ -67,8 +67,8 @@
 
   %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 1
   %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
-  %v1 = load <4 x float>* %ptr1, align 1
-  %v2 = load <4 x float>* %ptr2, align 1
+  %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+  %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
   %shuffle = shufflevector <4 x float> %v1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v2, i8 1)
   ret <8 x float> %v3
@@ -91,8 +91,8 @@
 
   %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 2
   %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
-  %v1 = load <4 x float>* %ptr1, align 1
-  %v2 = load <4 x float>* %ptr2, align 1
+  %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+  %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
   %shuffle = shufflevector <4 x float> %v2, <4 x float> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
   %v3 = tail call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %shuffle, <4 x float> %v1, i8 0)
   ret <8 x float> %v3
@@ -115,8 +115,8 @@
 
   %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 3
   %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
-  %v1 = load <4 x float>* %ptr1, align 1
-  %v2 = load <4 x float>* %ptr2, align 1
+  %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+  %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
   %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x float> %v3
 }
@@ -138,8 +138,8 @@
 
   %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 4
   %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %ptr, i64 5
-  %v1 = load <4 x float>* %ptr1, align 1
-  %v2 = load <4 x float>* %ptr2, align 1
+  %v1 = load <4 x float>, <4 x float>* %ptr1, align 1
+  %v2 = load <4 x float>, <4 x float>* %ptr2, align 1
   %v3 = shufflevector <4 x float> %v2, <4 x float> %v1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   ret <8 x float> %v3
 }
@@ -170,8 +170,8 @@
 
   %ptr1 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 5
   %ptr2 = getelementptr inbounds <2 x i64>, <2 x i64>* %ptr, i64 6
-  %v1 = load <2 x i64>* %ptr1, align 1
-  %v2 = load <2 x i64>* %ptr2, align 1
+  %v1 = load <2 x i64>, <2 x i64>* %ptr1, align 1
+  %v2 = load <2 x i64>, <2 x i64>* %ptr2, align 1
   %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v4 = add <4 x i64> %v3, %x
   ret <4 x i64> %v4
@@ -198,8 +198,8 @@
 
   %ptr1 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 6
   %ptr2 = getelementptr inbounds <4 x i32>, <4 x i32>* %ptr, i64 7
-  %v1 = load <4 x i32>* %ptr1, align 1
-  %v2 = load <4 x i32>* %ptr2, align 1
+  %v1 = load <4 x i32>, <4 x i32>* %ptr1, align 1
+  %v2 = load <4 x i32>, <4 x i32>* %ptr2, align 1
   %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %v4 = add <8 x i32> %v3, %x
   ret <8 x i32> %v4
@@ -226,8 +226,8 @@
 
   %ptr1 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 7
   %ptr2 = getelementptr inbounds <8 x i16>, <8 x i16>* %ptr, i64 8
-  %v1 = load <8 x i16>* %ptr1, align 1
-  %v2 = load <8 x i16>* %ptr2, align 1
+  %v1 = load <8 x i16>, <8 x i16>* %ptr1, align 1
+  %v2 = load <8 x i16>, <8 x i16>* %ptr2, align 1
   %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %v4 = add <16 x i16> %v3, %x
   ret <16 x i16> %v4
@@ -254,8 +254,8 @@
 
   %ptr1 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 8
   %ptr2 = getelementptr inbounds <16 x i8>, <16 x i8>* %ptr, i64 9
-  %v1 = load <16 x i8>* %ptr1, align 1
-  %v2 = load <16 x i8>* %ptr2, align 1
+  %v1 = load <16 x i8>, <16 x i8>* %ptr1, align 1
+  %v2 = load <16 x i8>, <16 x i8>* %ptr2, align 1
   %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
   %v4 = add <32 x i8> %v3, %x
   ret <32 x i8> %v4
@@ -279,8 +279,8 @@
 
   %ptr1 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 9
   %ptr2 = getelementptr inbounds <2 x double>, <2 x double>* %ptr, i64 10
-  %v1 = load <2 x double>* %ptr1, align 1
-  %v2 = load <2 x double>* %ptr2, align 1
+  %v1 = load <2 x double>, <2 x double>* %ptr1, align 1
+  %v2 = load <2 x double>, <2 x double>* %ptr2, align 1
   %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %v4 = fadd <4 x double> %v3, %x
   ret <4 x double> %v4
diff --git a/llvm/test/CodeGen/X86/unaligned-spill-folding.ll b/llvm/test/CodeGen/X86/unaligned-spill-folding.ll
index 3feeb0d..33e2daf 100644
--- a/llvm/test/CodeGen/X86/unaligned-spill-folding.ll
+++ b/llvm/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -13,7 +13,7 @@
   %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds [32 x i32], [32 x i32]* @arr, i32 0, i32 %index
   %1 = bitcast i32* %0 to <4 x i32>*
-  %wide.load = load <4 x i32>* %1, align 16
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 16
   %2 = add nsw <4 x i32> %wide.load, <i32 10, i32 10, i32 10, i32 10>
   %3 = xor <4 x i32> %2, <i32 123345, i32 123345, i32 123345, i32 123345>
   %4 = add nsw <4 x i32> %3, <i32 112, i32 112, i32 112, i32 112>
diff --git a/llvm/test/CodeGen/X86/unwindraise.ll b/llvm/test/CodeGen/X86/unwindraise.ll
index b9b2626..fb8319b63 100644
--- a/llvm/test/CodeGen/X86/unwindraise.ll
+++ b/llvm/test/CodeGen/X86/unwindraise.ll
@@ -50,12 +50,12 @@
   ]
 
 if.end3:                                          ; preds = %while.body
-  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
+  %4 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality, align 8
   %tobool = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %4, null
   br i1 %tobool, label %if.end13, label %if.then4
 
 if.then4:                                         ; preds = %if.end3
-  %5 = load i64* %exception_class, align 8
+  %5 = load i64, i64* %exception_class, align 8
   %call6 = call i32 %4(i32 1, i32 1, i64 %5, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call6, label %do.end21.loopexit46 [
     i32 6, label %while.end
@@ -64,7 +64,7 @@
 
 if.end13:                                         ; preds = %if.then4, %if.end3
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs)
-  %6 = load i64* %retaddr_column.i, align 8
+  %6 = load i64, i64* %retaddr_column.i, align 8
   %conv.i = trunc i64 %6 to i32
   %cmp.i.i.i = icmp slt i32 %conv.i, 18
   br i1 %cmp.i.i.i, label %cond.end.i.i.i, label %cond.true.i.i.i
@@ -77,17 +77,17 @@
   %sext.i = shl i64 %6, 32
   %idxprom.i.i.i = ashr exact i64 %sext.i, 32
   %arrayidx.i.i.i = getelementptr inbounds [18 x i8], [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i
-  %7 = load i8* %arrayidx.i.i.i, align 1
+  %7 = load i8, i8* %arrayidx.i.i.i, align 1
   %arrayidx2.i.i.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i
-  %8 = load i8** %arrayidx2.i.i.i, align 8
-  %9 = load i64* %flags.i.i.i.i, align 8
+  %8 = load i8*, i8** %arrayidx2.i.i.i, align 8
+  %9 = load i64, i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i = and i64 %9, 4611686018427387904
   %tobool.i.i.i = icmp eq i64 %and.i.i.i.i, 0
   br i1 %tobool.i.i.i, label %if.end.i.i.i, label %land.lhs.true.i.i.i
 
 land.lhs.true.i.i.i:                              ; preds = %cond.end.i.i.i
   %arrayidx4.i.i.i = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i
-  %10 = load i8* %arrayidx4.i.i.i, align 1
+  %10 = load i8, i8* %arrayidx4.i.i.i, align 1
   %tobool6.i.i.i = icmp eq i8 %10, 0
   br i1 %tobool6.i.i.i, label %if.end.i.i.i, label %if.then.i.i.i
 
@@ -101,7 +101,7 @@
 
 if.then10.i.i.i:                                  ; preds = %if.end.i.i.i
   %12 = bitcast i8* %8 to i64*
-  %13 = load i64* %12, align 8
+  %13 = load i64, i64* %12, align 8
   br label %uw_update_context.exit
 
 cond.true14.i.i.i:                                ; preds = %if.end.i.i.i
@@ -117,7 +117,7 @@
 while.end:                                        ; preds = %if.then4
   %private_1 = getelementptr inbounds %struct._Unwind_Exception, %struct._Unwind_Exception* %exc, i64 0, i32 2
   store i64 0, i64* %private_1, align 8
-  %15 = load i8** %ra.i, align 8
+  %15 = load i8*, i8** %ra.i, align 8
   %16 = ptrtoint i8* %15 to i64
   %private_2 = getelementptr inbounds %struct._Unwind_Exception, %struct._Unwind_Exception* %exc, i64 0, i32 3
   store i64 %16, i64* %private_2, align 8
@@ -130,21 +130,21 @@
 
 while.body.i:                                     ; preds = %uw_update_context.exit44, %while.end
   %call.i = call fastcc i32 @uw_frame_state_for(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %18 = load i8** %ra.i, align 8
+  %18 = load i8*, i8** %ra.i, align 8
   %19 = ptrtoint i8* %18 to i64
-  %20 = load i64* %private_2, align 8
+  %20 = load i64, i64* %private_2, align 8
   %cmp.i = icmp eq i64 %19, %20
   %cmp2.i = icmp eq i32 %call.i, 0
   br i1 %cmp2.i, label %if.end.i, label %do.end21
 
 if.end.i:                                         ; preds = %while.body.i
-  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
+  %21 = load i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)*, i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)** %personality.i, align 8
   %tobool.i = icmp eq i32 (i32, i32, i64, %struct._Unwind_Exception*, %struct._Unwind_Context*)* %21, null
   br i1 %tobool.i, label %if.end12.i, label %if.then3.i
 
 if.then3.i:                                       ; preds = %if.end.i
   %or.i = select i1 %cmp.i, i32 6, i32 2
-  %22 = load i64* %exception_class, align 8
+  %22 = load i64, i64* %exception_class, align 8
   %call5.i = call i32 %21(i32 1, i32 %or.i, i64 %22, %struct._Unwind_Exception* %exc, %struct._Unwind_Context* %cur_context)
   switch i32 %call5.i, label %do.end21 [
     i32 7, label %do.body19
@@ -160,7 +160,7 @@
 
 cond.end.i:                                       ; preds = %if.end12.i
   call fastcc void @uw_update_context_1(%struct._Unwind_Context* %cur_context, %struct._Unwind_FrameState* %fs.i)
-  %23 = load i64* %retaddr_column.i22, align 8
+  %23 = load i64, i64* %retaddr_column.i22, align 8
   %conv.i23 = trunc i64 %23 to i32
   %cmp.i.i.i24 = icmp slt i32 %conv.i23, 18
   br i1 %cmp.i.i.i24, label %cond.end.i.i.i33, label %cond.true.i.i.i25
@@ -173,17 +173,17 @@
   %sext.i26 = shl i64 %23, 32
   %idxprom.i.i.i27 = ashr exact i64 %sext.i26, 32
   %arrayidx.i.i.i28 = getelementptr inbounds [18 x i8], [18 x i8]* @dwarf_reg_size_table, i64 0, i64 %idxprom.i.i.i27
-  %24 = load i8* %arrayidx.i.i.i28, align 1
+  %24 = load i8, i8* %arrayidx.i.i.i28, align 1
   %arrayidx2.i.i.i29 = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 0, i64 %idxprom.i.i.i27
-  %25 = load i8** %arrayidx2.i.i.i29, align 8
-  %26 = load i64* %flags.i.i.i.i, align 8
+  %25 = load i8*, i8** %arrayidx2.i.i.i29, align 8
+  %26 = load i64, i64* %flags.i.i.i.i, align 8
   %and.i.i.i.i31 = and i64 %26, 4611686018427387904
   %tobool.i.i.i32 = icmp eq i64 %and.i.i.i.i31, 0
   br i1 %tobool.i.i.i32, label %if.end.i.i.i39, label %land.lhs.true.i.i.i36
 
 land.lhs.true.i.i.i36:                            ; preds = %cond.end.i.i.i33
   %arrayidx4.i.i.i34 = getelementptr inbounds %struct._Unwind_Context, %struct._Unwind_Context* %cur_context, i64 0, i32 8, i64 %idxprom.i.i.i27
-  %27 = load i8* %arrayidx4.i.i.i34, align 1
+  %27 = load i8, i8* %arrayidx4.i.i.i34, align 1
   %tobool6.i.i.i35 = icmp eq i8 %27, 0
   br i1 %tobool6.i.i.i35, label %if.end.i.i.i39, label %if.then.i.i.i37
 
@@ -197,7 +197,7 @@
 
 if.then10.i.i.i40:                                ; preds = %if.end.i.i.i39
   %29 = bitcast i8* %25 to i64*
-  %30 = load i64* %29, align 8
+  %30 = load i64, i64* %29, align 8
   br label %uw_update_context.exit44
 
 cond.true14.i.i.i41:                              ; preds = %if.end.i.i.i39
@@ -213,7 +213,7 @@
 do.body19:                                        ; preds = %if.then3.i
   call void @llvm.lifetime.end(i64 -1, i8* %17)
   %call20 = call fastcc i64 @uw_install_context_1(%struct._Unwind_Context* %this_context, %struct._Unwind_Context* %cur_context)
-  %32 = load i8** %ra.i, align 8
+  %32 = load i8*, i8** %ra.i, align 8
   call void @llvm.eh.return.i64(i64 %call20, i8* %32)
   unreachable
 
diff --git a/llvm/test/CodeGen/X86/use-add-flags.ll b/llvm/test/CodeGen/X86/use-add-flags.ll
index fd57f5c..da0002c 100644
--- a/llvm/test/CodeGen/X86/use-add-flags.ll
+++ b/llvm/test/CodeGen/X86/use-add-flags.ll
@@ -13,7 +13,7 @@
 ;     CHECK: ret
 
 define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
-	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %x, align 4		; <i32> [#uses=1]
 	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
 	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
 	%tmp.0 = select i1 %tmp5, i32 %a, i32 %b		; <i32> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/v4i32load-crash.ll b/llvm/test/CodeGen/X86/v4i32load-crash.ll
index 3e7f9e6..2025a2f 100644
--- a/llvm/test/CodeGen/X86/v4i32load-crash.ll
+++ b/llvm/test/CodeGen/X86/v4i32load-crash.ll
@@ -13,10 +13,10 @@
 ; Function Attrs: nounwind
 define void @fn3(i32 %el) {
 entry:
-  %0 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
-  %1 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
-  %2 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
-  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
+  %0 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 0)
+  %1 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 1)
+  %2 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 2)
+  %3 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i32 3)
   %4 = insertelement <4 x i32> undef, i32 %0, i32 0
   %5 = insertelement <4 x i32> %4, i32 %1, i32 1
   %6 = insertelement <4 x i32> %5, i32 %2, i32 2
diff --git a/llvm/test/CodeGen/X86/v8i1-masks.ll b/llvm/test/CodeGen/X86/v8i1-masks.ll
index 5da6e96..21fe963 100644
--- a/llvm/test/CodeGen/X86/v8i1-masks.ll
+++ b/llvm/test/CodeGen/X86/v8i1-masks.ll
@@ -10,10 +10,10 @@
 ;CHECK: ret
 
 define void @and_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
-  %v0 = load <8 x float>* %a, align 16
-  %v1 = load <8 x float>* %b, align 16
+  %v0 = load <8 x float>, <8 x float>* %a, align 16
+  %v1 = load <8 x float>, <8 x float>* %b, align 16
   %m0 = fcmp olt <8 x float> %v1, %v0
-  %v2 = load <8 x float>* %c, align 16
+  %v2 = load <8 x float>, <8 x float>* %c, align 16
   %m1 = fcmp olt <8 x float> %v2, %v0
   %mand = and <8 x i1> %m1, %m0
   %r = zext <8 x i1> %mand to <8 x i32>
@@ -28,8 +28,8 @@
 ;CHECK: vmovaps
 ;CHECK: ret
 define void @neg_masks(<8 x float>* %a, <8 x float>* %b, <8 x float>* %c) nounwind uwtable noinline ssp {
-  %v0 = load <8 x float>* %a, align 16
-  %v1 = load <8 x float>* %b, align 16
+  %v0 = load <8 x float>, <8 x float>* %a, align 16
+  %v1 = load <8 x float>, <8 x float>* %b, align 16
   %m0 = fcmp olt <8 x float> %v1, %v0
   %mand = xor <8 x i1> %m0, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
   %r = zext <8 x i1> %mand to <8 x i32>
diff --git a/llvm/test/CodeGen/X86/vaargs.ll b/llvm/test/CodeGen/X86/vaargs.ll
index 8b16b35..3767f41 100644
--- a/llvm/test/CodeGen/X86/vaargs.ll
+++ b/llvm/test/CodeGen/X86/vaargs.ll
@@ -30,7 +30,7 @@
 .lr.ph:                                           ; preds = %0
   %3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0
   %4 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2
-  %.pre = load i32* %3, align 16
+  %.pre = load i32, i32* %3, align 16
   br label %5
 
 ; <label>:5                                       ; preds = %.lr.ph, %13
@@ -45,7 +45,7 @@
   br label %13
 
 ; <label>:10                                      ; preds = %5
-  %11 = load i8** %4, align 8
+  %11 = load i8*, i8** %4, align 8
   %12 = getelementptr i8, i8* %11, i64 8
   store i8* %12, i8** %4, align 8
   br label %13
diff --git a/llvm/test/CodeGen/X86/vararg_tailcall.ll b/llvm/test/CodeGen/X86/vararg_tailcall.ll
index eeda5e1..8a90a4d 100644
--- a/llvm/test/CodeGen/X86/vararg_tailcall.ll
+++ b/llvm/test/CodeGen/X86/vararg_tailcall.ll
@@ -39,7 +39,7 @@
 ; WIN64: callq
 define i8* @foo2(i8* %arg) nounwind optsize ssp noredzone {
 entry:
-  %tmp1 = load i8** @sel, align 8
+  %tmp1 = load i8*, i8** @sel, align 8
   %call = tail call i8* (i8*, i8*, ...)* @x2(i8* %arg, i8* %tmp1) nounwind optsize noredzone
   ret i8* %call
 }
@@ -52,10 +52,10 @@
 ; WIN64: callq
 define i8* @foo6(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8
-  %tmp3 = load i8** @sel4, align 8
-  %tmp4 = load i8** @sel5, align 8
-  %tmp5 = load i8** @sel6, align 8
+  %tmp2 = load i8*, i8** @sel3, align 8
+  %tmp3 = load i8*, i8** @sel4, align 8
+  %tmp4 = load i8*, i8** @sel5, align 8
+  %tmp5 = load i8*, i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5) nounwind optsize noredzone
   ret i8* %call
 }
@@ -68,11 +68,11 @@
 ; WIN64: callq
 define i8* @foo7(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8
-  %tmp3 = load i8** @sel4, align 8
-  %tmp4 = load i8** @sel5, align 8
-  %tmp5 = load i8** @sel6, align 8
-  %tmp6 = load i8** @sel7, align 8
+  %tmp2 = load i8*, i8** @sel3, align 8
+  %tmp3 = load i8*, i8** @sel4, align 8
+  %tmp4 = load i8*, i8** @sel5, align 8
+  %tmp5 = load i8*, i8** @sel6, align 8
+  %tmp6 = load i8*, i8** @sel7, align 8
   %call = tail call i8* (i8*, i8*, i8*, i8*, i8*, i8*, i8*, ...)* @x7(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i8* %tmp6) nounwind optsize noredzone
   ret i8* %call
 }
@@ -85,10 +85,10 @@
 ; WIN64: callq
 define i8* @foo8(i8* %arg1, i8* %arg2) nounwind optsize ssp noredzone {
 entry:
-  %tmp2 = load i8** @sel3, align 8
-  %tmp3 = load i8** @sel4, align 8
-  %tmp4 = load i8** @sel5, align 8
-  %tmp5 = load i8** @sel6, align 8
+  %tmp2 = load i8*, i8** @sel3, align 8
+  %tmp3 = load i8*, i8** @sel4, align 8
+  %tmp4 = load i8*, i8** @sel5, align 8
+  %tmp5 = load i8*, i8** @sel6, align 8
   %call = tail call i8* (i8*, i8*, i8*, ...)* @x3(i8* %arg1, i8* %arg2, i8* %tmp2, i8* %tmp3, i8* %tmp4, i8* %tmp5, i32 48879, i32 48879) nounwind optsize noredzone
   ret i8* %call
 }
diff --git a/llvm/test/CodeGen/X86/vec-loadsingles-alignment.ll b/llvm/test/CodeGen/X86/vec-loadsingles-alignment.ll
index 6aa2adb..ad99c9f 100644
--- a/llvm/test/CodeGen/X86/vec-loadsingles-alignment.ll
+++ b/llvm/test/CodeGen/X86/vec-loadsingles-alignment.ll
@@ -10,14 +10,14 @@
 ; CHECK-LABEL: subb:
 ; CHECK:  vmovups e(%rip), %ymm
 entry:
-  %0 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 7), align 4
-  %1 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 6), align 8
-  %2 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 5), align 4
-  %3 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 4), align 16
-  %4 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 3), align 4
-  %5 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 2), align 8
-  %6 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 0), align 16
+  %0 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 7), align 4
+  %1 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 6), align 8
+  %2 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 5), align 4
+  %3 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 4), align 16
+  %4 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 3), align 4
+  %5 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 2), align 8
+  %6 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 1), align 4
+  %7 = load i32, i32* getelementptr inbounds ([8 x i32]* @e, i64 0, i64 0), align 16
   %vecinit.i = insertelement <8 x i32> undef, i32 %7, i32 0
   %vecinit1.i = insertelement <8 x i32> %vecinit.i, i32 %6, i32 1
   %vecinit2.i = insertelement <8 x i32> %vecinit1.i, i32 %5, i32 2
diff --git a/llvm/test/CodeGen/X86/vec-trunc-store.ll b/llvm/test/CodeGen/X86/vec-trunc-store.ll
index 4d665f1..d7897f8 100644
--- a/llvm/test/CodeGen/X86/vec-trunc-store.ll
+++ b/llvm/test/CodeGen/X86/vec-trunc-store.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=x86-64
 
 define void @foo(<8 x i32>* %p) nounwind {
-  %t = load <8 x i32>* %p
+  %t = load <8 x i32>, <8 x i32>* %p
   %cti69 = trunc <8 x i32> %t to <8 x i16>     ; <<8 x i16>> [#uses=1]
   store <8 x i16> %cti69, <8 x i16>* undef
   ret void
 }
 
 define void @bar(<4 x i32>* %p) nounwind {
-  %t = load <4 x i32>* %p
+  %t = load <4 x i32>, <4 x i32>* %p
   %cti44 = trunc <4 x i32> %t to <4 x i16>     ; <<4 x i16>> [#uses=1]
   store <4 x i16> %cti44, <4 x i16>* undef
   ret void
diff --git a/llvm/test/CodeGen/X86/vec_align.ll b/llvm/test/CodeGen/X86/vec_align.ll
index af53aa3..558d768 100644
--- a/llvm/test/CodeGen/X86/vec_align.ll
+++ b/llvm/test/CodeGen/X86/vec_align.ll
@@ -21,10 +21,10 @@
 	%Yp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 2
 	%Zp = getelementptr { float,float,float,float}, { float,float,float,float}* @G, i32 0, i32 3
 	
-	%W = load float* %Wp
-	%X = load float* %Xp
-	%Y = load float* %Yp
-	%Z = load float* %Zp
+	%W = load float, float* %Wp
+	%X = load float, float* %Xp
+	%Y = load float, float* %Yp
+	%Z = load float, float* %Zp
 
         %tmp = insertelement %f4 undef, float %W, i32 0
         %tmp2 = insertelement %f4 %tmp, float %X, i32 1
diff --git a/llvm/test/CodeGen/X86/vec_anyext.ll b/llvm/test/CodeGen/X86/vec_anyext.ll
index d2a4c7f..c088d7f 100644
--- a/llvm/test/CodeGen/X86/vec_anyext.ll
+++ b/llvm/test/CodeGen/X86/vec_anyext.ll
@@ -2,9 +2,9 @@
 ; PR 9267
 
 define<4 x i16> @func_16_32() {
-  %F = load <4 x i32>* undef
+  %F = load <4 x i32>, <4 x i32>* undef
   %G = trunc <4 x i32> %F to <4 x i16>
-  %H = load <4 x i32>* undef
+  %H = load <4 x i32>, <4 x i32>* undef
   %Y = trunc <4 x i32> %H to <4 x i16>
   %T = add <4 x i16> %Y, %G
   store <4 x i16>%T , <4 x i16>* undef
@@ -12,9 +12,9 @@
 }
 
 define<4 x i16> @func_16_64() {
-  %F = load <4 x i64>* undef
+  %F = load <4 x i64>, <4 x i64>* undef
   %G = trunc <4 x i64> %F to <4 x i16>
-  %H = load <4 x i64>* undef
+  %H = load <4 x i64>, <4 x i64>* undef
   %Y = trunc <4 x i64> %H to <4 x i16>
   %T = xor <4 x i16> %Y, %G
   store <4 x i16>%T , <4 x i16>* undef
@@ -22,36 +22,36 @@
 }
 
 define<4 x i32> @func_32_64() {
-  %F = load <4 x i64>* undef
+  %F = load <4 x i64>, <4 x i64>* undef
   %G = trunc <4 x i64> %F to <4 x i32>
-  %H = load <4 x i64>* undef
+  %H = load <4 x i64>, <4 x i64>* undef
   %Y = trunc <4 x i64> %H to <4 x i32>
   %T = or <4 x i32> %Y, %G
   ret <4 x i32> %T
 }
 
 define<4 x i8> @func_8_16() {
-  %F = load <4 x i16>* undef
+  %F = load <4 x i16>, <4 x i16>* undef
   %G = trunc <4 x i16> %F to <4 x i8>
-  %H = load <4 x i16>* undef
+  %H = load <4 x i16>, <4 x i16>* undef
   %Y = trunc <4 x i16> %H to <4 x i8>
   %T = add <4 x i8> %Y, %G
   ret <4 x i8> %T
 }
 
 define<4 x i8> @func_8_32() {
-  %F = load <4 x i32>* undef
+  %F = load <4 x i32>, <4 x i32>* undef
   %G = trunc <4 x i32> %F to <4 x i8>
-  %H = load <4 x i32>* undef
+  %H = load <4 x i32>, <4 x i32>* undef
   %Y = trunc <4 x i32> %H to <4 x i8>
   %T = sub <4 x i8> %Y, %G
   ret <4 x i8> %T
 }
 
 define<4 x i8> @func_8_64() {
-  %F = load <4 x i64>* undef
+  %F = load <4 x i64>, <4 x i64>* undef
   %G = trunc <4 x i64> %F to <4 x i8>
-  %H = load <4 x i64>* undef
+  %H = load <4 x i64>, <4 x i64>* undef
   %Y = trunc <4 x i64> %H to <4 x i8>
   %T = add <4 x i8> %Y, %G
   ret <4 x i8> %T
diff --git a/llvm/test/CodeGen/X86/vec_extract-mmx.ll b/llvm/test/CodeGen/X86/vec_extract-mmx.ll
index c6c93a1..780066d 100644
--- a/llvm/test/CodeGen/X86/vec_extract-mmx.ll
+++ b/llvm/test/CodeGen/X86/vec_extract-mmx.ll
@@ -8,7 +8,7 @@
 ; CHECK-NEXT:    addl $32, %eax
 ; CHECK-NEXT:    retq
 entry:
-  %v5 = load <1 x i64>* %v4, align 8
+  %v5 = load <1 x i64>, <1 x i64>* %v4, align 8
   %v12 = bitcast <1 x i64> %v5 to <4 x i16>
   %v13 = bitcast <4 x i16> %v12 to x86_mmx
   %v14 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v13, i8 -18)
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:    emms
 ; CHECK-NEXT:    retq
 entry:
-  %0 = load i32* %ptr, align 4
+  %0 = load i32, i32* %ptr, align 4
   %1 = insertelement <2 x i32> undef, i32 %0, i32 0
   %2 = insertelement <2 x i32> %1, i32 0, i32 1
   %3 = bitcast <2 x i32> %2 to x86_mmx
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:    retq
 entry:
   %0 = bitcast i32* %ptr to x86_mmx*
-  %1 = load x86_mmx* %0, align 8
+  %1 = load x86_mmx, x86_mmx* %0, align 8
   %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 -24)
   %3 = bitcast x86_mmx %2 to <4 x i16>
   %4 = bitcast <4 x i16> %3 to <1 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_extract-sse4.ll b/llvm/test/CodeGen/X86/vec_extract-sse4.ll
index 530911a..9f4210f 100644
--- a/llvm/test/CodeGen/X86/vec_extract-sse4.ll
+++ b/llvm/test/CodeGen/X86/vec_extract-sse4.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    movss %xmm0, (%eax)
 ; CHECK-NEXT:    retl
 
-	%X = load <4 x float>* %P1
+	%X = load <4 x float>, <4 x float>* %P1
 	%tmp = extractelement <4 x float> %X, i32 3
 	store float %tmp, float* %R
 	ret void
@@ -27,7 +27,7 @@
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 
-	%X = load <4 x float>* %P1
+	%X = load <4 x float>, <4 x float>* %P1
 	%tmp = extractelement <4 x float> %X, i32 2
 	ret float %tmp
 }
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    movl %ecx, (%eax)
 ; CHECK-NEXT:    retl
 
-	%X = load <4 x i32>* %P1
+	%X = load <4 x i32>, <4 x i32>* %P1
 	%tmp = extractelement <4 x i32> %X, i32 3
 	store i32 %tmp, i32* %R
 	ret void
@@ -54,7 +54,7 @@
 ; CHECK-NEXT:    movl 12(%eax), %eax
 ; CHECK-NEXT:    retl
 
-	%X = load <4 x i32>* %P1
+	%X = load <4 x i32>, <4 x i32>* %P1
 	%tmp = extractelement <4 x i32> %X, i32 3
 	ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/X86/vec_extract.ll b/llvm/test/CodeGen/X86/vec_extract.ll
index 6df7be7..3b47888 100644
--- a/llvm/test/CodeGen/X86/vec_extract.ll
+++ b/llvm/test/CodeGen/X86/vec_extract.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT:    movss %xmm0, (%eax)
 ; CHECK-NEXT:    retl
 entry:
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
 	store float %tmp2, float* %f
@@ -32,7 +32,7 @@
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 entry:
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
 	ret float %tmp2
@@ -47,7 +47,7 @@
 ; CHECK-NEXT:    movss %xmm0, (%eax)
 ; CHECK-NEXT:    retl
 entry:
-	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%X = load <4 x float>, <4 x float>* %P1		; <<4 x float>> [#uses=1]
 	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]
 	store float %tmp, float* %R
 	ret void
diff --git a/llvm/test/CodeGen/X86/vec_fpext.ll b/llvm/test/CodeGen/X86/vec_fpext.ll
index b882a5e..8488757 100644
--- a/llvm/test/CodeGen/X86/vec_fpext.ll
+++ b/llvm/test/CodeGen/X86/vec_fpext.ll
@@ -8,7 +8,7 @@
 entry:
 ; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
 ; AVX: vcvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
-  %0 = load <2 x float>* %in, align 8
+  %0 = load <2 x float>, <2 x float>* %in, align 8
   %1 = fpext <2 x float> %0 to <2 x double>
   store <2 x double> %1, <2 x double>* %out, align 1
   ret void
@@ -21,7 +21,7 @@
 ; CHECK: cvtps2pd (%{{.+}}), %xmm{{[0-9]+}}
 ; CHECK: cvtps2pd 8(%{{.+}}), %xmm{{[0-9]+}}
 ; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
-  %0 = load <4 x float>* %in
+  %0 = load <4 x float>, <4 x float>* %in
   %1 = fpext <4 x float> %0 to <4 x double>
   store <4 x double> %1, <4 x double>* %out, align 1
   ret void
@@ -37,7 +37,7 @@
 ; CHECK: cvtps2pd 24(%{{.+}}), %xmm{{[0-9]+}}
 ; AVX: vcvtps2pd 16(%{{.+}}), %ymm{{[0-9]+}}
 ; AVX: vcvtps2pd (%{{.+}}), %ymm{{[0-9]+}}
-  %0 = load <8 x float>* %in
+  %0 = load <8 x float>, <8 x float>* %in
   %1 = fpext <8 x float> %0 to <8 x double>
   store <8 x double> %1, <8 x double>* %out, align 1
   ret void
diff --git a/llvm/test/CodeGen/X86/vec_i64.ll b/llvm/test/CodeGen/X86/vec_i64.ll
index 462e16e..48ca1ff 100644
--- a/llvm/test/CodeGen/X86/vec_i64.ll
+++ b/llvm/test/CodeGen/X86/vec_i64.ll
@@ -5,7 +5,7 @@
 
 define <2 x i64> @foo1(i64* %y) nounwind  {
 entry:
-	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%tmp1 = load i64, i64* %y, align 8		; <i64> [#uses=1]
 	%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
 	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
 	ret <2 x i64> %loadl
@@ -14,7 +14,7 @@
 
 define <4 x float> @foo2(i64* %p) nounwind {
 entry:
-	%load = load i64* %p
+	%load = load i64, i64* %p
 	%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
 	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
 	%0 = bitcast <2 x i64> %loadl to <4 x float>
diff --git a/llvm/test/CodeGen/X86/vec_ins_extract.ll b/llvm/test/CodeGen/X86/vec_ins_extract.ll
index c65a948..e92f46d 100644
--- a/llvm/test/CodeGen/X86/vec_ins_extract.ll
+++ b/llvm/test/CodeGen/X86/vec_ins_extract.ll
@@ -7,7 +7,7 @@
 
 define void @test(<4 x float>* %F, float %f) {
 entry:
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]
 	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
@@ -18,12 +18,12 @@
 define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.1
-	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp4 = load <4 x float>, <4 x float>* %G		; <<4 x float>> [#uses=2]
 	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
@@ -32,18 +32,18 @@
 define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
-	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]
+	%tmp.upgrd.3 = load float, float* %tmp.upgrd.2		; <float> [#uses=1]
 	store float %tmp.upgrd.3, float* %f
 	ret void
 }
 
 define void @test4(<4 x float>* %F, float* %f) {
 entry:
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
 	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
 	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/vec_insert-5.ll b/llvm/test/CodeGen/X86/vec_insert-5.ll
index b77a1b5..0f89515 100644
--- a/llvm/test/CodeGen/X86/vec_insert-5.ll
+++ b/llvm/test/CodeGen/X86/vec_insert-5.ll
@@ -28,7 +28,7 @@
 ; CHECK-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,0]
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0]
 ; CHECK-NEXT:    retl
-  %tmp1 = load <4 x float>* %P
+  %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
   ret <4 x float> %tmp2
 }
@@ -41,7 +41,7 @@
 ; CHECK-NEXT:    xorpd %xmm1, %xmm1
 ; CHECK-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; CHECK-NEXT:    retl
-  %tmp1 = load <4 x float>* %P
+  %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
   ret <4 x float> %tmp2
 }
@@ -55,7 +55,7 @@
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[1,0]
 ; CHECK-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
 ; CHECK-NEXT:    retl
-  %tmp1 = load <4 x float>* %P
+  %tmp1 = load <4 x float>, <4 x float>* %P
   %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
   ret <4 x float> %tmp2
 }
diff --git a/llvm/test/CodeGen/X86/vec_insert-mmx.ll b/llvm/test/CodeGen/X86/vec_insert-mmx.ll
index d397d80..447f97a 100644
--- a/llvm/test/CodeGen/X86/vec_insert-mmx.ll
+++ b/llvm/test/CodeGen/X86/vec_insert-mmx.ll
@@ -50,8 +50,8 @@
 ; X86-64-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; X86-64-NEXT:    movq %xmm0
 ; X86-64-NEXT:    retq
-  load i16* @g0
-  load <4 x i16>* @g1
+  load i16, i16* @g0
+  load <4 x i16>, <4 x i16>* @g1
   insertelement <4 x i16> %2, i16 %1, i32 0
   store <4 x i16> %3, <4 x i16>* @g1
   ret void
diff --git a/llvm/test/CodeGen/X86/vec_loadsingles.ll b/llvm/test/CodeGen/X86/vec_loadsingles.ll
index 312b277..ecae5d9 100644
--- a/llvm/test/CodeGen/X86/vec_loadsingles.ll
+++ b/llvm/test/CodeGen/X86/vec_loadsingles.ll
@@ -2,10 +2,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=ALL --check-prefix=SLOW32
 
 define <4 x float> @merge_2_floats(float* nocapture %p) nounwind readonly {
-  %tmp1 = load float* %p
+  %tmp1 = load float, float* %p
   %vecins = insertelement <4 x float> undef, float %tmp1, i32 0
   %add.ptr = getelementptr float, float* %p, i32 1
-  %tmp5 = load float* %add.ptr
+  %tmp5 = load float, float* %add.ptr
   %vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
   ret <4 x float> %vecins7
 
@@ -17,13 +17,13 @@
 ; Test-case generated due to a crash when trying to treat loading the first
 ; two i64s of a <4 x i64> as a load of two i32s.
 define <4 x i64> @merge_2_floats_into_4() {
-  %1 = load i64** undef, align 8
+  %1 = load i64*, i64** undef, align 8
   %2 = getelementptr inbounds i64, i64* %1, i64 0
-  %3 = load i64* %2
+  %3 = load i64, i64* %2
   %4 = insertelement <4 x i64> undef, i64 %3, i32 0
-  %5 = load i64** undef, align 8
+  %5 = load i64*, i64** undef, align 8
   %6 = getelementptr inbounds i64, i64* %5, i64 1
-  %7 = load i64* %6
+  %7 = load i64, i64* %6
   %8 = insertelement <4 x i64> %4, i64 %7, i32 1
   %9 = shufflevector <4 x i64> %8, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x i64> %9
@@ -34,16 +34,16 @@
 }
 
 define <4 x float> @merge_4_floats(float* %ptr) {
-  %a = load float* %ptr, align 8
+  %a = load float, float* %ptr, align 8
   %vec = insertelement <4 x float> undef, float %a, i32 0
   %idx1 = getelementptr inbounds float, float* %ptr, i64 1
-  %b = load float* %idx1, align 8
+  %b = load float, float* %idx1, align 8
   %vec2 = insertelement <4 x float> %vec, float %b, i32 1
   %idx3 = getelementptr inbounds float, float* %ptr, i64 2
-  %c = load float* %idx3, align 8
+  %c = load float, float* %idx3, align 8
   %vec4 = insertelement <4 x float> %vec2, float %c, i32 2
   %idx5 = getelementptr inbounds float, float* %ptr, i64 3
-  %d = load float* %idx5, align 8
+  %d = load float, float* %idx5, align 8
   %vec6 = insertelement <4 x float> %vec4, float %d, i32 3
   ret <4 x float> %vec6
 
@@ -58,28 +58,28 @@
 ; 16-byte loads.
 
 define <8 x float> @merge_8_floats(float* %ptr) {
-  %a = load float* %ptr, align 4
+  %a = load float, float* %ptr, align 4
   %vec = insertelement <8 x float> undef, float %a, i32 0
   %idx1 = getelementptr inbounds float, float* %ptr, i64 1
-  %b = load float* %idx1, align 4
+  %b = load float, float* %idx1, align 4
   %vec2 = insertelement <8 x float> %vec, float %b, i32 1
   %idx3 = getelementptr inbounds float, float* %ptr, i64 2
-  %c = load float* %idx3, align 4
+  %c = load float, float* %idx3, align 4
   %vec4 = insertelement <8 x float> %vec2, float %c, i32 2
   %idx5 = getelementptr inbounds float, float* %ptr, i64 3
-  %d = load float* %idx5, align 4
+  %d = load float, float* %idx5, align 4
   %vec6 = insertelement <8 x float> %vec4, float %d, i32 3
   %idx7 = getelementptr inbounds float, float* %ptr, i64 4
-  %e = load float* %idx7, align 4
+  %e = load float, float* %idx7, align 4
   %vec8 = insertelement <8 x float> %vec6, float %e, i32 4
   %idx9 = getelementptr inbounds float, float* %ptr, i64 5
-  %f = load float* %idx9, align 4
+  %f = load float, float* %idx9, align 4
   %vec10 = insertelement <8 x float> %vec8, float %f, i32 5
   %idx11 = getelementptr inbounds float, float* %ptr, i64 6
-  %g = load float* %idx11, align 4
+  %g = load float, float* %idx11, align 4
   %vec12 = insertelement <8 x float> %vec10, float %g, i32 6
   %idx13 = getelementptr inbounds float, float* %ptr, i64 7
-  %h = load float* %idx13, align 4
+  %h = load float, float* %idx13, align 4
   %vec14 = insertelement <8 x float> %vec12, float %h, i32 7
   ret <8 x float> %vec14
 
@@ -94,16 +94,16 @@
 }
 
 define <4 x double> @merge_4_doubles(double* %ptr) {
-  %a = load double* %ptr, align 8
+  %a = load double, double* %ptr, align 8
   %vec = insertelement <4 x double> undef, double %a, i32 0
   %idx1 = getelementptr inbounds double, double* %ptr, i64 1
-  %b = load double* %idx1, align 8
+  %b = load double, double* %idx1, align 8
   %vec2 = insertelement <4 x double> %vec, double %b, i32 1
   %idx3 = getelementptr inbounds double, double* %ptr, i64 2
-  %c = load double* %idx3, align 8
+  %c = load double, double* %idx3, align 8
   %vec4 = insertelement <4 x double> %vec2, double %c, i32 2
   %idx5 = getelementptr inbounds double, double* %ptr, i64 3
-  %d = load double* %idx5, align 8
+  %d = load double, double* %idx5, align 8
   %vec6 = insertelement <4 x double> %vec4, double %d, i32 3
   ret <4 x double> %vec6
 
@@ -124,10 +124,10 @@
   %arrayidx5 = getelementptr inbounds double, double* %ptr, i64 5
   %arrayidx6 = getelementptr inbounds double, double* %ptr, i64 6
   %arrayidx7 = getelementptr inbounds double, double* %ptr, i64 7
-  %e = load double* %arrayidx4, align 8
-  %f = load double* %arrayidx5, align 8
-  %g = load double* %arrayidx6, align 8
-  %h = load double* %arrayidx7, align 8
+  %e = load double, double* %arrayidx4, align 8
+  %f = load double, double* %arrayidx5, align 8
+  %g = load double, double* %arrayidx6, align 8
+  %h = load double, double* %arrayidx7, align 8
   %vecinit4 = insertelement <4 x double> undef, double %e, i32 0
   %vecinit5 = insertelement <4 x double> %vecinit4, double %f, i32 1
   %vecinit6 = insertelement <4 x double> %vecinit5, double %g, i32 2
diff --git a/llvm/test/CodeGen/X86/vec_logical.ll b/llvm/test/CodeGen/X86/vec_logical.ll
index 1dc0b16..6ab2d89 100644
--- a/llvm/test/CodeGen/X86/vec_logical.ll
+++ b/llvm/test/CodeGen/X86/vec_logical.ll
@@ -29,7 +29,7 @@
 
 define void @t3(<4 x float> %a, <4 x float> %b, <4 x float>* %c, <4 x float>* %d) {
 entry:
-	%tmp3 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp3 = load <4 x float>, <4 x float>* %c		; <<4 x float>> [#uses=1]
 	%tmp11 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp12 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
 	%tmp13 = xor <4 x i32> %tmp11, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/vec_set-7.ll b/llvm/test/CodeGen/X86/vec_set-7.ll
index d993178..1701e49 100644
--- a/llvm/test/CodeGen/X86/vec_set-7.ll
+++ b/llvm/test/CodeGen/X86/vec_set-7.ll
@@ -2,7 +2,7 @@
 
 define <2 x i64> @test(<2 x i64>* %p) nounwind {
 	%tmp = bitcast <2 x i64>* %p to double*		
-	%tmp.upgrd.1 = load double* %tmp	
+	%tmp.upgrd.1 = load double, double* %tmp	
 	%tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0
 	%tmp5 = insertelement <2 x double> %tmp.upgrd.2, double 0.0, i32 1
 	%tmp.upgrd.3 = bitcast <2 x double> %tmp5 to <2 x i64>
diff --git a/llvm/test/CodeGen/X86/vec_set-F.ll b/llvm/test/CodeGen/X86/vec_set-F.ll
index 6dd3cb0..aa17f9b 100644
--- a/llvm/test/CodeGen/X86/vec_set-F.ll
+++ b/llvm/test/CodeGen/X86/vec_set-F.ll
@@ -4,7 +4,7 @@
 
 define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
 	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
-	%tmp615 = load <2 x i32>* %tmp45
+	%tmp615 = load <2 x i32>, <2 x i32>* %tmp45
 	%tmp7 = bitcast <2 x i32> %tmp615 to i64
 	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
 	ret <2 x i64> %tmp8
diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll
index e2d02c8..e150882 100644
--- a/llvm/test/CodeGen/X86/vec_setcc-2.ll
+++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll
@@ -26,7 +26,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv
-  %arrayidx1.val = load <2 x i64>* %arrayidx1, align 16
+  %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16
   %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16>
   %cmp.i.i = icmp ult <8 x i16> %0, <i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26>
   %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16>
@@ -55,7 +55,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %in, i64 %indvars.iv
-  %arrayidx1.val = load <2 x i64>* %arrayidx1, align 16
+  %arrayidx1.val = load <2 x i64>, <2 x i64>* %arrayidx1, align 16
   %0 = bitcast <2 x i64> %arrayidx1.val to <8 x i16>
   %cmp.i.i = icmp ult <8 x i16> %0, <i16 0, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26, i16 26>
   %sext.i.i = sext <8 x i1> %cmp.i.i to <8 x i16>
diff --git a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
index 80f12a2..ab5031e 100644
--- a/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/llvm/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -51,7 +51,7 @@
 declare <4 x float> @f()
 
 define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
-  %a = load float *%b
+  %a = load float , float *%b
   %B = insertelement <4 x float> undef, float %a, i32 0
   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
   ret <4 x float> %X
@@ -60,7 +60,7 @@
 }
 
 define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
-  %a = load float *%b
+  %a = load float , float *%b
   %B = insertelement <4 x float> undef, float %a, i32 0
   %q = call <4 x float> @f()
   %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
diff --git a/llvm/test/CodeGen/X86/vec_trunc_sext.ll b/llvm/test/CodeGen/X86/vec_trunc_sext.ll
index 3c446bb..dcfe423 100644
--- a/llvm/test/CodeGen/X86/vec_trunc_sext.ll
+++ b/llvm/test/CodeGen/X86/vec_trunc_sext.ll
@@ -9,7 +9,7 @@
 ; but that is beyond our current codegen capabilities.
 
 define <4 x i32> @trunc_sext(<4 x i16>* %in) {
-  %load = load <4 x i16>* %in
+  %load = load <4 x i16>, <4 x i16>* %in
   %trunc = trunc <4 x i16> %load to <4 x i8>
   %sext = sext <4 x i8> %trunc to <4 x i32>
   ret <4 x i32> %sext
diff --git a/llvm/test/CodeGen/X86/vec_zero.ll b/llvm/test/CodeGen/X86/vec_zero.ll
index c3ea0ad..1d900a0 100644
--- a/llvm/test/CodeGen/X86/vec_zero.ll
+++ b/llvm/test/CodeGen/X86/vec_zero.ll
@@ -3,7 +3,7 @@
 ; CHECK: foo
 ; CHECK: xorps
 define void @foo(<4 x float>* %P) {
-        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
+        %T = load <4 x float>, <4 x float>* %P               ; <<4 x float>> [#uses=1]
         %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
         store <4 x float> %S, <4 x float>* %P
         ret void
@@ -12,7 +12,7 @@
 ; CHECK: bar
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
-        %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
+        %T = load <4 x i32>, <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
         %S = sub <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
         store <4 x i32> %S, <4 x i32>* %P
         ret void
diff --git a/llvm/test/CodeGen/X86/vector-gep.ll b/llvm/test/CodeGen/X86/vector-gep.ll
index fcf985d..ce98e67 100644
--- a/llvm/test/CodeGen/X86/vector-gep.ll
+++ b/llvm/test/CodeGen/X86/vector-gep.ll
@@ -26,7 +26,7 @@
 ;CHECK-NEXT: movl
   %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   %k = extractelement <4 x i32*> %A2, i32 3
-  %v = load i32* %k
+  %v = load i32, i32* %k
   ret i32 %v
 ;CHECK: ret
 }
@@ -39,7 +39,7 @@
 ;CHECK-NEXT: vpadd
   %A2 = getelementptr i32, <4 x i32*> %param, <4 x i32> %off
   %k = extractelement <4 x i32*> %A2, i32 3
-  %v = load i32* %k
+  %v = load i32, i32* %k
   ret i32 %v
 ;CHECK: ret
 }
diff --git a/llvm/test/CodeGen/X86/vector-intrinsics.ll b/llvm/test/CodeGen/X86/vector-intrinsics.ll
index cabacb5..c140468 100644
--- a/llvm/test/CodeGen/X86/vector-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-intrinsics.ll
@@ -32,20 +32,20 @@
 declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32)
 
 define void @a(<9 x double>* %p) nounwind {
-  %a = load <9 x double>* %p
+  %a = load <9 x double>, <9 x double>* %p
   %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a)
   store <9 x double> %r, <9 x double>* %p
   ret void
 }
 define void @b(<9 x double>* %p, <9 x double>* %q) nounwind {
-  %a = load <9 x double>* %p
-  %b = load <9 x double>* %q
+  %a = load <9 x double>, <9 x double>* %p
+  %b = load <9 x double>, <9 x double>* %q
   %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
   store <9 x double> %r, <9 x double>* %p
   ret void
 }
 define void @c(<9 x double>* %p, i32 %n) nounwind {
-  %a = load <9 x double>* %p
+  %a = load <9 x double>, <9 x double>* %p
   %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n)
   store <9 x double> %r, <9 x double>* %p
   ret void
diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll
index 962d0381..c427371 100644
--- a/llvm/test/CodeGen/X86/vector-sext.ll
+++ b/llvm/test/CodeGen/X86/vector-sext.ll
@@ -206,7 +206,7 @@
 ; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
  %Y = sext <4 x i16> %X to <4 x i32>
  ret <4 x i32>%Y
 }
@@ -244,7 +244,7 @@
 ; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
  %Y = sext <4 x i8> %X to <4 x i32>
  ret <4 x i32>%Y
 }
@@ -284,7 +284,7 @@
 ; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <2 x i8>* %ptr
+ %X = load <2 x i8>, <2 x i8>* %ptr
  %Y = sext <2 x i8> %X to <2 x i64>
  ret <2 x i64>%Y
 }
@@ -324,7 +324,7 @@
 ; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <2 x i16>* %ptr
+ %X = load <2 x i16>, <2 x i16>* %ptr
  %Y = sext <2 x i16> %X to <2 x i64>
  ret <2 x i64>%Y
 }
@@ -364,7 +364,7 @@
 ; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <2 x i32>* %ptr
+ %X = load <2 x i32>, <2 x i32>* %ptr
  %Y = sext <2 x i32> %X to <2 x i64>
  ret <2 x i64>%Y
 }
@@ -400,7 +400,7 @@
 ; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <8 x i8>* %ptr
+ %X = load <8 x i8>, <8 x i8>* %ptr
  %Y = sext <8 x i8> %X to <8 x i16>
  ret <8 x i16>%Y
 }
@@ -566,7 +566,7 @@
 ; X32-SSE41-NEXT:    pmovsxbw 8(%eax), %xmm1
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <16 x i8>* %ptr
+ %X = load <16 x i8>, <16 x i8>* %ptr
  %Y = sext <16 x i8> %X to <16 x i16>
  ret <16 x i16> %Y
 }
@@ -742,7 +742,7 @@
 ; X32-SSE41-NEXT:    pmovsxbq 2(%eax), %xmm1
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <4 x i8>* %ptr
+ %X = load <4 x i8>, <4 x i8>* %ptr
  %Y = sext <4 x i8> %X to <4 x i64>
  ret <4 x i64>%Y
 }
@@ -803,7 +803,7 @@
 ; X32-SSE41-NEXT:    pmovsxwq 4(%eax), %xmm1
 ; X32-SSE41-NEXT:    retl
 entry:
- %X = load <4 x i16>* %ptr
+ %X = load <4 x i16>, <4 x i16>* %ptr
  %Y = sext <4 x i16> %X to <4 x i64>
  ret <4 x i64>%Y
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
index 7214803..30eceac 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -835,7 +835,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    retq
-  %a = load i64* %ptr
+  %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
@@ -866,7 +866,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; AVX-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %shuffle
@@ -946,7 +946,7 @@
 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
-  %a = load i64* %ptr
+  %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
@@ -981,7 +981,7 @@
 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
-  %a = load i64* %ptr
+  %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x i64> %shuffle
@@ -1013,7 +1013,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %shuffle
@@ -1045,7 +1045,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
   ret <2 x double> %shuffle
@@ -1108,7 +1108,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
 ; AVX-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   ret <2 x double> %shuffle
@@ -1125,7 +1125,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
 ; AVX-NEXT:    retq
-  %a = load <2 x double>* %ptr
+  %a = load <2 x double>, <2 x double>* %ptr
   %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   ret <2 x double> %shuffle
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index a684e5e..2021905 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1599,7 +1599,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    retq
-  %a = load i32* %ptr
+  %a = load i32, i32* %ptr
   %v = insertelement <4 x i32> undef, i32 %a, i32 0
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i32> %shuffle
@@ -1653,7 +1653,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    retq
-  %a = load float* %ptr
+  %a = load float, float* %ptr
   %v = insertelement <4 x float> undef, float %a, i32 0
   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x float> %shuffle
@@ -1734,7 +1734,7 @@
 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
-  %a = load <2 x i32>* %ptr
+  %a = load <2 x i32>, <2 x i32>* %ptr
   %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x i32> %shuffle
@@ -1770,7 +1770,7 @@
 ; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
-  %a = load <2 x i32>* %ptr
+  %a = load <2 x i32>, <2 x i32>* %ptr
   %v = shufflevector <2 x i32> %a, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i32> %shuffle
@@ -1803,7 +1803,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovlpd (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %a = load <2 x float>* %ptr
+  %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x float> %shuffle
@@ -1836,7 +1836,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
-  %a = load <2 x float>* %ptr
+  %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x float> %shuffle
@@ -1853,7 +1853,7 @@
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
 ; AVX-NEXT:    retq
-  %a = load <4 x float>* %ptr
+  %a = load <4 x float>, <4 x float>* %ptr
   %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %shuffle
 }
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 3d6ada6..0ac9a2b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -842,7 +842,7 @@
 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
 ; AVX2-NEXT:    retq
-  %a = load i64* %ptr
+  %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x i64> %shuffle
@@ -864,7 +864,7 @@
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
 ; ALL-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x double> %shuffle
@@ -875,7 +875,7 @@
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; ALL-NEXT:    retq
-  %a = load double* %ptr
+  %a = load double, double* %ptr
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x double> %shuffle
@@ -891,7 +891,7 @@
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; AVX2-NEXT:    retq
-  %a = load i64* %ptr
+  %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i64> %shuffle
@@ -902,7 +902,7 @@
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; ALL-NEXT:    retq
-  %1 = load double* %p
+  %1 = load double, double* %p
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %3
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index f4e9a3b..624b3f2 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1906,7 +1906,7 @@
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
 ; ALL-NEXT:    retq
-  %1 = load float* %p
+  %1 = load float, float* %p
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
   ret <8 x float> %3
@@ -2058,8 +2058,8 @@
 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
 ; ALL-NEXT:    retq
 entry:
-  %tmp74 = load <2 x float>* %tmp65, align 8
-  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -2073,8 +2073,8 @@
 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
 ; ALL-NEXT:    retq
 entry:
-  %tmp74 = load <2 x float>* %tmp65, align 8
-  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x float> %tmp76
 }
@@ -2086,8 +2086,8 @@
 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
 ; ALL-NEXT:    retq
 entry:
-  %tmp74 = load <2 x float>* %tmp65, align 8
-  %tmp72 = load <2 x float>* %tmp64, align 8
+  %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
+  %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x float> %res
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index b99946f..92c59e2 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1791,8 +1791,8 @@
 ; AVX-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ; Current AVX2 lowering of this is still awful, not adding a test case.
-  %1 = load <2 x float>* %a, align 8
-  %2 = load <2 x float>* %b, align 8
+  %1 = load <2 x float>, <2 x float>* %a, align 8
+  %2 = load <2 x float>, <2 x float>* %b, align 8
   %3 = shufflevector <2 x float> %1, <2 x float> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x float> %3
 }
@@ -1933,8 +1933,8 @@
 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
 ; AVX2-NEXT:    retq
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
+  %A = load <4 x i8>, <4 x i8>* %a
+  %B = load <4 x i8>, <4 x i8>* %b
   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
   ret <4 x i8> %2
@@ -1976,8 +1976,8 @@
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
+  %A = load <4 x i8>, <4 x i8>* %a
+  %B = load <4 x i8>, <4 x i8>* %b
   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 0, i32 5, i32 1, i32 5>
   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 2, i32 4, i32 1>
   ret <4 x i8> %2
@@ -2019,8 +2019,8 @@
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm1[1],xmm0[1]
 ; AVX-NEXT:    retq
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
+  %A = load <4 x i8>, <4 x i8>* %a
+  %B = load <4 x i8>, <4 x i8>* %b
   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 2, i32 3, i32 5, i32 5>
   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
   ret <4 x i8> %2
@@ -2071,8 +2071,8 @@
 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
 ; AVX2-NEXT:    retq
-  %A = load <4 x i8>* %a
-  %B = load <4 x i8>* %b
+  %A = load <4 x i8>, <4 x i8>* %a
+  %B = load <4 x i8>, <4 x i8>* %b
   %1 = shufflevector <4 x i8> %A, <4 x i8> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
   %2 = shufflevector <4 x i8> %1, <4 x i8> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
   ret <4 x i8> %2
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
index 19608bd..c585414 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -19,7 +19,7 @@
 ; X64-NEXT:    movq %xmm0, (%rdi)
 ; X64-NEXT:    retq
 entry:
-  %tmp2 = load <1 x i64>* %x
+  %tmp2 = load <1 x i64>, <1 x i64>* %x
   %tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>
   %tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
   %tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>
@@ -97,7 +97,7 @@
 ; X64-NEXT:    movq %xmm0, (%rax)
 ; X64-NEXT:    retq
 entry:
-  %0 = load <2 x i32>* @tmp_V2i, align 8
+  %0 = load <2 x i32>, <2 x i32>* @tmp_V2i, align 8
   %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer
   store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
   ret void
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
index b4cb0ec..66e53bb 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -176,7 +176,7 @@
 ; SSE1:       # BB#0:
 ; SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE1-NEXT:    retq
-  %a = load float* %ptr
+  %a = load float, float* %ptr
   %v = insertelement <4 x float> undef, float %a, i32 0
   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x float> %shuffle
@@ -197,7 +197,7 @@
 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
 ; SSE1-NEXT:    movaps %xmm1, %xmm0
 ; SSE1-NEXT:    retq
-  %a = load <2 x float>* %ptr
+  %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
   ret <4 x float> %shuffle
@@ -217,7 +217,7 @@
 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
 ; SSE1-NEXT:    retq
-  %a = load <2 x float>* %ptr
+  %a = load <2 x float>, <2 x float>* %ptr
   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x float> %shuffle
@@ -229,7 +229,7 @@
 ; SSE1-NEXT:    movaps (%rdi), %xmm0
 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
 ; SSE1-NEXT:    retq
-  %a = load <4 x float>* %ptr
+  %a = load <4 x float>, <4 x float>* %ptr
   %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %shuffle
 }
diff --git a/llvm/test/CodeGen/X86/vector-variable-idx2.ll b/llvm/test/CodeGen/X86/vector-variable-idx2.ll
index 6e8ae2e..df65257 100644
--- a/llvm/test/CodeGen/X86/vector-variable-idx2.ll
+++ b/llvm/test/CodeGen/X86/vector-variable-idx2.ll
@@ -8,8 +8,8 @@
   %2 = alloca i32, align 4
   store <2 x i64> %a, <2 x i64>* %1, align 16
   store i32 %i, i32* %2, align 4
-  %3 = load <2 x i64>* %1, align 16
-  %4 = load i32* %2, align 4
+  %3 = load <2 x i64>, <2 x i64>* %1, align 16
+  %4 = load i32, i32* %2, align 4
   %5 = extractelement <2 x i64> %3, i32 %4
   ret i64 %5
 }
@@ -19,8 +19,8 @@
   %2 = alloca i32, align 4
   store <2 x i64> %a, <2 x i64>* %1, align 16
   store i32 %i, i32* %2, align 4
-  %3 = load <2 x i64>* %1, align 16
-  %4 = load i32* %2, align 4
+  %3 = load <2 x i64>, <2 x i64>* %1, align 16
+  %4 = load i32, i32* %2, align 4
   %5 = insertelement <2 x i64> %3, i64 1, i32 %4
   ret <2 x i64> %5
 }
diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll
index 568687d..055c256 100644
--- a/llvm/test/CodeGen/X86/vector-zext.ll
+++ b/llvm/test/CodeGen/X86/vector-zext.ll
@@ -236,7 +236,7 @@
 ; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
 ; AVX2-NEXT:    retq
 entry:
- %X = load <16 x i8>* %ptr
+ %X = load <16 x i8>, <16 x i8>* %ptr
  %Y = zext <16 x i8> %X to <16 x i16>
  ret <16 x i16> %Y
 }
@@ -280,7 +280,7 @@
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 ; AVX2-NEXT:    retq
 entry:
- %X = load <8 x i16>* %ptr
+ %X = load <8 x i16>, <8 x i16>* %ptr
  %Y = zext <8 x i16> %X to <8 x i32>
  ret <8 x i32>%Y
 }
@@ -324,7 +324,7 @@
 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 ; AVX2-NEXT:    retq
 entry:
- %X = load <4 x i32>* %ptr
+ %X = load <4 x i32>, <4 x i32>* %ptr
  %Y = zext <4 x i32> %X to <4 x i64>
  ret <4 x i64>%Y
 }
diff --git a/llvm/test/CodeGen/X86/vector-zmov.ll b/llvm/test/CodeGen/X86/vector-zmov.ll
index 4de2543..cf592b1 100644
--- a/llvm/test/CodeGen/X86/vector-zmov.ll
+++ b/llvm/test/CodeGen/X86/vector-zmov.ll
@@ -15,7 +15,7 @@
 ; AVX-NEXT:   vmovd (%rdi), %xmm0
 ; AVX-NEXT:   retq
 entry:
-  %X = load <4 x i32>* %ptr
+  %X = load <4 x i32>, <4 x i32>* %ptr
   %Y = shufflevector <4 x i32> %X, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
   ret <4 x i32>%Y
 }
@@ -31,7 +31,7 @@
 ; AVX-NEXT:   vmovq (%rdi), %xmm0
 ; AVX-NEXT:   retq
 entry:
-  %X = load <2 x i64>* %ptr
+  %X = load <2 x i64>, <2 x i64>* %ptr
   %Y = shufflevector <2 x i64> %X, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
   ret <2 x i64>%Y
 }
diff --git a/llvm/test/CodeGen/X86/vector.ll b/llvm/test/CodeGen/X86/vector.ll
index 82d20a2..39e7f0e 100644
--- a/llvm/test/CodeGen/X86/vector.ll
+++ b/llvm/test/CodeGen/X86/vector.ll
@@ -13,48 +13,48 @@
 ;;; TEST HANDLING OF VARIOUS VECTOR SIZES
 
 define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
-        %p = load %f1* %P               ; <%f1> [#uses=1]
-        %q = load %f1* %Q               ; <%f1> [#uses=1]
+        %p = load %f1, %f1* %P               ; <%f1> [#uses=1]
+        %q = load %f1, %f1* %Q               ; <%f1> [#uses=1]
         %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
         store %f1 %R, %f1* %S
         ret void
 }
 
 define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
-        %p = load %f2* %P               ; <%f2> [#uses=1]
-        %q = load %f2* %Q               ; <%f2> [#uses=1]
+        %p = load %f2, %f2* %P               ; <%f2> [#uses=1]
+        %q = load %f2, %f2* %Q               ; <%f2> [#uses=1]
         %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
         store %f2 %R, %f2* %S
         ret void
 }
 
 define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
-        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
+        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
 
 define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
 }
 
 define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
-        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8, %f8* %Q               ; <%f8> [#uses=1]
         %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]
         store %f8 %R, %f8* %S
         ret void
@@ -63,21 +63,21 @@
 ;;; TEST VECTOR CONSTRUCTS
 
 define void @test_cst(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_zero(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
 }
 
 define void @test_undef(%f4* %P, %f4* %S) {
-        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %p = load %f4, %f4* %P               ; <%f4> [#uses=1]
         %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %S
         ret void
@@ -102,19 +102,19 @@
 }
 
 define float @test_extract_elt(%f8* %P) {
-        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %p = load %f8, %f8* %P               ; <%f8> [#uses=1]
         %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]
         ret float %R
 }
 
 define double @test_extract_elt2(%d8* %P) {
-        %p = load %d8* %P               ; <%d8> [#uses=1]
+        %p = load %d8, %d8* %P               ; <%d8> [#uses=1]
         %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]
         ret double %R
 }
 
 define void @test_cast_1(%f4* %b, %i4* %a) {
-        %tmp = load %f4* %b             ; <%f4> [#uses=1]
+        %tmp = load %f4, %f4* %b             ; <%f4> [#uses=1]
         %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
         %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
         %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]
@@ -123,7 +123,7 @@
 }
 
 define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
-        %T = load %f8* %a               ; <%f8> [#uses=1]
+        %T = load %f8, %f8* %a               ; <%f8> [#uses=1]
         %T2 = bitcast %f8 %T to <8 x i32>               ; <<8 x i32>> [#uses=1]
         store <8 x i32> %T2, <8 x i32>* %b
         ret void
@@ -137,7 +137,7 @@
         %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
         %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
         %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
-        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %q = load %f4, %f4* %Q               ; <%f4> [#uses=1]
         %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
         store %f4 %R, %f4* %P
         ret void
@@ -148,7 +148,7 @@
         %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
         %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
         %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
-        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %q = load %i4, %i4* %Q               ; <%i4> [#uses=1]
         %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
         store %i4 %R, %i4* %P
         ret void
diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index c009235..fe528fd 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -262,7 +262,7 @@
 ; AVX512-LABEL: test13:
 ; AVX512: vpabsq (%
 ; AVX512-NEXT: ret
-        %a = load <8 x i64>* %a.ptr, align 8
+        %a = load <8 x i64>, <8 x i64>* %a.ptr, align 8
         %tmp1neg = sub <8 x i64> zeroinitializer, %a
         %b = icmp sle <8 x i64> %a, zeroinitializer
         %abs = select <8 x i1> %b, <8 x i64> %tmp1neg, <8 x i64> %a
diff --git a/llvm/test/CodeGen/X86/visibility2.ll b/llvm/test/CodeGen/X86/visibility2.ll
index 72ea733..48a0ac6 100644
--- a/llvm/test/CodeGen/X86/visibility2.ll
+++ b/llvm/test/CodeGen/X86/visibility2.ll
@@ -8,7 +8,7 @@
 
 define void @foo1() nounwind ssp {
 entry:
-  %tmp = load i8** @foo_private_extern_str, align 8
+  %tmp = load i8*, i8** @foo_private_extern_str, align 8
   call void @foo3(i8* %tmp)
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/volatile.ll b/llvm/test/CodeGen/X86/volatile.ll
index 1a82014..8d521b4 100644
--- a/llvm/test/CodeGen/X86/volatile.ll
+++ b/llvm/test/CodeGen/X86/volatile.ll
@@ -4,14 +4,14 @@
 @x = external global double
 
 define void @foo() nounwind  {
-  %a = load volatile double* @x
+  %a = load volatile double, double* @x
   store volatile double 0.0, double* @x
   store volatile double 0.0, double* @x
-  %b = load volatile double* @x
+  %b = load volatile double, double* @x
   ret void
 }
 
 define void @bar() nounwind  {
-  %c = load volatile double* @x
+  %c = load volatile double, double* @x
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/vselect-avx.ll b/llvm/test/CodeGen/X86/vselect-avx.ll
index c5e2a6b..ff26aeb 100644
--- a/llvm/test/CodeGen/X86/vselect-avx.ll
+++ b/llvm/test/CodeGen/X86/vselect-avx.ll
@@ -43,7 +43,7 @@
 define void @test2(double** %call1559, i64 %indvars.iv4198, <4 x i1> %tmp1895) {
 bb:
   %arrayidx1928 = getelementptr inbounds double*, double** %call1559, i64 %indvars.iv4198
-  %tmp1888 = load double** %arrayidx1928, align 8
+  %tmp1888 = load double*, double** %arrayidx1928, align 8
   %predphi.v.v = select <4 x i1> %tmp1895, <4 x double> <double -5.000000e-01, double -5.000000e-01, double -5.000000e-01, double -5.000000e-01>, <4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
   %tmp1900 = bitcast double* %tmp1888 to <4 x double>*
   store <4 x double> %predphi.v.v, <4 x double>* %tmp1900, align 8
diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll
index feacf08..5ed687f 100644
--- a/llvm/test/CodeGen/X86/vselect-minmax.ll
+++ b/llvm/test/CodeGen/X86/vselect-minmax.ll
@@ -15,8 +15,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp slt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -50,8 +50,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sle <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -85,8 +85,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -120,8 +120,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sge <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -155,8 +155,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ult <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -190,8 +190,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ule <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -225,8 +225,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -260,8 +260,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp uge <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -295,8 +295,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp slt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -330,8 +330,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sle <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -365,8 +365,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -400,8 +400,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sge <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -435,8 +435,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ult <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -470,8 +470,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ule <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -505,8 +505,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -540,8 +540,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp uge <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -575,8 +575,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp slt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -610,8 +610,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sle <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -645,8 +645,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -680,8 +680,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sge <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -715,8 +715,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ult <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -750,8 +750,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ule <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -785,8 +785,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -820,8 +820,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp uge <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -855,8 +855,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp slt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -884,8 +884,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sle <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -913,8 +913,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -942,8 +942,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sge <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -971,8 +971,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ult <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1000,8 +1000,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ule <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1029,8 +1029,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1058,8 +1058,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp uge <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -1087,8 +1087,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp slt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1116,8 +1116,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sle <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1145,8 +1145,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1174,8 +1174,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sge <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1203,8 +1203,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ult <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1232,8 +1232,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ule <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1261,8 +1261,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1290,8 +1290,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp uge <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -1319,8 +1319,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp slt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1348,8 +1348,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sle <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1377,8 +1377,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1406,8 +1406,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sge <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1435,8 +1435,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ult <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1464,8 +1464,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ule <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1493,8 +1493,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1522,8 +1522,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp uge <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -1551,8 +1551,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp slt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1586,8 +1586,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sle <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1621,8 +1621,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1656,8 +1656,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp sge <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1691,8 +1691,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ult <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1726,8 +1726,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ule <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1761,8 +1761,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1796,8 +1796,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <16 x i8>*
   %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>* %ptr.b, align 2
+  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
+  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
   %cmp = icmp uge <16 x i8> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
   store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
@@ -1831,8 +1831,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp slt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1866,8 +1866,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sle <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1901,8 +1901,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1936,8 +1936,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp sge <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -1971,8 +1971,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ult <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2006,8 +2006,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ule <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2041,8 +2041,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2076,8 +2076,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <8 x i16>*
   %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>* %ptr.b, align 2
+  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
+  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
   %cmp = icmp uge <8 x i16> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
   store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
@@ -2111,8 +2111,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp slt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2146,8 +2146,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sle <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2181,8 +2181,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2216,8 +2216,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp sge <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2251,8 +2251,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ult <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2286,8 +2286,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ule <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2321,8 +2321,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2356,8 +2356,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i32>*
   %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>* %ptr.b, align 2
+  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
+  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
   %cmp = icmp uge <4 x i32> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
   store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
@@ -2391,8 +2391,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp slt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2420,8 +2420,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sle <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2449,8 +2449,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2478,8 +2478,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp sge <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2507,8 +2507,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ult <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2536,8 +2536,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ule <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2565,8 +2565,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2594,8 +2594,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <32 x i8>*
   %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>* %ptr.b, align 2
+  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
+  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
   %cmp = icmp uge <32 x i8> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
   store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
@@ -2623,8 +2623,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp slt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2652,8 +2652,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sle <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2681,8 +2681,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2710,8 +2710,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp sge <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2739,8 +2739,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ult <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2768,8 +2768,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ule <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2797,8 +2797,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2826,8 +2826,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <16 x i16>*
   %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>* %ptr.b, align 2
+  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
+  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
   %cmp = icmp uge <16 x i16> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
   store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
@@ -2855,8 +2855,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp slt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2884,8 +2884,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sle <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2913,8 +2913,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2942,8 +2942,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp sge <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -2971,8 +2971,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ult <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3000,8 +3000,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ule <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3029,8 +3029,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3058,8 +3058,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i32>*
   %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>* %ptr.b, align 2
+  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
+  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
   %cmp = icmp uge <8 x i32> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
   store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
@@ -3089,8 +3089,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp slt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3115,8 +3115,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sle <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3141,8 +3141,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3167,8 +3167,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sge <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3193,8 +3193,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ult <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3219,8 +3219,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ule <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3245,8 +3245,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3271,8 +3271,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp uge <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3297,8 +3297,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp slt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3323,8 +3323,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sle <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3349,8 +3349,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3375,8 +3375,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sge <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3401,8 +3401,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ult <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3427,8 +3427,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ule <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3453,8 +3453,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3479,8 +3479,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp uge <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -3505,8 +3505,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp slt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3531,8 +3531,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sle <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3557,8 +3557,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3583,8 +3583,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sge <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3609,8 +3609,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ult <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3635,8 +3635,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ule <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3661,8 +3661,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3687,8 +3687,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp uge <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -3713,8 +3713,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp slt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3739,8 +3739,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sle <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3765,8 +3765,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3791,8 +3791,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sge <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3817,8 +3817,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ult <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3843,8 +3843,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ule <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3869,8 +3869,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3895,8 +3895,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp uge <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -3921,8 +3921,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp slt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3947,8 +3947,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sle <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3973,8 +3973,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sgt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -3999,8 +3999,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp sge <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4025,8 +4025,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ult <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4051,8 +4051,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ule <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4077,8 +4077,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp ugt <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4103,8 +4103,8 @@
   %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
   %ptr.a = bitcast i8* %gep.a to <64 x i8>*
   %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>* %ptr.b, align 2
+  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
   %cmp = icmp uge <64 x i8> %load.a, %load.b
   %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
   store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
@@ -4129,8 +4129,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp slt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4155,8 +4155,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sle <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4181,8 +4181,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sgt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4207,8 +4207,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp sge <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4233,8 +4233,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ult <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4259,8 +4259,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ule <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4285,8 +4285,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp ugt <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4311,8 +4311,8 @@
   %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
   %ptr.a = bitcast i16* %gep.a to <32 x i16>*
   %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>* %ptr.b, align 2
+  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
   %cmp = icmp uge <32 x i16> %load.a, %load.b
   %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
   store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
@@ -4337,8 +4337,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp slt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4363,8 +4363,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sle <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4389,8 +4389,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sgt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4415,8 +4415,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp sge <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4441,8 +4441,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ult <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4467,8 +4467,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ule <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4493,8 +4493,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp ugt <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4519,8 +4519,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <16 x i32>*
   %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>* %ptr.b, align 2
+  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
   %cmp = icmp uge <16 x i32> %load.a, %load.b
   %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
   store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
@@ -4547,8 +4547,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp slt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4573,8 +4573,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sle <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4599,8 +4599,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4625,8 +4625,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp sge <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4651,8 +4651,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ult <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4677,8 +4677,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ule <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4703,8 +4703,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4729,8 +4729,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <8 x i64>*
   %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>* %ptr.b, align 2
+  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
   %cmp = icmp uge <8 x i64> %load.a, %load.b
   %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
   store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
@@ -4755,8 +4755,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp slt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4781,8 +4781,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sle <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4807,8 +4807,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4833,8 +4833,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sge <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4859,8 +4859,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ult <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4885,8 +4885,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ule <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4911,8 +4911,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4937,8 +4937,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp uge <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4963,8 +4963,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp slt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -4989,8 +4989,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sle <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5015,8 +5015,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5041,8 +5041,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp sge <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5067,8 +5067,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ult <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5093,8 +5093,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ule <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5119,8 +5119,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5145,8 +5145,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <4 x i64>*
   %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>* %ptr.b, align 2
+  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
   %cmp = icmp uge <4 x i64> %load.a, %load.b
   %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
   store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
@@ -5171,8 +5171,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp slt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5197,8 +5197,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sle <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5223,8 +5223,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5249,8 +5249,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sge <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5275,8 +5275,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ult <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5301,8 +5301,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ule <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5327,8 +5327,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5353,8 +5353,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp uge <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5379,8 +5379,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp slt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5405,8 +5405,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sle <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5431,8 +5431,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sgt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5457,8 +5457,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp sge <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5483,8 +5483,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ult <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5509,8 +5509,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ule <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5535,8 +5535,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp ugt <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
@@ -5561,8 +5561,8 @@
   %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
   %ptr.a = bitcast i32* %gep.a to <2 x i64>*
   %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>* %ptr.b, align 2
+  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
   %cmp = icmp uge <2 x i64> %load.a, %load.b
   %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
   store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
diff --git a/llvm/test/CodeGen/X86/vshift-5.ll b/llvm/test/CodeGen/X86/vshift-5.ll
index 562e520..a6ae8d5 100644
--- a/llvm/test/CodeGen/X86/vshift-5.ll
+++ b/llvm/test/CodeGen/X86/vshift-5.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: shift5a:
 ; CHECK: movd
 ; CHECK: pslld
-  %amt = load i32* %pamt 
+  %amt = load i32, i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
   %shl = shl <4 x i32> %val, %shamt
@@ -21,7 +21,7 @@
 ; CHECK-LABEL: shift5b:
 ; CHECK: movd
 ; CHECK: psrad
-  %amt = load i32* %pamt 
+  %amt = load i32, i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
   %shr = ashr <4 x i32> %val, %shamt
diff --git a/llvm/test/CodeGen/X86/vshift-6.ll b/llvm/test/CodeGen/X86/vshift-6.ll
index 175b649..551a138 100644
--- a/llvm/test/CodeGen/X86/vshift-6.ll
+++ b/llvm/test/CodeGen/X86/vshift-6.ll
@@ -25,7 +25,7 @@
 define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
 entry:
   store i8 %5, i8* %0
-  %L5 = load i8* %0
+  %L5 = load i8, i8* %0
   %I8 = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, i8 %L5, i32 7
   %B51 = shl <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, %I8
   ret <16 x i8> %B51
diff --git a/llvm/test/CodeGen/X86/weak_def_can_be_hidden.ll b/llvm/test/CodeGen/X86/weak_def_can_be_hidden.ll
index b17f372..8e6d34c 100644
--- a/llvm/test/CodeGen/X86/weak_def_can_be_hidden.ll
+++ b/llvm/test/CodeGen/X86/weak_def_can_be_hidden.ll
@@ -12,7 +12,7 @@
 ; CHECK-D89: .weak_definition _v1
 
 define i32 @f1() {
-  %x = load i32 * @v1
+  %x = load i32 , i32 * @v1
   ret i32 %x
 }
 
@@ -46,6 +46,6 @@
 ; CHECK-D89: .weak_definition _v4
 
 define i32 @f4() {
-  %x = load i32 * @v4
+  %x = load i32 , i32 * @v4
   ret i32 %x
 }
diff --git a/llvm/test/CodeGen/X86/widen_arith-1.ll b/llvm/test/CodeGen/X86/widen_arith-1.ll
index 03459d5..5663b8b 100644
--- a/llvm/test/CodeGen/X86/widen_arith-1.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-1.ll
@@ -16,25 +16,25 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load <3 x i8>** %dst.addr		; <<3 x i8>*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i8>*, <3 x i8>** %dst.addr		; <<3 x i8>*> [#uses=1]
 	%arrayidx = getelementptr <3 x i8>, <3 x i8>* %tmp3, i32 %tmp2		; <<3 x i8>*> [#uses=1]
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load <3 x i8>** %src.addr		; <<3 x i8>*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i8>*, <3 x i8>** %src.addr		; <<3 x i8>*> [#uses=1]
 	%arrayidx6 = getelementptr <3 x i8>, <3 x i8>* %tmp5, i32 %tmp4		; <<3 x i8>*> [#uses=1]
-	%tmp7 = load <3 x i8>* %arrayidx6		; <<3 x i8>> [#uses=1]
+	%tmp7 = load <3 x i8>, <3 x i8>* %arrayidx6		; <<3 x i8>> [#uses=1]
 	%add = add <3 x i8> %tmp7, < i8 1, i8 1, i8 1 >		; <<3 x i8>> [#uses=1]
 	store <3 x i8> %add, <3 x i8>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_arith-2.ll b/llvm/test/CodeGen/X86/widen_arith-2.ll
index fb775a5..6c219c1 100644
--- a/llvm/test/CodeGen/X86/widen_arith-2.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-2.ll
@@ -19,36 +19,36 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64*, i64** %dst_i.addr		; <i64*> [#uses=1]
 	%arrayidx = getelementptr i64, i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
 	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
 	store <8 x i8>* %conv, <8 x i8>** %dst
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64*, i64** %src_i.addr		; <i64*> [#uses=1]
 	%arrayidx6 = getelementptr i64, i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
 	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
 	store <8 x i8>* %conv7, <8 x i8>** %src
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
-	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>*, <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
 	%arrayidx10 = getelementptr <8 x i8>, <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
-	%tmp11 = load i32* %i		; <i32> [#uses=1]
-	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>*, <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
 	%arrayidx13 = getelementptr <8 x i8>, <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
-	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%tmp14 = load <8 x i8>, <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
 	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
 	%and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 >		; <<8 x i8>> [#uses=1]
 	store <8 x i8> %and, <8 x i8>* %arrayidx10
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_arith-3.ll b/llvm/test/CodeGen/X86/widen_arith-3.ll
index 1574bc07..aea7975 100644
--- a/llvm/test/CodeGen/X86/widen_arith-3.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-3.ll
@@ -21,25 +21,25 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load <3 x i16>** %dst.addr		; <<3 x i16>*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i16>*, <3 x i16>** %dst.addr		; <<3 x i16>*> [#uses=1]
 	%arrayidx = getelementptr <3 x i16>, <3 x i16>* %tmp3, i32 %tmp2		; <<3 x i16>*> [#uses=1]
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load <3 x i16>** %src.addr		; <<3 x i16>*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i16>*, <3 x i16>** %src.addr		; <<3 x i16>*> [#uses=1]
 	%arrayidx6 = getelementptr <3 x i16>, <3 x i16>* %tmp5, i32 %tmp4		; <<3 x i16>*> [#uses=1]
-	%tmp7 = load <3 x i16>* %arrayidx6		; <<3 x i16>> [#uses=1]
+	%tmp7 = load <3 x i16>, <3 x i16>* %arrayidx6		; <<3 x i16>> [#uses=1]
 	%add = add <3 x i16> %tmp7, < i16 1, i16 1, i16 1 >		; <<3 x i16>> [#uses=1]
 	store <3 x i16> %add, <3 x i16>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_arith-4.ll b/llvm/test/CodeGen/X86/widen_arith-4.ll
index f50fcab..5dba063 100644
--- a/llvm/test/CodeGen/X86/widen_arith-4.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-4.ll
@@ -19,26 +19,26 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load <5 x i16>** %dst.addr		; <<5 x i16>*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <5 x i16>*, <5 x i16>** %dst.addr		; <<5 x i16>*> [#uses=1]
 	%arrayidx = getelementptr <5 x i16>, <5 x i16>* %tmp3, i32 %tmp2		; <<5 x i16>*> [#uses=1]
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load <5 x i16>** %src.addr		; <<5 x i16>*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <5 x i16>*, <5 x i16>** %src.addr		; <<5 x i16>*> [#uses=1]
 	%arrayidx6 = getelementptr <5 x i16>, <5 x i16>* %tmp5, i32 %tmp4		; <<5 x i16>*> [#uses=1]
-	%tmp7 = load <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
+	%tmp7 = load <5 x i16>, <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
 	%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 >		; <<5 x i16>> [#uses=1]
 	%mul = mul <5 x i16> %sub, < i16 2, i16 4, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
 	store <5 x i16> %mul, <5 x i16>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_arith-5.ll b/llvm/test/CodeGen/X86/widen_arith-5.ll
index cdb0878..04c9ec2 100644
--- a/llvm/test/CodeGen/X86/widen_arith-5.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-5.ll
@@ -20,26 +20,26 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load <3 x i32>** %dst.addr		; <<3 x i32>*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i32>*, <3 x i32>** %dst.addr		; <<3 x i32>*> [#uses=1]
 	%arrayidx = getelementptr <3 x i32>, <3 x i32>* %tmp3, i32 %tmp2		; <<3 x i32>*> [#uses=1]
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load <3 x i32>** %src.addr		; <<3 x i32>*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i32>*, <3 x i32>** %src.addr		; <<3 x i32>*> [#uses=1]
 	%arrayidx6 = getelementptr <3 x i32>, <3 x i32>* %tmp5, i32 %tmp4		; <<3 x i32>*> [#uses=1]
-	%tmp7 = load <3 x i32>* %arrayidx6		; <<3 x i32>> [#uses=1]
+	%tmp7 = load <3 x i32>, <3 x i32>* %arrayidx6		; <<3 x i32>> [#uses=1]
 	%mul = mul <3 x i32> %tmp7, < i32 4, i32 4, i32 4 >		; <<3 x i32>> [#uses=1]
 	%sub = sub <3 x i32> %mul, < i32 3, i32 3, i32 3 >		; <<3 x i32>> [#uses=1]
 	store <3 x i32> %sub, <3 x i32>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_arith-6.ll b/llvm/test/CodeGen/X86/widen_arith-6.ll
index 65ffeb5..09998a0 100644
--- a/llvm/test/CodeGen/X86/widen_arith-6.ll
+++ b/llvm/test/CodeGen/X86/widen_arith-6.ll
@@ -19,27 +19,27 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load <3 x float>** %dst.addr		; <<3 x float>*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x float>*, <3 x float>** %dst.addr		; <<3 x float>*> [#uses=1]
 	%arrayidx = getelementptr <3 x float>, <3 x float>* %tmp3, i32 %tmp2		; <<3 x float>*> [#uses=1]
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load <3 x float>** %src.addr		; <<3 x float>*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x float>*, <3 x float>** %src.addr		; <<3 x float>*> [#uses=1]
 	%arrayidx6 = getelementptr <3 x float>, <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
-	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
-	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
+	%tmp7 = load <3 x float>, <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
+	%tmp8 = load <3 x float>, <3 x float>* %v		; <<3 x float>> [#uses=1]
 	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
 	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
 	store <3 x float> %add, <3 x float>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp9 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp9, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_cast-1.ll b/llvm/test/CodeGen/X86/widen_cast-1.ll
index 206d5a4..6b7f489 100644
--- a/llvm/test/CodeGen/X86/widen_cast-1.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-1.ll
@@ -23,25 +23,25 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp1 = load i32* %i		; <i32> [#uses=1]
-	%tmp2 = load <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%tmp1 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <2 x i32>*, <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
 	%arrayidx = getelementptr <2 x i32>, <2 x i32>* %tmp2, i32 %tmp1		; <<2 x i32>*> [#uses=1]
-	%tmp3 = load i32* %i		; <i32> [#uses=1]
-	%tmp4 = load <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%tmp3 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <4 x i16>*, <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
 	%arrayidx5 = getelementptr <4 x i16>, <4 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
-	%tmp6 = load <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%tmp6 = load <4 x i16>, <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
 	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
 	%conv = bitcast <4 x i16> %add to <2 x i32>		; <<2 x i32>> [#uses=1]
 	store <2 x i32> %conv, <2 x i32>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_cast-2.ll b/llvm/test/CodeGen/X86/widen_cast-2.ll
index 9520250..5a9acbd 100644
--- a/llvm/test/CodeGen/X86/widen_cast-2.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-2.ll
@@ -18,25 +18,25 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp1 = load i32* %i		; <i32> [#uses=1]
-	%tmp2 = load <7 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%tmp1 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <7 x i32>*, <7 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
 	%arrayidx = getelementptr <7 x i32>, <7 x i32>* %tmp2, i32 %tmp1		; <<7 x i32>*> [#uses=1]
-	%tmp3 = load i32* %i		; <i32> [#uses=1]
-	%tmp4 = load <14 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%tmp3 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <14 x i16>*, <14 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
 	%arrayidx5 = getelementptr <14 x i16>, <14 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
-	%tmp6 = load <14 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%tmp6 = load <14 x i16>, <14 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
 	%add = add <14 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
 	%conv = bitcast <14 x i16> %add to <7 x i32>		; <<7 x i32>> [#uses=1]
 	store <7 x i32> %conv, <7 x i32>* %arrayidx
 	br label %forinc
 
 forinc:		; preds = %forbody
-	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_cast-4.ll b/llvm/test/CodeGen/X86/widen_cast-4.ll
index a899441..060dfb1 100644
--- a/llvm/test/CodeGen/X86/widen_cast-4.ll
+++ b/llvm/test/CodeGen/X86/widen_cast-4.ll
@@ -18,29 +18,29 @@
 	br label %forcond
 
 forcond:		; preds = %forinc, %entry
-	%tmp = load i32* %i		; <i32> [#uses=1]
-	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %n.addr		; <i32> [#uses=1]
 	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
 	br i1 %cmp, label %forbody, label %afterfor
 
 forbody:		; preds = %forcond
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
-	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64*, i64** %dst_i.addr		; <i64*> [#uses=1]
 	%arrayidx = getelementptr i64, i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
 	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
 	store <8 x i8>* %conv, <8 x i8>** %dst
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
-	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64*, i64** %src_i.addr		; <i64*> [#uses=1]
 	%arrayidx6 = getelementptr i64, i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
 	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
 	store <8 x i8>* %conv7, <8 x i8>** %src
-	%tmp8 = load i32* %i		; <i32> [#uses=1]
-	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%tmp8 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>*, <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
 	%arrayidx10 = getelementptr <8 x i8>, <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
-	%tmp11 = load i32* %i		; <i32> [#uses=1]
-	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>*, <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
 	%arrayidx13 = getelementptr <8 x i8>, <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
-	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%tmp14 = load <8 x i8>, <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
 	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
 	%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >		; <<8 x i8>> [#uses=1]
 	store <8 x i8> %shr, <8 x i8>* %arrayidx10
@@ -67,7 +67,7 @@
 ; CHECK-WIDE-NEXT: movd
 
 forinc:		; preds = %forbody
-	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%tmp15 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %forcond
diff --git a/llvm/test/CodeGen/X86/widen_conversions.ll b/llvm/test/CodeGen/X86/widen_conversions.ll
index fa85400..dd75097 100644
--- a/llvm/test/CodeGen/X86/widen_conversions.ll
+++ b/llvm/test/CodeGen/X86/widen_conversions.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT: punpcklwd %[[Z]], %[[X]]
 ; CHECK-NEXT: ret
 
-  %val = load <4 x i8>* %ptr
+  %val = load <4 x i8>, <4 x i8>* %ptr
   %ext = zext <4 x i8> %val to <4 x i32>
   ret <4 x i32> %ext
 }
diff --git a/llvm/test/CodeGen/X86/widen_load-0.ll b/llvm/test/CodeGen/X86/widen_load-0.ll
index 768a1be..edaaa77 100644
--- a/llvm/test/CodeGen/X86/widen_load-0.ll
+++ b/llvm/test/CodeGen/X86/widen_load-0.ll
@@ -10,8 +10,8 @@
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
-  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
-  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %0 = load <2 x i16>, <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32, i32* %c, align 4                      ; <i32> [#uses=1]
   %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
   store <2 x i16> %tmp1, <2 x i16>* %b, align 2
   %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/widen_load-1.ll b/llvm/test/CodeGen/X86/widen_load-1.ll
index 1910798..849f9b9 100644
--- a/llvm/test/CodeGen/X86/widen_load-1.ll
+++ b/llvm/test/CodeGen/X86/widen_load-1.ll
@@ -26,12 +26,12 @@
   store i32 0, i32* %changed, align 4
   %r2 = getelementptr float, float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
   %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
-  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  %r4 = load <2 x float>, <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
   call void @killcommon(i32* %changed)
   br label %"file complex.c, line 34, bb4"
 
 "file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
-  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r5 = load i32, i32* %changed, align 4               ; <i32> [#uses=1]
   %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
   %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
   %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
diff --git a/llvm/test/CodeGen/X86/widen_load-2.ll b/llvm/test/CodeGen/X86/widen_load-2.ll
index c6bd964..f5ddc0e 100644
--- a/llvm/test/CodeGen/X86/widen_load-2.ll
+++ b/llvm/test/CodeGen/X86/widen_load-2.ll
@@ -10,8 +10,8 @@
 ; CHECK-NEXT:    paddd   (%{{.*}}), %[[R0]]
 ; CHECK-NEXT:    pextrd  $2, %[[R0]], 8(%{{.*}})
 ; CHECK-NEXT:    movq    %[[R0]], (%{{.*}})
-	%a = load %i32vec3* %ap, align 16
-	%b = load %i32vec3* %bp, align 16
+	%a = load %i32vec3, %i32vec3* %ap, align 16
+	%b = load %i32vec3, %i32vec3* %bp, align 16
 	%x = add %i32vec3 %a, %b
 	store %i32vec3 %x, %i32vec3* %ret, align 16
 	ret void
@@ -26,8 +26,8 @@
 ; CHECK-NEXT:    paddd   %[[R0]], %[[R1]]
 ; CHECK-NEXT:    pextrd  $2, %[[R1]], 8(%{{.*}})
 ; CHECK-NEXT:    movq    %[[R1]], (%{{.*}})
-	%a = load %i32vec3* %ap, align 8
-	%b = load %i32vec3* %bp, align 8
+	%a = load %i32vec3, %i32vec3* %ap, align 8
+	%b = load %i32vec3, %i32vec3* %bp, align 8
 	%x = add %i32vec3 %a, %b
 	store %i32vec3 %x, %i32vec3* %ret, align 8
 	ret void
@@ -43,8 +43,8 @@
 ; CHECK-NEXT:    pextrd  $2, %[[R1]], 24(%{{.*}})
 ; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
-	%a = load %i32vec7* %ap, align 16
-	%b = load %i32vec7* %bp, align 16
+	%a = load %i32vec7, %i32vec7* %ap, align 16
+	%b = load %i32vec7, %i32vec7* %bp, align 16
 	%x = add %i32vec7 %a, %b
 	store %i32vec7 %x, %i32vec7* %ret, align 16
 	ret void
@@ -62,8 +62,8 @@
 ; CHECK-NEXT:    movdqa  %[[R2]], 32(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R1]], 16(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
-	%a = load %i32vec12* %ap, align 16
-	%b = load %i32vec12* %bp, align 16
+	%a = load %i32vec12, %i32vec12* %ap, align 16
+	%b = load %i32vec12, %i32vec12* %bp, align 16
 	%x = add %i32vec12 %a, %b
 	store %i32vec12 %x, %i32vec12* %ret, align 16
 	ret void
@@ -80,8 +80,8 @@
 ; CHECK-NEXT:    pshufb   {{.*}}, %[[R1]]
 ; CHECK-NEXT:    pmovzxdq %[[R1]], %[[R0]]
 ; CHECK-NEXT:    movd     %[[R0]], (%{{.*}})
-	%a = load %i16vec3* %ap, align 16
-	%b = load %i16vec3* %bp, align 16
+	%a = load %i16vec3, %i16vec3* %ap, align 16
+	%b = load %i16vec3, %i16vec3* %bp, align 16
 	%x = add %i16vec3 %a, %b
 	store %i16vec3 %x, %i16vec3* %ret, align 16
 	ret void
@@ -94,8 +94,8 @@
 ; CHECK-NEXT:    movq    (%{{.*}}), %[[R1:xmm[0-9]+]]
 ; CHECK-NEXT:    paddw   %[[R0]], %[[R1]]
 ; CHECK-NEXT:    movq    %[[R1]], (%{{.*}})
-	%a = load %i16vec4* %ap, align 16
-	%b = load %i16vec4* %bp, align 16
+	%a = load %i16vec4, %i16vec4* %ap, align 16
+	%b = load %i16vec4, %i16vec4* %bp, align 16
 	%x = add %i16vec4 %a, %b
 	store %i16vec4 %x, %i16vec4* %ret, align 16
 	ret void
@@ -110,8 +110,8 @@
 ; CHECK-NEXT:    paddw   16(%{{.*}}), %[[R1]]
 ; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
-	%a = load %i16vec12* %ap, align 16
-	%b = load %i16vec12* %bp, align 16
+	%a = load %i16vec12, %i16vec12* %ap, align 16
+	%b = load %i16vec12, %i16vec12* %bp, align 16
 	%x = add %i16vec12 %a, %b
 	store %i16vec12 %x, %i16vec12* %ret, align 16
 	ret void
@@ -129,8 +129,8 @@
 ; CHECK-NEXT:    movd    %[[R2]], 32(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R1]], 16(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
-	%a = load %i16vec18* %ap, align 16
-	%b = load %i16vec18* %bp, align 16
+	%a = load %i16vec18, %i16vec18* %ap, align 16
+	%b = load %i16vec18, %i16vec18* %bp, align 16
 	%x = add %i16vec18 %a, %b
 	store %i16vec18 %x, %i16vec18* %ret, align 16
 	ret void
@@ -148,8 +148,8 @@
 ; CHECK-NEXT:    pmovzxwq %[[R1]], %[[R0]]
 ; CHECK-NEXT:    movd     %[[R0]], %e[[R2:[abcd]]]x
 ; CHECK-NEXT:    movw     %[[R2]]x, (%{{.*}})
-	%a = load %i8vec3* %ap, align 16
-	%b = load %i8vec3* %bp, align 16
+	%a = load %i8vec3, %i8vec3* %ap, align 16
+	%b = load %i8vec3, %i8vec3* %bp, align 16
 	%x = add %i8vec3 %a, %b
 	store %i8vec3 %x, %i8vec3* %ret, align 16
 	ret void
@@ -167,8 +167,8 @@
 ; CHECK-NEXT:    pextrd  $2, %[[R1]], 24(%{{.*}})
 ; CHECK-NEXT:    movq    %[[R1]], 16(%{{.*}})
 ; CHECK-NEXT:    movdqa  %[[R0]], (%{{.*}})
-	%a = load %i8vec31* %ap, align 16
-	%b = load %i8vec31* %bp, align 16
+	%a = load %i8vec31, %i8vec31* %ap, align 16
+	%b = load %i8vec31, %i8vec31* %bp, align 16
 	%x = add %i8vec31 %a, %b
 	store %i8vec31 %x, %i8vec31* %ret, align 16
 	ret void
@@ -216,9 +216,9 @@
   store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
   %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
   store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
-  %tmp = load %i8vec3pack* %X
+  %tmp = load %i8vec3pack, %i8vec3pack* %X
   %extractVec = extractvalue %i8vec3pack %tmp, 0
-  %tmp2 = load %i8vec3pack* %rot
+  %tmp2 = load %i8vec3pack, %i8vec3pack* %rot
   %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
   %shr = lshr <3 x i8> %extractVec, %extractVec3
   %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
diff --git a/llvm/test/CodeGen/X86/win32_sret.ll b/llvm/test/CodeGen/X86/win32_sret.ll
index 0b10a67..ca01d3b 100644
--- a/llvm/test/CodeGen/X86/win32_sret.ll
+++ b/llvm/test/CodeGen/X86/win32_sret.ll
@@ -106,7 +106,7 @@
 entry:
   %this.addr = alloca %class.C5*, align 4
   store %class.C5* %this, %class.C5** %this.addr, align 4
-  %this1 = load %class.C5** %this.addr
+  %this1 = load %class.C5*, %class.C5** %this.addr
   %x = getelementptr inbounds %struct.S5, %struct.S5* %agg.result, i32 0, i32 0
   store i32 42, i32* %x, align 4
   ret void
@@ -211,7 +211,7 @@
 define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
   %s = getelementptr %struct.test7, %struct.test7* %in, i32 0, i32 0
   %d = getelementptr %struct.test7, %struct.test7* %out, i32 0, i32 0
-  %v = load i32* %s
+  %v = load i32, i32* %s
   store i32 %v, i32* %d
   call void @clobber_eax()
   ret void
diff --git a/llvm/test/CodeGen/X86/win64_eh.ll b/llvm/test/CodeGen/X86/win64_eh.ll
index b67ad58..d668f43 100644
--- a/llvm/test/CodeGen/X86/win64_eh.ll
+++ b/llvm/test/CodeGen/X86/win64_eh.ll
@@ -62,21 +62,21 @@
   store i32 %d_arg, i32* %d
   store i32 %e_arg, i32* %e
   store i32 %f_arg, i32* %f
-  %tmp = load i32* %a
+  %tmp = load i32, i32* %a
   %tmp1 = mul i32 %tmp, 2
-  %tmp2 = load i32* %b
+  %tmp2 = load i32, i32* %b
   %tmp3 = mul i32 %tmp2, 3
   %tmp4 = add i32 %tmp1, %tmp3
-  %tmp5 = load i32* %c
+  %tmp5 = load i32, i32* %c
   %tmp6 = mul i32 %tmp5, 5
   %tmp7 = add i32 %tmp4, %tmp6
-  %tmp8 = load i32* %d
+  %tmp8 = load i32, i32* %d
   %tmp9 = mul i32 %tmp8, 7
   %tmp10 = add i32 %tmp7, %tmp9
-  %tmp11 = load i32* %e
+  %tmp11 = load i32, i32* %e
   %tmp12 = mul i32 %tmp11, 11
   %tmp13 = add i32 %tmp10, %tmp12
-  %tmp14 = load i32* %f
+  %tmp14 = load i32, i32* %f
   %tmp15 = mul i32 %tmp14, 13
   %tmp16 = add i32 %tmp13, %tmp15
   ret i32 %tmp16
@@ -105,7 +105,7 @@
 entry:
   %step = alloca i32, align 4
   store i32 0, i32* %step
-  %tmp = load i32* %step
+  %tmp = load i32, i32* %step
 
   %tmp1 = invoke i32 @bar()
           to label %finally unwind label %landingpad
@@ -123,7 +123,7 @@
   unreachable
 
 endtryfinally:
-  %tmp10 = load i32* %step
+  %tmp10 = load i32, i32* %step
   ret i32 %tmp10
 }
 ; WIN64-LABEL: foo4:
diff --git a/llvm/test/CodeGen/X86/win_eh_prepare.ll b/llvm/test/CodeGen/X86/win_eh_prepare.ll
index f96fed5..b457a41 100644
--- a/llvm/test/CodeGen/X86/win_eh_prepare.ll
+++ b/llvm/test/CodeGen/X86/win_eh_prepare.ll
@@ -36,7 +36,7 @@
 }
 
 define internal i32 @filt_g(i8*, i8*) {
-  %g = load i32* @g
+  %g = load i32, i32* @g
   ret i32 %g
 }
 
diff --git a/llvm/test/CodeGen/X86/x32-function_pointer-1.ll b/llvm/test/CodeGen/X86/x32-function_pointer-1.ll
index 2baf92a..952add9 100644
--- a/llvm/test/CodeGen/X86/x32-function_pointer-1.ll
+++ b/llvm/test/CodeGen/X86/x32-function_pointer-1.ll
@@ -8,11 +8,11 @@
 
 define void @bar(i8* %h) nounwind uwtable {
 entry:
-  %0 = load void (i8*)** @foo1, align 4
+  %0 = load void (i8*)*, void (i8*)** @foo1, align 4
 ; CHECK: movl	foo1(%rip), %e{{[^,]*}}
   tail call void %0(i8* %h) nounwind
 ; CHECK: callq	*%r{{[^,]*}}
-  %1 = load void (i8*)** @foo2, align 4
+  %1 = load void (i8*)*, void (i8*)** @foo2, align 4
 ; CHECK: movl	foo2(%rip), %e{{[^,]*}}
   tail call void %1(i8* %h) nounwind
 ; CHECK: jmpq	*%r{{[^,]*}}
diff --git a/llvm/test/CodeGen/X86/x86-64-gv-offset.ll b/llvm/test/CodeGen/X86/x86-64-gv-offset.ll
index 365e4af..e179146 100644
--- a/llvm/test/CodeGen/X86/x86-64-gv-offset.ll
+++ b/llvm/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -5,8 +5,8 @@
 
 define i32 @main() nounwind  {
 entry:
-	%tmp2 = load float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16		; <float> [#uses=1]
-	%tmp4 = load double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8		; <double> [#uses=1]
+	%tmp2 = load float, float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16		; <float> [#uses=1]
+	%tmp4 = load double, double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8		; <double> [#uses=1]
 	tail call void @t( float %tmp2, double %tmp4 ) nounwind 
 	ret i32 0
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-jumps.ll b/llvm/test/CodeGen/X86/x86-64-jumps.ll
index 7e5cd70..846660e 100644
--- a/llvm/test/CodeGen/X86/x86-64-jumps.ll
+++ b/llvm/test/CodeGen/X86/x86-64-jumps.ll
@@ -22,10 +22,10 @@
 entry:
   %i.addr = alloca i32                            ; <i32*> [#uses=2]
   store i32 %i, i32* %i.addr
-  %tmp = load i32* %i.addr                        ; <i32> [#uses=1]
+  %tmp = load i32, i32* %i.addr                        ; <i32> [#uses=1]
   %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
   %arrayidx = getelementptr inbounds i32, i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
-  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
+  %tmp1 = load i32, i32* %arrayidx                     ; <i32> [#uses=1]
   %idx.ext = sext i32 %tmp1 to i64                ; <i64> [#uses=1]
   %add.ptr = getelementptr i8, i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
   br label %indirectgoto
diff --git a/llvm/test/CodeGen/X86/x86-64-mem.ll b/llvm/test/CodeGen/X86/x86-64-mem.ll
index d15f516..c7a298e 100644
--- a/llvm/test/CodeGen/X86/x86-64-mem.ll
+++ b/llvm/test/CodeGen/X86/x86-64-mem.ll
@@ -17,7 +17,7 @@
 @bdst = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]
 
 define void @test1() nounwind {
-	%tmp = load i32* getelementptr ([0 x i32]* @src, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp = load i32, i32* getelementptr ([0 x i32]* @src, i32 0, i32 0)		; <i32> [#uses=1]
 	store i32 %tmp, i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0)
 	ret void
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-pic-4.ll b/llvm/test/CodeGen/X86/x86-64-pic-4.ll
index 33b08c4..42d08cc 100644
--- a/llvm/test/CodeGen/X86/x86-64-pic-4.ll
+++ b/llvm/test/CodeGen/X86/x86-64-pic-4.ll
@@ -5,6 +5,6 @@
 
 define i32 @get_a() {
 entry:
-	%tmp1 = load i32* @a, align 4
+	%tmp1 = load i32, i32* @a, align 4
 	ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-pic-5.ll b/llvm/test/CodeGen/X86/x86-64-pic-5.ll
index 234bc0d..d217a5c 100644
--- a/llvm/test/CodeGen/X86/x86-64-pic-5.ll
+++ b/llvm/test/CodeGen/X86/x86-64-pic-5.ll
@@ -6,6 +6,6 @@
 
 define i32 @get_a() {
 entry:
-	%tmp1 = load i32* @a, align 4
+	%tmp1 = load i32, i32* @a, align 4
 	ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-pic-6.ll b/llvm/test/CodeGen/X86/x86-64-pic-6.ll
index ae5b583..8671023 100644
--- a/llvm/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/llvm/test/CodeGen/X86/x86-64-pic-6.ll
@@ -6,6 +6,6 @@
 
 define i32 @get_a() nounwind {
 entry:
-	%tmp1 = load i32* @a, align 4
+	%tmp1 = load i32, i32* @a, align 4
 	ret i32 %tmp1
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-ptr-arg-simple.ll b/llvm/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
index 6d46663..11dfc80 100644
--- a/llvm/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
+++ b/llvm/test/CodeGen/X86/x86-64-ptr-arg-simple.ll
@@ -22,7 +22,7 @@
 
 define void @bar(i32* nocapture %pOut, i32* nocapture %pIn) nounwind {
 entry:
-  %0 = load i32* %pIn, align 4
+  %0 = load i32, i32* %pIn, align 4
   store i32 %0, i32* %pOut, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/X86/x86-64-sret-return.ll b/llvm/test/CodeGen/X86/x86-64-sret-return.ll
index bc98fd6..a0c4348 100644
--- a/llvm/test/CodeGen/X86/x86-64-sret-return.ll
+++ b/llvm/test/CodeGen/X86/x86-64-sret-return.ll
@@ -17,42 +17,42 @@
 	%memtmp = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store %struct.foo* %d, %struct.foo** %d_addr
-	%tmp = load %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
+	%tmp = load %struct.foo*, %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
 	%tmp1 = getelementptr %struct.foo, %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
 	%tmp2 = getelementptr %struct.foo, %struct.foo* %tmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
 	%tmp3 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 0		; <i64*> [#uses=1]
 	%tmp4 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 0		; <i64*> [#uses=1]
-	%tmp5 = load i64* %tmp4, align 8		; <i64> [#uses=1]
+	%tmp5 = load i64, i64* %tmp4, align 8		; <i64> [#uses=1]
 	store i64 %tmp5, i64* %tmp3, align 8
 	%tmp6 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 1		; <i64*> [#uses=1]
 	%tmp7 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
-	%tmp8 = load i64* %tmp7, align 8		; <i64> [#uses=1]
+	%tmp8 = load i64, i64* %tmp7, align 8		; <i64> [#uses=1]
 	store i64 %tmp8, i64* %tmp6, align 8
 	%tmp9 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 2		; <i64*> [#uses=1]
 	%tmp10 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 2		; <i64*> [#uses=1]
-	%tmp11 = load i64* %tmp10, align 8		; <i64> [#uses=1]
+	%tmp11 = load i64, i64* %tmp10, align 8		; <i64> [#uses=1]
 	store i64 %tmp11, i64* %tmp9, align 8
 	%tmp12 = getelementptr [4 x i64], [4 x i64]* %tmp1, i32 0, i32 3		; <i64*> [#uses=1]
 	%tmp13 = getelementptr [4 x i64], [4 x i64]* %tmp2, i32 0, i32 3		; <i64*> [#uses=1]
-	%tmp14 = load i64* %tmp13, align 8		; <i64> [#uses=1]
+	%tmp14 = load i64, i64* %tmp13, align 8		; <i64> [#uses=1]
 	store i64 %tmp14, i64* %tmp12, align 8
 	%tmp15 = getelementptr %struct.foo, %struct.foo* %memtmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
 	%tmp16 = getelementptr %struct.foo, %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
 	%tmp17 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 0		; <i64*> [#uses=1]
 	%tmp18 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 0		; <i64*> [#uses=1]
-	%tmp19 = load i64* %tmp18, align 8		; <i64> [#uses=1]
+	%tmp19 = load i64, i64* %tmp18, align 8		; <i64> [#uses=1]
 	store i64 %tmp19, i64* %tmp17, align 8
 	%tmp20 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 1		; <i64*> [#uses=1]
 	%tmp21 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 1		; <i64*> [#uses=1]
-	%tmp22 = load i64* %tmp21, align 8		; <i64> [#uses=1]
+	%tmp22 = load i64, i64* %tmp21, align 8		; <i64> [#uses=1]
 	store i64 %tmp22, i64* %tmp20, align 8
 	%tmp23 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 2		; <i64*> [#uses=1]
 	%tmp24 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 2		; <i64*> [#uses=1]
-	%tmp25 = load i64* %tmp24, align 8		; <i64> [#uses=1]
+	%tmp25 = load i64, i64* %tmp24, align 8		; <i64> [#uses=1]
 	store i64 %tmp25, i64* %tmp23, align 8
 	%tmp26 = getelementptr [4 x i64], [4 x i64]* %tmp15, i32 0, i32 3		; <i64*> [#uses=1]
 	%tmp27 = getelementptr [4 x i64], [4 x i64]* %tmp16, i32 0, i32 3		; <i64*> [#uses=1]
-	%tmp28 = load i64* %tmp27, align 8		; <i64> [#uses=1]
+	%tmp28 = load i64, i64* %tmp27, align 8		; <i64> [#uses=1]
 	store i64 %tmp28, i64* %tmp26, align 8
 	br label %return
 
diff --git a/llvm/test/CodeGen/X86/x86-64-static-relo-movl.ll b/llvm/test/CodeGen/X86/x86-64-static-relo-movl.ll
index 978abd7..5da3a47 100644
--- a/llvm/test/CodeGen/X86/x86-64-static-relo-movl.ll
+++ b/llvm/test/CodeGen/X86/x86-64-static-relo-movl.ll
@@ -12,7 +12,7 @@
   %t = bitcast %struct.MatchInfo* %pending to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %t, i8* bitcast (%struct.MatchInfo* @NO_MATCH to i8*), i64 512, i32 8, i1 false)
   %u = getelementptr inbounds %struct.MatchInfo, %struct.MatchInfo* %pending, i32 0, i32 2
-  %v = load i64* %u, align 8
+  %v = load i64, i64* %u, align 8
   br label %done
 done:
   ret void
diff --git a/llvm/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll b/llvm/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
index fcf7eae..8892a69 100644
--- a/llvm/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
+++ b/llvm/test/CodeGen/X86/x86-mixed-alignment-dagcombine.ll
@@ -10,8 +10,8 @@
   %1 = alloca  <2 x double>, align 16
   %2 = alloca  <2 x double>, align 8
 
-  %val = load <2 x double>* %1, align 16
-  %val2 = load <2 x double>* %2, align 8
+  %val = load <2 x double>, <2 x double>* %1, align 16
+  %val2 = load <2 x double>, <2 x double>* %2, align 8
   %val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
   call void @sink(<2 x double> %val3)
   ret void
@@ -24,8 +24,8 @@
   %1 = alloca  <2 x double>, align 16
   %2 = alloca  <2 x double>, align 8
 
-  %val = load <2 x double>* %1, align 16
-  %val2 = load <2 x double>* %2, align 16
+  %val = load <2 x double>, <2 x double>* %1, align 16
+  %val2 = load <2 x double>, <2 x double>* %2, align 16
   %val3 = select i1 %cmp, <2 x double> %val, <2 x double> %val2
   call void @sink(<2 x double> %val3)
   ret void
diff --git a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index e154e4a..2516116 100644
--- a/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/llvm/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -8,14 +8,14 @@
 define <2 x double> @test_int_x86_xop_vpermil2pd_mr(<2 x double> %a0, <2 x double>* %a1, <2 x double> %a2) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpermil2pd
-  %vec = load <2 x double>* %a1
+  %vec = load <2 x double>, <2 x double>* %a1
   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %vec, <2 x double> %a2, i8 1) ;  [#uses=1]
   ret <2 x double> %res
 }
 define <2 x double> @test_int_x86_xop_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x double>* %a2) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpermil2pd
-  %vec = load <2 x double>* %a2
+  %vec = load <2 x double>, <2 x double>* %a2
   %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %vec, i8 1) ;  [#uses=1]
   ret <2 x double> %res
 }
@@ -31,7 +31,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vpermil2pd
   ; CHECK: ymm
-  %vec = load <4 x double>* %a1
+  %vec = load <4 x double>, <4 x double>* %a1
   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %vec, <4 x double> %a2, i8 2) ;
   ret <4 x double> %res
 }
@@ -39,7 +39,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vpermil2pd
   ; CHECK: ymm
-  %vec = load <4 x double>* %a2
+  %vec = load <4 x double>, <4 x double>* %a2
   %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %vec, i8 2) ;
   ret <4 x double> %res
 }
@@ -77,7 +77,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vpcmov
   ; CHECK: ymm
-  %vec = load <4 x i64>* %a1
+  %vec = load <4 x i64>, <4 x i64>* %a1
   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %vec, <4 x i64> %a2) ;
   ret <4 x i64> %res
 }
@@ -85,7 +85,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vpcmov
   ; CHECK: ymm
- %vec = load <4 x i64>* %a2
+ %vec = load <4 x i64>, <4 x i64>* %a2
  %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %vec) ;
   ret <4 x i64> %res
 }
@@ -99,7 +99,7 @@
 define <16 x i8> @test_int_x86_xop_vpcomeqb_mem(<16 x i8> %a0, <16 x i8>* %a1) {
   ; CHECK-NOT: vmovaps
   ; CHECK:vpcomeqb
-  %vec = load <16 x i8>* %a1
+  %vec = load <16 x i8>, <16 x i8>* %a1
   %res = call <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8> %a0, <16 x i8> %vec) ;
   ret <16 x i8> %res
 }
@@ -645,7 +645,7 @@
 define <2 x i64> @test_int_x86_xop_vphsubdq_mem(<4 x i32>* %a0) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vphsubdq
-  %vec = load <4 x i32>* %a0
+  %vec = load <4 x i32>, <4 x i32>* %a0
   %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %vec) ;
   ret <2 x i64> %res
 }
@@ -659,7 +659,7 @@
 define <4 x i32> @test_int_x86_xop_vphsubwd_mem(<8 x i16>* %a0) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vphsubwd
-  %vec = load <8 x i16>* %a0
+  %vec = load <8 x i16>, <8 x i16>* %a0
   %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %vec) ;
   ret <4 x i32> %res
 }
@@ -750,7 +750,7 @@
 define <4 x i32> @test_int_x86_xop_vpmadcswd_mem(<8 x i16> %a0, <8 x i16>* %a1, <4 x i32> %a2) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpmadcswd
-  %vec = load <8 x i16>* %a1
+  %vec = load <8 x i16>, <8 x i16>* %a1
   %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %vec, <4 x i32> %a2) ;
   ret <4 x i32> %res
 }
@@ -764,14 +764,14 @@
 define <16 x i8> @test_int_x86_xop_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8>* %a2) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpperm
-  %vec = load <16 x i8>* %a2
+  %vec = load <16 x i8>, <16 x i8>* %a2
   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %vec) ;
   ret <16 x i8> %res
 }
 define <16 x i8> @test_int_x86_xop_vpperm_mr(<16 x i8> %a0, <16 x i8>* %a1, <16 x i8> %a2) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpperm
-  %vec = load <16 x i8>* %a1
+  %vec = load <16 x i8>, <16 x i8>* %a1
   %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %vec, <16 x i8> %a2) ;
   ret <16 x i8> %res
 }
@@ -862,14 +862,14 @@
 define <8 x i16> @test_int_x86_xop_vpshlw_rm(<8 x i16> %a0, <8 x i16>* %a1) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpshlw
-  %vec = load <8 x i16>* %a1
+  %vec = load <8 x i16>, <8 x i16>* %a1
   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %vec) ;
   ret <8 x i16> %res
 }
 define <8 x i16> @test_int_x86_xop_vpshlw_mr(<8 x i16>* %a0, <8 x i16> %a1) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vpshlw
-  %vec = load <8 x i16>* %a0
+  %vec = load <8 x i16>, <8 x i16>* %a0
   %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %vec, <8 x i16> %a1) ;
   ret <8 x i16> %res
 }
@@ -884,7 +884,7 @@
 define <4 x float> @test_int_x86_xop_vfrcz_ss_mem(float* %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczss
-  %elem = load float* %a0
+  %elem = load float, float* %a0
   %vec = insertelement <4 x float> undef, float %elem, i32 0
   %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %vec) ;
   ret <4 x float> %res
@@ -900,7 +900,7 @@
 define <2 x double> @test_int_x86_xop_vfrcz_sd_mem(double* %a0) {
   ; CHECK-NOT: mov
   ; CHECK: vfrczsd
-  %elem = load double* %a0
+  %elem = load double, double* %a0
   %vec = insertelement <2 x double> undef, double %elem, i32 0
   %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %vec) ;
   ret <2 x double> %res
@@ -915,7 +915,7 @@
 define <2 x double> @test_int_x86_xop_vfrcz_pd_mem(<2 x double>* %a0) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vfrczpd
-  %vec = load <2 x double>* %a0
+  %vec = load <2 x double>, <2 x double>* %a0
   %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %vec) ;
   ret <2 x double> %res
 }
@@ -931,7 +931,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vfrczpd
   ; CHECK: ymm
-  %vec = load <4 x double>* %a0
+  %vec = load <4 x double>, <4 x double>* %a0
   %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %vec) ;
   ret <4 x double> %res
 }
@@ -945,7 +945,7 @@
 define <4 x float> @test_int_x86_xop_vfrcz_ps_mem(<4 x float>* %a0) {
   ; CHECK-NOT: vmovaps
   ; CHECK: vfrczps
-  %vec = load <4 x float>* %a0
+  %vec = load <4 x float>, <4 x float>* %a0
   %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %vec) ;
   ret <4 x float> %res
 }
@@ -961,7 +961,7 @@
   ; CHECK-NOT: vmovaps
   ; CHECK: vfrczps
   ; CHECK: ymm
-  %vec = load <8 x float>* %a0
+  %vec = load <8 x float>, <8 x float>* %a0
   %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %vec) ;
   ret <8 x float> %res
 }
diff --git a/llvm/test/CodeGen/X86/zext-extract_subreg.ll b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
index 43e79c7..9e34abb 100644
--- a/llvm/test/CodeGen/X86/zext-extract_subreg.ll
+++ b/llvm/test/CodeGen/X86/zext-extract_subreg.ll
@@ -6,7 +6,7 @@
   br i1 undef, label %return, label %if.end.i
 
 if.end.i:                                         ; preds = %entry
-  %tmp7.i = load i32* undef, align 4
+  %tmp7.i = load i32, i32* undef, align 4
   br i1 undef, label %return, label %if.end
 
 if.end:                                           ; preds = %if.end.i
diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll
index 7b37d40..2758bff 100644
--- a/llvm/test/CodeGen/X86/zext-sext.ll
+++ b/llvm/test/CodeGen/X86/zext-sext.ll
@@ -9,14 +9,14 @@
 define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
 entry:
   %tmp103 = getelementptr inbounds [40 x i16], [40 x i16]* %a, i64 0, i64 4
-  %tmp104 = load i16* %tmp103, align 2
+  %tmp104 = load i16, i16* %tmp103, align 2
   %tmp105 = sext i16 %tmp104 to i32
-  %tmp106 = load i32* %b, align 4
+  %tmp106 = load i32, i32* %b, align 4
   %tmp107 = sub nsw i32 4, %tmp106
-  %tmp108 = load i16** %c, align 8
+  %tmp108 = load i16*, i16** %c, align 8
   %tmp109 = sext i32 %tmp107 to i64
   %tmp110 = getelementptr inbounds i16, i16* %tmp108, i64 %tmp109
-  %tmp111 = load i16* %tmp110, align 1
+  %tmp111 = load i16, i16* %tmp110, align 1
   %tmp112 = sext i16 %tmp111 to i32
   %tmp = mul i32 355244649, %tmp112
   %tmp1 = mul i32 %tmp, %tmp105
@@ -49,7 +49,7 @@
   %tmp19 = sub i64 %tmp18, 5386586244038704851
   %tmp20 = add i64 %tmp19, -1368057358110947217
   %tmp21 = mul i64 %tmp20, -422037402840850817
-  %tmp115 = load i64* %d, align 8
+  %tmp115 = load i64, i64* %d, align 8
   %alphaX = mul i64 468858157810230901, %tmp21
   %alphaXbetaY = add i64 %alphaX, %tmp115
   %transformed = add i64 %alphaXbetaY, 9040145182981852475
diff --git a/llvm/test/CodeGen/X86/zlib-longest-match.ll b/llvm/test/CodeGen/X86/zlib-longest-match.ll
index 90c9399..bd0b68e 100644
--- a/llvm/test/CodeGen/X86/zlib-longest-match.ll
+++ b/llvm/test/CodeGen/X86/zlib-longest-match.ll
@@ -29,45 +29,45 @@
 define i32 @longest_match(%struct.internal_state* nocapture %s, i32 %cur_match) nounwind {
 entry:
   %max_chain_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 31
-  %0 = load i32* %max_chain_length, align 4
+  %0 = load i32, i32* %max_chain_length, align 4
   %window = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 14
-  %1 = load i8** %window, align 8
+  %1 = load i8*, i8** %window, align 8
   %strstart = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 27
-  %2 = load i32* %strstart, align 4
+  %2 = load i32, i32* %strstart, align 4
   %idx.ext = zext i32 %2 to i64
   %add.ptr = getelementptr inbounds i8, i8* %1, i64 %idx.ext
   %prev_length = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 30
-  %3 = load i32* %prev_length, align 4
+  %3 = load i32, i32* %prev_length, align 4
   %nice_match1 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 36
-  %4 = load i32* %nice_match1, align 4
+  %4 = load i32, i32* %nice_match1, align 4
   %w_size = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 11
-  %5 = load i32* %w_size, align 4
+  %5 = load i32, i32* %w_size, align 4
   %sub = add i32 %5, -262
   %cmp = icmp ugt i32 %2, %sub
   %sub6 = sub i32 %2, %sub
   %sub6. = select i1 %cmp, i32 %sub6, i32 0
   %prev7 = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 16
-  %6 = load i16** %prev7, align 8
+  %6 = load i16*, i16** %prev7, align 8
   %w_mask = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 13
-  %7 = load i32* %w_mask, align 4
+  %7 = load i32, i32* %w_mask, align 4
   %add.ptr11.sum = add i64 %idx.ext, 258
   %add.ptr12 = getelementptr inbounds i8, i8* %1, i64 %add.ptr11.sum
   %sub13 = add nsw i32 %3, -1
   %idxprom = sext i32 %sub13 to i64
   %add.ptr.sum = add i64 %idxprom, %idx.ext
   %arrayidx = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum
-  %8 = load i8* %arrayidx, align 1
+  %8 = load i8, i8* %arrayidx, align 1
   %idxprom14 = sext i32 %3 to i64
   %add.ptr.sum213 = add i64 %idxprom14, %idx.ext
   %arrayidx15 = getelementptr inbounds i8, i8* %1, i64 %add.ptr.sum213
-  %9 = load i8* %arrayidx15, align 1
+  %9 = load i8, i8* %arrayidx15, align 1
   %good_match = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 35
-  %10 = load i32* %good_match, align 4
+  %10 = load i32, i32* %good_match, align 4
   %cmp17 = icmp ult i32 %3, %10
   %shr = lshr i32 %0, 2
   %chain_length.0 = select i1 %cmp17, i32 %0, i32 %shr
   %lookahead = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 29
-  %11 = load i32* %lookahead, align 4
+  %11 = load i32, i32* %lookahead, align 4
   %cmp18 = icmp ugt i32 %4, %11
   %. = select i1 %cmp18, i32 %11, i32 %4
   %match_start = getelementptr inbounds %struct.internal_state, %struct.internal_state* %s, i64 0, i32 28
@@ -89,7 +89,7 @@
   %idxprom25 = sext i32 %best_len.0 to i64
   %add.ptr24.sum = add i64 %idx.ext23, %idxprom25
   %arrayidx26 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum
-  %12 = load i8* %arrayidx26, align 1
+  %12 = load i8, i8* %arrayidx26, align 1
   %cmp28 = icmp eq i8 %12, %scan_end.0
   br i1 %cmp28, label %lor.lhs.false, label %do.cond125
 
@@ -98,21 +98,21 @@
   %idxprom31 = sext i32 %sub30 to i64
   %add.ptr24.sum214 = add i64 %idx.ext23, %idxprom31
   %arrayidx32 = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum214
-  %13 = load i8* %arrayidx32, align 1
+  %13 = load i8, i8* %arrayidx32, align 1
   %cmp35 = icmp eq i8 %13, %scan_end1.0
   br i1 %cmp35, label %lor.lhs.false37, label %do.cond125
 
 lor.lhs.false37:                                  ; preds = %lor.lhs.false
-  %14 = load i8* %add.ptr24, align 1
-  %15 = load i8* %add.ptr, align 1
+  %14 = load i8, i8* %add.ptr24, align 1
+  %15 = load i8, i8* %add.ptr, align 1
   %cmp40 = icmp eq i8 %14, %15
   br i1 %cmp40, label %lor.lhs.false42, label %do.cond125
 
 lor.lhs.false42:                                  ; preds = %lor.lhs.false37
   %add.ptr24.sum215 = add i64 %idx.ext23, 1
   %incdec.ptr = getelementptr inbounds i8, i8* %1, i64 %add.ptr24.sum215
-  %16 = load i8* %incdec.ptr, align 1
-  %17 = load i8* %arrayidx44, align 1
+  %16 = load i8, i8* %incdec.ptr, align 1
+  %17 = load i8, i8* %arrayidx44, align 1
   %cmp46 = icmp eq i8 %16, %17
   br i1 %cmp46, label %if.end49, label %do.cond125
 
@@ -125,65 +125,65 @@
   %match.0 = phi i8* [ %incdec.ptr51, %if.end49 ], [ %incdec.ptr103, %land.lhs.true100 ]
   %scan.1 = phi i8* [ %add.ptr50, %if.end49 ], [ %incdec.ptr101, %land.lhs.true100 ]
   %incdec.ptr53 = getelementptr inbounds i8, i8* %scan.1, i64 1
-  %18 = load i8* %incdec.ptr53, align 1
+  %18 = load i8, i8* %incdec.ptr53, align 1
   %incdec.ptr55 = getelementptr inbounds i8, i8* %match.0, i64 1
-  %19 = load i8* %incdec.ptr55, align 1
+  %19 = load i8, i8* %incdec.ptr55, align 1
   %cmp57 = icmp eq i8 %18, %19
   br i1 %cmp57, label %land.lhs.true, label %do.end
 
 land.lhs.true:                                    ; preds = %do.cond
   %incdec.ptr59 = getelementptr inbounds i8, i8* %scan.1, i64 2
-  %20 = load i8* %incdec.ptr59, align 1
+  %20 = load i8, i8* %incdec.ptr59, align 1
   %incdec.ptr61 = getelementptr inbounds i8, i8* %match.0, i64 2
-  %21 = load i8* %incdec.ptr61, align 1
+  %21 = load i8, i8* %incdec.ptr61, align 1
   %cmp63 = icmp eq i8 %20, %21
   br i1 %cmp63, label %land.lhs.true65, label %do.end
 
 land.lhs.true65:                                  ; preds = %land.lhs.true
   %incdec.ptr66 = getelementptr inbounds i8, i8* %scan.1, i64 3
-  %22 = load i8* %incdec.ptr66, align 1
+  %22 = load i8, i8* %incdec.ptr66, align 1
   %incdec.ptr68 = getelementptr inbounds i8, i8* %match.0, i64 3
-  %23 = load i8* %incdec.ptr68, align 1
+  %23 = load i8, i8* %incdec.ptr68, align 1
   %cmp70 = icmp eq i8 %22, %23
   br i1 %cmp70, label %land.lhs.true72, label %do.end
 
 land.lhs.true72:                                  ; preds = %land.lhs.true65
   %incdec.ptr73 = getelementptr inbounds i8, i8* %scan.1, i64 4
-  %24 = load i8* %incdec.ptr73, align 1
+  %24 = load i8, i8* %incdec.ptr73, align 1
   %incdec.ptr75 = getelementptr inbounds i8, i8* %match.0, i64 4
-  %25 = load i8* %incdec.ptr75, align 1
+  %25 = load i8, i8* %incdec.ptr75, align 1
   %cmp77 = icmp eq i8 %24, %25
   br i1 %cmp77, label %land.lhs.true79, label %do.end
 
 land.lhs.true79:                                  ; preds = %land.lhs.true72
   %incdec.ptr80 = getelementptr inbounds i8, i8* %scan.1, i64 5
-  %26 = load i8* %incdec.ptr80, align 1
+  %26 = load i8, i8* %incdec.ptr80, align 1
   %incdec.ptr82 = getelementptr inbounds i8, i8* %match.0, i64 5
-  %27 = load i8* %incdec.ptr82, align 1
+  %27 = load i8, i8* %incdec.ptr82, align 1
   %cmp84 = icmp eq i8 %26, %27
   br i1 %cmp84, label %land.lhs.true86, label %do.end
 
 land.lhs.true86:                                  ; preds = %land.lhs.true79
   %incdec.ptr87 = getelementptr inbounds i8, i8* %scan.1, i64 6
-  %28 = load i8* %incdec.ptr87, align 1
+  %28 = load i8, i8* %incdec.ptr87, align 1
   %incdec.ptr89 = getelementptr inbounds i8, i8* %match.0, i64 6
-  %29 = load i8* %incdec.ptr89, align 1
+  %29 = load i8, i8* %incdec.ptr89, align 1
   %cmp91 = icmp eq i8 %28, %29
   br i1 %cmp91, label %land.lhs.true93, label %do.end
 
 land.lhs.true93:                                  ; preds = %land.lhs.true86
   %incdec.ptr94 = getelementptr inbounds i8, i8* %scan.1, i64 7
-  %30 = load i8* %incdec.ptr94, align 1
+  %30 = load i8, i8* %incdec.ptr94, align 1
   %incdec.ptr96 = getelementptr inbounds i8, i8* %match.0, i64 7
-  %31 = load i8* %incdec.ptr96, align 1
+  %31 = load i8, i8* %incdec.ptr96, align 1
   %cmp98 = icmp eq i8 %30, %31
   br i1 %cmp98, label %land.lhs.true100, label %do.end
 
 land.lhs.true100:                                 ; preds = %land.lhs.true93
   %incdec.ptr101 = getelementptr inbounds i8, i8* %scan.1, i64 8
-  %32 = load i8* %incdec.ptr101, align 1
+  %32 = load i8, i8* %incdec.ptr101, align 1
   %incdec.ptr103 = getelementptr inbounds i8, i8* %match.0, i64 8
-  %33 = load i8* %incdec.ptr103, align 1
+  %33 = load i8, i8* %incdec.ptr103, align 1
   %cmp105 = icmp eq i8 %32, %33
   %cmp107 = icmp ult i8* %incdec.ptr101, %add.ptr12
   %or.cond = and i1 %cmp105, %cmp107
@@ -208,11 +208,11 @@
   %idxprom120 = sext i32 %sub119 to i64
   %add.ptr111.sum = add i64 %idxprom120, %idx.ext
   %arrayidx121 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum
-  %34 = load i8* %arrayidx121, align 1
+  %34 = load i8, i8* %arrayidx121, align 1
   %idxprom122 = sext i32 %sub110 to i64
   %add.ptr111.sum216 = add i64 %idxprom122, %idx.ext
   %arrayidx123 = getelementptr inbounds i8, i8* %1, i64 %add.ptr111.sum216
-  %35 = load i8* %arrayidx123, align 1
+  %35 = load i8, i8* %arrayidx123, align 1
   br label %do.cond125
 
 do.cond125:                                       ; preds = %if.end118, %do.end, %lor.lhs.false42, %lor.lhs.false37, %lor.lhs.false, %do.body
@@ -222,7 +222,7 @@
   %and = and i32 %cur_match.addr.0, %7
   %idxprom126 = zext i32 %and to i64
   %arrayidx127 = getelementptr inbounds i16, i16* %6, i64 %idxprom126
-  %36 = load i16* %arrayidx127, align 2
+  %36 = load i16, i16* %arrayidx127, align 2
   %conv128 = zext i16 %36 to i32
   %cmp129 = icmp ugt i32 %conv128, %sub6.
   br i1 %cmp129, label %land.rhs131, label %do.end135
diff --git a/llvm/test/CodeGen/XCore/2009-01-08-Crash.ll b/llvm/test/CodeGen/XCore/2009-01-08-Crash.ll
index 6b55e89..5eddbc3 100644
--- a/llvm/test/CodeGen/XCore/2009-01-08-Crash.ll
+++ b/llvm/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -7,6 +7,6 @@
 entry:
         %bar_addr = alloca i32
         %0 = getelementptr i32, i32* %bar_addr, i32 -1
-        %1 = load i32* %0, align 4
+        %1 = load i32, i32* %0, align 4
         ret i32 %1
 }
diff --git a/llvm/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/llvm/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
index 396b083..693e6f0 100644
--- a/llvm/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
+++ b/llvm/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
@@ -16,7 +16,7 @@
   %tmp137 = sub i32 0, %indvar.i.i.i              ; <i32> [#uses=1]
   %scevgep13.i.i.i = getelementptr i32, i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
   %scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1]
-  %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
+  %0 = load %struct.dwarf_fde*, %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
   store i32 undef, i32* %scevgep13.i.i.i
   %indvar.next.i.i.i = add i32 %indvar.i.i.i, 1   ; <i32> [#uses=1]
   br label %bb3.i15.i.i
diff --git a/llvm/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/llvm/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
index f8fe0d2..92391de 100644
--- a/llvm/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
+++ b/llvm/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
@@ -5,6 +5,6 @@
 
 define i32 @test_entry() nounwind {
 entry:
-  %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+  %0 = load i32, i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/XCore/atomic.ll b/llvm/test/CodeGen/XCore/atomic.ll
index 6ca80cf..13579db 100644
--- a/llvm/test/CodeGen/XCore/atomic.ll
+++ b/llvm/test/CodeGen/XCore/atomic.ll
@@ -25,27 +25,27 @@
 ; CHECK-NEXT: ldaw r[[R1:[0-9]+]], dp[pool]
 ; CHECK-NEXT: #MEMBARRIER
 ; CHECK-NEXT: ldc r[[R2:[0-9]+]], 0
-  %0 = load atomic i32* bitcast (i64* @pool to i32*) acquire, align 4
+  %0 = load atomic i32, i32* bitcast (i64* @pool to i32*) acquire, align 4
 
 ; CHECK-NEXT: ld16s r3, r[[R1]][r[[R2]]]
 ; CHECK-NEXT: #MEMBARRIER
-  %1 = load atomic i16* bitcast (i64* @pool to i16*) acquire, align 2
+  %1 = load atomic i16, i16* bitcast (i64* @pool to i16*) acquire, align 2
 
 ; CHECK-NEXT: ld8u r11, r[[R1]][r[[R2]]]
 ; CHECK-NEXT: #MEMBARRIER
-  %2 = load atomic i8* bitcast (i64* @pool to i8*) acquire, align 1
+  %2 = load atomic i8, i8* bitcast (i64* @pool to i8*) acquire, align 1
 
 ; CHECK-NEXT: ldw r4, dp[pool]
 ; CHECK-NEXT: #MEMBARRIER
-  %3 = load atomic i32* bitcast (i64* @pool to i32*) seq_cst, align 4
+  %3 = load atomic i32, i32* bitcast (i64* @pool to i32*) seq_cst, align 4
 
 ; CHECK-NEXT: ld16s r5, r[[R1]][r[[R2]]]
 ; CHECK-NEXT: #MEMBARRIER
-  %4 = load atomic i16* bitcast (i64* @pool to i16*) seq_cst, align 2
+  %4 = load atomic i16, i16* bitcast (i64* @pool to i16*) seq_cst, align 2
 
 ; CHECK-NEXT: ld8u r6, r[[R1]][r[[R2]]]
 ; CHECK-NEXT: #MEMBARRIER
-  %5 = load atomic i8* bitcast (i64* @pool to i8*) seq_cst, align 1
+  %5 = load atomic i8, i8* bitcast (i64* @pool to i8*) seq_cst, align 1
 
 ; CHECK-NEXT: #MEMBARRIER
 ; CHECK-NEXT: stw r[[R0]], dp[pool]
@@ -80,11 +80,11 @@
 ; CHECK-NEXT: st16 r[[R0]], r[[R1]][r[[R2]]]
 ; CHECK-NEXT: ld8u r[[R0]], r[[R1]][r[[R2]]]
 ; CHECK-NEXT: st8 r[[R0]], r[[R1]][r[[R2]]]
-  %6 = load atomic i32* bitcast (i64* @pool to i32*) monotonic, align 4
+  %6 = load atomic i32, i32* bitcast (i64* @pool to i32*) monotonic, align 4
   store atomic i32 %6, i32* bitcast (i64* @pool to i32*) monotonic, align 4
-  %7 = load atomic i16* bitcast (i64* @pool to i16*) monotonic, align 2
+  %7 = load atomic i16, i16* bitcast (i64* @pool to i16*) monotonic, align 2
   store atomic i16 %7, i16* bitcast (i64* @pool to i16*) monotonic, align 2
-  %8 = load atomic i8* bitcast (i64* @pool to i8*) monotonic, align 1
+  %8 = load atomic i8, i8* bitcast (i64* @pool to i8*) monotonic, align 1
   store atomic i8 %8, i8* bitcast (i64* @pool to i8*) monotonic, align 1
 
   ret void
diff --git a/llvm/test/CodeGen/XCore/codemodel.ll b/llvm/test/CodeGen/XCore/codemodel.ll
index d209125..706c380 100644
--- a/llvm/test/CodeGen/XCore/codemodel.ll
+++ b/llvm/test/CodeGen/XCore/codemodel.ll
@@ -97,21 +97,21 @@
 define i32 @f(i32* %i) {
 entry:
   %0 = getelementptr inbounds i32, i32* %i, i32 16383
-  %1 = load i32* %0
+  %1 = load i32, i32* %0
   %2 = getelementptr inbounds i32, i32* %i, i32 16384
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = add nsw i32 %1, %3
-  %5 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 0)
+  %5 = load i32, i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 0)
   %6 = add nsw i32 %4, %5
-  %7 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 1)
+  %7 = load i32, i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 1)
   %8 = add nsw i32 %6, %7
-  %9 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 98)
+  %9 = load i32, i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 98)
   %10 = add nsw i32 %8, %9
-  %11 = load i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 99)
+  %11 = load i32, i32* getelementptr inbounds ([100 x i32]* @l, i32 0, i32 99)
   %12 = add nsw i32 %10, %11
-  %13 = load i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 0)
+  %13 = load i32, i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 0)
   %14 = add nsw i32 %12, %13
-  %15 = load i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 9)
+  %15 = load i32, i32* getelementptr inbounds ([10 x i32]* @s, i32 0, i32 9)
   %16 = add nsw i32 %14, %15
   ret i32 %16
 }
@@ -132,7 +132,7 @@
 @NoSize = external global [0 x i32]
 define i32 @UnknownSize() nounwind {
 entry:
-  %0 = load i32* getelementptr inbounds ([0 x i32]* @NoSize, i32 0, i32 10)
+  %0 = load i32, i32* getelementptr inbounds ([0 x i32]* @NoSize, i32 0, i32 10)
   ret i32 %0
 }
 
diff --git a/llvm/test/CodeGen/XCore/dwarf_debug.ll b/llvm/test/CodeGen/XCore/dwarf_debug.ll
index 8c9c47d..783138b 100644
--- a/llvm/test/CodeGen/XCore/dwarf_debug.ll
+++ b/llvm/test/CodeGen/XCore/dwarf_debug.ll
@@ -14,7 +14,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !{!"0x102"}), !dbg !12
-  %0 = load i32* %a.addr, align 4, !dbg !12
+  %0 = load i32, i32* %a.addr, align 4, !dbg !12
   %add = add nsw i32 %0, 1, !dbg !12
   ret i32 %add, !dbg !12
 }
diff --git a/llvm/test/CodeGen/XCore/exception.ll b/llvm/test/CodeGen/XCore/exception.ll
index dcff0d6..6572dc8 100644
--- a/llvm/test/CodeGen/XCore/exception.ll
+++ b/llvm/test/CodeGen/XCore/exception.ll
@@ -84,7 +84,7 @@
   %2 = extractvalue { i8*, i32 } %0, 1
   %3 = call i8* @__cxa_begin_catch(i8* %1) nounwind
   %4 = bitcast i8* %3 to i32*
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   call void @__cxa_end_catch() nounwind
 
 ; CHECK: eq r0, r6, r5
diff --git a/llvm/test/CodeGen/XCore/indirectbr.ll b/llvm/test/CodeGen/XCore/indirectbr.ll
index 3565b20..9723cdc 100644
--- a/llvm/test/CodeGen/XCore/indirectbr.ll
+++ b/llvm/test/CodeGen/XCore/indirectbr.ll
@@ -6,7 +6,7 @@
 define internal i32 @foo(i32 %i) nounwind {
 ; CHECK-LABEL: foo:
 entry:
-  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %0 = load i8*, i8** @nextaddr, align 4               ; <i8*> [#uses=2]
   %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
   br i1 %1, label %bb3, label %bb2
 
@@ -17,7 +17,7 @@
 
 bb3:                                              ; preds = %entry
   %2 = getelementptr inbounds [5 x i8*], [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
-  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  %gotovar.4.0.pre = load i8*, i8** %2, align 4        ; <i8*> [#uses=1]
   br label %bb2
 
 L5:                                               ; preds = %bb2
diff --git a/llvm/test/CodeGen/XCore/llvm-intrinsics.ll b/llvm/test/CodeGen/XCore/llvm-intrinsics.ll
index b436282..539bf19 100644
--- a/llvm/test/CodeGen/XCore/llvm-intrinsics.ll
+++ b/llvm/test/CodeGen/XCore/llvm-intrinsics.ll
@@ -145,7 +145,7 @@
 ; CHECK-NEXT: set sp, r2
 ; CHECK-NEXT: bau r3
   call void (...)* @foo()
-  %0 = load i32* @offset
+  %0 = load i32, i32* @offset
   call void @llvm.eh.return.i32(i32 %0, i8* @handler)
   unreachable
 }
diff --git a/llvm/test/CodeGen/XCore/load.ll b/llvm/test/CodeGen/XCore/load.ll
index fc04978..bba7f72 100644
--- a/llvm/test/CodeGen/XCore/load.ll
+++ b/llvm/test/CodeGen/XCore/load.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: load32:
 ; CHECK: ldw r0, r0[r1]
 	%0 = getelementptr i32, i32* %p, i32 %offset
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	ret i32 %1
 }
 
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: load32_imm:
 ; CHECK: ldw r0, r0[11]
 	%0 = getelementptr i32, i32* %p, i32 11
-	%1 = load i32* %0, align 4
+	%1 = load i32, i32* %0, align 4
 	ret i32 %1
 }
 
@@ -24,7 +24,7 @@
 ; CHECK: ld16s r0, r0[r1]
 ; CHECK-NOT: sext
 	%0 = getelementptr i16, i16* %p, i32 %offset
-	%1 = load i16* %0, align 2
+	%1 = load i16, i16* %0, align 2
 	%2 = sext i16 %1 to i32
 	ret i32 %2
 }
@@ -35,7 +35,7 @@
 ; CHECK: ld8u r0, r0[r1]
 ; CHECK-NOT: zext
 	%0 = getelementptr i8, i8* %p, i32 %offset
-	%1 = load i8* %0, align 1
+	%1 = load i8, i8* %0, align 1
 	%2 = zext i8 %1 to i32
 	ret i32 %2
 }
@@ -45,6 +45,6 @@
 entry:
 ; CHECK-LABEL: load_cp:
 ; CHECK: ldw r0, cp[GConst]
-  %0 = load i32* @GConst
+  %0 = load i32, i32* @GConst
   ret i32 %0
 }
diff --git a/llvm/test/CodeGen/XCore/private.ll b/llvm/test/CodeGen/XCore/private.ll
index 474448a..a188864 100644
--- a/llvm/test/CodeGen/XCore/private.ll
+++ b/llvm/test/CodeGen/XCore/private.ll
@@ -14,7 +14,7 @@
 ; CHECK: bl .Lfoo
 ; CHECK: ldw r0, dp[.Lbaz]
         call void @foo()
-	%1 = load i32* @baz, align 4
+	%1 = load i32, i32* @baz, align 4
         ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/XCore/scavenging.ll b/llvm/test/CodeGen/XCore/scavenging.ll
index 2756c12..7b6f54e 100644
--- a/llvm/test/CodeGen/XCore/scavenging.ll
+++ b/llvm/test/CodeGen/XCore/scavenging.ll
@@ -17,20 +17,20 @@
 define void @f() nounwind {
 entry:
 	%x = alloca [100 x i32], align 4		; <[100 x i32]*> [#uses=2]
-	%0 = load i32* @size, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @size, align 4		; <i32> [#uses=1]
 	%1 = alloca i32, i32 %0, align 4		; <i32*> [#uses=1]
-	%2 = load volatile i32* @g0, align 4		; <i32> [#uses=1]
-	%3 = load volatile i32* @g1, align 4		; <i32> [#uses=1]
-	%4 = load volatile i32* @g2, align 4		; <i32> [#uses=1]
-	%5 = load volatile i32* @g3, align 4		; <i32> [#uses=1]
-	%6 = load volatile i32* @g4, align 4		; <i32> [#uses=1]
-	%7 = load volatile i32* @g5, align 4		; <i32> [#uses=1]
-	%8 = load volatile i32* @g6, align 4		; <i32> [#uses=1]
-	%9 = load volatile i32* @g7, align 4		; <i32> [#uses=1]
-	%10 = load volatile i32* @g8, align 4		; <i32> [#uses=1]
-	%11 = load volatile i32* @g9, align 4		; <i32> [#uses=1]
-	%12 = load volatile i32* @g10, align 4		; <i32> [#uses=1]
-	%13 = load volatile i32* @g11, align 4		; <i32> [#uses=2]
+	%2 = load volatile i32, i32* @g0, align 4		; <i32> [#uses=1]
+	%3 = load volatile i32, i32* @g1, align 4		; <i32> [#uses=1]
+	%4 = load volatile i32, i32* @g2, align 4		; <i32> [#uses=1]
+	%5 = load volatile i32, i32* @g3, align 4		; <i32> [#uses=1]
+	%6 = load volatile i32, i32* @g4, align 4		; <i32> [#uses=1]
+	%7 = load volatile i32, i32* @g5, align 4		; <i32> [#uses=1]
+	%8 = load volatile i32, i32* @g6, align 4		; <i32> [#uses=1]
+	%9 = load volatile i32, i32* @g7, align 4		; <i32> [#uses=1]
+	%10 = load volatile i32, i32* @g8, align 4		; <i32> [#uses=1]
+	%11 = load volatile i32, i32* @g9, align 4		; <i32> [#uses=1]
+	%12 = load volatile i32, i32* @g10, align 4		; <i32> [#uses=1]
+	%13 = load volatile i32, i32* @g11, align 4		; <i32> [#uses=2]
 	%14 = getelementptr [100 x i32], [100 x i32]* %x, i32 0, i32 50		; <i32*> [#uses=1]
 	store i32 %13, i32* %14, align 4
 	store volatile i32 %13, i32* @g11, align 4
diff --git a/llvm/test/CodeGen/XCore/trampoline.ll b/llvm/test/CodeGen/XCore/trampoline.ll
index 45d4bf4..a728000 100644
--- a/llvm/test/CodeGen/XCore/trampoline.ll
+++ b/llvm/test/CodeGen/XCore/trampoline.ll
@@ -29,7 +29,7 @@
 ; CHECK-NEXT: ldw r0, r11[0]
 ; CHECK-NEXT: retsp 0
   %0 = getelementptr inbounds %struct.FRAME.f, %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/CodeGen/XCore/unaligned_load.ll b/llvm/test/CodeGen/XCore/unaligned_load.ll
index b8b8827..4bec151 100644
--- a/llvm/test/CodeGen/XCore/unaligned_load.ll
+++ b/llvm/test/CodeGen/XCore/unaligned_load.ll
@@ -5,7 +5,7 @@
 ; CHECK: bl __misaligned_load
 define i32 @align1(i32* %p) nounwind {
 entry:
-	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	%0 = load i32, i32* %p, align 1		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -16,7 +16,7 @@
 ; CHECK: or
 define i32 @align2(i32* %p) nounwind {
 entry:
-	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	%0 = load i32, i32* %p, align 2		; <i32> [#uses=1]
 	ret i32 %0
 }
 
@@ -29,6 +29,6 @@
 ; CHECK: or
 define i32 @align3() nounwind {
 entry:
-	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	%0 = load i32, i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
 	ret i32 %0
 }
diff --git a/llvm/test/CodeGen/XCore/unaligned_store_combine.ll b/llvm/test/CodeGen/XCore/unaligned_store_combine.ll
index d1f4e6c..4b29a05 100644
--- a/llvm/test/CodeGen/XCore/unaligned_store_combine.ll
+++ b/llvm/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -7,7 +7,7 @@
 ; CHECK-LABEL: f:
 ; CHECK: ldc r2, 8
 ; CHECK: bl memmove
-	%0 = load i64* %src, align 1
+	%0 = load i64, i64* %src, align 1
 	store i64 %0, i64* %dst, align 1
 	ret void
 }
diff --git a/llvm/test/CodeGen/XCore/zextfree.ll b/llvm/test/CodeGen/XCore/zextfree.ll
index 48dce88..d1e2b7f 100644
--- a/llvm/test/CodeGen/XCore/zextfree.ll
+++ b/llvm/test/CodeGen/XCore/zextfree.ll
@@ -4,7 +4,7 @@
 ; CHECK-NOT: zext
 define void @test(i8* %s1) {
 entry:
-  %u8 = load i8* %s1, align 1
+  %u8 = load i8, i8* %s1, align 1
   %bool = icmp eq i8 %u8, 0
   br label %BB1
 BB1:
diff --git a/llvm/test/DebugInfo/2010-01-05-DbgScope.ll b/llvm/test/DebugInfo/2010-01-05-DbgScope.ll
index d559720..c500b56 100644
--- a/llvm/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/llvm/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -2,7 +2,7 @@
 ; PR 5942
 define i8* @foo() nounwind {
 entry:
-  %0 = load i32* undef, align 4, !dbg !0          ; <i32> [#uses=1]
+  %0 = load i32, i32* undef, align 4, !dbg !0          ; <i32> [#uses=1]
   %1 = inttoptr i32 %0 to i8*, !dbg !0            ; <i8*> [#uses=1]
   ret i8* %1, !dbg !10
 
diff --git a/llvm/test/DebugInfo/2010-03-24-MemberFn.ll b/llvm/test/DebugInfo/2010-03-24-MemberFn.ll
index 7b09109..bb14149 100644
--- a/llvm/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/llvm/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -11,12 +11,12 @@
   call void @llvm.dbg.declare(metadata %struct.S* %s1, metadata !0, metadata !{!"0x102"}), !dbg !16
   %1 = call i32 @_ZN1S3fooEv(%struct.S* %s1) nounwind, !dbg !17 ; <i32> [#uses=1]
   store i32 %1, i32* %0, align 4, !dbg !17
-  %2 = load i32* %0, align 4, !dbg !17            ; <i32> [#uses=1]
+  %2 = load i32, i32* %0, align 4, !dbg !17            ; <i32> [#uses=1]
   store i32 %2, i32* %retval, align 4, !dbg !17
   br label %return, !dbg !17
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !17          ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !17          ; <i32> [#uses=1]
   ret i32 %retval1, !dbg !16
 }
 
@@ -30,7 +30,7 @@
   br label %return, !dbg !21
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !21          ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !21          ; <i32> [#uses=1]
   ret i32 %retval1, !dbg !22
 }
 
diff --git a/llvm/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/llvm/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
index ce52d24..05990e4 100644
--- a/llvm/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/llvm/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -29,7 +29,7 @@
   call void @llvm.dbg.declare(metadata %class.A* %b, metadata !0, metadata !{!"0x102"}), !dbg !14
   %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
   store i32 %call, i32* %retval, !dbg !15
-  %0 = load i32* %retval, !dbg !16                ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval, !dbg !16                ; <i32> [#uses=1]
   ret i32 %0, !dbg !16
 }
 
@@ -43,14 +43,14 @@
   %i = alloca i32, align 4                        ; <i32*> [#uses=2]
   store %class.A* %this, %class.A** %this.addr
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !17, metadata !{!"0x102"}), !dbg !18
-  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  %this1 = load %class.A*, %class.A** %this.addr             ; <%class.A*> [#uses=0]
   call void @llvm.dbg.declare(metadata %class.A* %a, metadata !19, metadata !{!"0x102"}), !dbg !27
   call void @llvm.dbg.declare(metadata i32* %i, metadata !28, metadata !{!"0x102"}), !dbg !29
   %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
   store i32 %call, i32* %i, !dbg !30
-  %tmp = load i32* %i, !dbg !31                   ; <i32> [#uses=1]
+  %tmp = load i32, i32* %i, !dbg !31                   ; <i32> [#uses=1]
   store i32 %tmp, i32* %retval, !dbg !31
-  %0 = load i32* %retval, !dbg !32                ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval, !dbg !32                ; <i32> [#uses=1]
   ret i32 %0, !dbg !32
 }
 
@@ -60,9 +60,9 @@
   %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
   store %class.A* %this, %class.A** %this.addr
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !33, metadata !{!"0x102"}), !dbg !34
-  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  %this1 = load %class.A*, %class.A** %this.addr             ; <%class.A*> [#uses=0]
   store i32 42, i32* %retval, !dbg !35
-  %0 = load i32* %retval, !dbg !35                ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval, !dbg !35                ; <i32> [#uses=1]
   ret i32 %0, !dbg !35
 }
 
diff --git a/llvm/test/DebugInfo/2010-04-19-FramePtr.ll b/llvm/test/DebugInfo/2010-04-19-FramePtr.ll
index fe5a1f4..2667c96 100644
--- a/llvm/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/llvm/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -10,12 +10,12 @@
   %0 = alloca i32                                 ; <i32*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store i32 42, i32* %0, align 4, !dbg !0
-  %1 = load i32* %0, align 4, !dbg !0             ; <i32> [#uses=1]
+  %1 = load i32, i32* %0, align 4, !dbg !0             ; <i32> [#uses=1]
   store i32 %1, i32* %retval, align 4, !dbg !0
   br label %return, !dbg !0
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !0           ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !0           ; <i32> [#uses=1]
   ret i32 %retval1, !dbg !7
 }
 
diff --git a/llvm/test/DebugInfo/2010-05-03-OriginDIE.ll b/llvm/test/DebugInfo/2010-05-03-OriginDIE.ll
index de45b79..41d7130 100644
--- a/llvm/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/llvm/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -19,17 +19,17 @@
   %0 = getelementptr inbounds %struct.gpm_t, %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
   %1 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
   call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
-  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a9 = load volatile i64, i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
   %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
   %a11 = getelementptr inbounds %struct.gpt_t, %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
-  %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
+  %a12 = load i64, i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
   call void @llvm.dbg.declare(metadata i64* %data_addr.i17, metadata !8, metadata !{!"0x102"}) nounwind, !dbg !14
   store i64 %a12, i64* %data_addr.i17, align 8
   call void @llvm.dbg.value(metadata !6, i64 0, metadata !15, metadata !{!"0x102"}) nounwind
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !19, metadata !{!"0x102"}) nounwind
   call void @llvm.dbg.declare(metadata !6, metadata !23, metadata !{!"0x102"}) nounwind
   call void @llvm.dbg.value(metadata i64* %data_addr.i17, i64 0, metadata !34, metadata !{!"0x102"}) nounwind
-  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a13 = load volatile i64, i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
   %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
   %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
   %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
diff --git a/llvm/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/llvm/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
index 9f0f7c3..d99bcba 100644
--- a/llvm/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/llvm/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -13,7 +13,7 @@
 
 define i32 @bar() nounwind ssp {
 entry:
-  %0 = load i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
+  %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
   tail call void @llvm.dbg.declare(metadata !29, metadata !10, metadata !{!"0x102"}), !dbg !21
   %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
diff --git a/llvm/test/DebugInfo/AArch64/frameindices.ll b/llvm/test/DebugInfo/AArch64/frameindices.ll
index b49afd0..0a237060 100644
--- a/llvm/test/DebugInfo/AArch64/frameindices.ll
+++ b/llvm/test/DebugInfo/AArch64/frameindices.ll
@@ -46,7 +46,7 @@
 entry:
   %agg.tmp = alloca %struct.A, align 8
   tail call void @llvm.dbg.declare(metadata %struct.A* %p1, metadata !30, metadata !46), !dbg !47
-  %0 = load i64* @a, align 8, !dbg !48, !tbaa !49
+  %0 = load i64, i64* @a, align 8, !dbg !48, !tbaa !49
   %call = tail call noalias i8* @_Znwm(i64 %0) #5, !dbg !53
   store i8* %call, i8** bitcast (i32** @b to i8**), align 8, !dbg !54, !tbaa !55
   %1 = getelementptr inbounds %struct.A, %struct.A* %agg.tmp, i64 0, i32 0, !dbg !57
@@ -74,7 +74,7 @@
   %0 = getelementptr inbounds %struct.A, %struct.A* %p1, i64 0, i32 0, !dbg !64
   %1 = getelementptr inbounds %struct.A, %struct.A* %agg.tmp.i, i64 0, i32 0, !dbg !65
   call void @llvm.lifetime.start(i64 24, i8* %1), !dbg !65
-  %2 = load i64* @a, align 8, !dbg !67, !tbaa !49
+  %2 = load i64, i64* @a, align 8, !dbg !67, !tbaa !49
   %call.i = tail call noalias i8* @_Znwm(i64 %2) #5, !dbg !68
   store i8* %call.i, i8** bitcast (i32** @b to i8**), align 8, !dbg !69, !tbaa !55
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %0, i64 24, i32 8, i1 false), !dbg !70
@@ -92,7 +92,7 @@
   tail call void @llvm.dbg.declare(metadata [15 x i8]* %agg.tmp.sroa.2, metadata !74, metadata !76), !dbg !77
   tail call void @llvm.dbg.declare(metadata [7 x i8]* %agg.tmp.sroa.4, metadata !74, metadata !78), !dbg !77
   tail call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !38, metadata !79), !dbg !80
-  %0 = load i64* @a, align 8, !dbg !81, !tbaa !49
+  %0 = load i64, i64* @a, align 8, !dbg !81, !tbaa !49
   tail call void @llvm.dbg.value(metadata %struct.B* %d, i64 0, metadata !39, metadata !79), !dbg !82
   %call = call %struct.B* @_ZN1BC1El(%struct.B* %d, i64 %0), !dbg !82
   call void @llvm.dbg.value(metadata i8 1, i64 0, metadata !38, metadata !83), !dbg !80
@@ -102,7 +102,7 @@
   call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !74, metadata !46), !dbg !77
   %1 = getelementptr inbounds %struct.A, %struct.A* %agg.tmp.i.i, i64 0, i32 0, !dbg !85
   call void @llvm.lifetime.start(i64 24, i8* %1), !dbg !85
-  %2 = load i64* @a, align 8, !dbg !87, !tbaa !49
+  %2 = load i64, i64* @a, align 8, !dbg !87, !tbaa !49
   %call.i.i5 = invoke noalias i8* @_Znwm(i64 %2) #5
           to label %call.i.i.noexc unwind label %lpad, !dbg !88
 
diff --git a/llvm/test/DebugInfo/AArch64/struct_by_value.ll b/llvm/test/DebugInfo/AArch64/struct_by_value.ll
index 0e65bf2..ab99a3d 100644
--- a/llvm/test/DebugInfo/AArch64/struct_by_value.ll
+++ b/llvm/test/DebugInfo/AArch64/struct_by_value.ll
@@ -34,7 +34,7 @@
 entry:
   call void @llvm.dbg.declare(metadata %struct.five* %f, metadata !17, metadata !{!"0x102\006"}), !dbg !18
   %a = getelementptr inbounds %struct.five, %struct.five* %f, i32 0, i32 0, !dbg !19
-  %0 = load i32* %a, align 4, !dbg !19
+  %0 = load i32, i32* %a, align 4, !dbg !19
   ret i32 %0, !dbg !19
 }
 
diff --git a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index da2980a..5ac1d42 100644
--- a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -47,7 +47,7 @@
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
   %arrayidx2.phi.trans.insert = getelementptr inbounds float, float* %vla, i32 %inc
-  %.pre = load float* %arrayidx2.phi.trans.insert, align 4, !dbg !31, !tbaa !26
+  %.pre = load float, float* %arrayidx2.phi.trans.insert, align 4, !dbg !31, !tbaa !26
   br label %for.body, !dbg !30
 
 for.end:                                          ; preds = %for.body, %entry
diff --git a/llvm/test/DebugInfo/COFF/cpp-mangling.ll b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
index 85bdd4b..cb4b926c 100644
--- a/llvm/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/llvm/test/DebugInfo/COFF/cpp-mangling.ll
@@ -18,7 +18,7 @@
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %0 = load i32* %x.addr, align 4, !dbg !11
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
   %mul = mul nsw i32 %0, 2, !dbg !11
   ret i32 %mul, !dbg !11
 }
diff --git a/llvm/test/DebugInfo/Inputs/line.ll b/llvm/test/DebugInfo/Inputs/line.ll
index 1a4a908..9d55152 100644
--- a/llvm/test/DebugInfo/Inputs/line.ll
+++ b/llvm/test/DebugInfo/Inputs/line.ll
@@ -18,12 +18,12 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %a.addr, align 4, !dbg !10
+  %0 = load i32, i32* %a.addr, align 4, !dbg !10
   %tobool = icmp ne i32 %0, 0, !dbg !10
   br i1 %tobool, label %land.rhs, label %land.end, !dbg !11
 
 land.rhs:                                         ; preds = %entry
-  %1 = load i32* %b.addr, align 4, !dbg !12
+  %1 = load i32, i32* %b.addr, align 4, !dbg !12
   %tobool1 = icmp ne i32 %1, 0, !dbg !12
   br label %land.end
 
diff --git a/llvm/test/DebugInfo/PR20038.ll b/llvm/test/DebugInfo/PR20038.ll
index bfee8d2..c684ba4 100644
--- a/llvm/test/DebugInfo/PR20038.ll
+++ b/llvm/test/DebugInfo/PR20038.ll
@@ -58,7 +58,7 @@
   %this.addr.i = alloca %struct.C*, align 8, !dbg !22
   %agg.tmp.ensured = alloca %struct.C, align 1
   %cleanup.cond = alloca i1
-  %0 = load i8* @b, align 1, !dbg !24
+  %0 = load i8, i8* @b, align 1, !dbg !24
   %tobool = trunc i8 %0 to i1, !dbg !24
   store i1 false, i1* %cleanup.cond
   br i1 %tobool, label %land.rhs, label %land.end, !dbg !24
@@ -69,16 +69,16 @@
 
 land.end:                                         ; preds = %land.rhs, %entry
   %1 = phi i1 [ false, %entry ], [ true, %land.rhs ]
-  %cleanup.is_active = load i1* %cleanup.cond, !dbg !27
+  %cleanup.is_active = load i1, i1* %cleanup.cond, !dbg !27
   br i1 %cleanup.is_active, label %cleanup.action, label %cleanup.done, !dbg !27
 
 cleanup.action:                                   ; preds = %land.end
   store %struct.C* %agg.tmp.ensured, %struct.C** %this.addr.i, align 8, !dbg !22
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !29, metadata !{!"0x102"}), !dbg !31
-  %this1.i = load %struct.C** %this.addr.i, !dbg !22
+  %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !22
   store %struct.C* %this1.i, %struct.C** %this.addr.i.i, align 8, !dbg !21
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i.i, metadata !32, metadata !{!"0x102"}), !dbg !33
-  %this1.i.i = load %struct.C** %this.addr.i.i, !dbg !21
+  %this1.i.i = load %struct.C*, %struct.C** %this.addr.i.i, !dbg !21
   br label %cleanup.done, !dbg !22
 
 cleanup.done:                                     ; preds = %cleanup.action, %land.end
@@ -92,10 +92,10 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !29, metadata !{!"0x102"}), !dbg !38
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   store %struct.C* %this1, %struct.C** %this.addr.i, align 8, !dbg !37
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr.i, metadata !32, metadata !{!"0x102"}), !dbg !39
-  %this1.i = load %struct.C** %this.addr.i, !dbg !37
+  %this1.i = load %struct.C*, %struct.C** %this.addr.i, !dbg !37
   ret void, !dbg !37
 }
 
@@ -105,7 +105,7 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !32, metadata !{!"0x102"}), !dbg !40
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   ret void, !dbg !41
 }
 
diff --git a/llvm/test/DebugInfo/SystemZ/variable-loc.ll b/llvm/test/DebugInfo/SystemZ/variable-loc.ll
index 9a4a7bd..75c6288 100644
--- a/llvm/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/llvm/test/DebugInfo/SystemZ/variable-loc.ll
@@ -42,7 +42,7 @@
   %arraydecay1 = getelementptr inbounds [100 x i32], [100 x i32]* %main_arr, i32 0, i32 0, !dbg !26
   %call = call i32 @sum_array(i32* %arraydecay1, i32 100), !dbg !26
   store i32 %call, i32* %val, align 4, !dbg !26
-  %0 = load i32* %val, align 4, !dbg !27
+  %0 = load i32, i32* %val, align 4, !dbg !27
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), i32 %0), !dbg !27
   ret i32 0, !dbg !28
 }
diff --git a/llvm/test/DebugInfo/X86/2010-04-13-PubType.ll b/llvm/test/DebugInfo/X86/2010-04-13-PubType.ll
index 0aec036..a3b827b 100644
--- a/llvm/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/llvm/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -17,12 +17,12 @@
   call void @llvm.dbg.declare(metadata %struct.Y** %y_addr, metadata !14, metadata !{!"0x102"}), !dbg !13
   store %struct.Y* %y, %struct.Y** %y_addr
   store i32 0, i32* %0, align 4, !dbg !13
-  %1 = load i32* %0, align 4, !dbg !13            ; <i32> [#uses=1]
+  %1 = load i32, i32* %0, align 4, !dbg !13            ; <i32> [#uses=1]
   store i32 %1, i32* %retval, align 4, !dbg !13
   br label %return, !dbg !13
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !13          ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !13          ; <i32> [#uses=1]
   ret i32 %retval1, !dbg !15
 }
 
diff --git a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index d1beadc..8f2a66c 100644
--- a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -8,9 +8,9 @@
 define i32 @f() nounwind {
   %LOC = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %LOC, metadata !15, metadata !{!"0x102"}), !dbg !17
-  %1 = load i32* @GLB, align 4, !dbg !18
+  %1 = load i32, i32* @GLB, align 4, !dbg !18
   store i32 %1, i32* %LOC, align 4, !dbg !18
-  %2 = load i32* @GLB, align 4, !dbg !19
+  %2 = load i32, i32* @GLB, align 4, !dbg !19
   ret i32 %2, !dbg !19
 }
 
diff --git a/llvm/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/llvm/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index 799829b..d5ecf81 100644
--- a/llvm/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/llvm/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -33,8 +33,8 @@
   call void @llvm.dbg.declare(metadata %struct.bar** %this.addr, metadata !58, metadata !{!"0x102"}), !dbg !59
   store i32 %x, i32* %x.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !60, metadata !{!"0x102"}), !dbg !61
-  %this1 = load %struct.bar** %this.addr
-  %0 = load i32* %x.addr, align 4, !dbg !62
+  %this1 = load %struct.bar*, %struct.bar** %this.addr
+  %0 = load i32, i32* %x.addr, align 4, !dbg !62
   call void @_ZN3barC2Ei(%struct.bar* %this1, i32 %0), !dbg !62
   ret void, !dbg !62
 }
@@ -47,9 +47,9 @@
   call void @llvm.dbg.declare(metadata %struct.bar** %this.addr, metadata !63, metadata !{!"0x102"}), !dbg !64
   store i32 %x, i32* %x.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !65, metadata !{!"0x102"}), !dbg !66
-  %this1 = load %struct.bar** %this.addr
+  %this1 = load %struct.bar*, %struct.bar** %this.addr
   %b = getelementptr inbounds %struct.bar, %struct.bar* %this1, i32 0, i32 0, !dbg !67
-  %0 = load i32* %x.addr, align 4, !dbg !67
+  %0 = load i32, i32* %x.addr, align 4, !dbg !67
   call void @_ZN3bazC1Ei(%struct.baz* %b, i32 %0), !dbg !67
   %1 = getelementptr inbounds %struct.bar, %struct.bar* %this1, i32 0, i32 1, !dbg !67
   %b2 = getelementptr inbounds %struct.bar, %struct.bar* %this1, i32 0, i32 0, !dbg !67
@@ -65,8 +65,8 @@
   call void @llvm.dbg.declare(metadata %struct.baz** %this.addr, metadata !70, metadata !{!"0x102"}), !dbg !71
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !72, metadata !{!"0x102"}), !dbg !73
-  %this1 = load %struct.baz** %this.addr
-  %0 = load i32* %a.addr, align 4, !dbg !74
+  %this1 = load %struct.baz*, %struct.baz** %this.addr
+  %0 = load i32, i32* %a.addr, align 4, !dbg !74
   call void @_ZN3bazC2Ei(%struct.baz* %this1, i32 %0), !dbg !74
   ret void, !dbg !74
 }
@@ -79,9 +79,9 @@
   call void @llvm.dbg.declare(metadata %struct.baz** %this.addr, metadata !75, metadata !{!"0x102"}), !dbg !76
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !77, metadata !{!"0x102"}), !dbg !78
-  %this1 = load %struct.baz** %this.addr
+  %this1 = load %struct.baz*, %struct.baz** %this.addr
   %h = getelementptr inbounds %struct.baz, %struct.baz* %this1, i32 0, i32 0, !dbg !79
-  %0 = load i32* %a.addr, align 4, !dbg !79
+  %0 = load i32, i32* %a.addr, align 4, !dbg !79
   store i32 %0, i32* %h, align 4, !dbg !79
   ret void, !dbg !80
 }
diff --git a/llvm/test/DebugInfo/X86/DW_AT_byte_size.ll b/llvm/test/DebugInfo/X86/DW_AT_byte_size.ll
index 9cd7f8a..5365fb8 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -15,9 +15,9 @@
   %a.addr = alloca %struct.A*, align 8
   store %struct.A* %a, %struct.A** %a.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.A** %a.addr, metadata !16, metadata !{!"0x102"}), !dbg !17
-  %0 = load %struct.A** %a.addr, align 8, !dbg !18
+  %0 = load %struct.A*, %struct.A** %a.addr, align 8, !dbg !18
   %b = getelementptr inbounds %struct.A, %struct.A* %0, i32 0, i32 0, !dbg !18
-  %1 = load i32* %b, align 4, !dbg !18
+  %1 = load i32, i32* %b, align 4, !dbg !18
   ret i32 %1, !dbg !18
 }
 
diff --git a/llvm/test/DebugInfo/X86/DW_AT_linkage_name.ll b/llvm/test/DebugInfo/X86/DW_AT_linkage_name.ll
index e395e06..dfc2f8d 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_linkage_name.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -39,7 +39,7 @@
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.A** %this.addr, metadata !26, metadata !{!"0x102"}), !dbg !28
-  %this1 = load %struct.A** %this.addr
+  %this1 = load %struct.A*, %struct.A** %this.addr
   ret void, !dbg !29
 }
 
@@ -52,7 +52,7 @@
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
-  %this1 = load %struct.A** %this.addr
+  %this1 = load %struct.A*, %struct.A** %this.addr
   call void @_ZN1AD2Ev(%struct.A* %this1), !dbg !32
   ret void, !dbg !33
 }
diff --git a/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll b/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll
index a3c5475..3a199ae 100644
--- a/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/llvm/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -21,7 +21,7 @@
   call void @llvm.dbg.declare(metadata %class.A* %a, metadata !21, metadata !{!"0x102"}), !dbg !23
   call void @_ZN1AC1Ev(%class.A* %a), !dbg !24
   %m_a = getelementptr inbounds %class.A, %class.A* %a, i32 0, i32 0, !dbg !25
-  %1 = load i32* %m_a, align 4, !dbg !25
+  %1 = load i32, i32* %m_a, align 4, !dbg !25
   ret i32 %1, !dbg !25
 }
 
@@ -32,7 +32,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !26, metadata !{!"0x102"}), !dbg !28
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   call void @_ZN1AC2Ev(%class.A* %this1), !dbg !29
   ret void, !dbg !29
 }
@@ -42,7 +42,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %m_a = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 0, !dbg !32
   store i32 0, i32* %m_a, align 4, !dbg !32
   ret void, !dbg !34
diff --git a/llvm/test/DebugInfo/X86/arguments.ll b/llvm/test/DebugInfo/X86/arguments.ll
index b6850e0..68bc773 100644
--- a/llvm/test/DebugInfo/X86/arguments.ll
+++ b/llvm/test/DebugInfo/X86/arguments.ll
@@ -34,7 +34,7 @@
   call void @llvm.dbg.declare(metadata %struct.foo* %f, metadata !19, metadata !{!"0x102"}), !dbg !20
   call void @llvm.dbg.declare(metadata %struct.foo* %g, metadata !21, metadata !{!"0x102"}), !dbg !20
   %i = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 0, !dbg !22
-  %0 = load i32* %i, align 4, !dbg !22
+  %0 = load i32, i32* %i, align 4, !dbg !22
   %inc = add nsw i32 %0, 1, !dbg !22
   store i32 %inc, i32* %i, align 4, !dbg !22
   ret void, !dbg !23
diff --git a/llvm/test/DebugInfo/X86/array.ll b/llvm/test/DebugInfo/X86/array.ll
index 3230e77..57d522d 100644
--- a/llvm/test/DebugInfo/X86/array.ll
+++ b/llvm/test/DebugInfo/X86/array.ll
@@ -42,7 +42,7 @@
   %2 = getelementptr inbounds [4 x i32], [4 x i32]* %array, i64 0, i64 0, !dbg !37
   call void @f(i32* %2), !dbg !37
   tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !{!"0x102"}), !dbg !36
-  %3 = load i32* %2, align 16, !dbg !38, !tbaa !30
+  %3 = load i32, i32* %2, align 16, !dbg !38, !tbaa !30
   ret i32 %3, !dbg !38
 }
 
diff --git a/llvm/test/DebugInfo/X86/array2.ll b/llvm/test/DebugInfo/X86/array2.ll
index b196f44..b456f42 100644
--- a/llvm/test/DebugInfo/X86/array2.ll
+++ b/llvm/test/DebugInfo/X86/array2.ll
@@ -30,7 +30,7 @@
   %p.addr = alloca i32*, align 8
   store i32* %p, i32** %p.addr, align 8
   call void @llvm.dbg.declare(metadata i32** %p.addr, metadata !19, metadata !{!"0x102"}), !dbg !20
-  %0 = load i32** %p.addr, align 8, !dbg !21
+  %0 = load i32*, i32** %p.addr, align 8, !dbg !21
   %arrayidx = getelementptr inbounds i32, i32* %0, i64 0, !dbg !21
   store i32 42, i32* %arrayidx, align 4, !dbg !21
   ret void, !dbg !22
@@ -57,7 +57,7 @@
   %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %array, i32 0, i32 0, !dbg !31
   call void @f(i32* %arraydecay), !dbg !31
   %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %array, i32 0, i64 0, !dbg !32
-  %1 = load i32* %arrayidx, align 4, !dbg !32
+  %1 = load i32, i32* %arrayidx, align 4, !dbg !32
   ret i32 %1, !dbg !32
 }
 
diff --git a/llvm/test/DebugInfo/X86/block-capture.ll b/llvm/test/DebugInfo/X86/block-capture.ll
index bc74840..72ee596 100644
--- a/llvm/test/DebugInfo/X86/block-capture.ll
+++ b/llvm/test/DebugInfo/X86/block-capture.ll
@@ -32,18 +32,18 @@
   %.block_descriptor.addr = alloca i8*, align 8
   %block.addr = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, align 8
   store i8* %.block_descriptor, i8** %.block_descriptor.addr, align 8
-  %0 = load i8** %.block_descriptor.addr
+  %0 = load i8*, i8** %.block_descriptor.addr
   call void @llvm.dbg.value(metadata i8* %0, i64 0, metadata !47, metadata !43), !dbg !66
   call void @llvm.dbg.declare(metadata i8* %.block_descriptor, metadata !47, metadata !43), !dbg !66
   %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, !dbg !67
   store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>* %block, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>** %block.addr, align 8
   call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>** %block.addr, metadata !68, metadata !69), !dbg !70
   %block.capture.addr = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>* %block, i32 0, i32 5, !dbg !71
-  %1 = load void (...)** %block.capture.addr, align 8, !dbg !71
+  %1 = load void (...)*, void (...)** %block.capture.addr, align 8, !dbg !71
   %block.literal = bitcast void (...)* %1 to %struct.__block_literal_generic*, !dbg !71
   %2 = getelementptr inbounds %struct.__block_literal_generic, %struct.__block_literal_generic* %block.literal, i32 0, i32 3, !dbg !71
   %3 = bitcast %struct.__block_literal_generic* %block.literal to i8*, !dbg !71
-  %4 = load i8** %2, !dbg !71
+  %4 = load i8*, i8** %2, !dbg !71
   %5 = bitcast i8* %4 to void (i8*, ...)*, !dbg !71
   call void (i8*, ...)* %5(i8* %3), !dbg !71
   ret void, !dbg !73
diff --git a/llvm/test/DebugInfo/X86/byvalstruct.ll b/llvm/test/DebugInfo/X86/byvalstruct.ll
index d89ba35..794f417 100644
--- a/llvm/test/DebugInfo/X86/byvalstruct.ll
+++ b/llvm/test/DebugInfo/X86/byvalstruct.ll
@@ -74,7 +74,7 @@
   call void @llvm.dbg.declare(metadata %struct.ImageInfo* %info, metadata !33, metadata !{!"0x102"}), !dbg !34
   store i64 %length, i64* %length.addr, align 8
   call void @llvm.dbg.declare(metadata i64* %length.addr, metadata !35, metadata !{!"0x102"}), !dbg !36
-  %0 = load i8** %retval, !dbg !37
+  %0 = load i8*, i8** %retval, !dbg !37
   ret i8* %0, !dbg !37
 }
 
diff --git a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
index 9c80c75..680e060 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -38,9 +38,9 @@
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !27, metadata !{!"0x102"}), !dbg !29
   store i32 %i, i32* %i.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %a = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 0, !dbg !31
-  %0 = load i32* %i.addr, align 4, !dbg !31
+  %0 = load i32, i32* %i.addr, align 4, !dbg !31
   store i32 %0, i32* %a, align 4, !dbg !31
   ret void, !dbg !31
 }
diff --git a/llvm/test/DebugInfo/X86/cu-ranges.ll b/llvm/test/DebugInfo/X86/cu-ranges.ll
index 0d872d8..2803f79 100644
--- a/llvm/test/DebugInfo/X86/cu-ranges.ll
+++ b/llvm/test/DebugInfo/X86/cu-ranges.ll
@@ -30,7 +30,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
-  %0 = load i32* %a.addr, align 4, !dbg !14
+  %0 = load i32, i32* %a.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   ret i32 %add, !dbg !14
 }
@@ -44,7 +44,7 @@
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %b.addr, align 4, !dbg !16
+  %0 = load i32, i32* %b.addr, align 4, !dbg !16
   %add = add nsw i32 %0, 2, !dbg !16
   ret i32 %add, !dbg !16
 }
diff --git a/llvm/test/DebugInfo/X86/dbg-byval-parameter.ll b/llvm/test/DebugInfo/X86/dbg-byval-parameter.ll
index 1f5232a..ca6d292 100644
--- a/llvm/test/DebugInfo/X86/dbg-byval-parameter.ll
+++ b/llvm/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -12,14 +12,14 @@
   call void @llvm.dbg.declare(metadata %struct.Rect* %my_r0, metadata !0, metadata !{!"0x102"}), !dbg !15
   %1 = getelementptr inbounds %struct.Rect, %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
   %2 = getelementptr inbounds %struct.Pt, %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
-  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  %3 = load double, double* %2, align 8, !dbg !16         ; <double> [#uses=1]
   store double %3, double* %0, align 8, !dbg !16
-  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  %4 = load double, double* %0, align 8, !dbg !16         ; <double> [#uses=1]
   store double %4, double* %retval, align 8, !dbg !16
   br label %return, !dbg !16
 
 return:                                           ; preds = %entry
-  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  %retval1 = load double, double* %retval, !dbg !16       ; <double> [#uses=1]
   ret double %retval1, !dbg !16
 }
 
diff --git a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
index b270b13..2f683cf 100644
--- a/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/llvm/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -17,12 +17,12 @@
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !26, metadata !{!"0x102"}), !dbg !27
   call void @llvm.dbg.declare(metadata i32* %j, metadata !28, metadata !{!"0x102"}), !dbg !30
   store i32 0, i32* %j, align 4, !dbg !31
-  %tmp = load i32* %i.addr, align 4, !dbg !32
+  %tmp = load i32, i32* %i.addr, align 4, !dbg !32
   %cmp = icmp eq i32 %tmp, 42, !dbg !32
   br i1 %cmp, label %if.then, label %if.end, !dbg !32
 
 if.then:                                          ; preds = %entry
-  %tmp1 = load i32* %i.addr, align 4, !dbg !33
+  %tmp1 = load i32, i32* %i.addr, align 4, !dbg !33
   %add = add nsw i32 %tmp1, 1, !dbg !33
   store i32 %add, i32* %j, align 4, !dbg !33
   br label %if.end, !dbg !35
@@ -30,12 +30,12 @@
 if.end:                                           ; preds = %if.then, %entry
   store i1 false, i1* %nrvo, !dbg !36
   call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !37, metadata !{!"0x102"}), !dbg !39
-  %tmp2 = load i32* %j, align 4, !dbg !40
+  %tmp2 = load i32, i32* %j, align 4, !dbg !40
   %x = getelementptr inbounds %class.A, %class.A* %agg.result, i32 0, i32 0, !dbg !40
   store i32 %tmp2, i32* %x, align 4, !dbg !40
   store i1 true, i1* %nrvo, !dbg !41
   store i32 1, i32* %cleanup.dest.slot
-  %nrvo.val = load i1* %nrvo, !dbg !42
+  %nrvo.val = load i1, i1* %nrvo, !dbg !42
   br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !42
 
 nrvo.unused:                                      ; preds = %if.end
@@ -53,7 +53,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !43, metadata !{!"0x102"}), !dbg !44
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   call void @_ZN1AD2Ev(%class.A* %this1)
   ret void, !dbg !45
 }
@@ -63,7 +63,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !46, metadata !{!"0x102"}), !dbg !47
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %x = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 0, !dbg !48
   store i32 1, i32* %x, align 4, !dbg !48
   ret void, !dbg !48
diff --git a/llvm/test/DebugInfo/X86/dbg-declare.ll b/llvm/test/DebugInfo/X86/dbg-declare.ll
index 2ede97b..c8f491b 100644
--- a/llvm/test/DebugInfo/X86/dbg-declare.ll
+++ b/llvm/test/DebugInfo/X86/dbg-declare.ll
@@ -8,15 +8,15 @@
   %cleanup.dest.slot = alloca i32
   store i32* %x, i32** %x.addr, align 8
   call void @llvm.dbg.declare(metadata i32** %x.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
-  %0 = load i32** %x.addr, align 8, !dbg !16
-  %1 = load i32* %0, align 4, !dbg !16
+  %0 = load i32*, i32** %x.addr, align 8, !dbg !16
+  %1 = load i32, i32* %0, align 4, !dbg !16
   %2 = zext i32 %1 to i64, !dbg !16
   %3 = call i8* @llvm.stacksave(), !dbg !16
   store i8* %3, i8** %saved_stack, !dbg !16
   %vla = alloca i8, i64 %2, align 16, !dbg !16
   call void @llvm.dbg.declare(metadata i8* %vla, metadata !18, metadata !{!"0x102"}), !dbg !23
   store i32 1, i32* %cleanup.dest.slot
-  %4 = load i8** %saved_stack, !dbg !24
+  %4 = load i8*, i8** %saved_stack, !dbg !24
   call void @llvm.stackrestore(i8* %4), !dbg !24
   ret i32 0, !dbg !25
 }
diff --git a/llvm/test/DebugInfo/X86/dbg-prolog-end.ll b/llvm/test/DebugInfo/X86/dbg-prolog-end.ll
index 4aaaf4a..5008b86 100644
--- a/llvm/test/DebugInfo/X86/dbg-prolog-end.ll
+++ b/llvm/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -11,14 +11,14 @@
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !7, metadata !{!"0x102"}), !dbg !8
   call void @llvm.dbg.declare(metadata i32* %j, metadata !9, metadata !{!"0x102"}), !dbg !11
   store i32 2, i32* %j, align 4, !dbg !12
-  %tmp = load i32* %j, align 4, !dbg !13
+  %tmp = load i32, i32* %j, align 4, !dbg !13
   %inc = add nsw i32 %tmp, 1, !dbg !13
   store i32 %inc, i32* %j, align 4, !dbg !13
-  %tmp1 = load i32* %j, align 4, !dbg !14
-  %tmp2 = load i32* %i.addr, align 4, !dbg !14
+  %tmp1 = load i32, i32* %j, align 4, !dbg !14
+  %tmp2 = load i32, i32* %i.addr, align 4, !dbg !14
   %add = add nsw i32 %tmp1, %tmp2, !dbg !14
   store i32 %add, i32* %j, align 4, !dbg !14
-  %tmp3 = load i32* %j, align 4, !dbg !15
+  %tmp3 = load i32, i32* %j, align 4, !dbg !15
   ret i32 %tmp3, !dbg !15
 }
 
diff --git a/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll b/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll
index cf6d18a..ea3fca8 100644
--- a/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll
+++ b/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -13,7 +13,7 @@
   %1 = extractelement <4 x i32> %0, i32 0
   call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !11
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !14
-  %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
+  %tmp2 = load i32, i32 addrspace(1)* %ip, align 4, !dbg !15
   %tmp3 = add i32 0, %tmp2, !dbg !15
 ; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
   call void @llvm.dbg.value(metadata i32 %tmp3, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !15
diff --git a/llvm/test/DebugInfo/X86/dbg-value-location.ll b/llvm/test/DebugInfo/X86/dbg-value-location.ll
index 015ec89..13f54fe 100644
--- a/llvm/test/DebugInfo/X86/dbg-value-location.ll
+++ b/llvm/test/DebugInfo/X86/dbg-value-location.ll
@@ -19,7 +19,7 @@
 define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
 entry:
   call void @llvm.dbg.value(metadata i32 %dev, i64 0, metadata !12, metadata !{!"0x102"}), !dbg !13
-  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %tmp.i = load i32, i32* @dfm, align 4, !dbg !14
   %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
   br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
 
diff --git a/llvm/test/DebugInfo/X86/dbg-value-range.ll b/llvm/test/DebugInfo/X86/dbg-value-range.ll
index afa3b50..fb80a5d 100644
--- a/llvm/test/DebugInfo/X86/dbg-value-range.ll
+++ b/llvm/test/DebugInfo/X86/dbg-value-range.ll
@@ -6,7 +6,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata %struct.a* %b, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !13
   %tmp1 = getelementptr inbounds %struct.a, %struct.a* %b, i64 0, i32 0, !dbg !14
-  %tmp2 = load i32* %tmp1, align 4, !dbg !14
+  %tmp2 = load i32, i32* %tmp1, align 4, !dbg !14
   tail call void @llvm.dbg.value(metadata i32 %tmp2, i64 0, metadata !11, metadata !{!"0x102"}), !dbg !14
   %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
   %add = add nsw i32 %tmp2, 1, !dbg !19
diff --git a/llvm/test/DebugInfo/X86/dbg-value-terminator.ll b/llvm/test/DebugInfo/X86/dbg-value-terminator.ll
index 88c3ba2..9150d8b 100644
--- a/llvm/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/llvm/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -85,7 +85,7 @@
   br i1 undef, label %may_unswitch_on.exit, label %"44.i", !dbg !12
 
 "44.i":                                           ; preds = %"42.i"
-  %2 = load %a** undef, align 8, !dbg !12
+  %2 = load %a*, %a** undef, align 8, !dbg !12
   %3 = bitcast %a* %2 to %a*, !dbg !12
   call void @llvm.dbg.value(metadata %a* %3, i64 0, metadata !6, metadata !{!"0x102"}), !dbg !12
   br label %may_unswitch_on.exit, !dbg !12
diff --git a/llvm/test/DebugInfo/X86/dbg_value_direct.ll b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
index 6723ba5..20034c0 100644
--- a/llvm/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/llvm/test/DebugInfo/X86/dbg_value_direct.ll
@@ -51,7 +51,7 @@
   %17 = lshr i64 %16, 3
   %18 = add i64 %17, 2147450880
   %19 = inttoptr i64 %18 to i8*
-  %20 = load i8* %19
+  %20 = load i8, i8* %19
   %21 = icmp ne i8 %20, 0
   call void @llvm.dbg.declare(metadata i32* %3, metadata !23, metadata !28)
   br i1 %21, label %22, label %28
@@ -91,8 +91,8 @@
 
 define internal void @asan.module_ctor()  "stack-protector-buffer-size"="1" {
   call void @__asan_init_v3()
-  %1 = load volatile i64* @__asan_mapping_offset
-  %2 = load volatile i64* @__asan_mapping_scale
+  %1 = load volatile i64, i64* @__asan_mapping_offset
+  %2 = load volatile i64, i64* @__asan_mapping_scale
   ret void
 }
 
diff --git a/llvm/test/DebugInfo/X86/debug-info-blocks.ll b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
index cbf8ad2..1717e78 100644
--- a/llvm/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/llvm/test/DebugInfo/X86/debug-info-blocks.ll
@@ -104,15 +104,15 @@
   call void @llvm.dbg.declare(metadata %0** %1, metadata !60, metadata !{!"0x102"}), !dbg !62
   store i8* %_cmd, i8** %2, align 8
   call void @llvm.dbg.declare(metadata i8** %2, metadata !63, metadata !{!"0x102"}), !dbg !62
-  %5 = load %0** %1, !dbg !65
+  %5 = load %0*, %0** %1, !dbg !65
   %6 = bitcast %0* %5 to i8*, !dbg !65
   %7 = getelementptr inbounds %struct._objc_super, %struct._objc_super* %3, i32 0, i32 0, !dbg !65
   store i8* %6, i8** %7, !dbg !65
-  %8 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_", !dbg !65
+  %8 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_", !dbg !65
   %9 = bitcast %struct._class_t* %8 to i8*, !dbg !65
   %10 = getelementptr inbounds %struct._objc_super, %struct._objc_super* %3, i32 0, i32 1, !dbg !65
   store i8* %9, i8** %10, !dbg !65
-  %11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !65, !invariant.load !67
+  %11 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !65, !invariant.load !67
   %12 = call i8* bitcast (i8* (%struct._objc_super*, i8*, ...)* @objc_msgSendSuper2 to i8* (%struct._objc_super*, i8*)*)(%struct._objc_super* %3, i8* %11), !dbg !65
   %13 = bitcast i8* %12 to %0*, !dbg !65
   store %0* %13, %0** %1, align 8, !dbg !65
@@ -131,14 +131,14 @@
   %20 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 4, !dbg !68
   store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8*, i8*, i64 }* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %20, !dbg !68
   %21 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !68
-  %22 = load %0** %1, align 8, !dbg !68
+  %22 = load %0*, %0** %1, align 8, !dbg !68
   store %0* %22, %0** %21, align 8, !dbg !68
   %23 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4 to void ()*, !dbg !68
   call void @run(void ()* %23), !dbg !68
   br label %24, !dbg !70
 
 ; <label>:24                                      ; preds = %15, %0
-  %25 = load %0** %1, align 8, !dbg !71
+  %25 = load %0*, %0** %1, align 8, !dbg !71
   %26 = bitcast %0* %25 to i8*, !dbg !71
   ret i8* %26, !dbg !71
 }
@@ -151,11 +151,11 @@
   %1 = alloca void ()*, align 8
   store void ()* %block, void ()** %1, align 8
   call void @llvm.dbg.declare(metadata void ()** %1, metadata !72, metadata !{!"0x102"}), !dbg !73
-  %2 = load void ()** %1, align 8, !dbg !74
+  %2 = load void ()*, void ()** %1, align 8, !dbg !74
   %3 = bitcast void ()* %2 to %struct.__block_literal_generic*, !dbg !74
   %4 = getelementptr inbounds %struct.__block_literal_generic, %struct.__block_literal_generic* %3, i32 0, i32 3, !dbg !74
   %5 = bitcast %struct.__block_literal_generic* %3 to i8*, !dbg !74
-  %6 = load i8** %4, !dbg !74
+  %6 = load i8*, i8** %4, !dbg !74
   %7 = bitcast i8* %6 to void (i8*)*, !dbg !74
   call void %7(i8* %5), !dbg !74
   ret void, !dbg !75
@@ -166,7 +166,7 @@
   %2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8
   %d = alloca %1*, align 8
   store i8* %.block_descriptor, i8** %1, align 8
-  %3 = load i8** %1
+  %3 = load i8*, i8** %1
   call void @llvm.dbg.value(metadata i8* %3, i64 0, metadata !76, metadata !{!"0x102"}), !dbg !88
   call void @llvm.dbg.declare(metadata i8* %.block_descriptor, metadata !76, metadata !{!"0x102"}), !dbg !88
   %4 = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !88
@@ -174,25 +174,25 @@
   %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !88
   call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** %2, metadata !89, metadata !111), !dbg !90
   call void @llvm.dbg.declare(metadata %1** %d, metadata !91, metadata !{!"0x102"}), !dbg !100
-  %6 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100
+  %6 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", !dbg !100
   %7 = bitcast %struct._class_t* %6 to i8*, !dbg !100
-  %8 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100
+  %8 = load i8*, i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !100
   %9 = bitcast i8* %8 to i8* (i8*, i8*)*, !dbg !100
   %10 = call i8* %9(i8* %7, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !100
   %11 = bitcast i8* %10 to %1*, !dbg !100
-  %12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !100, !invariant.load !67
+  %12 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !100, !invariant.load !67
   %13 = bitcast %1* %11 to i8*, !dbg !100
   %14 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %13, i8* %12), !dbg !100
   %15 = bitcast i8* %14 to %1*, !dbg !100
   store %1* %15, %1** %d, align 8, !dbg !100
-  %16 = load %1** %d, align 8, !dbg !101
+  %16 = load %1*, %1** %d, align 8, !dbg !101
   %17 = bitcast %1* %16 to i8*, !dbg !101
-  %18 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to %struct._message_ref_t*), i32 0, i32 0), !dbg !101
+  %18 = load i8*, i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to %struct._message_ref_t*), i32 0, i32 0), !dbg !101
   %19 = bitcast i8* %18 to i32 (i8*, i8*)*, !dbg !101
   %20 = call i32 %19(i8* %17, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_count" to i8*)), !dbg !101
   %21 = add nsw i32 42, %20, !dbg !101
-  %22 = load %0** %5, align 8, !dbg !101
-  %23 = load i64* @"OBJC_IVAR_$_A.ivar", !dbg !101, !invariant.load !67
+  %22 = load %0*, %0** %5, align 8, !dbg !101
+  %23 = load i64, i64* @"OBJC_IVAR_$_A.ivar", !dbg !101, !invariant.load !67
   %24 = bitcast %0* %22 to i8*, !dbg !101
   %25 = getelementptr inbounds i8, i8* %24, i64 %23, !dbg !101
   %26 = bitcast i8* %25 to i32*, !dbg !101
@@ -213,13 +213,13 @@
   call void @llvm.dbg.declare(metadata i8** %3, metadata !102, metadata !{!"0x102"}), !dbg !103
   store i8* %1, i8** %4, align 8
   call void @llvm.dbg.declare(metadata i8** %4, metadata !104, metadata !{!"0x102"}), !dbg !103
-  %5 = load i8** %4, !dbg !103
+  %5 = load i8*, i8** %4, !dbg !103
   %6 = bitcast i8* %5 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
-  %7 = load i8** %3, !dbg !103
+  %7 = load i8*, i8** %3, !dbg !103
   %8 = bitcast i8* %7 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
   %9 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %6, i32 0, i32 5, !dbg !103
   %10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %8, i32 0, i32 5, !dbg !103
-  %11 = load %0** %9, !dbg !103
+  %11 = load %0*, %0** %9, !dbg !103
   %12 = bitcast %0* %11 to i8*, !dbg !103
   %13 = bitcast %0** %10 to i8*, !dbg !103
   call void @_Block_object_assign(i8* %13, i8* %12, i32 3) #3, !dbg !103
@@ -232,10 +232,10 @@
   %2 = alloca i8*, align 8
   store i8* %0, i8** %2, align 8
   call void @llvm.dbg.declare(metadata i8** %2, metadata !105, metadata !{!"0x102"}), !dbg !106
-  %3 = load i8** %2, !dbg !106
+  %3 = load i8*, i8** %2, !dbg !106
   %4 = bitcast i8* %3 to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !106
   %5 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %4, i32 0, i32 5, !dbg !106
-  %6 = load %0** %5, !dbg !106
+  %6 = load %0*, %0** %5, !dbg !106
   %7 = bitcast %0* %6 to i8*, !dbg !106
   call void @_Block_object_dispose(i8* %7, i32 3) #3, !dbg !106
   ret void, !dbg !106
@@ -248,13 +248,13 @@
   %a = alloca %0*, align 8
   store i32 0, i32* %1
   call void @llvm.dbg.declare(metadata %0** %a, metadata !107, metadata !{!"0x102"}), !dbg !108
-  %2 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108
+  %2 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5", !dbg !108
   %3 = bitcast %struct._class_t* %2 to i8*, !dbg !108
-  %4 = load i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108
+  %4 = load i8*, i8** getelementptr inbounds (%struct._message_ref_t* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to %struct._message_ref_t*), i32 0, i32 0), !dbg !108
   %5 = bitcast i8* %4 to i8* (i8*, i8*)*, !dbg !108
   %6 = call i8* %5(i8* %3, i8* bitcast ({ i8* (i8*, %struct._message_ref_t*, ...)*, i8* }* @"\01l_objc_msgSend_fixup_alloc" to i8*)), !dbg !108
   %7 = bitcast i8* %6 to %0*, !dbg !108
-  %8 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !108, !invariant.load !67
+  %8 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !dbg !108, !invariant.load !67
   %9 = bitcast %0* %7 to i8*, !dbg !108
   %10 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %9, i8* %8), !dbg !108
   %11 = bitcast i8* %10 to %0*, !dbg !108
diff --git a/llvm/test/DebugInfo/X86/debug-info-static-member.ll b/llvm/test/DebugInfo/X86/debug-info-static-member.ll
index 0b3d3cc..1afb262 100644
--- a/llvm/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/llvm/test/DebugInfo/X86/debug-info-static-member.ll
@@ -50,7 +50,7 @@
   call void @llvm.dbg.declare(metadata %class.C* %instance_C, metadata !29, metadata !{!"0x102"}), !dbg !30
   %d = getelementptr inbounds %class.C, %class.C* %instance_C, i32 0, i32 0, !dbg !31
   store i32 8, i32* %d, align 4, !dbg !31
-  %0 = load i32* @_ZN1C1cE, align 4, !dbg !32
+  %0 = load i32, i32* @_ZN1C1cE, align 4, !dbg !32
   ret i32 %0, !dbg !32
 }
 
diff --git a/llvm/test/DebugInfo/X86/debug-loc-asan.ll b/llvm/test/DebugInfo/X86/debug-loc-asan.ll
index 13e193bd..f880445 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/llvm/test/DebugInfo/X86/debug-loc-asan.ll
@@ -46,7 +46,7 @@
 entry:
   %MyAlloca = alloca [64 x i8], align 32
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64
-  %1 = load i32* @__asan_option_detect_stack_use_after_return
+  %1 = load i32, i32* @__asan_option_detect_stack_use_after_return
   %2 = icmp ne i32 %1, 0
   br i1 %2, label %3, label %5
 
@@ -75,7 +75,7 @@
   %19 = lshr i64 %18, 3
   %20 = add i64 %19, 2147450880
   %21 = inttoptr i64 %20 to i8*
-  %22 = load i8* %21
+  %22 = load i8, i8* %21
   %23 = icmp ne i8 %22, 0
   call void @llvm.dbg.declare(metadata i32* %8, metadata !12, metadata !14)
   br i1 %23, label %24, label %30
@@ -98,7 +98,7 @@
   %32 = lshr i64 %31, 3, !dbg !13
   %33 = add i64 %32, 2147450880, !dbg !13
   %34 = inttoptr i64 %33 to i8*, !dbg !13
-  %35 = load i8* %34, !dbg !13
+  %35 = load i8, i8* %34, !dbg !13
   %36 = icmp ne i8 %35, 0, !dbg !13
   br i1 %36, label %37, label %43, !dbg !13
 
@@ -115,7 +115,7 @@
   unreachable
 
 ; <label>:43                                      ; preds = %37, %30
-  %44 = load i32* %8, align 4, !dbg !13
+  %44 = load i32, i32* %8, align 4, !dbg !13
   %add = add nsw i32 %44, 2, !dbg !13
   store i64 1172321806, i64* %9, !dbg !13
   %45 = icmp ne i64 %6, %0, !dbg !13
@@ -127,7 +127,7 @@
   store i64 -723401728380766731, i64* %48, !dbg !13
   %49 = add i64 %6, 56, !dbg !13
   %50 = inttoptr i64 %49 to i64*, !dbg !13
-  %51 = load i64* %50, !dbg !13
+  %51 = load i64, i64* %50, !dbg !13
   %52 = inttoptr i64 %51 to i8*, !dbg !13
   store i8 0, i8* %52, !dbg !13
   br label %56, !dbg !13
diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.ll b/llvm/test/DebugInfo/X86/debug-loc-offset.ll
index 1a629aa..d2cb274 100644
--- a/llvm/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/llvm/test/DebugInfo/X86/debug-loc-offset.ll
@@ -65,7 +65,7 @@
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !21, metadata !{!"0x102"}), !dbg !22
-  %0 = load i32* %b.addr, align 4, !dbg !23
+  %0 = load i32, i32* %b.addr, align 4, !dbg !23
   %add = add nsw i32 %0, 4, !dbg !23
   ret i32 %add, !dbg !23
 }
@@ -80,12 +80,12 @@
   call void @llvm.dbg.declare(metadata i32* %z, metadata !26, metadata !{!"0x102"}), !dbg !27
   store i32 2, i32* %z, align 4, !dbg !27
   %var = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 1, !dbg !28
-  %0 = load i32* %var, align 4, !dbg !28
+  %0 = load i32, i32* %var, align 4, !dbg !28
   %cmp = icmp sgt i32 %0, 2, !dbg !28
   br i1 %cmp, label %if.then, label %if.end, !dbg !28
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* %z, align 4, !dbg !30
+  %1 = load i32, i32* %z, align 4, !dbg !30
   %inc = add nsw i32 %1, 1, !dbg !30
   store i32 %inc, i32* %z, align 4, !dbg !30
   br label %if.end, !dbg !30
@@ -97,7 +97,7 @@
   br i1 %cmp1, label %if.then2, label %if.end4, !dbg !31
 
 if.then2:                                         ; preds = %if.end
-  %2 = load i32* %z, align 4, !dbg !33
+  %2 = load i32, i32* %z, align 4, !dbg !33
   %inc3 = add nsw i32 %2, 1, !dbg !33
   store i32 %inc3, i32* %z, align 4, !dbg !33
   br label %if.end4, !dbg !33
diff --git a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
index fd8fe0e..a43461c 100644
--- a/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/llvm/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -29,15 +29,15 @@
   store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !19
   store i64 0, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
   %call = call i8* @_Znwm(i64 4) #4, !dbg !19
-  %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
+  %_msret = load i64, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !19
   %3 = bitcast i8* %call to i32*, !dbg !19
   tail call void @llvm.dbg.value(metadata i32* %3, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
   %4 = inttoptr i64 %1 to i64*, !dbg !19
   store i64 %_msret, i64* %4, align 8, !dbg !19
   store volatile i32* %3, i32** %p, align 8, !dbg !19
   tail call void @llvm.dbg.value(metadata i32** %p, i64 0, metadata !9, metadata !{!"0x102"}), !dbg !19
-  %p.0.p.0. = load volatile i32** %p, align 8, !dbg !20
-  %_msld = load i64* %4, align 8, !dbg !20
+  %p.0.p.0. = load volatile i32*, i32** %p, align 8, !dbg !20
+  %_msld = load i64, i64* %4, align 8, !dbg !20
   %_mscmp = icmp eq i64 %_msld, 0, !dbg !20
   br i1 %_mscmp, label %6, label %5, !dbg !20, !prof !22
 
@@ -47,11 +47,11 @@
   unreachable, !dbg !20
 
 ; <label>:6                                       ; preds = %entry
-  %7 = load i32* %p.0.p.0., align 4, !dbg !20, !tbaa !23
+  %7 = load i32, i32* %p.0.p.0., align 4, !dbg !20, !tbaa !23
   %8 = ptrtoint i32* %p.0.p.0. to i64, !dbg !20
   %9 = and i64 %8, -70368744177665, !dbg !20
   %10 = inttoptr i64 %9 to i32*, !dbg !20
-  %_msld2 = load i32* %10, align 4, !dbg !20
+  %_msld2 = load i32, i32* %10, align 4, !dbg !20
   %11 = icmp ne i32 %_msld2, 0, !dbg !20
   %12 = xor i32 %_msld2, -1, !dbg !20
   %13 = and i32 %7, %12, !dbg !20
@@ -94,15 +94,15 @@
   store i64 0, i64* getelementptr inbounds ([1000 x i64]* @__msan_param_tls, i64 0, i64 0), align 8, !dbg !30
   store i64 0, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
   %call.i = call i8* @_Znwm(i64 4) #4, !dbg !30
-  %_msret = load i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
+  %_msret = load i64, i64* getelementptr inbounds ([8 x i64]* @__msan_retval_tls, i64 0, i64 0), align 8, !dbg !30
   %3 = bitcast i8* %call.i to i32*, !dbg !30
   tail call void @llvm.dbg.value(metadata i32* %3, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !30
   %4 = inttoptr i64 %1 to i64*, !dbg !30
   store i64 %_msret, i64* %4, align 8, !dbg !30
   store volatile i32* %3, i32** %p.i, align 8, !dbg !30
   tail call void @llvm.dbg.value(metadata i32** %p.i, i64 0, metadata !32, metadata !{!"0x102"}), !dbg !30
-  %p.i.0.p.0.p.0..i = load volatile i32** %p.i, align 8, !dbg !33
-  %_msld = load i64* %4, align 8, !dbg !33
+  %p.i.0.p.0.p.0..i = load volatile i32*, i32** %p.i, align 8, !dbg !33
+  %_msld = load i64, i64* %4, align 8, !dbg !33
   %_mscmp = icmp eq i64 %_msld, 0, !dbg !33
   br i1 %_mscmp, label %6, label %5, !dbg !33, !prof !22
 
@@ -112,11 +112,11 @@
   unreachable, !dbg !33
 
 ; <label>:6                                       ; preds = %entry
-  %7 = load i32* %p.i.0.p.0.p.0..i, align 4, !dbg !33, !tbaa !23
+  %7 = load i32, i32* %p.i.0.p.0.p.0..i, align 4, !dbg !33, !tbaa !23
   %8 = ptrtoint i32* %p.i.0.p.0.p.0..i to i64, !dbg !33
   %9 = and i64 %8, -70368744177665, !dbg !33
   %10 = inttoptr i64 %9 to i32*, !dbg !33
-  %_msld2 = load i32* %10, align 4, !dbg !33
+  %_msld2 = load i32, i32* %10, align 4, !dbg !33
   %11 = icmp ne i32 %_msld2, 0, !dbg !33
   %12 = xor i32 %_msld2, -1, !dbg !33
   %13 = and i32 %7, %12, !dbg !33
diff --git a/llvm/test/DebugInfo/X86/decl-derived-member.ll b/llvm/test/DebugInfo/X86/decl-derived-member.ll
index 8f58a43..0ff8287 100644
--- a/llvm/test/DebugInfo/X86/decl-derived-member.ll
+++ b/llvm/test/DebugInfo/X86/decl-derived-member.ll
@@ -46,7 +46,7 @@
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !34, metadata !36), !dbg !37
-  %this1 = load %struct.foo** %this.addr
+  %this1 = load %struct.foo*, %struct.foo** %this.addr
   %b = getelementptr inbounds %struct.foo, %struct.foo* %this1, i32 0, i32 0, !dbg !38
   call void @_ZN4baseC2Ev(%struct.base* %b) #2, !dbg !38
   ret void, !dbg !38
@@ -58,7 +58,7 @@
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !39, metadata !36), !dbg !40
-  %this1 = load %struct.foo** %this.addr
+  %this1 = load %struct.foo*, %struct.foo** %this.addr
   %b = getelementptr inbounds %struct.foo, %struct.foo* %this1, i32 0, i32 0, !dbg !41
   call void @_ZN4baseD1Ev(%struct.base* %b), !dbg !41
   ret void, !dbg !43
@@ -76,7 +76,7 @@
   %this.addr = alloca %struct.base*, align 8
   store %struct.base* %this, %struct.base** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.base** %this.addr, metadata !44, metadata !36), !dbg !46
-  %this1 = load %struct.base** %this.addr
+  %this1 = load %struct.base*, %struct.base** %this.addr
   %0 = bitcast %struct.base* %this1 to i32 (...)***, !dbg !47
   store i32 (...)** bitcast (i8** getelementptr inbounds ([4 x i8*]* @_ZTV4base, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, !dbg !47
   ret void, !dbg !47
diff --git a/llvm/test/DebugInfo/X86/discriminator.ll b/llvm/test/DebugInfo/X86/discriminator.ll
index 185f7cd..d36c7e1 100644
--- a/llvm/test/DebugInfo/X86/discriminator.ll
+++ b/llvm/test/DebugInfo/X86/discriminator.ll
@@ -16,12 +16,12 @@
   %retval = alloca i32, align 4
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4, !dbg !10
+  %0 = load i32, i32* %i.addr, align 4, !dbg !10
   %cmp = icmp slt i32 %0, 10, !dbg !10
   br i1 %cmp, label %if.then, label %if.end, !dbg !10
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* %i.addr, align 4, !dbg !14
+  %1 = load i32, i32* %i.addr, align 4, !dbg !14
   %sub = sub nsw i32 %1, 1, !dbg !14
   store i32 %sub, i32* %retval, !dbg !14
   br label %return, !dbg !14
@@ -31,7 +31,7 @@
   br label %return, !dbg !12
 
 return:                                           ; preds = %if.end, %if.then
-  %2 = load i32* %retval, !dbg !13
+  %2 = load i32, i32* %retval, !dbg !13
   ret i32 %2, !dbg !13
 }
 
diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index 1bda8ec..eff918e 100644
--- a/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -45,7 +45,7 @@
   %call = tail call i32 @_Z3fooi(i32 2), !dbg !22
   %call1 = tail call i32 @_Z4foo2i(i32 1), !dbg !22
   %add = add nsw i32 %call1, %call, !dbg !22
-  %0 = load i32* @global, align 4, !dbg !22, !tbaa !23
+  %0 = load i32, i32* @global, align 4, !dbg !22, !tbaa !23
   %add2 = add nsw i32 %add, %0, !dbg !22
   ret i32 %add2, !dbg !22
 }
diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges.ll b/llvm/test/DebugInfo/X86/dwarf-aranges.ll
index 6873e58..081dc92 100644
--- a/llvm/test/DebugInfo/X86/dwarf-aranges.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-aranges.ll
@@ -50,10 +50,10 @@
 
 define void @some_code() {
 entry:
-  %0 = load i32* @some_data, align 4, !dbg !14
-  %1 = load i32* @some_other, align 4, !dbg !14
+  %0 = load i32, i32* @some_data, align 4, !dbg !14
+  %1 = load i32, i32* @some_other, align 4, !dbg !14
   %add = add nsw i32 %0, %1, !dbg !14
-  %2 = load i32* @some_bss, align 4, !dbg !14
+  %2 = load i32, i32* @some_bss, align 4, !dbg !14
   %add1 = add nsw i32 %2, %add, !dbg !14
   store i32 %add1, i32* @some_bss, align 4, !dbg !14
   ret void, !dbg !15
diff --git a/llvm/test/DebugInfo/X86/dwarf-public-names.ll b/llvm/test/DebugInfo/X86/dwarf-public-names.ll
index 778738c..77efab9 100644
--- a/llvm/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/llvm/test/DebugInfo/X86/dwarf-public-names.ll
@@ -64,7 +64,7 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !30
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
   ret void, !dbg !32
 }
@@ -73,7 +73,7 @@
 
 define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
 entry:
-  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
   ret i32 %0, !dbg !33
 }
 
diff --git a/llvm/test/DebugInfo/X86/elf-names.ll b/llvm/test/DebugInfo/X86/elf-names.ll
index 688f7f0..8a8a3c3 100644
--- a/llvm/test/DebugInfo/X86/elf-names.ll
+++ b/llvm/test/DebugInfo/X86/elf-names.ll
@@ -39,19 +39,19 @@
   tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !34, metadata !{!"0x102"}), !dbg !46
   tail call void @llvm.dbg.value(metadata %class.D* %d, i64 0, metadata !35, metadata !{!"0x102"}), !dbg !46
   %c1 = getelementptr inbounds %class.D, %class.D* %d, i64 0, i32 0, !dbg !47
-  %0 = load i32* %c1, align 4, !dbg !47
+  %0 = load i32, i32* %c1, align 4, !dbg !47
   %c12 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 0, !dbg !47
   store i32 %0, i32* %c12, align 4, !dbg !47
   %c2 = getelementptr inbounds %class.D, %class.D* %d, i64 0, i32 1, !dbg !49
-  %1 = load i32* %c2, align 4, !dbg !49
+  %1 = load i32, i32* %c2, align 4, !dbg !49
   %c23 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 1, !dbg !49
   store i32 %1, i32* %c23, align 4, !dbg !49
   %c3 = getelementptr inbounds %class.D, %class.D* %d, i64 0, i32 2, !dbg !50
-  %2 = load i32* %c3, align 4, !dbg !50
+  %2 = load i32, i32* %c3, align 4, !dbg !50
   %c34 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 2, !dbg !50
   store i32 %2, i32* %c34, align 4, !dbg !50
   %c4 = getelementptr inbounds %class.D, %class.D* %d, i64 0, i32 3, !dbg !51
-  %3 = load i32* %c4, align 4, !dbg !51
+  %3 = load i32, i32* %c4, align 4, !dbg !51
   %c45 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 3, !dbg !51
   store i32 %3, i32* %c45, align 4, !dbg !51
   ret void, !dbg !52
diff --git a/llvm/test/DebugInfo/X86/empty-and-one-elem-array.ll b/llvm/test/DebugInfo/X86/empty-and-one-elem-array.ll
index bee45d3..c0108e0 100644
--- a/llvm/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/llvm/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -16,9 +16,9 @@
   %a1 = getelementptr inbounds %struct.bar, %struct.bar* %my_bar, i32 0, i32 0, !dbg !30
   store i32 5, i32* %a1, align 4, !dbg !30
   %a2 = getelementptr inbounds %struct.foo, %struct.foo* %my_foo, i32 0, i32 0, !dbg !31
-  %0 = load i32* %a2, align 4, !dbg !31
+  %0 = load i32, i32* %a2, align 4, !dbg !31
   %a3 = getelementptr inbounds %struct.bar, %struct.bar* %my_bar, i32 0, i32 0, !dbg !31
-  %1 = load i32* %a3, align 4, !dbg !31
+  %1 = load i32, i32* %a3, align 4, !dbg !31
   %add = add nsw i32 %0, %1, !dbg !31
   ret i32 %add, !dbg !31
 }
diff --git a/llvm/test/DebugInfo/X86/ending-run.ll b/llvm/test/DebugInfo/X86/ending-run.ll
index 0b5c77f..f407616 100644
--- a/llvm/test/DebugInfo/X86/ending-run.ll
+++ b/llvm/test/DebugInfo/X86/ending-run.ll
@@ -15,11 +15,11 @@
   store i32 %x, i32* %x.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !{!"0x102"}), !dbg !13
   call void @llvm.dbg.declare(metadata i32* %y, metadata !14, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %x.addr, align 4, !dbg !17
-  %1 = load i32* %x.addr, align 4, !dbg !17
+  %0 = load i32, i32* %x.addr, align 4, !dbg !17
+  %1 = load i32, i32* %x.addr, align 4, !dbg !17
   %mul = mul nsw i32 %0, %1, !dbg !17
   store i32 %mul, i32* %y, align 4, !dbg !17
-  %2 = load i32* %y, align 4, !dbg !18
+  %2 = load i32, i32* %y, align 4, !dbg !18
   %sub = sub nsw i32 %2, 2, !dbg !18
   ret i32 %sub, !dbg !18
 }
diff --git a/llvm/test/DebugInfo/X86/fission-ranges.ll b/llvm/test/DebugInfo/X86/fission-ranges.ll
index 400998e..466bc36 100644
--- a/llvm/test/DebugInfo/X86/fission-ranges.ll
+++ b/llvm/test/DebugInfo/X86/fission-ranges.ll
@@ -93,7 +93,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !13, metadata !{!"0x102"}), !dbg !30
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !{!"0x102"}), !dbg !31
-  %c.promoted9 = load i32* @c, align 4, !dbg !32, !tbaa !33
+  %c.promoted9 = load i32, i32* @c, align 4, !dbg !32, !tbaa !33
   br label %for.cond1.preheader, !dbg !31
 
 for.cond1.preheader:                              ; preds = %for.inc16, %entry
diff --git a/llvm/test/DebugInfo/X86/formal_parameter.ll b/llvm/test/DebugInfo/X86/formal_parameter.ll
index 9077c74..9fd2bf4 100644
--- a/llvm/test/DebugInfo/X86/formal_parameter.ll
+++ b/llvm/test/DebugInfo/X86/formal_parameter.ll
@@ -36,7 +36,7 @@
   ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC:.*]]
   ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
   ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]]
-%0 = load i32* %map.addr, align 4, !dbg !20, !tbaa !15
+%0 = load i32, i32* %map.addr, align 4, !dbg !20, !tbaa !15
   %call1 = call i32 (i32, ...)* bitcast (i32 (...)* @verify to i32 (i32, ...)*)(i32 %0) #3, !dbg !20
   ret void, !dbg !22
 }
diff --git a/llvm/test/DebugInfo/X86/generate-odr-hash.ll b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
index 6d4fa86..e81077e 100644
--- a/llvm/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/llvm/test/DebugInfo/X86/generate-odr-hash.ll
@@ -202,7 +202,7 @@
   %this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
   store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %"struct.<anonymous namespace>::walrus"** %this.addr, metadata !51, metadata !{!"0x102"}), !dbg !53
-  %this1 = load %"struct.<anonymous namespace>::walrus"** %this.addr
+  %this1 = load %"struct.<anonymous namespace>::walrus"*, %"struct.<anonymous namespace>::walrus"** %this.addr
   ret void, !dbg !54
 }
 
diff --git a/llvm/test/DebugInfo/X86/gnu-public-names.ll b/llvm/test/DebugInfo/X86/gnu-public-names.ll
index 7e92b53..cef4368 100644
--- a/llvm/test/DebugInfo/X86/gnu-public-names.ll
+++ b/llvm/test/DebugInfo/X86/gnu-public-names.ll
@@ -215,7 +215,7 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !50, metadata !{!"0x102"}), !dbg !52
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !53
   ret void, !dbg !54
 }
@@ -226,7 +226,7 @@
 ; Function Attrs: nounwind uwtable
 define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
 entry:
-  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !55
+  %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !55
   ret i32 %0, !dbg !55
 }
 
@@ -252,13 +252,13 @@
 ; Function Attrs: nounwind uwtable
 define i32 @_Z2f7v() #0 {
 entry:
-  %0 = load i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !60
+  %0 = load i32, i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !60
   %call = call i32* @_Z2f3v(), !dbg !60
-  %1 = load i32* %call, align 4, !dbg !60
+  %1 = load i32, i32* %call, align 4, !dbg !60
   %add = add nsw i32 %0, %1, !dbg !60
-  %2 = load i32* @_ZN12_GLOBAL__N_15inner1bE, align 4, !dbg !60
+  %2 = load i32, i32* @_ZN12_GLOBAL__N_15inner1bE, align 4, !dbg !60
   %add1 = add nsw i32 %add, %2, !dbg !60
-  %3 = load i32* @_ZN5outer12_GLOBAL__N_11cE, align 4, !dbg !60
+  %3 = load i32, i32* @_ZN5outer12_GLOBAL__N_11cE, align 4, !dbg !60
   %add2 = add nsw i32 %add1, %3, !dbg !60
   ret i32 %add2, !dbg !60
 }
diff --git a/llvm/test/DebugInfo/X86/inline-member-function.ll b/llvm/test/DebugInfo/X86/inline-member-function.ll
index 68a211f..49c5a02 100644
--- a/llvm/test/DebugInfo/X86/inline-member-function.ll
+++ b/llvm/test/DebugInfo/X86/inline-member-function.ll
@@ -44,13 +44,13 @@
   %retval = alloca i32, align 4
   %tmp = alloca %struct.foo, align 1
   store i32 0, i32* %retval
-  %0 = load i32* @i, align 4, !dbg !23
+  %0 = load i32, i32* @i, align 4, !dbg !23
   store %struct.foo* %tmp, %struct.foo** %this.addr.i, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %this.addr.i, metadata !24, metadata !{!"0x102"}), !dbg !26
   store i32 %0, i32* %x.addr.i, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !27, metadata !{!"0x102"}), !dbg !28
-  %this1.i = load %struct.foo** %this.addr.i
-  %1 = load i32* %x.addr.i, align 4, !dbg !28
+  %this1.i = load %struct.foo*, %struct.foo** %this.addr.i
+  %1 = load i32, i32* %x.addr.i, align 4, !dbg !28
   %add.i = add nsw i32 %1, 2, !dbg !28
   ret i32 %add.i, !dbg !23
 }
diff --git a/llvm/test/DebugInfo/X86/inline-seldag-test.ll b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
index 8c10e3a..3b1bc8e 100644
--- a/llvm/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/llvm/test/DebugInfo/X86/inline-seldag-test.ll
@@ -28,10 +28,10 @@
   %y.addr.i = alloca i32, align 4
   %x = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %x, metadata !15, metadata !{!"0x102"}), !dbg !17
-  %0 = load volatile i32* %x, align 4, !dbg !18
+  %0 = load volatile i32, i32* %x, align 4, !dbg !18
   store i32 %0, i32* %y.addr.i, align 4
   call void @llvm.dbg.declare(metadata i32* %y.addr.i, metadata !19, metadata !{!"0x102"}), !dbg !20
-  %1 = load i32* %y.addr.i, align 4, !dbg !21
+  %1 = load i32, i32* %y.addr.i, align 4, !dbg !21
   %tobool.i = icmp ne i32 %1, 0, !dbg !21
   %cond.i = select i1 %tobool.i, i32 4, i32 7, !dbg !21
   store volatile i32 %cond.i, i32* %x, align 4, !dbg !18
diff --git a/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll b/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll
index 942865a..a797731 100644
--- a/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -33,9 +33,9 @@
   call void @llvm.dbg.declare(metadata %struct.i14** %p, metadata !11, metadata !{!"0x102"}), !dbg !18
   store %struct.i14* null, %struct.i14** %p, align 8, !dbg !18
   %1 = call i32 @foo(%struct.i14** %p), !dbg !19
-  %2 = load %struct.i14** %p, align 8, !dbg !20
+  %2 = load %struct.i14*, %struct.i14** %p, align 8, !dbg !20
   %3 = getelementptr inbounds %struct.i14, %struct.i14* %2, i32 0, i32 0, !dbg !20
-  %4 = load i64* %3, align 8, !dbg !20
+  %4 = load i64, i64* %3, align 8, !dbg !20
   %5 = or i64 %4, 4, !dbg !20
   store i64 %5, i64* %3, align 8, !dbg !20
   %6 = call i32 @foo(%struct.i14** %p), !dbg !21
diff --git a/llvm/test/DebugInfo/X86/lexical_block.ll b/llvm/test/DebugInfo/X86/lexical_block.ll
index a9e377a..06d217a 100644
--- a/llvm/test/DebugInfo/X86/lexical_block.ll
+++ b/llvm/test/DebugInfo/X86/lexical_block.ll
@@ -27,7 +27,7 @@
   %i = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %i, metadata !11, metadata !{!"0x102"}), !dbg !14
   store i32 3, i32* %i, align 4, !dbg !14
-  %0 = load i32* %i, align 4, !dbg !14
+  %0 = load i32, i32* %i, align 4, !dbg !14
   %tobool = icmp ne i32 %0, 0, !dbg !14
   br i1 %tobool, label %if.then, label %if.end, !dbg !14
 
diff --git a/llvm/test/DebugInfo/X86/line-info.ll b/llvm/test/DebugInfo/X86/line-info.ll
index e436426..0ee3c1d 100644
--- a/llvm/test/DebugInfo/X86/line-info.ll
+++ b/llvm/test/DebugInfo/X86/line-info.ll
@@ -19,7 +19,7 @@
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
-  %0 = load i32* %x.addr, align 4, !dbg !16
+  %0 = load i32, i32* %x.addr, align 4, !dbg !16
   %inc = add nsw i32 %0, 1, !dbg !16
   store i32 %inc, i32* %x.addr, align 4, !dbg !16
   ret i32 %inc, !dbg !16
diff --git a/llvm/test/DebugInfo/X86/linkage-name.ll b/llvm/test/DebugInfo/X86/linkage-name.ll
index 187ff8b..ab10ef5 100644
--- a/llvm/test/DebugInfo/X86/linkage-name.ll
+++ b/llvm/test/DebugInfo/X86/linkage-name.ll
@@ -17,8 +17,8 @@
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !21, metadata !{!"0x102"}), !dbg !23
   store i32 %b, i32* %b.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
-  %this1 = load %class.A** %this.addr
-  %0 = load i32* %b.addr, align 4, !dbg !26
+  %this1 = load %class.A*, %class.A** %this.addr
+  %0 = load i32, i32* %b.addr, align 4, !dbg !26
   ret i32 %0, !dbg !26
 }
 
diff --git a/llvm/test/DebugInfo/X86/misched-dbg-value.ll b/llvm/test/DebugInfo/X86/misched-dbg-value.ll
index 1a867e8..c1dabcf 100644
--- a/llvm/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/llvm/test/DebugInfo/X86/misched-dbg-value.ll
@@ -83,10 +83,10 @@
   %sub = add nsw i32 %IntParI1, 4, !dbg !78
   %idxprom14 = sext i32 %sub to i64, !dbg !78
   %arrayidx17 = getelementptr inbounds [51 x i32], [51 x i32]* %Array2Par, i64 %idxprom, i64 %idxprom14, !dbg !78
-  %0 = load i32* %arrayidx17, align 4, !dbg !78
+  %0 = load i32, i32* %arrayidx17, align 4, !dbg !78
   %inc18 = add nsw i32 %0, 1, !dbg !78
   store i32 %inc18, i32* %arrayidx17, align 4, !dbg !78
-  %1 = load i32* %arrayidx, align 4, !dbg !79
+  %1 = load i32, i32* %arrayidx, align 4, !dbg !79
   %add22 = add nsw i32 %IntParI1, 25, !dbg !79
   %idxprom23 = sext i32 %add22 to i64, !dbg !79
   %arrayidx25 = getelementptr inbounds [51 x i32], [51 x i32]* %Array2Par, i64 %idxprom23, i64 %idxprom, !dbg !79
diff --git a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index 71eefbe..775dbd3 100644
--- a/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/llvm/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -51,7 +51,7 @@
 entry:
   %str2.i = alloca %struct.string, align 4
   %0 = bitcast %struct.string* %str2.i to i8*, !dbg !26
-  %1 = load %struct.string** @str, align 4
+  %1 = load %struct.string*, %struct.string** @str, align 4
   %mem = getelementptr inbounds %struct.string, %struct.string* %1, i32 0, i32 0
   br label %for.body
 
@@ -63,10 +63,10 @@
   call void @_Z4sinkPKv(i8* undef) #3, !dbg !29
   call void @_Z4sinkPKv(i8* %0) #3, !dbg !30
   call void @llvm.lifetime.end(i64 4, i8* %0), !dbg !31
-  %2 = load i32** %mem, align 4, !tbaa !32
+  %2 = load i32*, i32** %mem, align 4, !tbaa !32
   %3 = bitcast i32* %2 to i8*
   call void @_Z4sinkPKv(i8* %3) #3
-  %4 = load i8* @b, align 1, !tbaa !37, !range !39
+  %4 = load i8, i8* @b, align 1, !tbaa !37, !range !39
   %tobool = icmp ne i8 %4, 0
   %inc = add nsw i32 %iter.02, 1
   %cmp = icmp eq i32 %inc, 2
diff --git a/llvm/test/DebugInfo/X86/op_deref.ll b/llvm/test/DebugInfo/X86/op_deref.ll
index ef564f0..236c526 100644
--- a/llvm/test/DebugInfo/X86/op_deref.ll
+++ b/llvm/test/DebugInfo/X86/op_deref.ll
@@ -33,7 +33,7 @@
   %i = alloca i32, align 4
   store i32 %s, i32* %s.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %s.addr, metadata !10, metadata !{!"0x102"}), !dbg !11
-  %0 = load i32* %s.addr, align 4, !dbg !12
+  %0 = load i32, i32* %s.addr, align 4, !dbg !12
   %1 = zext i32 %0 to i64, !dbg !12
   %2 = call i8* @llvm.stacksave(), !dbg !12
   store i8* %2, i8** %saved_stack, !dbg !12
@@ -44,29 +44,29 @@
   br label %for.cond, !dbg !21
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %3 = load i32* %i, align 4, !dbg !21
-  %4 = load i32* %s.addr, align 4, !dbg !21
+  %3 = load i32, i32* %i, align 4, !dbg !21
+  %4 = load i32, i32* %s.addr, align 4, !dbg !21
   %cmp = icmp slt i32 %3, %4, !dbg !21
   br i1 %cmp, label %for.body, label %for.end, !dbg !21
 
 for.body:                                         ; preds = %for.cond
-  %5 = load i32* %i, align 4, !dbg !23
-  %6 = load i32* %i, align 4, !dbg !23
+  %5 = load i32, i32* %i, align 4, !dbg !23
+  %6 = load i32, i32* %i, align 4, !dbg !23
   %mul = mul nsw i32 %5, %6, !dbg !23
-  %7 = load i32* %i, align 4, !dbg !23
+  %7 = load i32, i32* %i, align 4, !dbg !23
   %idxprom = sext i32 %7 to i64, !dbg !23
   %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %idxprom, !dbg !23
   store i32 %mul, i32* %arrayidx, align 4, !dbg !23
   br label %for.inc, !dbg !25
 
 for.inc:                                          ; preds = %for.body
-  %8 = load i32* %i, align 4, !dbg !26
+  %8 = load i32, i32* %i, align 4, !dbg !26
   %inc = add nsw i32 %8, 1, !dbg !26
   store i32 %inc, i32* %i, align 4, !dbg !26
   br label %for.cond, !dbg !26
 
 for.end:                                          ; preds = %for.cond
-  %9 = load i8** %saved_stack, !dbg !27
+  %9 = load i8*, i8** %saved_stack, !dbg !27
   call void @llvm.stackrestore(i8* %9), !dbg !27
   ret void, !dbg !27
 }
diff --git a/llvm/test/DebugInfo/X86/parameters.ll b/llvm/test/DebugInfo/X86/parameters.ll
index 9e6ee4a..8d6b2f3 100644
--- a/llvm/test/DebugInfo/X86/parameters.ll
+++ b/llvm/test/DebugInfo/X86/parameters.ll
@@ -60,7 +60,7 @@
   store i8 %frombool, i8* %b.addr, align 1
   call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !26, metadata !{!"0x102"}), !dbg !27
   call void @llvm.dbg.declare(metadata %"struct.pr14763::foo"* %g, metadata !28, metadata !{!"0x102\006"}), !dbg !27
-  %0 = load i8* %b.addr, align 1, !dbg !29
+  %0 = load i8, i8* %b.addr, align 1, !dbg !29
   %tobool = trunc i8 %0 to i1, !dbg !29
   br i1 %tobool, label %if.then, label %if.end, !dbg !29
 
diff --git a/llvm/test/DebugInfo/X86/pieces-2.ll b/llvm/test/DebugInfo/X86/pieces-2.ll
index 31f6941..8788a06 100644
--- a/llvm/test/DebugInfo/X86/pieces-2.ll
+++ b/llvm/test/DebugInfo/X86/pieces-2.ll
@@ -33,7 +33,7 @@
 entry:
   call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !{!"0x102"}), !dbg !26
   %i1.sroa.0.0..sroa_idx = getelementptr inbounds %struct.Outer, %struct.Outer* %outer, i64 0, i32 0, i64 1, i32 0, !dbg !27
-  %i1.sroa.0.0.copyload = load i32* %i1.sroa.0.0..sroa_idx, align 8, !dbg !27
+  %i1.sroa.0.0.copyload = load i32, i32* %i1.sroa.0.0..sroa_idx, align 8, !dbg !27
   call void @llvm.dbg.value(metadata i32 %i1.sroa.0.0.copyload, i64 0, metadata !28, metadata !29), !dbg !27
   %i1.sroa.2.0..sroa_raw_cast = bitcast %struct.Outer* %outer to i8*, !dbg !27
   %i1.sroa.2.0..sroa_raw_idx = getelementptr inbounds i8, i8* %i1.sroa.2.0..sroa_raw_cast, i64 20, !dbg !27
diff --git a/llvm/test/DebugInfo/X86/pr11300.ll b/llvm/test/DebugInfo/X86/pr11300.ll
index 53e85ac..99e3803 100644
--- a/llvm/test/DebugInfo/X86/pr11300.ll
+++ b/llvm/test/DebugInfo/X86/pr11300.ll
@@ -19,7 +19,7 @@
   %x.addr = alloca %struct.foo*, align 8
   store %struct.foo* %x, %struct.foo** %x.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %x.addr, metadata !23, metadata !{!"0x102"}), !dbg !24
-  %0 = load %struct.foo** %x.addr, align 8, !dbg !25
+  %0 = load %struct.foo*, %struct.foo** %x.addr, align 8, !dbg !25
   call void @_ZN3foo3barEv(%struct.foo* %0), !dbg !25
   ret void, !dbg !27
 }
@@ -31,7 +31,7 @@
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !29
-  %this1 = load %struct.foo** %this.addr
+  %this1 = load %struct.foo*, %struct.foo** %this.addr
   ret void, !dbg !30
 }
 
diff --git a/llvm/test/DebugInfo/X86/pr12831.ll b/llvm/test/DebugInfo/X86/pr12831.ll
index b1412ed..794877d 100644
--- a/llvm/test/DebugInfo/X86/pr12831.ll
+++ b/llvm/test/DebugInfo/X86/pr12831.ll
@@ -21,13 +21,13 @@
   %agg.tmp5 = alloca %class.anon.0, align 1
   store %class.BPLFunctionWriter* %this, %class.BPLFunctionWriter** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.BPLFunctionWriter** %this.addr, metadata !133, metadata !{!"0x102"}), !dbg !135
-  %this1 = load %class.BPLFunctionWriter** %this.addr
+  %this1 = load %class.BPLFunctionWriter*, %class.BPLFunctionWriter** %this.addr
   %MW = getelementptr inbounds %class.BPLFunctionWriter, %class.BPLFunctionWriter* %this1, i32 0, i32 0, !dbg !136
-  %0 = load %struct.BPLModuleWriter** %MW, align 8, !dbg !136
+  %0 = load %struct.BPLModuleWriter*, %struct.BPLModuleWriter** %MW, align 8, !dbg !136
   call void @"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"(%class.function* %agg.tmp), !dbg !136
   call void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter* %0), !dbg !136
   %MW3 = getelementptr inbounds %class.BPLFunctionWriter, %class.BPLFunctionWriter* %this1, i32 0, i32 0, !dbg !138
-  %1 = load %struct.BPLModuleWriter** %MW3, align 8, !dbg !138
+  %1 = load %struct.BPLModuleWriter*, %struct.BPLModuleWriter** %MW3, align 8, !dbg !138
   call void @"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"(%class.function* %agg.tmp4), !dbg !138
   call void @_ZN15BPLModuleWriter14writeIntrinsicE8functionIFvvEE(%struct.BPLModuleWriter* %1), !dbg !138
   ret void, !dbg !139
@@ -44,7 +44,7 @@
   store %class.function* %this, %class.function** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.function** %this.addr, metadata !140, metadata !{!"0x102"}), !dbg !142
   call void @llvm.dbg.declare(metadata %class.anon.0* %__f, metadata !143, metadata !{!"0x102"}), !dbg !144
-  %this1 = load %class.function** %this.addr
+  %this1 = load %class.function*, %class.function** %this.addr
   call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_"(%class.anon.0* %__f), !dbg !145
   ret void, !dbg !147
 }
@@ -63,7 +63,7 @@
   store %class.function* %this, %class.function** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.function** %this.addr, metadata !150, metadata !{!"0x102"}), !dbg !151
   call void @llvm.dbg.declare(metadata %class.anon* %__f, metadata !152, metadata !{!"0x102"}), !dbg !153
-  %this1 = load %class.function** %this.addr
+  %this1 = load %class.function*, %class.function** %this.addr
   call void @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_"(%class.anon* %__f), !dbg !154
   ret void, !dbg !156
 }
diff --git a/llvm/test/DebugInfo/X86/pr19307.ll b/llvm/test/DebugInfo/X86/pr19307.ll
index 38d8050..43b9219 100644
--- a/llvm/test/DebugInfo/X86/pr19307.ll
+++ b/llvm/test/DebugInfo/X86/pr19307.ll
@@ -50,19 +50,19 @@
 
 lor.lhs.false:                                    ; preds = %entry
   %call1 = call i8* @_ZNSsixEm(%"class.std::basic_string"* %range, i64 6), !dbg !52
-  %0 = load i8* %call1, !dbg !52
+  %0 = load i8, i8* %call1, !dbg !52
   %conv = sext i8 %0 to i32, !dbg !52
   %cmp2 = icmp eq i32 %conv, 45, !dbg !52
   br i1 %cmp2, label %if.then, label %if.end, !dbg !52
 
 if.then:                                          ; preds = %lor.lhs.false, %entry
-  %1 = load i64** %offset.addr, align 8, !dbg !54
+  %1 = load i64*, i64** %offset.addr, align 8, !dbg !54
   store i64 1, i64* %1, align 8, !dbg !54
   br label %if.end, !dbg !54
 
 if.end:                                           ; preds = %if.then, %lor.lhs.false
   %call3 = call %"class.std::basic_string"* @_ZNSs5eraseEmm(%"class.std::basic_string"* %range, i64 0, i64 6), !dbg !55
-  %2 = load i64** %limit.addr, align 8, !dbg !56
+  %2 = load i64*, i64** %limit.addr, align 8, !dbg !56
   store i64 2, i64* %2, align 8, !dbg !56
   ret void, !dbg !57
 }
diff --git a/llvm/test/DebugInfo/X86/recursive_inlining.ll b/llvm/test/DebugInfo/X86/recursive_inlining.ll
index 379d72d..43af92f 100644
--- a/llvm/test/DebugInfo/X86/recursive_inlining.ll
+++ b/llvm/test/DebugInfo/X86/recursive_inlining.ll
@@ -94,11 +94,11 @@
 define void @_Z3fn6v() #0 {
 entry:
   tail call void @_Z3fn8v() #3, !dbg !31
-  %0 = load %struct.C** @x, align 8, !dbg !32, !tbaa !33
+  %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !32, !tbaa !33
   tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !37, metadata !{!"0x102"}) #3, !dbg !38
   tail call void @_Z3fn8v() #3, !dbg !39
   %b.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !40
-  %1 = load i32* %b.i, align 4, !dbg !40, !tbaa !42
+  %1 = load i32, i32* %b.i, align 4, !dbg !40, !tbaa !42
   %tobool.i = icmp eq i32 %1, 0, !dbg !40
   br i1 %tobool.i, label %_ZN1C5m_fn2Ev.exit, label %if.then.i, !dbg !40
 
@@ -119,7 +119,7 @@
   tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !24, metadata !{!"0x102"}), !dbg !49
   tail call void @_Z3fn8v() #3, !dbg !50
   %b = getelementptr inbounds %struct.C, %struct.C* %this, i64 0, i32 0, !dbg !51
-  %0 = load i32* %b, align 4, !dbg !51, !tbaa !42
+  %0 = load i32, i32* %b, align 4, !dbg !51, !tbaa !42
   %tobool = icmp eq i32 %0, 0, !dbg !51
   br i1 %tobool, label %if.end, label %if.then, !dbg !51
 
@@ -129,11 +129,11 @@
 
 if.end:                                           ; preds = %entry, %if.then
   tail call void @_Z3fn8v() #3, !dbg !53
-  %1 = load %struct.C** @x, align 8, !dbg !56, !tbaa !33
+  %1 = load %struct.C*, %struct.C** @x, align 8, !dbg !56, !tbaa !33
   tail call void @llvm.dbg.value(metadata %struct.C* %1, i64 0, metadata !57, metadata !{!"0x102"}) #3, !dbg !58
   tail call void @_Z3fn8v() #3, !dbg !59
   %b.i.i = getelementptr inbounds %struct.C, %struct.C* %1, i64 0, i32 0, !dbg !60
-  %2 = load i32* %b.i.i, align 4, !dbg !60, !tbaa !42
+  %2 = load i32, i32* %b.i.i, align 4, !dbg !60, !tbaa !42
   %tobool.i.i = icmp eq i32 %2, 0, !dbg !60
   br i1 %tobool.i.i, label %_Z3fn6v.exit, label %if.then.i.i, !dbg !60
 
@@ -153,11 +153,11 @@
 
 tailrecurse:                                      ; preds = %tailrecurse.backedge, %entry
   tail call void @_Z3fn8v() #3, !dbg !64
-  %0 = load %struct.C** @x, align 8, !dbg !66, !tbaa !33
+  %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !66, !tbaa !33
   tail call void @llvm.dbg.value(metadata %struct.C* %0, i64 0, metadata !67, metadata !{!"0x102"}) #3, !dbg !68
   tail call void @_Z3fn8v() #3, !dbg !69
   %b.i.i = getelementptr inbounds %struct.C, %struct.C* %0, i64 0, i32 0, !dbg !70
-  %1 = load i32* %b.i.i, align 4, !dbg !70, !tbaa !42
+  %1 = load i32, i32* %b.i.i, align 4, !dbg !70, !tbaa !42
   %tobool.i.i = icmp eq i32 %1, 0, !dbg !70
   br i1 %tobool.i.i, label %tailrecurse.backedge, label %if.then.i.i, !dbg !70
 
@@ -172,7 +172,7 @@
 ; Function Attrs: nounwind
 define void @_Z3fn4v() #0 {
 entry:
-  %0 = load %struct.C** @x, align 8, !dbg !72, !tbaa !33
+  %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !72, !tbaa !33
   tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !72
   ret void, !dbg !72
 }
@@ -180,7 +180,7 @@
 ; Function Attrs: nounwind
 define void @_Z3fn5v() #0 {
 entry:
-  %0 = load %struct.C** @x, align 8, !dbg !73, !tbaa !33
+  %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !73, !tbaa !33
   tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !73
   ret void, !dbg !73
 }
diff --git a/llvm/test/DebugInfo/X86/reference-argument.ll b/llvm/test/DebugInfo/X86/reference-argument.ll
index 57ff994..962ce29 100644
--- a/llvm/test/DebugInfo/X86/reference-argument.ll
+++ b/llvm/test/DebugInfo/X86/reference-argument.ll
@@ -22,7 +22,7 @@
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !59, metadata !{!"0x102"}), !dbg !61
   call void @llvm.dbg.declare(metadata %class.SVal* %v, metadata !62, metadata !{!"0x102\006"}), !dbg !61
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   call void @_Z3barR4SVal(%class.SVal* %v), !dbg !61
   ret void, !dbg !61
 }
diff --git a/llvm/test/DebugInfo/X86/rvalue-ref.ll b/llvm/test/DebugInfo/X86/rvalue-ref.ll
index 3829966..56196cf 100644
--- a/llvm/test/DebugInfo/X86/rvalue-ref.ll
+++ b/llvm/test/DebugInfo/X86/rvalue-ref.ll
@@ -10,8 +10,8 @@
   %i.addr = alloca i32*, align 8
   store i32* %i, i32** %i.addr, align 8
   call void @llvm.dbg.declare(metadata i32** %i.addr, metadata !11, metadata !{!"0x102"}), !dbg !12
-  %0 = load i32** %i.addr, align 8, !dbg !13
-  %1 = load i32* %0, align 4, !dbg !13
+  %0 = load i32*, i32** %i.addr, align 8, !dbg !13
+  %1 = load i32, i32* %0, align 4, !dbg !13
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %1), !dbg !13
   ret void, !dbg !15
 }
diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll
index d8352e2..c4211e2 100644
--- a/llvm/test/DebugInfo/X86/sret.ll
+++ b/llvm/test/DebugInfo/X86/sret.ll
@@ -26,11 +26,11 @@
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !67, metadata !{!"0x102"}), !dbg !69
   store i32 %i, i32* %i.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !70, metadata !{!"0x102"}), !dbg !71
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !72
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !72
   %m_int = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 1, !dbg !72
-  %1 = load i32* %i.addr, align 4, !dbg !72
+  %1 = load i32, i32* %i.addr, align 4, !dbg !72
   store i32 %1, i32* %m_int, align 4, !dbg !72
   ret void, !dbg !73
 }
@@ -47,13 +47,13 @@
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !74, metadata !{!"0x102"}), !dbg !75
   store %class.A* %rhs, %class.A** %rhs.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %rhs.addr, metadata !76, metadata !{!"0x102"}), !dbg !77
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !78
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !78
   %m_int = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 1, !dbg !78
-  %1 = load %class.A** %rhs.addr, align 8, !dbg !78
+  %1 = load %class.A*, %class.A** %rhs.addr, align 8, !dbg !78
   %m_int2 = getelementptr inbounds %class.A, %class.A* %1, i32 0, i32 1, !dbg !78
-  %2 = load i32* %m_int2, align 4, !dbg !78
+  %2 = load i32, i32* %m_int2, align 4, !dbg !78
   store i32 %2, i32* %m_int, align 4, !dbg !78
   ret void, !dbg !79
 }
@@ -67,10 +67,10 @@
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !80, metadata !{!"0x102"}), !dbg !81
   store %class.A* %rhs, %class.A** %rhs.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %rhs.addr, metadata !82, metadata !{!"0x102"}), !dbg !83
-  %this1 = load %class.A** %this.addr
-  %0 = load %class.A** %rhs.addr, align 8, !dbg !84
+  %this1 = load %class.A*, %class.A** %this.addr
+  %0 = load %class.A*, %class.A** %rhs.addr, align 8, !dbg !84
   %m_int = getelementptr inbounds %class.A, %class.A* %0, i32 0, i32 1, !dbg !84
-  %1 = load i32* %m_int, align 4, !dbg !84
+  %1 = load i32, i32* %m_int, align 4, !dbg !84
   %m_int2 = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 1, !dbg !84
   store i32 %1, i32* %m_int2, align 4, !dbg !84
   ret %class.A* %this1, !dbg !85
@@ -82,9 +82,9 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !86, metadata !{!"0x102"}), !dbg !87
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %m_int = getelementptr inbounds %class.A, %class.A* %this1, i32 0, i32 1, !dbg !88
-  %0 = load i32* %m_int, align 4, !dbg !88
+  %0 = load i32, i32* %m_int, align 4, !dbg !88
   ret i32 %0, !dbg !88
 }
 
@@ -96,13 +96,13 @@
   %cleanup.dest.slot = alloca i32
   store %class.B* %this, %class.B** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !89, metadata !{!"0x102"}), !dbg !91
-  %this1 = load %class.B** %this.addr
+  %this1 = load %class.B*, %class.B** %this.addr
   store i1 false, i1* %nrvo, !dbg !92
   call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !93, metadata !{!"0x102\006"}), !dbg !92
   call void @_ZN1AC1Ei(%class.A* %agg.result, i32 12), !dbg !92
   store i1 true, i1* %nrvo, !dbg !94
   store i32 1, i32* %cleanup.dest.slot
-  %nrvo.val = load i1* %nrvo, !dbg !95
+  %nrvo.val = load i1, i1* %nrvo, !dbg !95
   br i1 %nrvo.val, label %nrvo.skipdtor, label %nrvo.unused, !dbg !95
 
 nrvo.unused:                                      ; preds = %entry
@@ -119,7 +119,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !101, metadata !{!"0x102"}), !dbg !102
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   ret void, !dbg !103
 }
 
@@ -153,11 +153,11 @@
   store i32 %call, i32* %return_val, align 4, !dbg !111
   call void @llvm.dbg.declare(metadata %class.A* %a, metadata !113, metadata !{!"0x102"}), !dbg !114
   call void @_ZN1B9AInstanceEv(%class.A* sret %a, %class.B* %b), !dbg !114
-  %0 = load i32* %return_val, align 4, !dbg !115
+  %0 = load i32, i32* %return_val, align 4, !dbg !115
   store i32 %0, i32* %retval, !dbg !115
   store i32 1, i32* %cleanup.dest.slot
   call void @_ZN1AD2Ev(%class.A* %a), !dbg !116
-  %1 = load i32* %retval, !dbg !116
+  %1 = load i32, i32* %retval, !dbg !116
   ret i32 %1, !dbg !116
 
 lpad:                                             ; preds = %entry
@@ -174,8 +174,8 @@
   br label %eh.resume, !dbg !117
 
 eh.resume:                                        ; preds = %invoke.cont1
-  %exn = load i8** %exn.slot, !dbg !119
-  %sel = load i32* %ehselector.slot, !dbg !119
+  %exn = load i8*, i8** %exn.slot, !dbg !119
+  %sel = load i32, i32* %ehselector.slot, !dbg !119
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0, !dbg !119
   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1, !dbg !119
   resume { i8*, i32 } %lpad.val2, !dbg !119
@@ -194,7 +194,7 @@
   %this.addr = alloca %class.B*, align 8
   store %class.B* %this, %class.B** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !123, metadata !{!"0x102"}), !dbg !124
-  %this1 = load %class.B** %this.addr
+  %this1 = load %class.B*, %class.B** %this.addr
   ret void, !dbg !125
 }
 
@@ -219,7 +219,7 @@
   %ehselector.slot = alloca i32
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !126, metadata !{!"0x102"}), !dbg !127
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   invoke void @_ZN1AD2Ev(%class.A* %this1)
           to label %invoke.cont unwind label %lpad, !dbg !128
 
@@ -240,8 +240,8 @@
   br label %eh.resume, !dbg !131
 
 eh.resume:                                        ; preds = %lpad
-  %exn = load i8** %exn.slot, !dbg !133
-  %sel = load i32* %ehselector.slot, !dbg !133
+  %exn = load i8*, i8** %exn.slot, !dbg !133
+  %sel = load i32, i32* %ehselector.slot, !dbg !133
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0, !dbg !133
   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1, !dbg !133
   resume { i8*, i32 } %lpad.val2, !dbg !133
diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll
index 509ab92..5ee8989 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-1.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll
@@ -47,7 +47,7 @@
   %1 = bitcast %struct.Inner* %arrayidx to i8*, !dbg !28
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 16, i32 8, i1 false), !dbg !28
   %a = getelementptr inbounds %struct.Inner, %struct.Inner* %i1, i32 0, i32 0, !dbg !29
-  %2 = load i32* %a, align 4, !dbg !29
+  %2 = load i32, i32* %a, align 4, !dbg !29
   ret i32 %2, !dbg !29
 }
 
diff --git a/llvm/test/DebugInfo/X86/sroasplit-2.ll b/llvm/test/DebugInfo/X86/sroasplit-2.ll
index 4b4fabe..2617319 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-2.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-2.ll
@@ -53,7 +53,7 @@
   %7 = bitcast %struct.Inner* %5 to i8*, !dbg !27
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %6, i8* %7, i64 8, i32 4, i1 false), !dbg !27
   %8 = getelementptr inbounds %struct.Inner, %struct.Inner* %i1, i32 0, i32 0, !dbg !28
-  %9 = load i32* %8, align 4, !dbg !28
+  %9 = load i32, i32* %8, align 4, !dbg !28
   ret i32 %9, !dbg !28
 }
 
diff --git a/llvm/test/DebugInfo/X86/sroasplit-3.ll b/llvm/test/DebugInfo/X86/sroasplit-3.ll
index fa0e6c4..caaec39 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-3.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-3.ll
@@ -27,7 +27,7 @@
   store float %s.coerce, float* %coerce.dive, align 1
   call void @llvm.dbg.declare(metadata %struct.S* %s, metadata !16, metadata !17), !dbg !18
   %f = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 0, !dbg !19
-  %0 = load float* %f, align 4, !dbg !19
+  %0 = load float, float* %f, align 4, !dbg !19
   ret float %0, !dbg !19
 }
 
diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll
index 58cdb07..f5a46f3 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-4.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll
@@ -1,9 +1,9 @@
 ; RUN: opt -sroa < %s -S -o - | FileCheck %s
 ;
 ; Test that recursively splitting an alloca updates the debug info correctly.
-; CHECK: %[[T:.*]] = load i64* @t, align 8
+; CHECK: %[[T:.*]] = load i64, i64* @t, align 8
 ; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], i64 0, metadata ![[Y:.*]], metadata ![[P1:.*]])
-; CHECK: %[[T1:.*]] = load i64* @t, align 8
+; CHECK: %[[T1:.*]] = load i64, i64* @t, align 8
 ; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], i64 0, metadata ![[Y]], metadata ![[P2:.*]])
 ; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], i64 0, metadata ![[R:.*]], metadata ![[P3:.*]])
 ; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], i64 0, metadata ![[R]], metadata ![[P4:.*]])
@@ -62,10 +62,10 @@
 if.end:                                           ; preds = %entry
   call void @llvm.dbg.declare(metadata %struct.p* %y, metadata !28, metadata !29), !dbg !30
   %s = getelementptr inbounds %struct.p, %struct.p* %y, i32 0, i32 0, !dbg !30
-  %0 = load i64* @t, align 8, !dbg !30
+  %0 = load i64, i64* @t, align 8, !dbg !30
   store i64 %0, i64* %s, align 8, !dbg !30
   %t = getelementptr inbounds %struct.p, %struct.p* %y, i32 0, i32 1, !dbg !30
-  %1 = load i64* @t, align 8, !dbg !30
+  %1 = load i64, i64* @t, align 8, !dbg !30
   store i64 %1, i64* %t, align 8, !dbg !30
   call void @llvm.dbg.declare(metadata %struct.r* %r, metadata !31, metadata !29), !dbg !32
   %i = getelementptr inbounds %struct.r, %struct.r* %r, i32 0, i32 0, !dbg !32
@@ -87,7 +87,7 @@
   br label %return, !dbg !33
 
 return:                                           ; preds = %if.end, %if.then
-  %6 = load i32* %retval, !dbg !34
+  %6 = load i32, i32* %retval, !dbg !34
   ret i32 %6, !dbg !34
 }
 
diff --git a/llvm/test/DebugInfo/X86/sroasplit-5.ll b/llvm/test/DebugInfo/X86/sroasplit-5.ll
index 9c05e83..079eaec 100644
--- a/llvm/test/DebugInfo/X86/sroasplit-5.ll
+++ b/llvm/test/DebugInfo/X86/sroasplit-5.ll
@@ -37,7 +37,7 @@
   call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 4, i1 false), !dbg !19
   call void @llvm.dbg.declare(metadata i32* %local, metadata !20, metadata !17), !dbg !21
   %1 = bitcast %struct.prog_src_register* %a to i32*, !dbg !21
-  %bf.load = load i32* %1, align 4, !dbg !21
+  %bf.load = load i32, i32* %1, align 4, !dbg !21
   %bf.shl = shl i32 %bf.load, 15, !dbg !21
   %bf.ashr = ashr i32 %bf.shl, 19, !dbg !21
   store i32 %bf.ashr, i32* %local, align 4, !dbg !21
@@ -45,7 +45,7 @@
   %3 = bitcast %struct.prog_src_register* %a to i8*, !dbg !22
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 8, i32 4, i1 false), !dbg !22
   %4 = bitcast %struct.prog_src_register* %retval to i64*, !dbg !22
-  %5 = load i64* %4, align 1, !dbg !22
+  %5 = load i64, i64* %4, align 1, !dbg !22
   ret i64 %5, !dbg !22
 }
 
diff --git a/llvm/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/llvm/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 0b2c50e..40146b3 100644
--- a/llvm/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/llvm/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -61,7 +61,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %a.addr, align 4, !dbg !17
+  %0 = load i32, i32* %a.addr, align 4, !dbg !17
   %call = call i32 @fn(i32 %0), !dbg !17
   ret i32 %call, !dbg !17
 }
@@ -73,7 +73,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !19, metadata !{!"0x102"}), !dbg !20
-  %0 = load i32* %a.addr, align 4, !dbg !21
+  %0 = load i32, i32* %a.addr, align 4, !dbg !21
   ret i32 %0, !dbg !21
 }
 
diff --git a/llvm/test/DebugInfo/X86/subregisters.ll b/llvm/test/DebugInfo/X86/subregisters.ll
index 2e728c2..37a3513 100644
--- a/llvm/test/DebugInfo/X86/subregisters.ll
+++ b/llvm/test/DebugInfo/X86/subregisters.ll
@@ -42,7 +42,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata %struct.bar* %b, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !25
   %a1 = getelementptr inbounds %struct.bar, %struct.bar* %b, i64 0, i32 0, !dbg !26
-  %0 = load i32* %a1, align 4, !dbg !26, !tbaa !27
+  %0 = load i32, i32* %a1, align 4, !dbg !26, !tbaa !27
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !16, metadata !{!"0x102"}), !dbg !26
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %0) #4, !dbg !32
   ret void, !dbg !33
diff --git a/llvm/test/DebugInfo/X86/vla.ll b/llvm/test/DebugInfo/X86/vla.ll
index 98a3a1c..d73e0c4 100644
--- a/llvm/test/DebugInfo/X86/vla.ll
+++ b/llvm/test/DebugInfo/X86/vla.ll
@@ -28,7 +28,7 @@
   %cleanup.dest.slot = alloca i32
   store i32 %n, i32* %n.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %n.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %n.addr, align 4, !dbg !17
+  %0 = load i32, i32* %n.addr, align 4, !dbg !17
   %1 = zext i32 %0 to i64, !dbg !17
   %2 = call i8* @llvm.stacksave(), !dbg !17
   store i8* %2, i8** %saved_stack, !dbg !17
@@ -36,13 +36,13 @@
   call void @llvm.dbg.declare(metadata i32* %vla, metadata !18, metadata !{!"0x102\006"}), !dbg !17
   %arrayidx = getelementptr inbounds i32, i32* %vla, i64 0, !dbg !22
   store i32 42, i32* %arrayidx, align 4, !dbg !22
-  %3 = load i32* %n.addr, align 4, !dbg !23
+  %3 = load i32, i32* %n.addr, align 4, !dbg !23
   %sub = sub nsw i32 %3, 1, !dbg !23
   %idxprom = sext i32 %sub to i64, !dbg !23
   %arrayidx1 = getelementptr inbounds i32, i32* %vla, i64 %idxprom, !dbg !23
-  %4 = load i32* %arrayidx1, align 4, !dbg !23
+  %4 = load i32, i32* %arrayidx1, align 4, !dbg !23
   store i32 1, i32* %cleanup.dest.slot
-  %5 = load i8** %saved_stack, !dbg !24
+  %5 = load i8*, i8** %saved_stack, !dbg !24
   call void @llvm.stackrestore(i8* %5), !dbg !24
   ret i32 %4, !dbg !23
 }
@@ -67,7 +67,7 @@
   call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !25, metadata !{!"0x102"}), !dbg !26
   store i8** %argv, i8*** %argv.addr, align 8
   call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !27, metadata !{!"0x102"}), !dbg !26
-  %0 = load i32* %argc.addr, align 4, !dbg !28
+  %0 = load i32, i32* %argc.addr, align 4, !dbg !28
   %call = call i32 @vla(i32 %0), !dbg !28
   ret i32 %call, !dbg !28
 }
diff --git a/llvm/test/DebugInfo/block-asan.ll b/llvm/test/DebugInfo/block-asan.ll
index 6851b25..4c938d9 100644
--- a/llvm/test/DebugInfo/block-asan.ll
+++ b/llvm/test/DebugInfo/block-asan.ll
@@ -33,9 +33,9 @@
   %byref.size = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 3, !dbg !24
   store i32 32, i32* %byref.size, !dbg !24
   %forwarding = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %x, i32 0, i32 1, !dbg !25
-  %0 = load %struct.__block_byref_x** %forwarding, !dbg !25
+  %0 = load %struct.__block_byref_x*, %struct.__block_byref_x** %forwarding, !dbg !25
   %x1 = getelementptr inbounds %struct.__block_byref_x, %struct.__block_byref_x* %0, i32 0, i32 4, !dbg !25
-  %1 = load i32* %x1, align 4, !dbg !25
+  %1 = load i32, i32* %x1, align 4, !dbg !25
   call void @bar(i32 %1), !dbg !25
   %2 = bitcast %struct.__block_byref_x* %x to i8*, !dbg !26
   call void @_Block_object_dispose(i8* %2, i32 8) #3, !dbg !26
diff --git a/llvm/test/DebugInfo/cross-cu-inlining.ll b/llvm/test/DebugInfo/cross-cu-inlining.ll
index fafa3fa..4b2cbf7 100644
--- a/llvm/test/DebugInfo/cross-cu-inlining.ll
+++ b/llvm/test/DebugInfo/cross-cu-inlining.ll
@@ -71,12 +71,12 @@
   %x.addr.i = alloca i32, align 4
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @i, align 4, !dbg !19
+  %0 = load i32, i32* @i, align 4, !dbg !19
   %1 = bitcast i32* %x.addr.i to i8*
   call void @llvm.lifetime.start(i64 4, i8* %1)
   store i32 %0, i32* %x.addr.i, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr.i, metadata !20, metadata !{!"0x102"}), !dbg !21
-  %2 = load i32* %x.addr.i, align 4, !dbg !22
+  %2 = load i32, i32* %x.addr.i, align 4, !dbg !22
   %mul.i = mul nsw i32 %2, 2, !dbg !22
   %3 = bitcast i32* %x.addr.i to i8*, !dbg !22
   call void @llvm.lifetime.end(i64 4, i8* %3), !dbg !22
@@ -89,7 +89,7 @@
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !20, metadata !{!"0x102"}), !dbg !23
-  %0 = load i32* %x.addr, align 4, !dbg !24
+  %0 = load i32, i32* %x.addr, align 4, !dbg !24
   %mul = mul nsw i32 %0, 2, !dbg !24
   ret i32 %mul, !dbg !24
 }
diff --git a/llvm/test/DebugInfo/cross-cu-linkonce-distinct.ll b/llvm/test/DebugInfo/cross-cu-linkonce-distinct.ll
index 2bd7c47..2ab9add 100644
--- a/llvm/test/DebugInfo/cross-cu-linkonce-distinct.ll
+++ b/llvm/test/DebugInfo/cross-cu-linkonce-distinct.ll
@@ -53,7 +53,7 @@
   %1 = alloca i32, align 4
   store i32 %i, i32* %1, align 4
   call void @llvm.dbg.declare(metadata i32* %1, metadata !22, metadata !{!"0x102"}), !dbg !23
-  %2 = load i32* %1, align 4, !dbg !24
+  %2 = load i32, i32* %1, align 4, !dbg !24
   %3 = mul nsw i32 %2, 2, !dbg !24
   ret i32 %3, !dbg !24
 }
diff --git a/llvm/test/DebugInfo/cross-cu-linkonce.ll b/llvm/test/DebugInfo/cross-cu-linkonce.ll
index aaae4c1..61dbcbf 100644
--- a/llvm/test/DebugInfo/cross-cu-linkonce.ll
+++ b/llvm/test/DebugInfo/cross-cu-linkonce.ll
@@ -33,7 +33,7 @@
   %1 = alloca i32, align 4
   store i32 %i, i32* %1, align 4
   call void @llvm.dbg.declare(metadata i32* %1, metadata !20, metadata !{!"0x102"}), !dbg !21
-  %2 = load i32* %1, align 4, !dbg !22
+  %2 = load i32, i32* %1, align 4, !dbg !22
   %3 = mul nsw i32 %2, 2, !dbg !22
   ret i32 %3, !dbg !22
 }
diff --git a/llvm/test/DebugInfo/cu-range-hole.ll b/llvm/test/DebugInfo/cu-range-hole.ll
index aa489b6..454c235 100644
--- a/llvm/test/DebugInfo/cu-range-hole.ll
+++ b/llvm/test/DebugInfo/cu-range-hole.ll
@@ -19,7 +19,7 @@
   %c.addr = alloca i32, align 4
   store i32 %c, i32* %c.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %c.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
-  %0 = load i32* %c.addr, align 4, !dbg !14
+  %0 = load i32, i32* %c.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   ret i32 %add, !dbg !14
 }
@@ -29,7 +29,7 @@
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %b.addr, align 4
+  %0 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %0, 1
   ret i32 %add
 }
@@ -43,7 +43,7 @@
   %e.addr = alloca i32, align 4
   store i32 %e, i32* %e.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %e.addr, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %e.addr, align 4, !dbg !16
+  %0 = load i32, i32* %e.addr, align 4, !dbg !16
   %add = add nsw i32 %0, 1, !dbg !16
   ret i32 %add, !dbg !16
 }
diff --git a/llvm/test/DebugInfo/cu-ranges.ll b/llvm/test/DebugInfo/cu-ranges.ll
index 6296b93..763622d 100644
--- a/llvm/test/DebugInfo/cu-ranges.ll
+++ b/llvm/test/DebugInfo/cu-ranges.ll
@@ -23,7 +23,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
-  %0 = load i32* %a.addr, align 4, !dbg !15
+  %0 = load i32, i32* %a.addr, align 4, !dbg !15
   %add = add nsw i32 %0, 5, !dbg !15
   ret i32 %add, !dbg !15
 }
@@ -37,7 +37,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !16, metadata !{!"0x102"}), !dbg !17
-  %0 = load i32* %a.addr, align 4, !dbg !18
+  %0 = load i32, i32* %a.addr, align 4, !dbg !18
   %add = add nsw i32 %0, 5, !dbg !18
   ret i32 %add, !dbg !18
 }
diff --git a/llvm/test/DebugInfo/debug-info-always-inline.ll b/llvm/test/DebugInfo/debug-info-always-inline.ll
index 2f00bac..cfa046d 100644
--- a/llvm/test/DebugInfo/debug-info-always-inline.ll
+++ b/llvm/test/DebugInfo/debug-info-always-inline.ll
@@ -82,7 +82,7 @@
   store i32 5, i32* %arrayidx, align 4, !dbg !19
   call void @llvm.dbg.declare(metadata i32* %sum, metadata !20), !dbg !21
   store i32 4, i32* %sum, align 4, !dbg !21
-  %0 = load i32* %sum, align 4, !dbg !22
+  %0 = load i32, i32* %sum, align 4, !dbg !22
   ret i32 %0, !dbg !22
 }
 
@@ -99,7 +99,7 @@
   call void @llvm.dbg.declare(metadata i32* %i, metadata !24), !dbg !25
   %call = call i32 @_Z3foov(), !dbg !25
   store i32 %call, i32* %i, align 4, !dbg !25
-  %0 = load i32* %i, align 4, !dbg !26
+  %0 = load i32, i32* %i, align 4, !dbg !26
   ret i32 %0, !dbg !26
 }
 
diff --git a/llvm/test/DebugInfo/dwarf-public-names.ll b/llvm/test/DebugInfo/dwarf-public-names.ll
index d2b8664..702203f 100644
--- a/llvm/test/DebugInfo/dwarf-public-names.ll
+++ b/llvm/test/DebugInfo/dwarf-public-names.ll
@@ -60,7 +60,7 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !28, metadata !{!"0x102"}), !dbg !30
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   store i32 0, i32* @_ZN1C22static_member_variableE, align 4, !dbg !31
   ret void, !dbg !32
 }
@@ -69,7 +69,7 @@
 
 define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
 entry:
-  %0 = load i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
+  %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
   ret i32 %0, !dbg !33
 }
 
diff --git a/llvm/test/DebugInfo/incorrect-variable-debugloc.ll b/llvm/test/DebugInfo/incorrect-variable-debugloc.ll
index 04ff603..4b6c3ad 100644
--- a/llvm/test/DebugInfo/incorrect-variable-debugloc.ll
+++ b/llvm/test/DebugInfo/incorrect-variable-debugloc.ll
@@ -62,7 +62,7 @@
 entry:
   %MyAlloca = alloca [64 x i8], align 32, !dbg !39
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !39
-  %1 = load i32* @__asan_option_detect_stack_use_after_return, !dbg !39
+  %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !39
   %2 = icmp ne i32 %1, 0, !dbg !39
   br i1 %2, label %3, label %5
 
@@ -92,7 +92,7 @@
   %19 = lshr i64 %18, 3, !dbg !39
   %20 = add i64 %19, 2147450880, !dbg !39
   %21 = inttoptr i64 %20 to i8*, !dbg !39
-  %22 = load i8* %21, !dbg !39
+  %22 = load i8, i8* %21, !dbg !39
   %23 = icmp ne i8 %22, 0, !dbg !39
   br i1 %23, label %24, label %30, !dbg !39
 
@@ -120,7 +120,7 @@
 entry:
   %MyAlloca = alloca [64 x i8], align 32, !dbg !48
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !48
-  %1 = load i32* @__asan_option_detect_stack_use_after_return, !dbg !48
+  %1 = load i32, i32* @__asan_option_detect_stack_use_after_return, !dbg !48
   %2 = icmp ne i32 %1, 0, !dbg !48
   br i1 %2, label %3, label %5
 
@@ -152,7 +152,7 @@
   %19 = lshr i64 %18, 3, !dbg !50
   %20 = add i64 %19, 2147450880, !dbg !50
   %21 = inttoptr i64 %20 to i8*, !dbg !50
-  %22 = load i8* %21, !dbg !50
+  %22 = load i8, i8* %21, !dbg !50
   %23 = icmp ne i8 %22, 0, !dbg !50
   br i1 %23, label %24, label %30, !dbg !50
 
@@ -180,7 +180,7 @@
   store i64 -723401728380766731, i64* %34, !dbg !52
   %35 = add i64 %6, 56, !dbg !52
   %36 = inttoptr i64 %35 to i64*, !dbg !52
-  %37 = load i64* %36, !dbg !52
+  %37 = load i64, i64* %36, !dbg !52
   %38 = inttoptr i64 %37 to i8*, !dbg !52
   store i8 0, i8* %38, !dbg !52
   br label %42, !dbg !52
diff --git a/llvm/test/DebugInfo/incorrect-variable-debugloc1.ll b/llvm/test/DebugInfo/incorrect-variable-debugloc1.ll
index 18f2dc7..d53eff3 100644
--- a/llvm/test/DebugInfo/incorrect-variable-debugloc1.ll
+++ b/llvm/test/DebugInfo/incorrect-variable-debugloc1.ll
@@ -36,7 +36,7 @@
   tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !10, metadata !16), !dbg !17
   store volatile i32 %call, i32* %c, align 4, !dbg !19
   tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !10, metadata !16), !dbg !17
-  %c.0.c.0. = load volatile i32* %c, align 4, !dbg !20
+  %c.0.c.0. = load volatile i32, i32* %c, align 4, !dbg !20
   ret i32 %c.0.c.0., !dbg !20
 }
 
diff --git a/llvm/test/DebugInfo/inheritance.ll b/llvm/test/DebugInfo/inheritance.ll
index f33edac..5562c40 100644
--- a/llvm/test/DebugInfo/inheritance.ll
+++ b/llvm/test/DebugInfo/inheritance.ll
@@ -19,12 +19,12 @@
   call void @llvm.dbg.declare(metadata %struct.test1* %tst, metadata !0, metadata !{!"0x102"}), !dbg !21
   call void @_ZN5test1C1Ev(%struct.test1* %tst) nounwind, !dbg !22
   store i32 0, i32* %0, align 4, !dbg !23
-  %1 = load i32* %0, align 4, !dbg !23            ; <i32> [#uses=1]
+  %1 = load i32, i32* %0, align 4, !dbg !23            ; <i32> [#uses=1]
   store i32 %1, i32* %retval, align 4, !dbg !23
   br label %return, !dbg !23
 
 return:                                           ; preds = %entry
-  %retval1 = load i32* %retval, !dbg !23          ; <i32> [#uses=1]
+  %retval1 = load i32, i32* %retval, !dbg !23          ; <i32> [#uses=1]
   ret i32 %retval1, !dbg !23
 }
 
@@ -34,7 +34,7 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !24, metadata !{!"0x102"}), !dbg !28
   store %struct.test1* %this, %struct.test1** %this_addr
-  %0 = load %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
+  %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !28 ; <i32 (...)***> [#uses=1]
   store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !28
   br label %return, !dbg !28
@@ -51,7 +51,7 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !32, metadata !{!"0x102"}), !dbg !34
   store %struct.test1* %this, %struct.test1** %this_addr
-  %0 = load %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
+  %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !35 ; <i32 (...)***> [#uses=1]
   store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !35
   br label %bb, !dbg !37
@@ -62,7 +62,7 @@
   br i1 %toBool, label %bb1, label %bb2, !dbg !37
 
 bb1:                                              ; preds = %bb
-  %3 = load %struct.test1** %this_addr, align 8, !dbg !37 ; <%struct.test1*> [#uses=1]
+  %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !37 ; <%struct.test1*> [#uses=1]
   %4 = bitcast %struct.test1* %3 to i8*, !dbg !37 ; <i8*> [#uses=1]
   call void @_ZdlPv(i8* %4) nounwind, !dbg !37
   br label %bb2, !dbg !37
@@ -80,7 +80,7 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata %struct.test1** %this_addr, metadata !38, metadata !{!"0x102"}), !dbg !40
   store %struct.test1* %this, %struct.test1** %this_addr
-  %0 = load %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
+  %0 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
   %1 = getelementptr inbounds %struct.test1, %struct.test1* %0, i32 0, i32 0, !dbg !41 ; <i32 (...)***> [#uses=1]
   store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !41
   br label %bb, !dbg !43
@@ -91,7 +91,7 @@
   br i1 %toBool, label %bb1, label %bb2, !dbg !43
 
 bb1:                                              ; preds = %bb
-  %3 = load %struct.test1** %this_addr, align 8, !dbg !43 ; <%struct.test1*> [#uses=1]
+  %3 = load %struct.test1*, %struct.test1** %this_addr, align 8, !dbg !43 ; <%struct.test1*> [#uses=1]
   %4 = bitcast %struct.test1* %3 to i8*, !dbg !43 ; <i8*> [#uses=1]
   call void @_ZdlPv(i8* %4) nounwind, !dbg !43
   br label %bb2, !dbg !43
diff --git a/llvm/test/DebugInfo/inline-debug-info-multiret.ll b/llvm/test/DebugInfo/inline-debug-info-multiret.ll
index 71f29ec..464fc18 100644
--- a/llvm/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/llvm/test/DebugInfo/inline-debug-info-multiret.ll
@@ -29,25 +29,25 @@
   store i32 %k, i32* %k.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %k.addr, align 4, !dbg !16
+  %0 = load i32, i32* %k.addr, align 4, !dbg !16
   %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
   store i32 %call, i32* %k2, align 4, !dbg !16
-  %1 = load i32* %k2, align 4, !dbg !17
+  %1 = load i32, i32* %k2, align 4, !dbg !17
   %cmp = icmp sgt i32 %1, 100, !dbg !17
   br i1 %cmp, label %if.then, label %if.end, !dbg !17
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* %k2, align 4, !dbg !18
+  %2 = load i32, i32* %k2, align 4, !dbg !18
   store i32 %2, i32* %retval, !dbg !18
   br label %return, !dbg !18
 
 if.end:                                           ; preds = %entry
   store i32 0, i32* %retval, !dbg !19
-  %3 = load i32* %retval, !dbg !20                ; hand-edited
+  %3 = load i32, i32* %retval, !dbg !20                ; hand-edited
   ret i32 %3, !dbg !20                            ; hand-edited
 
 return:                                           ; preds = %if.end, %if.then
-  %4 = load i32* %retval, !dbg !20
+  %4 = load i32, i32* %retval, !dbg !20
   ret i32 %4, !dbg !20
 }
 
@@ -62,7 +62,7 @@
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
   %e = alloca i32, align 4
-  %0 = load i32* @global_var, align 4, !dbg !21
+  %0 = load i32, i32* @global_var, align 4, !dbg !21
   %call = invoke i32 @_Z4testi(i32 %0)
           to label %invoke.cont unwind label %lpad, !dbg !21
 
@@ -79,17 +79,17 @@
   br label %catch.dispatch, !dbg !21
 
 catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32* %ehselector.slot, !dbg !23
+  %sel = load i32, i32* %ehselector.slot, !dbg !23
   %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
   %matches = icmp eq i32 %sel, %4, !dbg !23
   br i1 %matches, label %catch, label %eh.resume, !dbg !23
 
 catch:                                            ; preds = %catch.dispatch
   call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !{!"0x102"}), !dbg !25
-  %exn = load i8** %exn.slot, !dbg !23
+  %exn = load i8*, i8** %exn.slot, !dbg !23
   %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
   %6 = bitcast i8* %5 to i32*, !dbg !23
-  %7 = load i32* %6, align 4, !dbg !23
+  %7 = load i32, i32* %6, align 4, !dbg !23
   store i32 %7, i32* %e, align 4, !dbg !23
   store i32 0, i32* @global_var, align 4, !dbg !26
   call void @__cxa_end_catch() #2, !dbg !28
@@ -100,8 +100,8 @@
   ret i32 0, !dbg !30
 
 eh.resume:                                        ; preds = %catch.dispatch
-  %exn1 = load i8** %exn.slot, !dbg !23
-  %sel2 = load i32* %ehselector.slot, !dbg !23
+  %exn1 = load i8*, i8** %exn.slot, !dbg !23
+  %sel2 = load i32, i32* %ehselector.slot, !dbg !23
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
   %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
   resume { i8*, i32 } %lpad.val3, !dbg !23
diff --git a/llvm/test/DebugInfo/inline-debug-info.ll b/llvm/test/DebugInfo/inline-debug-info.ll
index 9b9439b..52299e2 100644
--- a/llvm/test/DebugInfo/inline-debug-info.ll
+++ b/llvm/test/DebugInfo/inline-debug-info.ll
@@ -49,15 +49,15 @@
   store i32 %k, i32* %k.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %k.addr, metadata !13, metadata !{!"0x102"}), !dbg !14
   call void @llvm.dbg.declare(metadata i32* %k2, metadata !15, metadata !{!"0x102"}), !dbg !16
-  %0 = load i32* %k.addr, align 4, !dbg !16
+  %0 = load i32, i32* %k.addr, align 4, !dbg !16
   %call = call i32 @_Z8test_exti(i32 %0), !dbg !16
   store i32 %call, i32* %k2, align 4, !dbg !16
-  %1 = load i32* %k2, align 4, !dbg !17
+  %1 = load i32, i32* %k2, align 4, !dbg !17
   %cmp = icmp sgt i32 %1, 100, !dbg !17
   br i1 %cmp, label %if.then, label %if.end, !dbg !17
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* %k2, align 4, !dbg !18
+  %2 = load i32, i32* %k2, align 4, !dbg !18
   store i32 %2, i32* %retval, !dbg !18
   br label %return, !dbg !18
 
@@ -66,7 +66,7 @@
   br label %return, !dbg !19
 
 return:                                           ; preds = %if.end, %if.then
-  %3 = load i32* %retval, !dbg !20
+  %3 = load i32, i32* %retval, !dbg !20
   ret i32 %3, !dbg !20
 }
 
@@ -80,7 +80,7 @@
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
   %e = alloca i32, align 4
-  %0 = load i32* @global_var, align 4, !dbg !21
+  %0 = load i32, i32* @global_var, align 4, !dbg !21
   %call = invoke i32 @_Z4testi(i32 %0)
           to label %invoke.cont unwind label %lpad, !dbg !21
 
@@ -97,17 +97,17 @@
   br label %catch.dispatch, !dbg !21
 
 catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32* %ehselector.slot, !dbg !23
+  %sel = load i32, i32* %ehselector.slot, !dbg !23
   %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #2, !dbg !23
   %matches = icmp eq i32 %sel, %4, !dbg !23
   br i1 %matches, label %catch, label %eh.resume, !dbg !23
 
 catch:                                            ; preds = %catch.dispatch
   call void @llvm.dbg.declare(metadata i32* %e, metadata !24, metadata !{!"0x102"}), !dbg !25
-  %exn = load i8** %exn.slot, !dbg !23
+  %exn = load i8*, i8** %exn.slot, !dbg !23
   %5 = call i8* @__cxa_begin_catch(i8* %exn) #2, !dbg !23
   %6 = bitcast i8* %5 to i32*, !dbg !23
-  %7 = load i32* %6, align 4, !dbg !23
+  %7 = load i32, i32* %6, align 4, !dbg !23
   store i32 %7, i32* %e, align 4, !dbg !23
   store i32 0, i32* @global_var, align 4, !dbg !26
   call void @__cxa_end_catch() #2, !dbg !28
@@ -118,8 +118,8 @@
   ret i32 0, !dbg !30
 
 eh.resume:                                        ; preds = %catch.dispatch
-  %exn1 = load i8** %exn.slot, !dbg !23
-  %sel2 = load i32* %ehselector.slot, !dbg !23
+  %exn1 = load i8*, i8** %exn.slot, !dbg !23
+  %sel2 = load i32, i32* %ehselector.slot, !dbg !23
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn1, 0, !dbg !23
   %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel2, 1, !dbg !23
   resume { i8*, i32 } %lpad.val3, !dbg !23
diff --git a/llvm/test/DebugInfo/inline-scopes.ll b/llvm/test/DebugInfo/inline-scopes.ll
index ec36a2f..76e6312 100644
--- a/llvm/test/DebugInfo/inline-scopes.ll
+++ b/llvm/test/DebugInfo/inline-scopes.ll
@@ -47,7 +47,7 @@
   %call.i = call zeroext i1 @_Z1fv(), !dbg !19
   %frombool.i = zext i1 %call.i to i8, !dbg !19
   store i8 %frombool.i, i8* %b.i, align 1, !dbg !19
-  %0 = load i8* %b.i, align 1, !dbg !19
+  %0 = load i8, i8* %b.i, align 1, !dbg !19
   %tobool.i = trunc i8 %0 to i1, !dbg !19
   br i1 %tobool.i, label %if.then.i, label %if.end.i, !dbg !19
 
@@ -60,12 +60,12 @@
   br label %_Z2f1v.exit, !dbg !22
 
 _Z2f1v.exit:                                      ; preds = %if.then.i, %if.end.i
-  %1 = load i32* %retval.i, !dbg !23
+  %1 = load i32, i32* %retval.i, !dbg !23
   call void @llvm.dbg.declare(metadata i8* %b.i3, metadata !24, metadata !{!"0x102"}), !dbg !27
   %call.i4 = call zeroext i1 @_Z1fv(), !dbg !27
   %frombool.i5 = zext i1 %call.i4 to i8, !dbg !27
   store i8 %frombool.i5, i8* %b.i3, align 1, !dbg !27
-  %2 = load i8* %b.i3, align 1, !dbg !27
+  %2 = load i8, i8* %b.i3, align 1, !dbg !27
   %tobool.i6 = trunc i8 %2 to i1, !dbg !27
   br i1 %tobool.i6, label %if.then.i7, label %if.end.i8, !dbg !27
 
@@ -78,7 +78,7 @@
   br label %_Z2f2v.exit, !dbg !30
 
 _Z2f2v.exit:                                      ; preds = %if.then.i7, %if.end.i8
-  %3 = load i32* %retval.i2, !dbg !31
+  %3 = load i32, i32* %retval.i2, !dbg !31
   ret i32 0, !dbg !32
 }
 
diff --git a/llvm/test/DebugInfo/member-order.ll b/llvm/test/DebugInfo/member-order.ll
index ae84571..451533d 100644
--- a/llvm/test/DebugInfo/member-order.ll
+++ b/llvm/test/DebugInfo/member-order.ll
@@ -30,7 +30,7 @@
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.foo** %this.addr, metadata !16, metadata !{!"0x102"}), !dbg !18
-  %this1 = load %struct.foo** %this.addr
+  %this1 = load %struct.foo*, %struct.foo** %this.addr
   ret void, !dbg !19
 }
 
diff --git a/llvm/test/DebugInfo/missing-abstract-variable.ll b/llvm/test/DebugInfo/missing-abstract-variable.ll
index dcaa2db..214bf43 100644
--- a/llvm/test/DebugInfo/missing-abstract-variable.ll
+++ b/llvm/test/DebugInfo/missing-abstract-variable.ll
@@ -112,7 +112,7 @@
   br i1 %u, label %if.then.i, label %_Z1xb.exit, !dbg !34
 
 if.then.i:                                        ; preds = %entry
-  %0 = load i32* @t, align 4, !dbg !35, !tbaa !36
+  %0 = load i32, i32* @t, align 4, !dbg !35, !tbaa !36
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !40, metadata !{!"0x102"}), !dbg !35
   tail call void @_Z1fi(i32 %0), !dbg !41
   br label %_Z1xb.exit, !dbg !42
diff --git a/llvm/test/DebugInfo/namespace.ll b/llvm/test/DebugInfo/namespace.ll
index a4fdbd2..7125434 100644
--- a/llvm/test/DebugInfo/namespace.ll
+++ b/llvm/test/DebugInfo/namespace.ll
@@ -238,32 +238,32 @@
   %frombool = zext i1 %b to i8
   store i8 %frombool, i8* %b.addr, align 1
   call void @llvm.dbg.declare(metadata i8* %b.addr, metadata !66, metadata !62), !dbg !67
-  %0 = load i8* %b.addr, align 1, !dbg !68
+  %0 = load i8, i8* %b.addr, align 1, !dbg !68
   %tobool = trunc i8 %0 to i1, !dbg !68
   br i1 %tobool, label %if.then, label %if.end, !dbg !68
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @_ZN1A1B1iE, align 4, !dbg !69
+  %1 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !69
   store i32 %1, i32* %retval, !dbg !69
   br label %return, !dbg !69
 
 if.end:                                           ; preds = %entry
-  %2 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
-  %3 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %2 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %3 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
   %add = add nsw i32 %2, %3, !dbg !70
-  %4 = load i32* @_ZN1A1B1iE, align 4, !dbg !70
+  %4 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !70
   %add1 = add nsw i32 %add, %4, !dbg !70
   store i32 %add1, i32* %retval, !dbg !70
   br label %return, !dbg !70
 
 return:                                           ; preds = %if.end, %if.then
-  %5 = load i32* %retval, !dbg !71
+  %5 = load i32, i32* %retval, !dbg !71
   ret i32 %5, !dbg !71
 }
 
 define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" {
 entry:
-  %0 = load i32* @_ZN1A1B1iE, align 4, !dbg !72
+  %0 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !72
   store i32 %0, i32* @_ZN1A1B7var_fwdE, align 4, !dbg !72
   ret void, !dbg !72
 }
diff --git a/llvm/test/DebugInfo/namespace_inline_function_definition.ll b/llvm/test/DebugInfo/namespace_inline_function_definition.ll
index b6f1b5f..f326a74 100644
--- a/llvm/test/DebugInfo/namespace_inline_function_definition.ll
+++ b/llvm/test/DebugInfo/namespace_inline_function_definition.ll
@@ -40,10 +40,10 @@
   %i.addr.i = alloca i32, align 4
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @x, align 4, !dbg !16
+  %0 = load i32, i32* @x, align 4, !dbg !16
   store i32 %0, i32* %i.addr.i, align 4
   call void @llvm.dbg.declare(metadata i32* %i.addr.i, metadata !17, metadata !{!"0x102"}), !dbg !18
-  %1 = load i32* %i.addr.i, align 4, !dbg !18
+  %1 = load i32, i32* %i.addr.i, align 4, !dbg !18
   %mul.i = mul nsw i32 %1, 2, !dbg !18
   ret i32 %mul.i, !dbg !16
 }
@@ -54,7 +54,7 @@
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !17, metadata !{!"0x102"}), !dbg !19
-  %0 = load i32* %i.addr, align 4, !dbg !19
+  %0 = load i32, i32* %i.addr, align 4, !dbg !19
   %mul = mul nsw i32 %0, 2, !dbg !19
   ret i32 %mul, !dbg !19
 }
diff --git a/llvm/test/DebugInfo/tu-composite.ll b/llvm/test/DebugInfo/tu-composite.ll
index 6f052ee..dd77819 100644
--- a/llvm/test/DebugInfo/tu-composite.ll
+++ b/llvm/test/DebugInfo/tu-composite.ll
@@ -92,7 +92,7 @@
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %struct.C** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !38
-  %this1 = load %struct.C** %this.addr
+  %this1 = load %struct.C*, %struct.C** %this.addr
   ret void, !dbg !39
 }
 
diff --git a/llvm/test/DebugInfo/unconditional-branch.ll b/llvm/test/DebugInfo/unconditional-branch.ll
index c82f1ba..87654e7 100644
--- a/llvm/test/DebugInfo/unconditional-branch.ll
+++ b/llvm/test/DebugInfo/unconditional-branch.ll
@@ -23,7 +23,7 @@
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !12, metadata !{!"0x102"}), !dbg !13
-  %0 = load i32* %i.addr, align 4, !dbg !14
+  %0 = load i32, i32* %i.addr, align 4, !dbg !14
   switch i32 %0, label %sw.default [
   ], !dbg !14
 
diff --git a/llvm/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll b/llvm/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
index 8a71f59..52b04a0 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2002-12-16-ArgTest.ll
@@ -24,9 +24,9 @@
 	%add1-indvar = add i32 %cann-indvar, 1		; <i32> [#uses=2]
 	%cann-indvar-idxcast = sext i32 %cann-indvar to i64		; <i64> [#uses=1]
 	%CT = bitcast i8** %local to i8***		; <i8***> [#uses=1]
-	%reg115 = load i8*** %CT		; <i8**> [#uses=1]
+	%reg115 = load i8**, i8*** %CT		; <i8**> [#uses=1]
 	%cast235 = getelementptr i8*, i8** %reg115, i64 %cann-indvar-idxcast		; <i8**> [#uses=1]
-	%reg117 = load i8** %cast235		; <i8*> [#uses=1]
+	%reg117 = load i8*, i8** %cast235		; <i8*> [#uses=1]
 	%reg236 = call i32 @puts( i8* %reg117 )		; <i32> [#uses=0]
 	%cond239 = icmp slt i32 %add1-indvar, %argc		; <i1> [#uses=1]
 	br i1 %cond239, label %bb2, label %bb3
diff --git a/llvm/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll b/llvm/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
index 576ef7c..d714bf7 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2003-05-06-LivenessClobber.ll
@@ -5,7 +5,7 @@
 @test = global i64 0		; <i64*> [#uses=1]
 
 define internal i64 @test.upgrd.1() {
-	%tmp.0 = load i64* @test		; <i64> [#uses=1]
+	%tmp.0 = load i64, i64* @test		; <i64> [#uses=1]
 	%tmp.1 = add i64 %tmp.0, 1		; <i64> [#uses=1]
 	ret i64 %tmp.1
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll b/llvm/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
index e6bc1d1..0538201 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2003-05-07-ArgumentTest.ll
@@ -4,7 +4,7 @@
 
 define i32 @main(i32 %argc.1, i8** %argv.1) {
 	%tmp.5 = getelementptr i8*, i8** %argv.1, i64 1		; <i8**> [#uses=1]
-	%tmp.6 = load i8** %tmp.5		; <i8*> [#uses=1]
+	%tmp.6 = load i8*, i8** %tmp.5		; <i8*> [#uses=1]
 	%tmp.0 = call i32 @puts( i8* %tmp.6 )		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll b/llvm/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
index 63303fc..e2c52b4 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2003-08-21-EnvironmentTest.ll
@@ -12,7 +12,7 @@
 declare i32 @strlen(i8*)
 
 define i32 @main(i32 %argc.1, i8** %argv.1, i8** %envp.1) {
-	%tmp.2 = load i8** %envp.1		; <i8*> [#uses=1]
+	%tmp.2 = load i8*, i8** %envp.1		; <i8*> [#uses=1]
 	%tmp.3 = call i32 @strlen( i8* %tmp.2 )		; <i32> [#uses=1]
 	%T = icmp eq i32 %tmp.3, 0		; <i1> [#uses=1]
 	%R = zext i1 %T to i32		; <i32> [#uses=1]
diff --git a/llvm/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll b/llvm/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
index 4183611..d557f57 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2007-12-10-APIntLoadStore.ll
@@ -9,11 +9,11 @@
     %"alloca point" = bitcast i32 0 to i32      ; <i32> [#uses=0]
     store i75 999, i75* %x, align 16
     store i32 0, i32* %tmp, align 4
-    %tmp1 = load i32* %tmp, align 4     ; <i32> [#uses=1]
+    %tmp1 = load i32, i32* %tmp, align 4     ; <i32> [#uses=1]
     store i32 %tmp1, i32* %retval, align 4
     br label %return
 
 return:     ; preds = %entry
-    %retval2 = load i32* %retval        ; <i32> [#uses=1]
+    %retval2 = load i32, i32* %retval        ; <i32> [#uses=1]
     ret i32 %retval2
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll b/llvm/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
index ae79058..4e4ad2a 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2008-06-05-APInt-OverAShr.ll
@@ -12,15 +12,15 @@
 	%tmp = alloca i65		; <i65*> [#uses=2]
 	%"alloca point" = bitcast i65 0 to i65		; <i65> [#uses=0]
 	store i65 %x, i65* %x_addr
-	%tmp1 = load i65* %x_addr, align 4		; <i65> [#uses=1]
+	%tmp1 = load i65, i65* %x_addr, align 4		; <i65> [#uses=1]
 	%tmp2 = ashr i65 %tmp1, 65		; <i65> [#uses=1]
 	store i65 %tmp2, i65* %tmp, align 4
-	%tmp3 = load i65* %tmp, align 4		; <i65> [#uses=1]
+	%tmp3 = load i65, i65* %tmp, align 4		; <i65> [#uses=1]
 	store i65 %tmp3, i65* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load i65* %retval		; <i65> [#uses=1]
+	%retval4 = load i65, i65* %retval		; <i65> [#uses=1]
 	ret i65 %retval4
 }
 
@@ -47,13 +47,13 @@
 
 cond_next:		; preds = %cond_false, %cond_true
 	%tmp5 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp6 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %iftmp.0, align 4		; <i32> [#uses=1]
 	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next
     store i32 0, i32* %retval, align 4
-	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval8
 }
 
diff --git a/llvm/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll b/llvm/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
index d9ff347..a329e98 100644
--- a/llvm/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/2013-04-04-RelocAddend.ll
@@ -18,8 +18,8 @@
 
 define i32 @main() {
 entry:
-  %0 = load i32** @p, align 8
-  %1 = load i32* %0, align 4
+  %0 = load i32*, i32** @p, align 8
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/ExecutionEngine/MCJIT/pr13727.ll b/llvm/test/ExecutionEngine/MCJIT/pr13727.ll
index 443c499..df57759 100644
--- a/llvm/test/ExecutionEngine/MCJIT/pr13727.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/pr13727.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.100000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
index da4ddc6..03e6bb3 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
@@ -28,7 +28,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
index f6a1607..8f9b05b 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/stubs-sm-pic.ll
@@ -28,7 +28,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
index ac2c235..5cc304a 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
index 9d11415..6134b8b 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 define double @test(double* %DP, double %Arg) nounwind {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
index 40b514f..5b7999c 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
index 5119b72..f9184b4 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -12,24 +12,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
index ba3ffff..19303cc 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
@@ -7,8 +7,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
index bbc71af..ac52e73 100644
--- a/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -8,8 +8,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
index 9b83ed2..134a091 100644
--- a/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/stubs-sm-pic.ll
@@ -27,7 +27,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/stubs.ll b/llvm/test/ExecutionEngine/MCJIT/stubs.ll
index b7d922f..a23e6bc 100644
--- a/llvm/test/ExecutionEngine/MCJIT/stubs.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/stubs.ll
@@ -26,7 +26,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll b/llvm/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
index a425b5c..1bb0744 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-common-symbols-alignment.ll
@@ -13,7 +13,7 @@
     %ptr = alloca i32, align 4
     store i32 0, i32* %retval
     store i32 ptrtoint (i32* @CS3 to i32), i32* %ptr, align 4
-    %0 = load i32* %ptr, align 4
+    %0 = load i32, i32* %ptr, align 4
     %and = and i32 %0, 15
     %tobool = icmp ne i32 %and, 0
     br i1 %tobool, label %if.then, label %if.else
@@ -27,6 +27,6 @@
     br label %return
 
 return:                                           ; preds = %if.else, %if.then
-    %1 = load i32* %retval
+    %1 = load i32, i32* %retval
     ret i32 %1
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-common-symbols.ll b/llvm/test/ExecutionEngine/MCJIT/test-common-symbols.ll
index eba9279..62ada27 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-common-symbols.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-common-symbols.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll b/llvm/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
index adb0550..c549a44 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-fp-no-external-funcs.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-fp.ll b/llvm/test/ExecutionEngine/MCJIT/test-fp.ll
index 2bf0210d..c65b5a6 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-fp.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll b/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll
index ec87d15..6d13271 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-global-ctors.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @var, align 4
+  %0 = load i32, i32* @var, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
index 26bd838..8ae496d 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero-sm-pic.ll
@@ -12,24 +12,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll b/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
index 3877e9a..4595219 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-global-init-nonzero.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-global.ll b/llvm/test/ExecutionEngine/MCJIT/test-global.ll
index 69e5455..2760ecd 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-global.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-global.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 50
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-loadstore.ll b/llvm/test/ExecutionEngine/MCJIT/test-loadstore.ll
index 1797599..68c8505 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-loadstore.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-loadstore.ll
@@ -1,13 +1,13 @@
 ; RUN: %lli %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
-	%V = load i8* %P		; <i8> [#uses=1]
+	%V = load i8, i8* %P		; <i8> [#uses=1]
 	store i8 %V, i8* %P
-	%V.upgrd.4 = load i16* %P.upgrd.1		; <i16> [#uses=1]
+	%V.upgrd.4 = load i16, i16* %P.upgrd.1		; <i16> [#uses=1]
 	store i16 %V.upgrd.4, i16* %P.upgrd.1
-	%V.upgrd.5 = load i32* %P.upgrd.2		; <i32> [#uses=1]
+	%V.upgrd.5 = load i32, i32* %P.upgrd.2		; <i32> [#uses=1]
 	store i32 %V.upgrd.5, i32* %P.upgrd.2
-	%V.upgrd.6 = load i64* %P.upgrd.3		; <i64> [#uses=1]
+	%V.upgrd.6 = load i64, i64* %P.upgrd.3		; <i64> [#uses=1]
 	store i64 %V.upgrd.6, i64* %P.upgrd.3
 	ret void
 }
@@ -16,7 +16,7 @@
         ;; Variable sized alloca
 	%X = alloca i32, i32 %Size		; <i32*> [#uses=2]
 	store i32 %Size, i32* %X
-	%Y = load i32* %X		; <i32> [#uses=1]
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
 	ret i32 %Y
 }
 
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-local.ll b/llvm/test/ExecutionEngine/MCJIT/test-local.ll
index ec5ba16..832e178 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-local.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-local.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 50
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %count, align 4
+  %1 = load i32, i32* %count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* %count, align 4
+  %3 = load i32, i32* %count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
index 21bcaef..d198acf 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc-sm-pic.ll
@@ -8,8 +8,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll b/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
index f139ddf..dc9aaf2 100644
--- a/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
+++ b/llvm/test/ExecutionEngine/MCJIT/test-ptr-reloc.ll
@@ -7,8 +7,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2002-12-16-ArgTest.ll b/llvm/test/ExecutionEngine/OrcJIT/2002-12-16-ArgTest.ll
index d8c0cad..12236cf 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2002-12-16-ArgTest.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2002-12-16-ArgTest.ll
@@ -24,9 +24,9 @@
 	%add1-indvar = add i32 %cann-indvar, 1		; <i32> [#uses=2]
 	%cann-indvar-idxcast = sext i32 %cann-indvar to i64		; <i64> [#uses=1]
 	%CT = bitcast i8** %local to i8***		; <i8***> [#uses=1]
-	%reg115 = load i8*** %CT		; <i8**> [#uses=1]
+	%reg115 = load i8**, i8*** %CT		; <i8**> [#uses=1]
 	%cast235 = getelementptr i8*, i8** %reg115, i64 %cann-indvar-idxcast		; <i8**> [#uses=1]
-	%reg117 = load i8** %cast235		; <i8*> [#uses=1]
+	%reg117 = load i8*, i8** %cast235		; <i8*> [#uses=1]
 	%reg236 = call i32 @puts( i8* %reg117 )		; <i32> [#uses=0]
 	%cond239 = icmp slt i32 %add1-indvar, %argc		; <i1> [#uses=1]
 	br i1 %cond239, label %bb2, label %bb3
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2003-05-06-LivenessClobber.ll b/llvm/test/ExecutionEngine/OrcJIT/2003-05-06-LivenessClobber.ll
index 576ef7c..d714bf7 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2003-05-06-LivenessClobber.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2003-05-06-LivenessClobber.ll
@@ -5,7 +5,7 @@
 @test = global i64 0		; <i64*> [#uses=1]
 
 define internal i64 @test.upgrd.1() {
-	%tmp.0 = load i64* @test		; <i64> [#uses=1]
+	%tmp.0 = load i64, i64* @test		; <i64> [#uses=1]
 	%tmp.1 = add i64 %tmp.0, 1		; <i64> [#uses=1]
 	ret i64 %tmp.1
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2003-05-07-ArgumentTest.ll b/llvm/test/ExecutionEngine/OrcJIT/2003-05-07-ArgumentTest.ll
index 473d94b..44ead0c 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2003-05-07-ArgumentTest.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2003-05-07-ArgumentTest.ll
@@ -4,7 +4,7 @@
 
 define i32 @main(i32 %argc.1, i8** %argv.1) {
 	%tmp.5 = getelementptr i8*, i8** %argv.1, i64 1		; <i8**> [#uses=1]
-	%tmp.6 = load i8** %tmp.5		; <i8*> [#uses=1]
+	%tmp.6 = load i8*, i8** %tmp.5		; <i8*> [#uses=1]
 	%tmp.0 = call i32 @puts( i8* %tmp.6 )		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2003-08-21-EnvironmentTest.ll b/llvm/test/ExecutionEngine/OrcJIT/2003-08-21-EnvironmentTest.ll
index f73f10e..ea4b145 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2003-08-21-EnvironmentTest.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2003-08-21-EnvironmentTest.ll
@@ -12,7 +12,7 @@
 declare i32 @strlen(i8*)
 
 define i32 @main(i32 %argc.1, i8** %argv.1, i8** %envp.1) {
-	%tmp.2 = load i8** %envp.1		; <i8*> [#uses=1]
+	%tmp.2 = load i8*, i8** %envp.1		; <i8*> [#uses=1]
 	%tmp.3 = call i32 @strlen( i8* %tmp.2 )		; <i32> [#uses=1]
 	%T = icmp eq i32 %tmp.3, 0		; <i1> [#uses=1]
 	%R = zext i1 %T to i32		; <i32> [#uses=1]
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2007-12-10-APIntLoadStore.ll b/llvm/test/ExecutionEngine/OrcJIT/2007-12-10-APIntLoadStore.ll
index efe5d83..454ae70 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2007-12-10-APIntLoadStore.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2007-12-10-APIntLoadStore.ll
@@ -9,11 +9,11 @@
     %"alloca point" = bitcast i32 0 to i32      ; <i32> [#uses=0]
     store i75 999, i75* %x, align 16
     store i32 0, i32* %tmp, align 4
-    %tmp1 = load i32* %tmp, align 4     ; <i32> [#uses=1]
+    %tmp1 = load i32, i32* %tmp, align 4     ; <i32> [#uses=1]
     store i32 %tmp1, i32* %retval, align 4
     br label %return
 
 return:     ; preds = %entry
-    %retval2 = load i32* %retval        ; <i32> [#uses=1]
+    %retval2 = load i32, i32* %retval        ; <i32> [#uses=1]
     ret i32 %retval2
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2008-06-05-APInt-OverAShr.ll b/llvm/test/ExecutionEngine/OrcJIT/2008-06-05-APInt-OverAShr.ll
index 196d669..e29843a 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2008-06-05-APInt-OverAShr.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2008-06-05-APInt-OverAShr.ll
@@ -12,15 +12,15 @@
 	%tmp = alloca i65		; <i65*> [#uses=2]
 	%"alloca point" = bitcast i65 0 to i65		; <i65> [#uses=0]
 	store i65 %x, i65* %x_addr
-	%tmp1 = load i65* %x_addr, align 4		; <i65> [#uses=1]
+	%tmp1 = load i65, i65* %x_addr, align 4		; <i65> [#uses=1]
 	%tmp2 = ashr i65 %tmp1, 65		; <i65> [#uses=1]
 	store i65 %tmp2, i65* %tmp, align 4
-	%tmp3 = load i65* %tmp, align 4		; <i65> [#uses=1]
+	%tmp3 = load i65, i65* %tmp, align 4		; <i65> [#uses=1]
 	store i65 %tmp3, i65* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load i65* %retval		; <i65> [#uses=1]
+	%retval4 = load i65, i65* %retval		; <i65> [#uses=1]
 	ret i65 %retval4
 }
 
@@ -47,13 +47,13 @@
 
 cond_next:		; preds = %cond_false, %cond_true
 	%tmp5 = getelementptr [10 x i8], [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp6 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %iftmp.0, align 4		; <i32> [#uses=1]
 	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
 	br label %return
 
 return:		; preds = %cond_next
     store i32 0, i32* %retval, align 4
-	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval8
 }
 
diff --git a/llvm/test/ExecutionEngine/OrcJIT/2013-04-04-RelocAddend.ll b/llvm/test/ExecutionEngine/OrcJIT/2013-04-04-RelocAddend.ll
index 199e948..0df5f1a 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/2013-04-04-RelocAddend.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/2013-04-04-RelocAddend.ll
@@ -18,8 +18,8 @@
 
 define i32 @main() {
 entry:
-  %0 = load i32** @p, align 8
-  %1 = load i32* %0, align 4
+  %0 = load i32*, i32** @p, align 8
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/ExecutionEngine/OrcJIT/pr13727.ll b/llvm/test/ExecutionEngine/OrcJIT/pr13727.ll
index 7f8bc62..35d84e1 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/pr13727.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/pr13727.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.100000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-remote.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-remote.ll
index da4ddc6..03e6bb3 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-remote.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-remote.ll
@@ -28,7 +28,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-sm-pic.ll
index f6a1607..8f9b05b 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/stubs-sm-pic.ll
@@ -28,7 +28,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-common-symbols-remote.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-common-symbols-remote.ll
index ac2c235..5cc304a 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-common-symbols-remote.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-common-symbols-remote.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-fp-no-external-funcs-remote.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-fp-no-external-funcs-remote.ll
index 9d11415..6134b8b 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
 
 define double @test(double* %DP, double %Arg) nounwind {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-remote.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-remote.ll
index 40b514f..5b7999c 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-remote.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-remote.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-sm-pic.ll
index 5119b72..f9184b4 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -12,24 +12,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-remote.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-remote.ll
index ba3ffff..19303cc 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-remote.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-remote.ll
@@ -7,8 +7,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-sm-pic.ll
index bbc71af..ac52e73 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -8,8 +8,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/OrcJIT/stubs-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/stubs-sm-pic.ll
index 28f8a76..81098ce 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/stubs-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/stubs-sm-pic.ll
@@ -27,7 +27,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/stubs.ll b/llvm/test/ExecutionEngine/OrcJIT/stubs.ll
index ec3c458..5007c49 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/stubs.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/stubs.ll
@@ -26,7 +26,7 @@
 
 define i1 @test() nounwind {
 entry:
-	%tmp = load i1 ()** @funcPtr
+	%tmp = load i1 ()*, i1 ()** @funcPtr
 	%eq = icmp eq i1 ()* %tmp, @test
 	ret i1 %eq
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols-alignment.ll b/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols-alignment.ll
index 35349e3..d48e057 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols-alignment.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols-alignment.ll
@@ -13,7 +13,7 @@
     %ptr = alloca i32, align 4
     store i32 0, i32* %retval
     store i32 ptrtoint (i32* @CS3 to i32), i32* %ptr, align 4
-    %0 = load i32* %ptr, align 4
+    %0 = load i32, i32* %ptr, align 4
     %and = and i32 %0, 15
     %tobool = icmp ne i32 %and, 0
     br i1 %tobool, label %if.then, label %if.else
@@ -27,6 +27,6 @@
     br label %return
 
 return:                                           ; preds = %if.else, %if.then
-    %1 = load i32* %retval
+    %1 = load i32, i32* %retval
     ret i32 %1
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols.ll b/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols.ll
index d2784a9..a6e7dde 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-common-symbols.ll
@@ -31,17 +31,17 @@
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 0, i32* %retval
-  %0 = load i32* @zero_int, align 4
+  %0 = load i32, i32* @zero_int, align 4
   %add = add nsw i32 %0, 5
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom
   store i32 40, i32* %arrayidx, align 4
-  %1 = load double* @zero_double, align 8
+  %1 = load double, double* @zero_double, align 8
   %cmp = fcmp olt double %1, 1.000000e+00
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* @zero_int, align 4
+  %2 = load i32, i32* @zero_int, align 4
   %add1 = add nsw i32 %2, 2
   %idxprom2 = sext i32 %add1 to i64
   %arrayidx3 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom2
@@ -53,35 +53,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %if.end
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %cmp4 = icmp slt i32 %3, 10
   br i1 %cmp4, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 %4, 1
   %idxprom5 = sext i32 %sub to i64
   %arrayidx6 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom5
-  %5 = load i32* %arrayidx6, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
+  %6 = load i32, i32* %i, align 4
   %idxprom7 = sext i32 %6 to i64
   %arrayidx8 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom7
-  %7 = load i32* %arrayidx8, align 4
+  %7 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %5, %7
-  %8 = load i32* %i, align 4
+  %8 = load i32, i32* %i, align 4
   %idxprom10 = sext i32 %8 to i64
   %arrayidx11 = getelementptr inbounds [10 x i32], [10 x i32]* @zero_arr, i32 0, i64 %idxprom10
   store i32 %add9, i32* %arrayidx11, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %inc = add nsw i32 %9, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %10 = load i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
+  %10 = load i32, i32* getelementptr inbounds ([10 x i32]* @zero_arr, i32 0, i64 9), align 4
   %cmp12 = icmp eq i32 %10, 110
   %cond = select i1 %cmp12, i32 0, i32 -1
   ret i32 %cond
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-fp-no-external-funcs.ll b/llvm/test/ExecutionEngine/OrcJIT/test-fp-no-external-funcs.ll
index cf8db4c..3a88a7d 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-fp-no-external-funcs.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-fp-no-external-funcs.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli -use-orcmcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-fp.ll b/llvm/test/ExecutionEngine/OrcJIT/test-fp.ll
index 77a4c7e..6129f7e 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-fp.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-fp.ll
@@ -1,7 +1,7 @@
 ; RUN: %lli -use-orcmcjit %s > /dev/null
 
 define double @test(double* %DP, double %Arg) {
-	%D = load double* %DP		; <double> [#uses=1]
+	%D = load double, double* %DP		; <double> [#uses=1]
 	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
 	%W = fsub double %V, %V		; <double> [#uses=3]
 	%X = fmul double %W, %W		; <double> [#uses=2]
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-global-ctors.ll b/llvm/test/ExecutionEngine/OrcJIT/test-global-ctors.ll
index bb00af6..da911d9 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-global-ctors.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-global-ctors.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @var, align 4
+  %0 = load i32, i32* @var, align 4
   ret i32 %0
 }
 
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero-sm-pic.ll
index c8ef597..158d64f 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero-sm-pic.ll
@@ -12,24 +12,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero.ll b/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero.ll
index 46b721d..0fcf8f9 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-global-init-nonzero.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 49
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-global.ll b/llvm/test/ExecutionEngine/OrcJIT/test-global.ll
index 5ece354..96036e0 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-global.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-global.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 50
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @count, align 4
+  %1 = load i32, i32* @count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* @count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* @count, align 4
+  %3 = load i32, i32* @count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-loadstore.ll b/llvm/test/ExecutionEngine/OrcJIT/test-loadstore.ll
index 24ddd7a..9c298f4 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-loadstore.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-loadstore.ll
@@ -1,13 +1,13 @@
 ; RUN: %lli -use-orcmcjit %s > /dev/null
 
 define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
-	%V = load i8* %P		; <i8> [#uses=1]
+	%V = load i8, i8* %P		; <i8> [#uses=1]
 	store i8 %V, i8* %P
-	%V.upgrd.4 = load i16* %P.upgrd.1		; <i16> [#uses=1]
+	%V.upgrd.4 = load i16, i16* %P.upgrd.1		; <i16> [#uses=1]
 	store i16 %V.upgrd.4, i16* %P.upgrd.1
-	%V.upgrd.5 = load i32* %P.upgrd.2		; <i32> [#uses=1]
+	%V.upgrd.5 = load i32, i32* %P.upgrd.2		; <i32> [#uses=1]
 	store i32 %V.upgrd.5, i32* %P.upgrd.2
-	%V.upgrd.6 = load i64* %P.upgrd.3		; <i64> [#uses=1]
+	%V.upgrd.6 = load i64, i64* %P.upgrd.3		; <i64> [#uses=1]
 	store i64 %V.upgrd.6, i64* %P.upgrd.3
 	ret void
 }
@@ -16,7 +16,7 @@
         ;; Variable sized alloca
 	%X = alloca i32, i32 %Size		; <i32*> [#uses=2]
 	store i32 %Size, i32* %X
-	%Y = load i32* %X		; <i32> [#uses=1]
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
 	ret i32 %Y
 }
 
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-local.ll b/llvm/test/ExecutionEngine/OrcJIT/test-local.ll
index b541650..5a08e47 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-local.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-local.ll
@@ -11,24 +11,24 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 50
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %count, align 4
+  %1 = load i32, i32* %count, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %count, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %inc1 = add nsw i32 %2, 1
   store i32 %inc1, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %3 = load i32* %count, align 4
+  %3 = load i32, i32* %count, align 4
   %sub = sub nsw i32 %3, 50
   ret i32 %sub
 }
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc-sm-pic.ll b/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc-sm-pic.ll
index d940adc..a787116 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc-sm-pic.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc-sm-pic.ll
@@ -8,8 +8,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc.ll b/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc.ll
index 95fa106..1be6fed 100644
--- a/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc.ll
+++ b/llvm/test/ExecutionEngine/OrcJIT/test-ptr-reloc.ll
@@ -7,8 +7,8 @@
 
 define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
 entry:
-  %0 = load i8** @ptr, align 4
-  %1 = load i8** @ptr2, align 4
+  %0 = load i8*, i8** @ptr, align 4
+  %1 = load i8*, i8** @ptr2, align 4
   %cmp = icmp eq i8* %0, %1
   %. = zext i1 %cmp to i32
   ret i32 %.
diff --git a/llvm/test/ExecutionEngine/frem.ll b/llvm/test/ExecutionEngine/frem.ll
index ce83d20..cde4e1b 100644
--- a/llvm/test/ExecutionEngine/frem.ll
+++ b/llvm/test/ExecutionEngine/frem.ll
@@ -11,7 +11,7 @@
 declare i32 @fflush(i8*) nounwind
 
 define i32 @main() {
-  %flt = load float* @flt
+  %flt = load float, float* @flt
   %float2 = frem float %flt, 5.0
   %double1 = fpext float %float2 to double
   call i32 (i8*, ...)* @printf(i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double %double1)
diff --git a/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll b/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
index cadeedc..6819724 100644
--- a/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
+++ b/llvm/test/ExecutionEngine/test-interp-vec-loadstore.ll
@@ -35,13 +35,13 @@
   store i32 4, i32* %pint_3
   
   ; load stored scalars
-  %val_int0 = load i32* %pint_0
-  %val_int1 = load i32* %pint_1
-  %val_int2 = load i32* %pint_2
-  %val_int3 = load i32* %pint_3
+  %val_int0 = load i32, i32* %pint_0
+  %val_int1 = load i32, i32* %pint_1
+  %val_int2 = load i32, i32* %pint_2
+  %val_int3 = load i32, i32* %pint_3
 
   ; load stored vector
-  %val0 = load <4 x i32> *%a, align 16
+  %val0 = load <4 x i32> , <4 x i32> *%a, align 16
 
   ; extract integers from the loaded vector
   %res_i32_0 = extractelement <4 x i32> %val0, i32 0
@@ -84,12 +84,12 @@
   store double 8.0, double* %pdouble_3
 
   ; load stored vector
-  %val1 = load <4 x double> *%b, align 16
+  %val1 = load <4 x double> , <4 x double> *%b, align 16
   ; load stored scalars
-  %val_double0 = load double* %pdouble_0
-  %val_double1 = load double* %pdouble_1
-  %val_double2 = load double* %pdouble_2
-  %val_double3 = load double* %pdouble_3
+  %val_double0 = load double, double* %pdouble_0
+  %val_double1 = load double, double* %pdouble_1
+  %val_double2 = load double, double* %pdouble_2
+  %val_double3 = load double, double* %pdouble_3
 
   %res_double_0 = extractelement <4 x double> %val1, i32 0
   %res_double_1 = extractelement <4 x double> %val1, i32 1
@@ -128,12 +128,12 @@
   store float 12.0, float* %pfloat_3
 
   ; load stored vector
-  %val2 = load <4 x float> *%c, align 16
+  %val2 = load <4 x float> , <4 x float> *%c, align 16
   ; load stored scalars
-  %val_float0 = load float* %pfloat_0
-  %val_float1 = load float* %pfloat_1
-  %val_float2 = load float* %pfloat_2
-  %val_float3 = load float* %pfloat_3
+  %val_float0 = load float, float* %pfloat_0
+  %val_float1 = load float, float* %pfloat_1
+  %val_float2 = load float, float* %pfloat_2
+  %val_float3 = load float, float* %pfloat_3
 
   %res_float_0 = extractelement <4 x float> %val2, i32 0
   %res_float_1 = extractelement <4 x float> %val2, i32 1
diff --git a/llvm/test/Feature/aliases.ll b/llvm/test/Feature/aliases.ll
index c11fc47..f03e3cf 100644
--- a/llvm/test/Feature/aliases.ll
+++ b/llvm/test/Feature/aliases.ll
@@ -30,9 +30,9 @@
 
 define i32 @test() {
 entry:
-   %tmp = load i32* @foo1
-   %tmp1 = load i32* @foo2
-   %tmp0 = load i32* @bar_i
+   %tmp = load i32, i32* @foo1
+   %tmp1 = load i32, i32* @foo2
+   %tmp0 = load i32, i32* @bar_i
    %tmp2 = call i32 @foo_f()
    %tmp3 = add i32 %tmp, %tmp2
    %tmp4 = call %FunTy* @bar_f()
diff --git a/llvm/test/Feature/md_on_instruction.ll b/llvm/test/Feature/md_on_instruction.ll
index 511cc85..0785538 100644
--- a/llvm/test/Feature/md_on_instruction.ll
+++ b/llvm/test/Feature/md_on_instruction.ll
@@ -8,7 +8,7 @@
 
 ; <label>:0                                       ; preds = %entry
   call void @llvm.dbg.region.end(metadata !0)
-  %1 = load i32* %retval, !dbg !3                  ; <i32> [#uses=1]
+  %1 = load i32, i32* %retval, !dbg !3                  ; <i32> [#uses=1]
   ret i32 %1, !dbg !3
 }
 
diff --git a/llvm/test/Feature/memorymarkers.ll b/llvm/test/Feature/memorymarkers.ll
index 6643448..47dd1ee 100644
--- a/llvm/test/Feature/memorymarkers.ll
+++ b/llvm/test/Feature/memorymarkers.ll
@@ -26,7 +26,7 @@
   ;; Constructor has finished here.
   %inv = call {}* @llvm.invariant.start(i64 8, i8* %y)
   call void @_Z3barRKi(i32* %0) nounwind
-  %2 = load i32* %0, align 8
+  %2 = load i32, i32* %0, align 8
 
   ;; Destructor is run here.
   call void @llvm.invariant.end({}* %inv, i64 8, i8* %y)
diff --git a/llvm/test/Feature/optnone-llc.ll b/llvm/test/Feature/optnone-llc.ll
index f19fa88f..b848b19 100644
--- a/llvm/test/Feature/optnone-llc.ll
+++ b/llvm/test/Feature/optnone-llc.ll
@@ -17,7 +17,7 @@
   br label %while.cond
 
 while.cond:                                       ; preds = %while.body, %entry
-  %0 = load i32* %x.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
   %dec = add nsw i32 %0, -1
   store i32 %dec, i32* %x.addr, align 4
   %tobool = icmp ne i32 %0, 0
diff --git a/llvm/test/Feature/optnone-opt.ll b/llvm/test/Feature/optnone-opt.ll
index f83e68c..a17f977 100644
--- a/llvm/test/Feature/optnone-opt.ll
+++ b/llvm/test/Feature/optnone-opt.ll
@@ -18,7 +18,7 @@
   br label %while.cond
 
 while.cond:                                       ; preds = %while.body, %entry
-  %0 = load i32* %x.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
   %dec = add nsw i32 %0, -1
   store i32 %dec, i32* %x.addr, align 4
   %tobool = icmp ne i32 %0, 0
diff --git a/llvm/test/Feature/packed.ll b/llvm/test/Feature/packed.ll
index b86a227..fa29535 100644
--- a/llvm/test/Feature/packed.ll
+++ b/llvm/test/Feature/packed.ll
@@ -8,8 +8,8 @@
 define void @main() {
         store <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, <4 x float>* @foo1
         store <2 x i32> < i32 4, i32 4 >, <2 x i32>* @foo2
-        %l1 = load <4 x float>* @foo1           ; <<4 x float>> [#uses=0]
-        %l2 = load <2 x i32>* @foo2             ; <<2 x i32>> [#uses=0]
+        %l1 = load <4 x float>, <4 x float>* @foo1           ; <<4 x float>> [#uses=0]
+        %l2 = load <2 x i32>, <2 x i32>* @foo2             ; <<2 x i32>> [#uses=0]
         ret void
 }
 
diff --git a/llvm/test/Feature/packed_struct.ll b/llvm/test/Feature/packed_struct.ll
index 0766649..5909f99 100644
--- a/llvm/test/Feature/packed_struct.ll
+++ b/llvm/test/Feature/packed_struct.ll
@@ -16,9 +16,9 @@
 
 define i32 @main() 
 {
-        %tmp = load i32*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i32> [#uses=1]
-        %tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i32> [#uses=1]
-        %tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i32> [#uses=1]
+        %tmp = load i32, i32*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32, i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp6 = load i32, i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i32> [#uses=1]
         %tmp4 = add i32 %tmp3, %tmp             ; <i32> [#uses=1]
         %tmp7 = add i32 %tmp4, %tmp6            ; <i32> [#uses=1]
         ret i32 %tmp7
@@ -26,8 +26,8 @@
 
 define i32 @bar() {
 entry:
-        %tmp = load i32* getelementptr([2 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i32> [#uses=1]
-        %tmp4 = load i32* getelementptr ([2 x <{ i32, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i32> [#uses=1]
+        %tmp = load i32, i32* getelementptr([2 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i32> [#uses=1]
+        %tmp4 = load i32, i32* getelementptr ([2 x <{ i32, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i32> [#uses=1]
         %tmp5 = add i32 %tmp4, %tmp             ; <i32> [#uses=1]
         ret i32 %tmp5
 }
diff --git a/llvm/test/Feature/ppcld.ll b/llvm/test/Feature/ppcld.ll
index 393a491..6f56619 100644
--- a/llvm/test/Feature/ppcld.ll
+++ b/llvm/test/Feature/ppcld.ll
@@ -12,15 +12,15 @@
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp = load float, float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp2 = load double, double* @d		; <double> [#uses=1]
 	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to ppc_fp128		; <ppc_fp128> [#uses=1]
 	store ppc_fp128 %tmp4, ppc_fp128* @ld
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	%retval4 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval4
 }
diff --git a/llvm/test/Feature/recursivetype.ll b/llvm/test/Feature/recursivetype.ll
index 133704c..b040696 100644
--- a/llvm/test/Feature/recursivetype.ll
+++ b/llvm/test/Feature/recursivetype.ll
@@ -43,7 +43,7 @@
 
 define void @InsertIntoListTail(%list** %L, i32 %Data) {
 bb1:
-        %reg116 = load %list** %L               ; <%list*> [#uses=1]
+        %reg116 = load %list*, %list** %L               ; <%list*> [#uses=1]
         %cast1004 = inttoptr i64 0 to %list*            ; <%list*> [#uses=1]
         %cond1000 = icmp eq %list* %reg116, %cast1004           ; <i1> [#uses=1]
         br i1 %cond1000, label %bb3, label %bb2
@@ -51,8 +51,8 @@
 bb2:            ; preds = %bb2, %bb1
         %reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]           ; <%list**> [#uses=1]
         %cast1010 = bitcast %list** %reg117 to %list***         ; <%list***> [#uses=1]
-        %reg118 = load %list*** %cast1010               ; <%list**> [#uses=3]
-        %reg109 = load %list** %reg118          ; <%list*> [#uses=1]
+        %reg118 = load %list**, %list*** %cast1010               ; <%list**> [#uses=3]
+        %reg109 = load %list*, %list** %reg118          ; <%list*> [#uses=1]
         %cast1005 = inttoptr i64 0 to %list*            ; <%list*> [#uses=1]
         %cond1001 = icmp ne %list* %reg109, %cast1005           ; <i1> [#uses=1]
         br i1 %cond1001, label %bb2, label %bb3
@@ -88,7 +88,7 @@
 
 bb4:            ; preds = %bb2
         %idx = getelementptr %list, %list* %reg115, i64 0, i32 1               ; <i32*> [#uses=1]
-        %reg111 = load i32* %idx                ; <i32> [#uses=1]
+        %reg111 = load i32, i32* %idx                ; <i32> [#uses=1]
         %cond1013 = icmp ne i32 %reg111, %Data          ; <i1> [#uses=1]
         br i1 %cond1013, label %bb6, label %bb5
 
@@ -97,7 +97,7 @@
 
 bb6:            ; preds = %bb4
         %idx2 = getelementptr %list, %list* %reg115, i64 0, i32 0              ; <%list**> [#uses=1]
-        %reg116 = load %list** %idx2            ; <%list*> [#uses=1]
+        %reg116 = load %list*, %list** %idx2            ; <%list*> [#uses=1]
         br label %bb2
 }
 
diff --git a/llvm/test/Feature/sparcld.ll b/llvm/test/Feature/sparcld.ll
index 095f6f6..f5a24f5 100644
--- a/llvm/test/Feature/sparcld.ll
+++ b/llvm/test/Feature/sparcld.ll
@@ -10,15 +10,15 @@
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp = load float, float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp2 = load double, double* @d		; <double> [#uses=1]
 	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to fp128		; <fp128> [#uses=1]
 	store fp128 %tmp4, fp128* @ld
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	%retval4 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval4
 }
diff --git a/llvm/test/Feature/testalloca.ll b/llvm/test/Feature/testalloca.ll
index ba2eed8..0405320 100644
--- a/llvm/test/Feature/testalloca.ll
+++ b/llvm/test/Feature/testalloca.ll
@@ -9,7 +9,7 @@
         alloca i8, i32 5                ; <i8*>:1 [#uses=0]
         %ptr = alloca i32               ; <i32*> [#uses=2]
         store i32 3, i32* %ptr
-        %val = load i32* %ptr           ; <i32> [#uses=0]
+        %val = load i32, i32* %ptr           ; <i32> [#uses=0]
         %sptr = alloca %struct          ; <%struct*> [#uses=2]
         %nsptr = getelementptr %struct, %struct* %sptr, i64 0, i32 1             ; <%inners*> [#uses=1]
         %ubsptr = getelementptr %inners, %inners* %nsptr, i64 0, i32 1           ; <{ i8 }*> [#uses=1]
diff --git a/llvm/test/Feature/varargs_new.ll b/llvm/test/Feature/varargs_new.ll
index a46f270..ca5599d 100644
--- a/llvm/test/Feature/varargs_new.ll
+++ b/llvm/test/Feature/varargs_new.ll
@@ -24,7 +24,7 @@
         %tmp = va_arg i8** %ap, i32             ; <i32> [#uses=1]
 
         ; Demonstrate usage of llvm.va_copy and llvm_va_end
-        %apv = load i8** %ap            ; <i8*> [#uses=1]
+        %apv = load i8*, i8** %ap            ; <i8*> [#uses=1]
         %va0.upgrd.2 = bitcast i8** %aq to i8*          ; <i8*> [#uses=1]
         %va1.upgrd.3 = bitcast i8* %apv to i8*          ; <i8*> [#uses=1]
         call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
diff --git a/llvm/test/Feature/weak_constant.ll b/llvm/test/Feature/weak_constant.ll
index fa254e4..4ac2e7e 100644
--- a/llvm/test/Feature/weak_constant.ll
+++ b/llvm/test/Feature/weak_constant.ll
@@ -10,20 +10,20 @@
 @c = weak constant %0 { i32 7, i32 9 }		; <%0*> [#uses=1]
 
 define i32 @la() {
-	%v = load i32* @a		; <i32> [#uses=1]
+	%v = load i32, i32* @a		; <i32> [#uses=1]
 	ret i32 %v
 }
 
 define i32 @lb() {
-	%v = load i32* @b		; <i32> [#uses=1]
+	%v = load i32, i32* @b		; <i32> [#uses=1]
 	ret i32 %v
 }
 
 define i32 @lc() {
 	%g = getelementptr %0, %0* @c, i32 0, i32 0		; <i32*> [#uses=1]
-	%u = load i32* %g		; <i32> [#uses=1]
+	%u = load i32, i32* %g		; <i32> [#uses=1]
 	%h = getelementptr %0, %0* @c, i32 0, i32 1		; <i32*> [#uses=1]
-	%v = load i32* %h		; <i32> [#uses=1]
+	%v = load i32, i32* %h		; <i32> [#uses=1]
 	%r = add i32 %u, %v
 	ret i32 %r
 }
diff --git a/llvm/test/Feature/x86ld.ll b/llvm/test/Feature/x86ld.ll
index 32005ae..7f91abd 100644
--- a/llvm/test/Feature/x86ld.ll
+++ b/llvm/test/Feature/x86ld.ll
@@ -12,15 +12,15 @@
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp = load float, float* @f		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp2 = load double, double* @d		; <double> [#uses=1]
 	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
 	%tmp4 = fpext double %tmp3 to x86_fp80		; <x86_fp80> [#uses=1]
 	store x86_fp80 %tmp4, x86_fp80* @ld
 	br label %return
 
 return:		; preds = %entry
-	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	%retval4 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval4
 }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index afc0e1c..dc943fe 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -53,10 +53,10 @@
   store i32 %filter_shift, i32* %filter_shift.addr, align 4, !tbaa !3
   store i32 %mask, i32* %mask.addr, align 4, !tbaa !3
   %arrayidx = getelementptr inbounds [9 x i8*], [9 x i8*]* @firtable, i32 0, i32 %firorder
-  %0 = load i8** %arrayidx, align 4, !tbaa !0
+  %0 = load i8*, i8** %arrayidx, align 4, !tbaa !0
   store i8* %0, i8** %firjump, align 4, !tbaa !0
   %arrayidx1 = getelementptr inbounds [5 x i8*], [5 x i8*]* @iirtable, i32 0, i32 %iirorder
-  %1 = load i8** %arrayidx1, align 4, !tbaa !0
+  %1 = load i8*, i8** %arrayidx1, align 4, !tbaa !0
   store i8* %1, i8** %iirjump, align 4, !tbaa !0
   %sub = sub nsw i32 0, %blocksize
   store i32 %sub, i32* %blocksize.addr, align 4, !tbaa !3
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index 75adf40..64adf46 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -15,9 +15,9 @@
 
 define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
-  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %tmp = load i8, i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
-  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %tmp1 = load i8, i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 3), align 1
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
@@ -38,9 +38,9 @@
 
 define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address {
 entry:
-  %tmp = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %tmp = load i8, i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
-  %tmp1 = load i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 2), align 1
+  %tmp1 = load i8, i8* getelementptr inbounds (%struct_of_7_bytes_4_aligned* @f, i64 0, i32 2), align 1
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic.ll b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
index 8020660..21153ae 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
@@ -12,7 +12,7 @@
 ; CHECK:   lshr i64 %[[LOAD_ADDR]], 3
 ; CHECK:   {{or|add}}
 ; CHECK:   %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
-; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8* %[[LOAD_SHADOW_PTR]]
+; CHECK:   %[[LOAD_SHADOW:[^ ]*]] = load i8, i8* %[[LOAD_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}!prof ![[PROF:[0-9]+]]
 ;
@@ -28,13 +28,13 @@
 ; CHECK:   unreachable
 ;
 ; The actual load.
-; CHECK:   %tmp1 = load i32* %a
+; CHECK:   %tmp1 = load i32, i32* %a
 ; CHECK:   ret i32 %tmp1
 
 
 
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 
@@ -45,7 +45,7 @@
 ; CHECK:   lshr i64 %[[STORE_ADDR]], 3
 ; CHECK:   {{or|add}}
 ; CHECK:   %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
-; CHECK:   %[[STORE_SHADOW:[^ ]*]] = load i8* %[[STORE_SHADOW_PTR]]
+; CHECK:   %[[STORE_SHADOW:[^ ]*]] = load i8, i8* %[[STORE_SHADOW_PTR]]
 ; CHECK:   icmp ne i8
 ; CHECK:   br i1 %{{.*}}, label %{{.*}}, label %{{.*}}
 ;
@@ -103,7 +103,7 @@
 
 define void @i40test(i40* %a, i40* %b) nounwind uwtable sanitize_address {
   entry:
-  %t = load i40* %a
+  %t = load i40, i40* %a
   store i40 %t, i40* %b, align 8
   ret void
 }
@@ -129,7 +129,7 @@
 
 define void @i80test(i80* %a, i80* %b) nounwind uwtable sanitize_address {
   entry:
-  %t = load i80* %a
+  %t = load i80, i80* %a
   store i80 %t, i80* %b, align 8
   ret void
 }
@@ -144,7 +144,7 @@
 ; asan should not instrument functions with available_externally linkage.
 define available_externally i32 @f_available_externally(i32* %a) sanitize_address  {
 entry:
-  %tmp1 = load i32* %a
+  %tmp1 = load i32, i32* %a
   ret i32 %tmp1
 }
 ; CHECK-LABEL: @f_available_externally
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
index 9bfa8c1..b95283b 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -13,10 +13,10 @@
   store volatile i32 %p, i32* %p.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %p.addr, metadata !10, metadata !{!"0x102"}), !dbg !11
   call void @llvm.dbg.declare(metadata i32* %r, metadata !12, metadata !{!"0x102"}), !dbg !14
-  %0 = load i32* %p.addr, align 4, !dbg !14
+  %0 = load i32, i32* %p.addr, align 4, !dbg !14
   %add = add nsw i32 %0, 1, !dbg !14
   store volatile i32 %add, i32* %r, align 4, !dbg !14
-  %1 = load i32* %r, align 4, !dbg !15
+  %1 = load i32, i32* %r, align 4, !dbg !15
   ret i32 %1, !dbg !15
 }
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
index be73248..9b1e241 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
@@ -7,7 +7,7 @@
 entry:
 ; CHECK: %0 = alloca i32, align 4
 ; CHECK: store i32 0, i32* %0, align 4
-; CHECK: %1 = load i32* %0, align 4
+; CHECK: %1 = load i32, i32* %0, align 4
 ; CHECK: ret i32 %1
 
 ; CHECK-NOT: __asan_stack_malloc_0
@@ -16,6 +16,6 @@
 
   %0 = alloca i32, align 4
   store i32 0, i32* %0, align 4
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
index 359529f..5178432 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
@@ -10,7 +10,7 @@
 
 define i32 @read_4_bytes(i32* %a) sanitize_address {
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
index 3901745..2fab178 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -27,7 +27,7 @@
 ; Function Attrs: nounwind sanitize_address
 define internal void @__cxx_global_var_init() #0 section ".text.startup" {
 entry:
-  %0 = load i32* @global, align 4
+  %0 = load i32, i32* @global, align 4
   store i32 %0, i32* @dyn_init_global, align 4
   ret void
 }
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
index 1234bc0..18a86a9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -16,7 +16,7 @@
   %0 = alloca i32, align 4
   %1 = alloca i8*
   store volatile i32 %len, i32* %0, align 4
-  %2 = load i32* %0, align 4
+  %2 = load i32, i32* %0, align 4
   %3 = zext i32 %2 to i64
   %4 = alloca i8, i64 %3, align 32
   store volatile i8 0, i8* %4
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 259c815..a912f22 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -20,7 +20,7 @@
 ; GlobSt is declared here, and has static initializer -- ok to optimize.
 define i32 @AccessGlobSt_0_2() sanitize_address {
 entry:
-    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 2), align 8
+    %0 = load i32, i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 2), align 8
     ret i32 %0
 ; CHECK-LABEL: define i32 @AccessGlobSt_0_2
 ; CHECK-NOT: __asan_report
@@ -30,7 +30,7 @@
 ; GlobSt is accessed out of bounds -- can't optimize
 define i32 @AccessGlobSt_0_12() sanitize_address {
 entry:
-    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 12), align 8
+    %0 = load i32, i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 0, i64 12), align 8
     ret i32 %0
 ; CHECK-LABEL: define i32 @AccessGlobSt_0_12
 ; CHECK: __asan_report
@@ -40,7 +40,7 @@
 ; GlobSt is accessed with Gep that has non-0 first index -- can't optimize.
 define i32 @AccessGlobSt_1_2() sanitize_address {
 entry:
-    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 1, i64 2), align 8
+    %0 = load i32, i32* getelementptr inbounds ([10 x i32]* @GlobSt, i64 1, i64 2), align 8
     ret i32 %0
 ; CHECK-LABEL: define i32 @AccessGlobSt_1_2
 ; CHECK: __asan_report
@@ -50,7 +50,7 @@
 ; GlobDy is declared with dynamic initializer -- can't optimize.
 define i32 @AccessGlobDy_0_2() sanitize_address {
 entry:
-    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobDy, i64 0, i64 2), align 8
+    %0 = load i32, i32* getelementptr inbounds ([10 x i32]* @GlobDy, i64 0, i64 2), align 8
     ret i32 %0
 ; CHECK-LABEL: define i32 @AccessGlobDy_0_2
 ; CHECK: __asan_report
@@ -60,7 +60,7 @@
 ; GlobEx is an external global -- can't optimize.
 define i32 @AccessGlobEx_0_2() sanitize_address {
 entry:
-    %0 = load i32* getelementptr inbounds ([10 x i32]* @GlobEx, i64 0, i64 2), align 8
+    %0 = load i32, i32* getelementptr inbounds ([10 x i32]* @GlobEx, i64 0, i64 2), align 8
     ret i32 %0
 ; CHECK-LABEL: define i32 @AccessGlobEx_0_2
 ; CHECK: __asan_report
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
index 195785f..01a7a66 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -6,7 +6,7 @@
 target triple = "x86_64-unknown-linux-gnu"
 define void @IncrementMe(i32* %a) sanitize_address {
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   %tmp2 = add i32 %tmp1,  1
   store i32 %tmp2, i32* %a, align 4
   ret void
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
index adb4341..8e0275d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
@@ -20,10 +20,10 @@
 ; CHECK-CUSTOM-PREFIX: call void @__foo_load8
 ; CHECK-CUSTOM-PREFIX: call void @__foo_loadN
 ; CHECK-INLINE-NOT: call void @__asan_load
-  %tmp1 = load i32* %a, align 4
-  %tmp2 = load i64* %b, align 8
-  %tmp3 = load i512* %c, align 32
-  %tmp4 = load i80* %d, align 8
+  %tmp1 = load i32, i32* %a, align 4
+  %tmp2 = load i64, i64* %b, align 8
+  %tmp3 = load i512, i512* %c, align 32
+  %tmp4 = load i80, i80* %d, align 8
   ret void
 }
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
index b7e24f5..0c9ffe2 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
@@ -12,7 +12,7 @@
 ; CHECK-PLAIN: ret void
 
 ; CHECK-UAR-LABEL: Bar
-; CHECK-UAR: load i32* @__asan_option_detect_stack_use_after_return
+; CHECK-UAR: load i32, i32* @__asan_option_detect_stack_use_after_return
 ; CHECK-UAR: label
 ; CHECK-UAR: call i64 @__asan_stack_malloc_1
 ; CHECK-UAR: label
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
index a738f72..9c05874 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: Func1
 
 ; CHECK: entry:
-; CHECK: load i32* @__asan_option_detect_stack_use_after_return
+; CHECK: load i32, i32* @__asan_option_detect_stack_use_after_return
 
 ; CHECK: <label>:[[UAR_ENABLED_BB:[0-9]+]]
 ; CHECK: [[FAKE_STACK_RT:%[0-9]+]] = call i64 @__asan_stack_malloc_
diff --git a/llvm/test/Instrumentation/AddressSanitizer/test64.ll b/llvm/test/Instrumentation/AddressSanitizer/test64.ll
index fd93f45..85a29e6 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/test64.ll
@@ -3,7 +3,7 @@
 target triple = "x86_64-unknown-linux-gnu"
 define i32 @read_4_bytes(i32* %a) sanitize_address {
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 ; CHECK-LABEL: @read_4_bytes
diff --git a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
index feeb8ac..23b7ef4 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
@@ -17,9 +17,9 @@
 ; CHECK-LABEL: define void @_Z3BarP1A
 entry:
   %0 = bitcast %struct.A* %a to void (%struct.A*)***
-  %vtable = load void (%struct.A*)*** %0, align 8
+  %vtable = load void (%struct.A*)**, void (%struct.A*)*** %0, align 8
 ; CHECK: __asan_report_load8
-  %1 = load void (%struct.A*)** %vtable, align 8
+  %1 = load void (%struct.A*)*, void (%struct.A*)** %vtable, align 8
 ; CHECK: __asan_report_load8
   %2 = ptrtoint void (%struct.A*)** %vtable to i64
   %3 = xor i64 %2, -303164226014115343, !nosanitize !0
@@ -34,7 +34,7 @@
   %12 = and i64 %11, 127, !nosanitize !0
   %13 = getelementptr inbounds [128 x i64], [128 x i64]* @__ubsan_vptr_type_cache, i64 0, i64 %12, !nosanitize !0
 ; CHECK-NOT: __asan_report_load8
-  %14 = load i64* %13, align 8, !nosanitize !0
+  %14 = load i64, i64* %13, align 8, !nosanitize !0
   %15 = icmp eq i64 %14, %11, !nosanitize !0
   br i1 %15, label %cont, label %handler.dynamic_type_cache_miss, !nosanitize !0
 
diff --git a/llvm/test/Instrumentation/BoundsChecking/many-trap.ll b/llvm/test/Instrumentation/BoundsChecking/many-trap.ll
index 0bbb959..5894487 100644
--- a/llvm/test/Instrumentation/BoundsChecking/many-trap.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/many-trap.ll
@@ -5,8 +5,8 @@
 ; CHECK: @f1
 define void @f1(i64 %x) nounwind {
   %1 = alloca i128, i64 %x
-  %2 = load i128* %1, align 4
-  %3 = load i128* %1, align 4
+  %2 = load i128, i128* %1, align 4
+  %3 = load i128, i128* %1, align 4
   ret void
 ; CHECK: call void @llvm.trap()
 ; CHECK: call void @llvm.trap()
diff --git a/llvm/test/Instrumentation/BoundsChecking/phi.ll b/llvm/test/Instrumentation/BoundsChecking/phi.ll
index 428501e..15361b6 100644
--- a/llvm/test/Instrumentation/BoundsChecking/phi.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/phi.ll
@@ -8,7 +8,7 @@
 ; CHECK-NOT: trap
 define void @f1(i8* nocapture %c) {
 entry:
-  %0 = load i8* %c, align 1
+  %0 = load i8, i8* %c, align 1
   %tobool1 = icmp eq i8 %0, 0
   br i1 %tobool1, label %while.end, label %while.body
 
@@ -16,7 +16,7 @@
   %c.addr.02 = phi i8* [ %incdec.ptr, %while.body ], [ %c, %entry ]
   %incdec.ptr = getelementptr inbounds i8, i8* %c.addr.02, i64 -1
   store i8 100, i8* %c.addr.02, align 1
-  %1 = load i8* %incdec.ptr, align 1
+  %1 = load i8, i8* %incdec.ptr, align 1
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %while.end, label %while.body
 
@@ -43,7 +43,7 @@
 ; CHECK-NEXT: or i1
 ; CHECK-NEXT: br {{.*}}, label %trap
   store i8 100, i8* %c.addr.02.i, align 1
-  %0 = load i8* %incdec.ptr.i, align 1
+  %0 = load i8, i8* %incdec.ptr.i, align 1
   %tobool.i = icmp eq i8 %0, 0
   br i1 %tobool.i, label %fn.exit, label %while.body.i
 
@@ -61,7 +61,7 @@
 ; CHECK: add i16 undef, -1
 ; CHECK-NOT: trap
 entry:
-  %0 = load i8 addrspace(1)* %c, align 1
+  %0 = load i8, i8 addrspace(1)* %c, align 1
   %tobool1 = icmp eq i8 %0, 0
   br i1 %tobool1, label %while.end, label %while.body
 
@@ -69,7 +69,7 @@
   %c.addr.02 = phi i8 addrspace(1)* [ %incdec.ptr, %while.body ], [ %c, %entry ]
   %incdec.ptr = getelementptr inbounds i8, i8 addrspace(1)* %c.addr.02, i64 -1
   store i8 100, i8 addrspace(1)* %c.addr.02, align 1
-  %1 = load i8 addrspace(1)* %incdec.ptr, align 1
+  %1 = load i8, i8 addrspace(1)* %incdec.ptr, align 1
   %tobool = icmp eq i8 %1, 0
   br i1 %tobool, label %while.end, label %while.body
 
@@ -96,7 +96,7 @@
 ; CHECK-NEXT: or i1
 ; CHECK-NEXT: br {{.*}}, label %trap
   store i8 100, i8 addrspace(1)* %c.addr.02.i, align 1
-  %0 = load i8 addrspace(1)* %incdec.ptr.i, align 1
+  %0 = load i8, i8 addrspace(1)* %incdec.ptr.i, align 1
   %tobool.i = icmp eq i8 %0, 0
   br i1 %tobool.i, label %fn.exit, label %while.body.i
 
diff --git a/llvm/test/Instrumentation/BoundsChecking/simple-32.ll b/llvm/test/Instrumentation/BoundsChecking/simple-32.ll
index 0fd3c62..0fdb0a4 100644
--- a/llvm/test/Instrumentation/BoundsChecking/simple-32.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/simple-32.ll
@@ -11,7 +11,7 @@
   %packed1 = alloca %struct.s2_packed, align 8
   %gep = getelementptr inbounds %struct.s2_packed, %struct.s2_packed* %packed1, i32 0, i32 4
   %ptr = bitcast i16* %gep to i32*
-  %val = load i32* %ptr, align 4
+  %val = load i32, i32* %ptr, align 4
   %valt = trunc i32 %val to i16
   ret i16 %valt
 }
@@ -23,7 +23,7 @@
   %packed1 = alloca %struct.s2_packed, align 8
   %gep = getelementptr inbounds %struct.s2_packed, %struct.s2_packed* %packed1, i32 0, i32 4
   %ptr = bitcast i16* %gep to i48*
-  %val = load i48* %ptr, align 4
+  %val = load i48, i48* %ptr, align 4
   %valt = trunc i48 %val to i16
   ret i16 %valt
 }
diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll
index d37654f..7afc4e7 100644
--- a/llvm/test/Instrumentation/BoundsChecking/simple.ll
+++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll
@@ -51,7 +51,7 @@
   %2 = bitcast i8* %1 to i32*
   %idx = getelementptr inbounds i32, i32* %2, i64 8
 ; CHECK: trap
-  %3 = load i32* %idx, align 4
+  %3 = load i32, i32* %idx, align 4
   ret void
 }
 
@@ -59,7 +59,7 @@
 define void @f5(i64 %x) nounwind {
   %idx = getelementptr inbounds [8 x i8], [8 x i8]* @.str, i64 0, i64 %x
 ; CHECK: trap
-  %1 = load i8* %idx, align 4
+  %1 = load i8, i8* %idx, align 4
   ret void
 }
 
@@ -69,7 +69,7 @@
   ; CHECK: sub i16
   ; CHECK icmp ult i16
 ; CHECK: trap
-  %1 = load i8 addrspace(1)* %idx, align 4
+  %1 = load i8, i8 addrspace(1)* %idx, align 4
   ret void
 }
 
@@ -77,7 +77,7 @@
 define void @f6(i64 %x) nounwind {
   %1 = alloca i128
 ; CHECK-NOT: trap
-  %2 = load i128* %1, align 4
+  %2 = load i128, i128* %1, align 4
   ret void
 }
 
@@ -86,7 +86,7 @@
   %1 = alloca i128, i64 %x
 ; CHECK: mul i64 16,
 ; CHECK: trap
-  %2 = load i128* %1, align 4
+  %2 = load i128, i128* %1, align 4
   ret void
 }
 
@@ -96,7 +96,7 @@
   %2 = alloca i128
   %3 = select i1 undef, i128* %1, i128* %2
 ; CHECK-NOT: trap
-  %4 = load i128* %3, align 4
+  %4 = load i128, i128* %3, align 4
   ret void
 }
 
@@ -105,7 +105,7 @@
   %1 = alloca i128
   %2 = select i1 undef, i128* %arg, i128* %1
 ; CHECK-NOT: trap
-  %3 = load i128* %2, align 4
+  %3 = load i128, i128* %2, align 4
   ret void
 }
 
@@ -117,7 +117,7 @@
 ; CHECK: select
 ; CHECK: select
 ; CHECK: trap
-  %4 = load i128* %3, align 4
+  %4 = load i128, i128* %3, align 4
   ret void
 }
 
@@ -126,7 +126,7 @@
   %1 = bitcast i128* %x to i8*
   %2 = getelementptr inbounds i8, i8* %1, i64 16
 ; CHECK: br label
-  %3 = load i8* %2, align 4
+  %3 = load i8, i8* %2, align 4
   ret void
 }
 
@@ -135,7 +135,7 @@
   %1 = bitcast i128 addrspace(1)* %x to i8 addrspace(1)*
   %2 = getelementptr inbounds i8, i8 addrspace(1)* %1, i16 16
 ; CHECK: br label
-  %3 = load i8 addrspace(1)* %2, align 4
+  %3 = load i8, i8 addrspace(1)* %2, align 4
   ret void
 }
 
@@ -145,7 +145,7 @@
 ; CHECK: mul i64 %y, 8
   %2 = bitcast i8* %1 to i64*
   %3 = getelementptr inbounds i64, i64* %2, i64 %y
-  %4 = load i64* %3, align 8
+  %4 = load i64, i64* %3, align 8
   ret i64 %4
 }
 
@@ -159,7 +159,7 @@
   ; Self-refential GEPs can occur in dead code.
   %incdec.ptr = getelementptr inbounds i32, i32* %incdec.ptr, i64 1
   ; CHECK: %incdec.ptr = getelementptr inbounds i32, i32* %incdec.ptr
-  %l = load i32* %incdec.ptr
+  %l = load i32, i32* %incdec.ptr
   br label %alive
 
 alive:
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/abilist.ll b/llvm/test/Instrumentation/DataFlowSanitizer/abilist.ll
index 2294731..7998513 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/abilist.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -24,7 +24,7 @@
 ; CHECK: define linkonce_odr { i32, i16 } @"dfsw$custom2"(i32, i32, i16, i16)
 ; CHECK: %[[LABELRETURN2:.*]] = alloca i16
 ; CHECK: %[[RV:.*]] = call i32 @__dfsw_custom2
-; CHECK: %[[RVSHADOW:.*]] = load i16* %[[LABELRETURN2]]
+; CHECK: %[[RVSHADOW:.*]] = load i16, i16* %[[LABELRETURN2]]
 ; CHECK: insertvalue {{.*}}[[RV]], 0
 ; CHECK: insertvalue {{.*}}[[RVSHADOW]], 1
 ; CHECK: ret { i32, i16 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll b/llvm/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
index 16de9cc..6632eb3 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/debug-nonzero-labels.ll
@@ -19,9 +19,9 @@
   ; CHECK: [[CALLCMP:%.*]] = icmp ne i16 [[CALLLABEL]], 0
   ; CHECK: br i1 [[CALLCMP]]
   %call = call i32 @g()
-  ; CHECK: [[LOCALLABEL:%.*]] = load i16* [[LOCALLABELALLOCA]]
+  ; CHECK: [[LOCALLABEL:%.*]] = load i16, i16* [[LOCALLABELALLOCA]]
   ; CHECK: [[LOCALCMP:%.*]] = icmp ne i16 [[LOCALLABEL]], 0
   ; CHECK: br i1 [[LOCALCMP]]
-  %load = load i32* %i
+  %load = load i32, i32* %i
   ret i32 %load
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/load.ll
index 1322dae..5bb3984 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/load.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/load.ll
@@ -11,21 +11,21 @@
   ; NO_COMBINE_PTR_LABEL: @"dfs$load0"
   ; NO_COMBINE_PTR_LABEL: load
   ; NO_COMBINE_PTR_LABEL-NOT: load
-  %a = load {}* %p
+  %a = load {}, {}* %p
   ret {} %a
 }
 
 define i8 @load8(i8* %p) {
   ; COMBINE_PTR_LABEL: @"dfs$load8"
-  ; COMBINE_PTR_LABEL: load i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
   ; COMBINE_PTR_LABEL: ptrtoint i8* {{.*}} to i64
   ; COMBINE_PTR_LABEL: and i64
   ; COMBINE_PTR_LABEL: mul i64
   ; COMBINE_PTR_LABEL: inttoptr i64
-  ; COMBINE_PTR_LABEL: load i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call zeroext i16 @__dfsan_union
-  ; COMBINE_PTR_LABEL: load i8*
+  ; COMBINE_PTR_LABEL: load i8, i8*
   ; COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; COMBINE_PTR_LABEL: ret i8
 
@@ -34,12 +34,12 @@
   ; NO_COMBINE_PTR_LABEL: and i64
   ; NO_COMBINE_PTR_LABEL: mul i64
   ; NO_COMBINE_PTR_LABEL: inttoptr i64 {{.*}} to i16*
-  ; NO_COMBINE_PTR_LABEL: load i16*
-  ; NO_COMBINE_PTR_LABEL: load i8*
+  ; NO_COMBINE_PTR_LABEL: load i16, i16*
+  ; NO_COMBINE_PTR_LABEL: load i8, i8*
   ; NO_COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; NO_COMBINE_PTR_LABEL: ret i8
 
-  %a = load i8* %p
+  %a = load i8, i8* %p
   ret i8 %a
 }
 
@@ -50,13 +50,13 @@
   ; COMBINE_PTR_LABEL: mul i64
   ; COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; COMBINE_PTR_LABEL: getelementptr i16
-  ; COMBINE_PTR_LABEL: load i16*
-  ; COMBINE_PTR_LABEL: load i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
   ; COMBINE_PTR_LABEL: icmp ne
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
-  ; COMBINE_PTR_LABEL: load i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
   ; COMBINE_PTR_LABEL: store {{.*}} @__dfsan_retval_tls
   ; COMBINE_PTR_LABEL: ret i16
 
@@ -66,15 +66,15 @@
   ; NO_COMBINE_PTR_LABEL: mul i64
   ; NO_COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; NO_COMBINE_PTR_LABEL: getelementptr i16, i16*
-  ; NO_COMBINE_PTR_LABEL: load i16*
-  ; NO_COMBINE_PTR_LABEL: load i16*
+  ; NO_COMBINE_PTR_LABEL: load i16, i16*
+  ; NO_COMBINE_PTR_LABEL: load i16, i16*
   ; NO_COMBINE_PTR_LABEL: icmp ne i16
   ; NO_COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
-  ; NO_COMBINE_PTR_LABEL: load i16*
+  ; NO_COMBINE_PTR_LABEL: load i16, i16*
   ; NO_COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; NO_COMBINE_PTR_LABEL: ret i16
 
-  %a = load i16* %p
+  %a = load i16, i16* %p
   ret i16 %a
 }
 
@@ -85,7 +85,7 @@
   ; COMBINE_PTR_LABEL: mul i64
   ; COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; COMBINE_PTR_LABEL: bitcast i16* {{.*}} i64*
-  ; COMBINE_PTR_LABEL: load i64*
+  ; COMBINE_PTR_LABEL: load i64, i64*
   ; COMBINE_PTR_LABEL: trunc i64 {{.*}} i16
   ; COMBINE_PTR_LABEL: shl i64
   ; COMBINE_PTR_LABEL: lshr i64
@@ -93,7 +93,7 @@
   ; COMBINE_PTR_LABEL: icmp eq i64
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
-  ; COMBINE_PTR_LABEL: load i32*
+  ; COMBINE_PTR_LABEL: load i32, i32*
   ; COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; COMBINE_PTR_LABEL: ret i32
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union_load
@@ -104,19 +104,19 @@
   ; NO_COMBINE_PTR_LABEL: mul i64
   ; NO_COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; NO_COMBINE_PTR_LABEL: bitcast i16* {{.*}} i64*
-  ; NO_COMBINE_PTR_LABEL: load i64*
+  ; NO_COMBINE_PTR_LABEL: load i64, i64*
   ; NO_COMBINE_PTR_LABEL: trunc i64 {{.*}} i16
   ; NO_COMBINE_PTR_LABEL: shl i64
   ; NO_COMBINE_PTR_LABEL: lshr i64
   ; NO_COMBINE_PTR_LABEL: or i64
   ; NO_COMBINE_PTR_LABEL: icmp eq i64
-  ; NO_COMBINE_PTR_LABEL: load i32*
+  ; NO_COMBINE_PTR_LABEL: load i32, i32*
   ; NO_COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; NO_COMBINE_PTR_LABEL: ret i32
   ; NO_COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union_load
   
 
-  %a = load i32* %p
+  %a = load i32, i32* %p
   ret i32 %a
 }
 
@@ -127,7 +127,7 @@
   ; COMBINE_PTR_LABEL: mul i64
   ; COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; COMBINE_PTR_LABEL: bitcast i16* {{.*}} i64*
-  ; COMBINE_PTR_LABEL: load i64*
+  ; COMBINE_PTR_LABEL: load i64, i64*
   ; COMBINE_PTR_LABEL: trunc i64 {{.*}} i16
   ; COMBINE_PTR_LABEL: shl i64
   ; COMBINE_PTR_LABEL: lshr i64
@@ -135,12 +135,12 @@
   ; COMBINE_PTR_LABEL: icmp eq i64
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
-  ; COMBINE_PTR_LABEL: load i64*
+  ; COMBINE_PTR_LABEL: load i64, i64*
   ; COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; COMBINE_PTR_LABEL: ret i64
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union_load
   ; COMBINE_PTR_LABEL: getelementptr i64, i64* {{.*}} i64
-  ; COMBINE_PTR_LABEL: load i64*
+  ; COMBINE_PTR_LABEL: load i64, i64*
   ; COMBINE_PTR_LABEL: icmp eq i64
 
   ; NO_COMBINE_PTR_LABEL: @"dfs$load64"
@@ -149,20 +149,20 @@
   ; NO_COMBINE_PTR_LABEL: mul i64
   ; NO_COMBINE_PTR_LABEL: inttoptr i64 {{.*}} i16*
   ; NO_COMBINE_PTR_LABEL: bitcast i16* {{.*}} i64*
-  ; NO_COMBINE_PTR_LABEL: load i64*
+  ; NO_COMBINE_PTR_LABEL: load i64, i64*
   ; NO_COMBINE_PTR_LABEL: trunc i64 {{.*}} i16
   ; NO_COMBINE_PTR_LABEL: shl i64
   ; NO_COMBINE_PTR_LABEL: lshr i64
   ; NO_COMBINE_PTR_LABEL: or i64
   ; NO_COMBINE_PTR_LABEL: icmp eq i64
-  ; NO_COMBINE_PTR_LABEL: load i64*
+  ; NO_COMBINE_PTR_LABEL: load i64, i64*
   ; NO_COMBINE_PTR_LABEL: store i16 {{.*}} @__dfsan_retval_tls
   ; NO_COMBINE_PTR_LABEL: ret i64
   ; NO_COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union_load
   ; NO_COMBINE_PTR_LABEL: getelementptr i64, i64* {{.*}} i64
-  ; NO_COMBINE_PTR_LABEL: load i64*
+  ; NO_COMBINE_PTR_LABEL: load i64, i64*
   ; NO_COMBINE_PTR_LABEL: icmp eq i64
 
-  %a = load i64* %p
+  %a = load i64, i64* %p
   ret i64 %a
 }
diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
index 0c64574..a66cedf 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/store.ll
@@ -18,7 +18,7 @@
 
 define void @store8(i8 %v, i8* %p) {
   ; NO_COMBINE_PTR_LABEL: @"dfs$store8"
-  ; NO_COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; NO_COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; NO_COMBINE_PTR_LABEL: ptrtoint i8* {{.*}} i64
   ; NO_COMBINE_PTR_LABEL: and i64
   ; NO_COMBINE_PTR_LABEL: mul i64
@@ -28,8 +28,8 @@
   ; NO_COMBINE_PTR_LABEL: store i8
 
   ; COMBINE_PTR_LABEL: @"dfs$store8"
-  ; COMBINE_PTR_LABEL: load i16*
-  ; COMBINE_PTR_LABEL: load i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
+  ; COMBINE_PTR_LABEL: load i16, i16*
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
   ; COMBINE_PTR_LABEL: ptrtoint i8* {{.*}} i64
@@ -46,7 +46,7 @@
 
 define void @store16(i16 %v, i16* %p) {
   ; NO_COMBINE_PTR_LABEL: @"dfs$store16"
-  ; NO_COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; NO_COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; NO_COMBINE_PTR_LABEL: ptrtoint i16* {{.*}} i64
   ; NO_COMBINE_PTR_LABEL: and i64
   ; NO_COMBINE_PTR_LABEL: mul i64
@@ -58,8 +58,8 @@
   ; NO_COMBINE_PTR_LABEL: store i16
 
   ; COMBINE_PTR_LABEL: @"dfs$store16"
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
   ; COMBINE_PTR_LABEL: ptrtoint i16* {{.*}} i64
@@ -78,7 +78,7 @@
 
 define void @store32(i32 %v, i32* %p) {
   ; NO_COMBINE_PTR_LABEL: @"dfs$store32"
-  ; NO_COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; NO_COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; NO_COMBINE_PTR_LABEL: ptrtoint i32* {{.*}} i64
   ; NO_COMBINE_PTR_LABEL: and i64
   ; NO_COMBINE_PTR_LABEL: mul i64
@@ -94,8 +94,8 @@
   ; NO_COMBINE_PTR_LABEL: store i32
 
   ; COMBINE_PTR_LABEL: @"dfs$store32"
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
   ; COMBINE_PTR_LABEL: ptrtoint i32* {{.*}} i64
@@ -118,7 +118,7 @@
 
 define void @store64(i64 %v, i64* %p) {
   ; NO_COMBINE_PTR_LABEL: @"dfs$store64"
-  ; NO_COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; NO_COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; NO_COMBINE_PTR_LABEL: ptrtoint i64* {{.*}} i64
   ; NO_COMBINE_PTR_LABEL: and i64
   ; NO_COMBINE_PTR_LABEL: mul i64
@@ -135,8 +135,8 @@
   ; NO_COMBINE_PTR_LABEL: store i64
 
   ; COMBINE_PTR_LABEL: @"dfs$store64"
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
-  ; COMBINE_PTR_LABEL: load i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
+  ; COMBINE_PTR_LABEL: load i16, i16* {{.*}} @__dfsan_arg_tls
   ; COMBINE_PTR_LABEL: icmp ne i16
   ; COMBINE_PTR_LABEL: call {{.*}} @__dfsan_union
   ; COMBINE_PTR_LABEL: ptrtoint i64* {{.*}} i64
diff --git a/llvm/test/Instrumentation/InstrProfiling/linkage.ll b/llvm/test/Instrumentation/InstrProfiling/linkage.ll
index 0a92d5d..0608f7b 100644
--- a/llvm/test/Instrumentation/InstrProfiling/linkage.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/linkage.ll
@@ -41,6 +41,6 @@
 ; CHECK: @__llvm_profile_runtime = external global i32
 
 ; CHECK: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
-; CHECK:   %[[REG:.*]] = load i32* @__llvm_profile_runtime
+; CHECK:   %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
 ; CHECK:   ret i32 %[[REG]]
 ; CHECK: }
diff --git a/llvm/test/Instrumentation/InstrProfiling/noruntime.ll b/llvm/test/Instrumentation/InstrProfiling/noruntime.ll
index e69445d..5b71f1a 100644
--- a/llvm/test/Instrumentation/InstrProfiling/noruntime.ll
+++ b/llvm/test/Instrumentation/InstrProfiling/noruntime.ll
@@ -2,7 +2,7 @@
 
 ; RUN: opt < %s -instrprof -S | FileCheck %s
 ; CHECK-NOT: define {{.*}} @__llvm_profile_runtime_user()
-; CHECK-NOT: load i32* @__llvm_profile_runtime
+; CHECK-NOT: load i32, i32* @__llvm_profile_runtime
 
 @__llvm_profile_runtime = global i32 0, align 4
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/array_types.ll b/llvm/test/Instrumentation/MemorySanitizer/array_types.ll
index fa3835f..e96716a 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/array_types.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/array_types.ll
@@ -12,8 +12,8 @@
 }
 
 ; CHECK-LABEL: @InsertValue(
-; CHECK-DAG: [[Sy:%.*]] = load i32* {{.*}}@__msan_param_tls to i64), i64 8) to i32*)
-; CHECK-DAG: [[Sx:%.*]] = load i32* {{.*}}@__msan_param_tls to i32*)
+; CHECK-DAG: [[Sy:%.*]] = load i32, i32* {{.*}}@__msan_param_tls to i64), i64 8) to i32*)
+; CHECK-DAG: [[Sx:%.*]] = load i32, i32* {{.*}}@__msan_param_tls to i32*)
 ; CHECK: [[A:%.*]] = insertvalue [2 x i32] [i32 -1, i32 -1], i32 [[Sx]], 0
 ; CHECK: [[B:%.*]] = insertvalue [2 x i32] [[A]], i32 [[Sy]], 1
 ; CHECK: store [2 x i32] [[B]], [2 x i32]* {{.*}}@__msan_retval_tls
@@ -28,8 +28,8 @@
 }
 
 ; CHECK-LABEL: @InsertValueDouble(
-; CHECK-DAG: [[Sy:%.*]] = load i64* {{.*}}@__msan_param_tls to i64), i64 8) to i64*)
-; CHECK-DAG: [[Sx:%.*]] = load i64* getelementptr {{.*}}@__msan_param_tls, i32 0, i32 0
+; CHECK-DAG: [[Sy:%.*]] = load i64, i64* {{.*}}@__msan_param_tls to i64), i64 8) to i64*)
+; CHECK-DAG: [[Sx:%.*]] = load i64, i64* getelementptr {{.*}}@__msan_param_tls, i32 0, i32 0
 ; CHECK: [[A:%.*]] = insertvalue [2 x i64] [i64 -1, i64 -1], i64 [[Sx]], 0
 ; CHECK: [[B:%.*]] = insertvalue [2 x i64] [[A]], i64 [[Sy]], 1
 ; CHECK: store [2 x i64] [[B]], [2 x i64]* {{.*}}@__msan_retval_tls
@@ -43,7 +43,7 @@
 }
 
 ; CHECK-LABEL: @ExtractValue(
-; CHECK: [[Sa:%.*]] = load [2 x i32]* {{.*}}@__msan_param_tls to [2 x i32]*)
+; CHECK: [[Sa:%.*]] = load [2 x i32], [2 x i32]* {{.*}}@__msan_param_tls to [2 x i32]*)
 ; CHECK: [[Sx:%.*]] = extractvalue [2 x i32] [[Sa]], 1
 ; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
 ; CHECK: ret i32
@@ -59,7 +59,7 @@
 }
 
 ; CHECK-LABEL: @ArrayInStruct(
-; CHECK: [[Ss:%.*]] = load { i32, i32, [3 x i32] }* {{.*}}@__msan_param_tls to { i32, i32, [3 x i32] }*)
+; CHECK: [[Ss:%.*]] = load { i32, i32, [3 x i32] }, { i32, i32, [3 x i32] }* {{.*}}@__msan_param_tls to { i32, i32, [3 x i32] }*)
 ; CHECK: [[Sx:%.*]] = extractvalue { i32, i32, [3 x i32] } [[Ss]], 2, 1
 ; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
 ; CHECK: ret i32
@@ -71,7 +71,7 @@
 }
 
 ; CHECK-LABEL: @ArrayOfStructs(
-; CHECK: [[Ss:%.*]] = load [3 x { i32, i32 }]* {{.*}}@__msan_param_tls to [3 x { i32, i32 }]*)
+; CHECK: [[Ss:%.*]] = load [3 x { i32, i32 }], [3 x { i32, i32 }]* {{.*}}@__msan_param_tls to [3 x { i32, i32 }]*)
 ; CHECK: [[Sx:%.*]] = extractvalue [3 x { i32, i32 }] [[Ss]], 2, 1
 ; CHECK: store i32 [[Sx]], i32* {{.*}}@__msan_retval_tls
 ; CHECK: ret i32
@@ -83,7 +83,7 @@
 }
 
 ; CHECK-LABEL: @ArrayOfVectors(
-; CHECK: [[Ss:%.*]] = load [3 x <8 x i16>]* {{.*}}@__msan_param_tls to [3 x <8 x i16>]*)
+; CHECK: [[Ss:%.*]] = load [3 x <8 x i16>], [3 x <8 x i16>]* {{.*}}@__msan_param_tls to [3 x <8 x i16>]*)
 ; CHECK: [[Sx:%.*]] = extractvalue [3 x <8 x i16>] [[Ss]], 1
 ; CHECK: store <8 x i16> [[Sx]], <8 x i16>* {{.*}}@__msan_retval_tls
 ; CHECK: ret <8 x i16>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/atomics.ll b/llvm/test/Instrumentation/MemorySanitizer/atomics.ll
index 28736ad..e896eae 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -77,13 +77,13 @@
 
 define i32 @AtomicLoad(i32* %p) sanitize_memory {
 entry:
-  %0 = load atomic i32* %p seq_cst, align 16
+  %0 = load atomic i32, i32* %p seq_cst, align 16
   ret i32 %0
 }
 
 ; CHECK: @AtomicLoad
-; CHECK: load atomic i32* {{.*}} seq_cst, align 16
-; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: load atomic i32, i32* {{.*}} seq_cst, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
@@ -92,13 +92,13 @@
 
 define i32 @AtomicLoadAcquire(i32* %p) sanitize_memory {
 entry:
-  %0 = load atomic i32* %p acquire, align 16
+  %0 = load atomic i32, i32* %p acquire, align 16
   ret i32 %0
 }
 
 ; CHECK: @AtomicLoadAcquire
-; CHECK: load atomic i32* {{.*}} acquire, align 16
-; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
@@ -107,13 +107,13 @@
 
 define i32 @AtomicLoadMonotonic(i32* %p) sanitize_memory {
 entry:
-  %0 = load atomic i32* %p monotonic, align 16
+  %0 = load atomic i32, i32* %p monotonic, align 16
   ret i32 %0
 }
 
 ; CHECK: @AtomicLoadMonotonic
-; CHECK: load atomic i32* {{.*}} acquire, align 16
-; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
@@ -122,13 +122,13 @@
 
 define i32 @AtomicLoadUnordered(i32* %p) sanitize_memory {
 entry:
-  %0 = load atomic i32* %p unordered, align 16
+  %0 = load atomic i32, i32* %p unordered, align 16
   ret i32 %0
 }
 
 ; CHECK: @AtomicLoadUnordered
-; CHECK: load atomic i32* {{.*}} acquire, align 16
-; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32* {{.*}}, align 16
+; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
+; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
 ; CHECK: ret i32
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/check_access_address.ll b/llvm/test/Instrumentation/MemorySanitizer/check_access_address.ll
index 5660226..5e1a3f4 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/check_access_address.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/check_access_address.ll
@@ -8,7 +8,7 @@
 
 define <2 x i64> @ByValArgumentShadowLargeAlignment(<2 x i64>* byval %p) sanitize_memory {
 entry:
-  %x = load <2 x i64>* %p
+  %x = load <2 x i64>, <2 x i64>* %p
   ret <2 x i64> %x
 }
 
@@ -19,7 +19,7 @@
 
 define i16 @ByValArgumentShadowSmallAlignment(i16* byval %p) sanitize_memory {
 entry:
-  %x = load i16* %p
+  %x = load i16, i16* %p
   ret i16 %x
 }
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll b/llvm/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
index beb3c5f..fb1cdbb 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/instrumentation-with-call-threshold.ll
@@ -11,7 +11,7 @@
 
 define void @LoadAndCmp(i32* nocapture %a) nounwind uwtable sanitize_memory {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/missing_origin.ll b/llvm/test/Instrumentation/MemorySanitizer/missing_origin.ll
index f7385b9..08289ab 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/missing_origin.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/missing_origin.ll
@@ -14,7 +14,7 @@
 }
 
 ; CHECK-LABEL: @Shuffle(
-; CHECK: [[A:%.*]] = load i32* {{.*}}@__msan_param_origin_tls,
+; CHECK: [[A:%.*]] = load i32, i32* {{.*}}@__msan_param_origin_tls,
 ; CHECK: store i32 [[A]], i32* @__msan_retval_origin_tls
 ; CHECK: ret <4 x i32>
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 3165568..7472559 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -70,7 +70,7 @@
 ; load followed by cmp: check that we load the shadow and call __msan_warning.
 define void @LoadAndCmp(i32* nocapture %a) nounwind uwtable sanitize_memory {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -124,11 +124,11 @@
   br i1 %tobool, label %if.else, label %if.then
 
   if.then:                                          ; preds = %entry
-  %0 = load i32* %b, align 4
+  %0 = load i32, i32* %b, align 4
   br label %if.end
 
   if.else:                                          ; preds = %entry
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   br label %if.end
 
   if.end:                                           ; preds = %if.else, %if.then
@@ -147,7 +147,7 @@
 ; Compute shadow for "x << 10"
 define void @ShlConst(i32* nocapture %x) nounwind uwtable sanitize_memory {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %1 = shl i32 %0, 10
   store i32 %1, i32* %x, align 4
   ret void
@@ -165,7 +165,7 @@
 ; Compute shadow for "10 << x": it should have 'sext i1'.
 define void @ShlNonConst(i32* nocapture %x) nounwind uwtable sanitize_memory {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %1 = shl i32 10, %0
   store i32 %1, i32* %x, align 4
   ret void
@@ -182,7 +182,7 @@
 ; SExt
 define void @SExt(i32* nocapture %a, i16* nocapture %b) nounwind uwtable sanitize_memory {
 entry:
-  %0 = load i16* %b, align 2
+  %0 = load i16, i16* %b, align 2
   %1 = sext i16 %0 to i32
   store i32 %1, i32* %a, align 4
   ret void
@@ -345,8 +345,8 @@
 }
 
 ; CHECK: @IntToPtr
-; CHECK: load i64*{{.*}}__msan_param_tls
-; CHECK-ORIGINS-NEXT: load i32*{{.*}}__msan_param_origin_tls
+; CHECK: load i64, i64*{{.*}}__msan_param_tls
+; CHECK-ORIGINS-NEXT: load i32, i32*{{.*}}__msan_param_origin_tls
 ; CHECK-NEXT: inttoptr
 ; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
 ; CHECK: ret i8*
@@ -359,7 +359,7 @@
 }
 
 ; CHECK: @IntToPtr_ZExt
-; CHECK: load i16*{{.*}}__msan_param_tls
+; CHECK: load i16, i16*{{.*}}__msan_param_tls
 ; CHECK: zext
 ; CHECK-NEXT: inttoptr
 ; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
@@ -475,25 +475,25 @@
 
 define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
   %y = alloca i32, align 64
-  %1 = load volatile i32* %y, align 64
+  %1 = load volatile i32, i32* %y, align 64
   ret i32 %1
 }
 
 ; CHECK: @ShadowLoadAlignmentLarge
-; CHECK: load volatile i32* {{.*}} align 64
-; CHECK: load i32* {{.*}} align 64
+; CHECK: load volatile i32, i32* {{.*}} align 64
+; CHECK: load i32, i32* {{.*}} align 64
 ; CHECK: ret i32
 
 define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
   %y = alloca i32, align 2
-  %1 = load volatile i32* %y, align 2
+  %1 = load volatile i32, i32* %y, align 2
   ret i32 %1
 }
 
 ; CHECK: @ShadowLoadAlignmentSmall
-; CHECK: load volatile i32* {{.*}} align 2
-; CHECK: load i32* {{.*}} align 2
-; CHECK-ORIGINS: load i32* {{.*}} align 4
+; CHECK: load volatile i32, i32* {{.*}} align 2
+; CHECK: load i32, i32* {{.*}} align 2
+; CHECK-ORIGINS: load i32, i32* {{.*}} align 4
 ; CHECK: ret i32
 
 
@@ -580,8 +580,8 @@
 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
 
 ; CHECK: @LoadIntrinsic
-; CHECK: load <16 x i8>* {{.*}} align 1
-; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32* {{.*}}
+; CHECK: load <16 x i8>, <16 x i8>* {{.*}} align 1
+; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32, i32* {{.*}}
 ; CHECK-NOT: br
 ; CHECK-NOT: = or
 ; CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
@@ -602,10 +602,10 @@
 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
 
 ; CHECK: @Paddsw128
-; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
-; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
-; CHECK-NEXT: load <8 x i16>* {{.*}} @__msan_param_tls
-; CHECK-ORIGINS: load i32* {{.*}} @__msan_param_origin_tls
+; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: load i32, i32* {{.*}} @__msan_param_origin_tls
+; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: load i32, i32* {{.*}} @__msan_param_origin_tls
 ; CHECK-NEXT: = or <8 x i16>
 ; CHECK-ORIGINS: = bitcast <8 x i16> {{.*}} to i128
 ; CHECK-ORIGINS-NEXT: = icmp ne i128 {{.*}}, 0
@@ -620,13 +620,13 @@
 ; Check that shadow of such vector is a vector of integers.
 
 define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memory {
-  %x = load <8 x i8*>* %p
+  %x = load <8 x i8*>, <8 x i8*>* %p
   ret <8 x i8*> %x
 }
 
 ; CHECK: @VectorOfPointers
-; CHECK: load <8 x i8*>*
-; CHECK: load <8 x i64>*
+; CHECK: load <8 x i8*>, <8 x i8*>*
+; CHECK: load <8 x i64>, <8 x i64>*
 ; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
 ; CHECK: ret <8 x i8*>
 
@@ -772,7 +772,7 @@
 
 define i32 @NoSanitizeMemoryParamTLS(i32* nocapture readonly %x) {
 entry:
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %call = tail call i32 @NoSanitizeMemoryParamTLSHelper(i32 %0)
   ret i32 %call
 }
@@ -792,7 +792,7 @@
 }
 
 ; CHECK: @ArgumentShadowAlignment
-; CHECK: load <2 x i64>* {{.*}} @__msan_param_tls {{.*}}, align 8
+; CHECK: load <2 x i64>, <2 x i64>* {{.*}} @__msan_param_tls {{.*}}, align 8
 ; CHECK: store <2 x i64> {{.*}} @__msan_retval_tls {{.*}}, align 8
 ; CHECK: ret <2 x i64>
 
@@ -835,10 +835,10 @@
   %agg.tmp2 = alloca %struct.StructByVal, align 8
   %0 = bitcast %struct.StructByVal* %s to i8*
   %agg.tmp.sroa.0.0..sroa_cast = bitcast %struct.StructByVal* %s to i64*
-  %agg.tmp.sroa.0.0.copyload = load i64* %agg.tmp.sroa.0.0..sroa_cast, align 4
+  %agg.tmp.sroa.0.0.copyload = load i64, i64* %agg.tmp.sroa.0.0..sroa_cast, align 4
   %agg.tmp.sroa.2.0..sroa_idx = getelementptr inbounds %struct.StructByVal, %struct.StructByVal* %s, i64 0, i32 2
   %agg.tmp.sroa.2.0..sroa_cast = bitcast i32* %agg.tmp.sroa.2.0..sroa_idx to i64*
-  %agg.tmp.sroa.2.0.copyload = load i64* %agg.tmp.sroa.2.0..sroa_cast, align 4
+  %agg.tmp.sroa.2.0.copyload = load i64, i64* %agg.tmp.sroa.2.0..sroa_cast, align 4
   %1 = bitcast %struct.StructByVal* %agg.tmp2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %0, i64 16, i32 4, i1 false)
   call void (i32, ...)* @VAArgStructFn(i32 undef, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, i64 %agg.tmp.sroa.0.0.copyload, i64 %agg.tmp.sroa.2.0.copyload, %struct.StructByVal* byval align 8 %agg.tmp2)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/unreachable.ll b/llvm/test/Instrumentation/MemorySanitizer/unreachable.ll
index c813071..e9a79ce 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/unreachable.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/unreachable.ll
@@ -10,7 +10,7 @@
   br label %exit
 
 unreachable:
-  %x = load i32* %p
+  %x = load i32, i32* %p
   br label %exit
 
 exit:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll
index 9425e25..a7d5f21 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -34,7 +34,7 @@
 }
 
 ; CHECK: @test_cvtsi2sd
-; CHECK: [[Sa:%[_01-9a-z]+]] = load i32* {{.*}} @__msan_param_tls
+; CHECK: [[Sa:%[_01-9a-z]+]] = load i32, i32* {{.*}} @__msan_param_tls
 ; CHECK: [[Sout0:%[_01-9a-z]+]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 {{.*}}, i32 1
 ; Clear low half of result shadow
 ; CHECK: [[Sout:%[_01-9a-z]+]] = insertelement <2 x i64> {{.*}}[[Sout0]], i64 0, i32 0
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
index 929d1cc..9de6a9a 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
@@ -29,7 +29,7 @@
 entry:
   tail call void @llvm.dbg.value(metadata %struct.A* %this, i64 0, metadata !15, metadata !{!"0x102"}), !dbg !20
   %x = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0, !dbg !21
-  %0 = load i32* %x, align 4, !dbg !21
+  %0 = load i32, i32* %x, align 4, !dbg !21
   ret i32 %0, !dbg !21
 }
 
diff --git a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
index 15957279..dd6b8d8 100644
--- a/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/llvm/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -37,7 +37,7 @@
 ; CHECK0-NOT: call void @__sanitizer_cov_module_init(
 
 ; CHECK1-LABEL: define void @foo
-; CHECK1: %0 = load atomic i32* {{.*}} monotonic, align 4, !nosanitize
+; CHECK1: %0 = load atomic i32, i32* {{.*}} monotonic, align 4, !nosanitize
 ; CHECK1: %1 = icmp sge i32 0, %0
 ; CHECK1: br i1 %1, label %2, label %3
 ; CHECK1: call void @__sanitizer_cov(i32*{{.*}})
@@ -84,8 +84,8 @@
 define void @CallViaVptr(%struct.StructWithVptr* %foo) uwtable sanitize_address {
 entry:
   %0 = bitcast %struct.StructWithVptr* %foo to void (%struct.StructWithVptr*)***
-  %vtable = load void (%struct.StructWithVptr*)*** %0, align 8
-  %1 = load void (%struct.StructWithVptr*)** %vtable, align 8
+  %vtable = load void (%struct.StructWithVptr*)**, void (%struct.StructWithVptr*)*** %0, align 8
+  %1 = load void (%struct.StructWithVptr*)*, void (%struct.StructWithVptr*)** %vtable, align 8
   tail call void %1(%struct.StructWithVptr* %foo)
   tail call void %1(%struct.StructWithVptr* %foo)
   tail call void asm sideeffect "", ""()
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll b/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
index e40268f..1d6ac38 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -4,7 +4,7 @@
 
 define i8 @atomic8_load_unordered(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8* %a unordered, align 1
+  %0 = load atomic i8, i8* %a unordered, align 1
   ret i8 %0
 }
 ; CHECK: atomic8_load_unordered
@@ -12,7 +12,7 @@
 
 define i8 @atomic8_load_monotonic(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8* %a monotonic, align 1
+  %0 = load atomic i8, i8* %a monotonic, align 1
   ret i8 %0
 }
 ; CHECK: atomic8_load_monotonic
@@ -20,7 +20,7 @@
 
 define i8 @atomic8_load_acquire(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8* %a acquire, align 1
+  %0 = load atomic i8, i8* %a acquire, align 1
   ret i8 %0
 }
 ; CHECK: atomic8_load_acquire
@@ -28,7 +28,7 @@
 
 define i8 @atomic8_load_seq_cst(i8* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i8* %a seq_cst, align 1
+  %0 = load atomic i8, i8* %a seq_cst, align 1
   ret i8 %0
 }
 ; CHECK: atomic8_load_seq_cst
@@ -388,7 +388,7 @@
 
 define i16 @atomic16_load_unordered(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16* %a unordered, align 2
+  %0 = load atomic i16, i16* %a unordered, align 2
   ret i16 %0
 }
 ; CHECK: atomic16_load_unordered
@@ -396,7 +396,7 @@
 
 define i16 @atomic16_load_monotonic(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16* %a monotonic, align 2
+  %0 = load atomic i16, i16* %a monotonic, align 2
   ret i16 %0
 }
 ; CHECK: atomic16_load_monotonic
@@ -404,7 +404,7 @@
 
 define i16 @atomic16_load_acquire(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16* %a acquire, align 2
+  %0 = load atomic i16, i16* %a acquire, align 2
   ret i16 %0
 }
 ; CHECK: atomic16_load_acquire
@@ -412,7 +412,7 @@
 
 define i16 @atomic16_load_seq_cst(i16* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i16* %a seq_cst, align 2
+  %0 = load atomic i16, i16* %a seq_cst, align 2
   ret i16 %0
 }
 ; CHECK: atomic16_load_seq_cst
@@ -772,7 +772,7 @@
 
 define i32 @atomic32_load_unordered(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32* %a unordered, align 4
+  %0 = load atomic i32, i32* %a unordered, align 4
   ret i32 %0
 }
 ; CHECK: atomic32_load_unordered
@@ -780,7 +780,7 @@
 
 define i32 @atomic32_load_monotonic(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32* %a monotonic, align 4
+  %0 = load atomic i32, i32* %a monotonic, align 4
   ret i32 %0
 }
 ; CHECK: atomic32_load_monotonic
@@ -788,7 +788,7 @@
 
 define i32 @atomic32_load_acquire(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32* %a acquire, align 4
+  %0 = load atomic i32, i32* %a acquire, align 4
   ret i32 %0
 }
 ; CHECK: atomic32_load_acquire
@@ -796,7 +796,7 @@
 
 define i32 @atomic32_load_seq_cst(i32* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i32* %a seq_cst, align 4
+  %0 = load atomic i32, i32* %a seq_cst, align 4
   ret i32 %0
 }
 ; CHECK: atomic32_load_seq_cst
@@ -1156,7 +1156,7 @@
 
 define i64 @atomic64_load_unordered(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64* %a unordered, align 8
+  %0 = load atomic i64, i64* %a unordered, align 8
   ret i64 %0
 }
 ; CHECK: atomic64_load_unordered
@@ -1164,7 +1164,7 @@
 
 define i64 @atomic64_load_monotonic(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64* %a monotonic, align 8
+  %0 = load atomic i64, i64* %a monotonic, align 8
   ret i64 %0
 }
 ; CHECK: atomic64_load_monotonic
@@ -1172,7 +1172,7 @@
 
 define i64 @atomic64_load_acquire(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64* %a acquire, align 8
+  %0 = load atomic i64, i64* %a acquire, align 8
   ret i64 %0
 }
 ; CHECK: atomic64_load_acquire
@@ -1180,7 +1180,7 @@
 
 define i64 @atomic64_load_seq_cst(i64* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i64* %a seq_cst, align 8
+  %0 = load atomic i64, i64* %a seq_cst, align 8
   ret i64 %0
 }
 ; CHECK: atomic64_load_seq_cst
@@ -1540,7 +1540,7 @@
 
 define i128 @atomic128_load_unordered(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128* %a unordered, align 16
+  %0 = load atomic i128, i128* %a unordered, align 16
   ret i128 %0
 }
 ; CHECK: atomic128_load_unordered
@@ -1548,7 +1548,7 @@
 
 define i128 @atomic128_load_monotonic(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128* %a monotonic, align 16
+  %0 = load atomic i128, i128* %a monotonic, align 16
   ret i128 %0
 }
 ; CHECK: atomic128_load_monotonic
@@ -1556,7 +1556,7 @@
 
 define i128 @atomic128_load_acquire(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128* %a acquire, align 16
+  %0 = load atomic i128, i128* %a acquire, align 16
   ret i128 %0
 }
 ; CHECK: atomic128_load_acquire
@@ -1564,7 +1564,7 @@
 
 define i128 @atomic128_load_seq_cst(i128* %a) nounwind uwtable {
 entry:
-  %0 = load atomic i128* %a seq_cst, align 16
+  %0 = load atomic i128, i128* %a seq_cst, align 16
   ret i128 %0
 }
 ; CHECK: atomic128_load_seq_cst
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/capture.ll b/llvm/test/Instrumentation/ThreadSanitizer/capture.ll
index d6c62f0..038b8c0 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/capture.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/capture.ll
@@ -37,7 +37,7 @@
   %tmp = alloca i32*, align 8
   ; transitive escape
   store i32* %ptr, i32** %tmp, align 8
-  %0 = load i32** %tmp, align 8
+  %0 = load i32*, i32** %tmp, align 8
   store i32* %0, i32** @sink, align 8
   store i32 42, i32* %ptr, align 4
   ret void
@@ -79,7 +79,7 @@
   store i32 42, i32* %ptr, align 4
   ; transitive escape
   store i32* %ptr, i32** %tmp, align 8
-  %0 = load i32** %tmp, align 8
+  %0 = load i32*, i32** %tmp, align 8
   store i32* %0, i32** @sink, align 8
   ret void
 }
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll b/llvm/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
index 3949fd5..a90a560 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/no_sanitize_thread.ll
@@ -6,20 +6,20 @@
 ; no sanitize_thread attribute here
 define i32 @read_4_bytes(i32* %a) {
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 
 ; CHECK: define i32 @read_4_bytes(i32* %a) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   %tmp1 = load i32, i32* %a, align 4
 ; CHECK: ret i32 %tmp1
 
 ; no sanitize_thread attribute here
 define i32 @read_4_bytes_and_call(i32* %a) {
 entry:
   call void @foo()
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 
@@ -28,7 +28,7 @@
 ; CHECK-NEXT:   %0 = call i8* @llvm.returnaddress(i32 0)
 ; CHECK-NEXT:   call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   call void @foo()
-; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   %tmp1 = load i32, i32* %a, align 4
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK-NEXT:   ret i32 %tmp1
 
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
index cb6603b..c15ab13 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -4,7 +4,7 @@
 
 define void @IncrementMe(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
-  %0 = load i32* %ptr, align 4
+  %0 = load i32, i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %ptr, align 4
   ret void
@@ -16,7 +16,7 @@
 
 define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
 entry:
-  %0 = load i32* %ptr, align 4
+  %0 = load i32, i32* %ptr, align 4
   %inc = add nsw i32 %0, 1
   call void @foo()
   store i32 %inc, i32* %ptr, align 4
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/read_from_global.ll b/llvm/test/Instrumentation/ThreadSanitizer/read_from_global.ll
index 5d573e8..76ee50c 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/read_from_global.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/read_from_global.ll
@@ -6,7 +6,7 @@
 @const_global = external constant i32
 define i32 @read_from_const_global() nounwind uwtable sanitize_thread readnone {
 entry:
-  %0 = load i32* @const_global, align 4
+  %0 = load i32, i32* @const_global, align 4
   ret i32 %0
 }
 ; CHECK: define i32 @read_from_const_global
@@ -16,7 +16,7 @@
 @non_const_global = global i32 0, align 4
 define i32 @read_from_non_const_global() nounwind uwtable sanitize_thread readonly {
 entry:
-  %0 = load i32* @non_const_global, align 4
+  %0 = load i32, i32* @non_const_global, align 4
   ret i32 %0
 }
 
@@ -29,7 +29,7 @@
 entry:
   %idxprom = sext i32 %idx to i64
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @const_global_array, i64 0, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 }
 
@@ -41,8 +41,8 @@
 define void @call_virtual_func(%struct.Foo* %f) uwtable sanitize_thread {
 entry:
   %0 = bitcast %struct.Foo* %f to void (%struct.Foo*)***
-  %vtable = load void (%struct.Foo*)*** %0, align 8, !tbaa !2
-  %1 = load void (%struct.Foo*)** %vtable, align 8
+  %vtable = load void (%struct.Foo*)**, void (%struct.Foo*)*** %0, align 8, !tbaa !2
+  %1 = load void (%struct.Foo*)*, void (%struct.Foo*)** %vtable, align 8
   call void %1(%struct.Foo* %f)
   ret void
 }
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
index a83a274..d821b1c 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan-vs-gvn.ll
@@ -12,9 +12,9 @@
 
 define i32 @test_widening_bad(i8* %P) nounwind ssp noredzone sanitize_thread {
 entry:
-  %tmp = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 1), align 4
+  %tmp = load i8, i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
-  %tmp1 = load i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 3), align 1
+  %tmp1 = load i8, i8* getelementptr inbounds (%struct_of_8_bytes_4_aligned* @f, i64 0, i32 3), align 1
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
index dc6e43e..22582eb 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_basic.ll
@@ -5,7 +5,7 @@
 
 define i32 @read_4_bytes(i32* %a) sanitize_thread {
 entry:
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   ret i32 %tmp1
 }
 
@@ -15,7 +15,7 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_read4(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i32* %a, align 4
+; CHECK-NEXT:   %tmp1 = load i32, i32* %a, align 4
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i32
 
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/unaligned.ll b/llvm/test/Instrumentation/ThreadSanitizer/unaligned.ll
index 7a240e3..89461b4 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/unaligned.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/unaligned.ll
@@ -4,7 +4,7 @@
 
 define i16 @test_unaligned_read2(i16* %a) sanitize_thread {
 entry:
-  %tmp1 = load i16* %a, align 1
+  %tmp1 = load i16, i16* %a, align 1
   ret i16 %tmp1
 }
 
@@ -12,13 +12,13 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i16* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_unaligned_read2(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i16* %a, align 1
+; CHECK-NEXT:   %tmp1 = load i16, i16* %a, align 1
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i16
 
 define i32 @test_unaligned_read4(i32* %a) sanitize_thread {
 entry:
-  %tmp1 = load i32* %a, align 2
+  %tmp1 = load i32, i32* %a, align 2
   ret i32 %tmp1
 }
 
@@ -26,13 +26,13 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i32* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_unaligned_read4(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i32* %a, align 2
+; CHECK-NEXT:   %tmp1 = load i32, i32* %a, align 2
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i32
 
 define i64 @test_unaligned_read8(i64* %a) sanitize_thread {
 entry:
-  %tmp1 = load i64* %a, align 4
+  %tmp1 = load i64, i64* %a, align 4
   ret i64 %tmp1
 }
 
@@ -40,13 +40,13 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i64* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_unaligned_read8(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i64* %a, align 4
+; CHECK-NEXT:   %tmp1 = load i64, i64* %a, align 4
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i64
 
 define i128 @test_unaligned_read16(i128* %a) sanitize_thread {
 entry:
-  %tmp1 = load i128* %a, align 1
+  %tmp1 = load i128, i128* %a, align 1
   ret i128 %tmp1
 }
 
@@ -54,13 +54,13 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i128* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_unaligned_read16(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i128* %a, align 1
+; CHECK-NEXT:   %tmp1 = load i128, i128* %a, align 1
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i128
 
 define i128 @test_aligned_read16(i128* %a) sanitize_thread {
 entry:
-  %tmp1 = load i128* %a, align 8
+  %tmp1 = load i128, i128* %a, align 8
   ret i128 %tmp1
 }
 
@@ -68,7 +68,7 @@
 ; CHECK:        call void @__tsan_func_entry(i8* %0)
 ; CHECK-NEXT:   %1 = bitcast i128* %a to i8*
 ; CHECK-NEXT:   call void @__tsan_read16(i8* %1)
-; CHECK-NEXT:   %tmp1 = load i128* %a, align 8
+; CHECK-NEXT:   %tmp1 = load i128, i128* %a, align 8
 ; CHECK-NEXT:   call void @__tsan_func_exit()
 ; CHECK: ret i128
 
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/vptr_read.ll b/llvm/test/Instrumentation/ThreadSanitizer/vptr_read.ll
index cccdeb8..6ed64c6 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/vptr_read.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/vptr_read.ll
@@ -5,7 +5,7 @@
 define i8 @Foo(i8* %a) nounwind uwtable sanitize_thread {
 entry:
 ; CHECK: call void @__tsan_vptr_read
-  %0 = load i8* %a, align 8, !tbaa !0
+  %0 = load i8, i8* %a, align 8, !tbaa !0
   ret i8 %0
 }
 !0 = !{!2, !2, i64 0}
diff --git a/llvm/test/Integer/2007-01-19-TruncSext.ll b/llvm/test/Integer/2007-01-19-TruncSext.ll
index 5ec8a579..a487eb2 100644
--- a/llvm/test/Integer/2007-01-19-TruncSext.ll
+++ b/llvm/test/Integer/2007-01-19-TruncSext.ll
@@ -21,7 +21,7 @@
   %i = bitcast i32 0 to i32
   call void @multiply(i32 %i, i32 -1, i32 255) 
   %P = getelementptr [20 x i17], [20 x i17]* @ARRAY, i32 0, i32 0
-  %X = load i17* %P
+  %X = load i17, i17* %P
   %result = sext i17 %X to i32
   %fmt = getelementptr [4 x i8], [4 x i8]* @FORMAT, i32 0, i32 0
   call i32 (i8*,...)* @printf(i8* %fmt, i32 %result)
diff --git a/llvm/test/Integer/BitPacked.ll b/llvm/test/Integer/BitPacked.ll
index e6e453a..def0a83 100644
--- a/llvm/test/Integer/BitPacked.ll
+++ b/llvm/test/Integer/BitPacked.ll
@@ -10,8 +10,8 @@
 {
         store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float>* @foo1
         store <2 x i10> <i10 4, i10 4>, <2 x i10>* @foo2
-	%l1 = load <4 x float>* @foo1
-        %l2 = load <2 x i10>* @foo2
+	%l1 = load <4 x float>, <4 x float>* @foo1
+        %l2 = load <2 x i10>, <2 x i10>* @foo2
         %r1 = extractelement <2 x i10> %l2, i32 1    
         %r2 = extractelement <2 x i10> %l2, i32 0
         %t = mul i10 %r1, %r2
diff --git a/llvm/test/Integer/packed_bt.ll b/llvm/test/Integer/packed_bt.ll
index f6ea87c..ecaf621 100644
--- a/llvm/test/Integer/packed_bt.ll
+++ b/llvm/test/Integer/packed_bt.ll
@@ -10,7 +10,7 @@
 {
         store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float>* @foo1
         store <2 x i10> <i10 4, i10 4>, <2 x i10>* @foo2
-	%l1 = load <4 x float>* @foo1
-        %l2 = load <2 x i10>* @foo2
+	%l1 = load <4 x float>, <4 x float>* @foo1
+        %l2 = load <2 x i10>, <2 x i10>* @foo2
         ret void
 }
diff --git a/llvm/test/Integer/packed_struct_bt.ll b/llvm/test/Integer/packed_struct_bt.ll
index b8301ba..97bf0ff 100644
--- a/llvm/test/Integer/packed_struct_bt.ll
+++ b/llvm/test/Integer/packed_struct_bt.ll
@@ -16,9 +16,9 @@
 
 define i35 @main() 
 {
-        %tmp = load i35*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i35> [#uses=1]
-        %tmp3 = load i35* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i35> [#uses=1]
-        %tmp6 = load i35* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i35> [#uses=1]
+        %tmp = load i35, i35*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i35> [#uses=1]
+        %tmp3 = load i35, i35* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i35> [#uses=1]
+        %tmp6 = load i35, i35* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i35> [#uses=1]
         %tmp4 = add i35 %tmp3, %tmp             ; <i35> [#uses=1]
         %tmp7 = add i35 %tmp4, %tmp6            ; <i35> [#uses=1]
         ret i35 %tmp7
@@ -26,8 +26,8 @@
 
 define i35 @bar() {
 entry:
-        %tmp = load i35* getelementptr([2 x <{ i35, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i35> [#uses=1]
-        %tmp4 = load i35* getelementptr ([2 x <{ i35, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i35> [#uses=1]
+        %tmp = load i35, i35* getelementptr([2 x <{ i35, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i35> [#uses=1]
+        %tmp4 = load i35, i35* getelementptr ([2 x <{ i35, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i35> [#uses=1]
         %tmp5 = add i35 %tmp4, %tmp             ; <i35> [#uses=1]
         ret i35 %tmp5
 }
diff --git a/llvm/test/JitListener/multiple.ll b/llvm/test/JitListener/multiple.ll
index ae54608..fcd9a2e 100644
--- a/llvm/test/JitListener/multiple.ll
+++ b/llvm/test/JitListener/multiple.ll
@@ -55,7 +55,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !16), !dbg !17
-  %0 = load i32* %a.addr, align 4, !dbg !18
+  %0 = load i32, i32* %a.addr, align 4, !dbg !18
   ret i32 %0, !dbg !19
 }
 
@@ -69,7 +69,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !20, metadata !16), !dbg !21
-  %0 = load i32* %a.addr, align 4, !dbg !22
+  %0 = load i32, i32* %a.addr, align 4, !dbg !22
   %cmp = icmp eq i32 %0, 0, !dbg !22
   br i1 %cmp, label %if.then, label %if.end, !dbg !24
 
@@ -78,13 +78,13 @@
   br label %return, !dbg !25
 
 if.end:                                           ; preds = %entry
-  %1 = load i32* %a.addr, align 4, !dbg !27
+  %1 = load i32, i32* %a.addr, align 4, !dbg !27
   %div = sdiv i32 100, %1, !dbg !28
   store i32 %div, i32* %retval, !dbg !29
   br label %return, !dbg !29
 
 return:                                           ; preds = %if.end, %if.then
-  %2 = load i32* %retval, !dbg !30
+  %2 = load i32, i32* %retval, !dbg !30
   ret i32 %2, !dbg !30
 }
 
@@ -95,7 +95,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !31, metadata !16), !dbg !32
-  %0 = load i32* %a.addr, align 4, !dbg !33
+  %0 = load i32, i32* %a.addr, align 4, !dbg !33
   switch i32 %0, label %sw.default [
     i32 0, label %sw.bb
     i32 1, label %sw.bb1
@@ -114,7 +114,7 @@
   br label %return, !dbg !38
 
 return:                                           ; preds = %sw.default, %sw.bb1, %sw.bb
-  %1 = load i32* %retval, !dbg !39
+  %1 = load i32, i32* %retval, !dbg !39
   ret i32 %1, !dbg !39
 }
 
diff --git a/llvm/test/JitListener/simple.ll b/llvm/test/JitListener/simple.ll
index 1732170..971959c 100644
--- a/llvm/test/JitListener/simple.ll
+++ b/llvm/test/JitListener/simple.ll
@@ -21,7 +21,7 @@
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !12, metadata !13), !dbg !14
-  %0 = load i32* %a.addr, align 4, !dbg !15
+  %0 = load i32, i32* %a.addr, align 4, !dbg !15
   ret i32 %0, !dbg !16
 }
 
diff --git a/llvm/test/LTO/X86/cfi_endproc.ll b/llvm/test/LTO/X86/cfi_endproc.ll
index 1a69bf6..10aedad 100644
--- a/llvm/test/LTO/X86/cfi_endproc.ll
+++ b/llvm/test/LTO/X86/cfi_endproc.ll
@@ -37,6 +37,6 @@
 @zed2 = linkonce_odr unnamed_addr global i32 42
 
 define i32 @useZed2() {
-  %x = load i32* @zed2
+  %x = load i32, i32* @zed2
   ret i32 %x
 }
diff --git a/llvm/test/LTO/X86/linkonce_odr_func.ll b/llvm/test/LTO/X86/linkonce_odr_func.ll
index 48da795..241bc61 100644
--- a/llvm/test/LTO/X86/linkonce_odr_func.ll
+++ b/llvm/test/LTO/X86/linkonce_odr_func.ll
@@ -30,7 +30,7 @@
 @v1 = linkonce_odr constant i32 32
 
 define i32 @useV1() {
-  %x = load i32* @v1
+  %x = load i32, i32* @v1
   ret i32 %x
 }
 
@@ -38,7 +38,7 @@
 @v2 = linkonce_odr global i32 32
 
 define i32 @useV2() {
-  %x = load i32* @v2
+  %x = load i32, i32* @v2
   ret i32 %x
 }
 
diff --git a/llvm/test/LTO/X86/set-merged.ll b/llvm/test/LTO/X86/set-merged.ll
index 0e2e1ea..aaafc5e 100644
--- a/llvm/test/LTO/X86/set-merged.ll
+++ b/llvm/test/LTO/X86/set-merged.ll
@@ -11,8 +11,8 @@
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %0 = load i32* %a.addr, align 4
-  %1 = load i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %a.addr, align 4
   %call = call i32 @_Z4bar2i(i32 %1)
   %add = add nsw i32 %0, %call
   ret i32 %add
@@ -22,7 +22,7 @@
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %0 = load i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
   %mul = mul nsw i32 2, %0
   ret i32 %mul
 }
diff --git a/llvm/test/Linker/2004-05-07-TypeResolution2.ll b/llvm/test/Linker/2004-05-07-TypeResolution2.ll
index 39bfcd4..2b67276 100644
--- a/llvm/test/Linker/2004-05-07-TypeResolution2.ll
+++ b/llvm/test/Linker/2004-05-07-TypeResolution2.ll
@@ -9,7 +9,7 @@
 define internal void @f1(%struct1* %tty) {
 loopentry.preheader:
 	%tmp.2.i.i = getelementptr %struct1, %struct1* %tty, i64 0, i32 1		; <void (%struct2*)**> [#uses=1]
-	%tmp.3.i.i = load volatile void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
+	%tmp.3.i.i = load volatile void (%struct2*)*, void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
 	ret void
 }
 
diff --git a/llvm/test/Linker/2008-03-05-AliasReference.ll b/llvm/test/Linker/2008-03-05-AliasReference.ll
index 8ce1ccb..0784794 100644
--- a/llvm/test/Linker/2008-03-05-AliasReference.ll
+++ b/llvm/test/Linker/2008-03-05-AliasReference.ll
@@ -12,6 +12,6 @@
 
 define i32 @baz() nounwind  {
 entry:
-	%tmp1 = load i32* @bar, align 4		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @bar, align 4		; <i32> [#uses=1]
 	ret i32 %tmp1
 }
diff --git a/llvm/test/Linker/2009-09-03-mdnode.ll b/llvm/test/Linker/2009-09-03-mdnode.ll
index 1f308e7..428d0fa 100644
--- a/llvm/test/Linker/2009-09-03-mdnode.ll
+++ b/llvm/test/Linker/2009-09-03-mdnode.ll
@@ -14,7 +14,7 @@
   br label %return
 
 return:                                           ; preds = %entry
-  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval                          ; <i32> [#uses=1]
   call void @llvm.dbg.stoppoint(i32 5, i32 1, metadata !1)
   call void @llvm.dbg.region.end(metadata !0)
   ret i32 %0
diff --git a/llvm/test/Linker/2009-09-03-mdnode2.ll b/llvm/test/Linker/2009-09-03-mdnode2.ll
index 68e3294..78ae5c0 100644
--- a/llvm/test/Linker/2009-09-03-mdnode2.ll
+++ b/llvm/test/Linker/2009-09-03-mdnode2.ll
@@ -9,7 +9,7 @@
   br label %return
 
 return:                                           ; preds = %entry
-  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  %0 = load i32, i32* %retval                          ; <i32> [#uses=1]
   call void @llvm.dbg.stoppoint(i32 3, i32 1, metadata !1)
   call void @llvm.dbg.region.end(metadata !0)
   ret i32 %0
diff --git a/llvm/test/Linker/DbgDeclare.ll b/llvm/test/Linker/DbgDeclare.ll
index de5ac9e..2a934b2 100644
--- a/llvm/test/Linker/DbgDeclare.ll
+++ b/llvm/test/Linker/DbgDeclare.ll
@@ -24,8 +24,8 @@
   call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !14, metadata !{!"0x102"}), !dbg !15
   store i8** %argv, i8*** %argv.addr, align 8
   call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !16, metadata !{!"0x102"}), !dbg !15
-  %0 = load i32* %argc.addr, align 4, !dbg !17
-  %1 = load i8*** %argv.addr, align 8, !dbg !17
+  %0 = load i32, i32* %argc.addr, align 4, !dbg !17
+  %1 = load i8**, i8*** %argv.addr, align 8, !dbg !17
   call void @test(i32 %0, i8** %1), !dbg !17
   ret i32 0, !dbg !19
 }
diff --git a/llvm/test/Linker/DbgDeclare2.ll b/llvm/test/Linker/DbgDeclare2.ll
index 0447cb8..410d1c0 100644
--- a/llvm/test/Linker/DbgDeclare2.ll
+++ b/llvm/test/Linker/DbgDeclare2.ll
@@ -19,22 +19,22 @@
   br label %for.cond, !dbg !20
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4, !dbg !20
-  %1 = load i32* %argc.addr, align 4, !dbg !20
+  %0 = load i32, i32* %i, align 4, !dbg !20
+  %1 = load i32, i32* %argc.addr, align 4, !dbg !20
   %cmp = icmp slt i32 %0, %1, !dbg !20
   br i1 %cmp, label %for.body, label %for.end, !dbg !20
 
 for.body:                                         ; preds = %for.cond
-  %2 = load i32* %i, align 4, !dbg !21
+  %2 = load i32, i32* %i, align 4, !dbg !21
   %idxprom = sext i32 %2 to i64, !dbg !21
-  %3 = load i8*** %argv.addr, align 8, !dbg !21
+  %3 = load i8**, i8*** %argv.addr, align 8, !dbg !21
   %arrayidx = getelementptr inbounds i8*, i8** %3, i64 %idxprom, !dbg !21
-  %4 = load i8** %arrayidx, align 8, !dbg !21
+  %4 = load i8*, i8** %arrayidx, align 8, !dbg !21
   %call = call i32 @puts(i8* %4), !dbg !21
   br label %for.inc, !dbg !23
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4, !dbg !20
+  %5 = load i32, i32* %i, align 4, !dbg !20
   %inc = add nsw i32 %5, 1, !dbg !20
   store i32 %inc, i32* %i, align 4, !dbg !20
   br label %for.cond, !dbg !20
diff --git a/llvm/test/Linker/Inputs/linkage.b.ll b/llvm/test/Linker/Inputs/linkage.b.ll
index 0ada3f4..9c1d066 100644
--- a/llvm/test/Linker/Inputs/linkage.b.ll
+++ b/llvm/test/Linker/Inputs/linkage.b.ll
@@ -3,7 +3,7 @@
 declare i32 @foo() 
 
 define void @bar() {
-	load i32* @X
+	load i32, i32* @X
 	call i32 @foo()
 	ret void
 }
diff --git a/llvm/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll b/llvm/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
index a5de89f..dcf4e72 100644
--- a/llvm/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
+++ b/llvm/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
@@ -4,7 +4,7 @@
 entry:
   %this.addr = alloca %struct.Class*, align 8
   store %struct.Class* %this, %struct.Class** %this.addr, align 8
-  %this1 = load %struct.Class** %this.addr
+  %this1 = load %struct.Class*, %struct.Class** %this.addr
   ret i32 0, !dbg !12
 }
 
diff --git a/llvm/test/Linker/Inputs/testlink.ll b/llvm/test/Linker/Inputs/testlink.ll
index e095ab1..263d9e7 100644
--- a/llvm/test/Linker/Inputs/testlink.ll
+++ b/llvm/test/Linker/Inputs/testlink.ll
@@ -34,7 +34,7 @@
   store i32 %blah, i32* @MyVar
   %idx = getelementptr %intlist, %intlist* @MyIntList, i64 0, i32 1
   store i32 12, i32* %idx
-  %ack = load i32* @0
+  %ack = load i32, i32* @0
   %fzo = add i32 %ack, %blah
   ret i32 %fzo
 }
diff --git a/llvm/test/Linker/link-global-to-func.ll b/llvm/test/Linker/link-global-to-func.ll
index 4d83fe5..f7adde9 100644
--- a/llvm/test/Linker/link-global-to-func.ll
+++ b/llvm/test/Linker/link-global-to-func.ll
@@ -9,6 +9,6 @@
 @__eprintf = external global i8*		; <i8**> [#uses=1]
 
 define i8* @test() {
-	%A = load i8** @__eprintf		; <i8*> [#uses=1]
+	%A = load i8*, i8** @__eprintf		; <i8*> [#uses=1]
 	ret i8* %A
 }
diff --git a/llvm/test/Linker/partial-type-refinement.ll b/llvm/test/Linker/partial-type-refinement.ll
index b995f11..2588fae 100644
--- a/llvm/test/Linker/partial-type-refinement.ll
+++ b/llvm/test/Linker/partial-type-refinement.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-link %s %p/partial-type-refinement-link.ll -S | FileCheck %s
 ; PR4954
 
-; CHECK: load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+; CHECK: load %PI*, %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
 
 %AnalysisResolver = type { i8, %PMDataManager* }
 %"DenseMap<P*,AU*>" = type { i64, %"pair<P*,AU*>"*, i64, i64 }
@@ -19,6 +19,6 @@
 
 define void @__tcf_0() nounwind {
 entry:
-  %0 = load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+  %0 = load %PI*, %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
   ret void
 }
diff --git a/llvm/test/Linker/replaced-function-matches-first-subprogram.ll b/llvm/test/Linker/replaced-function-matches-first-subprogram.ll
index c0ec5f3..3f6e857 100644
--- a/llvm/test/Linker/replaced-function-matches-first-subprogram.ll
+++ b/llvm/test/Linker/replaced-function-matches-first-subprogram.ll
@@ -29,7 +29,7 @@
 entry:
   %this.addr = alloca %struct.Class*, align 8
   store %struct.Class* %this, %struct.Class** %this.addr, align 8
-  %this1 = load %struct.Class** %this.addr
+  %this1 = load %struct.Class*, %struct.Class** %this.addr
   ret i32 0, !dbg !15
 }
 
diff --git a/llvm/test/Linker/testlink.ll b/llvm/test/Linker/testlink.ll
index d928c66..5488fcc 100644
--- a/llvm/test/Linker/testlink.ll
+++ b/llvm/test/Linker/testlink.ll
@@ -76,15 +76,15 @@
 declare void @print(i32)
 
 define void @main() {
-  %v1 = load i32* @MyVar
+  %v1 = load i32, i32* @MyVar
   call void @print(i32 %v1)
   %idx = getelementptr %intlist, %intlist* @MyIntList, i64 0, i32 1
-  %v2 = load i32* %idx
+  %v2 = load i32, i32* %idx
   call void @print(i32 %v2)
   %1 = call i32 @foo(i32 5)
-  %v3 = load i32* @MyVar
+  %v3 = load i32, i32* @MyVar
   call void @print(i32 %v3)
-  %v4 = load i32* %idx
+  %v4 = load i32, i32* %idx
   call void @print(i32 %v4)
   ret void
 }
diff --git a/llvm/test/Linker/type-unique-odr-b.ll b/llvm/test/Linker/type-unique-odr-b.ll
index b262191..bbb70bd 100644
--- a/llvm/test/Linker/type-unique-odr-b.ll
+++ b/llvm/test/Linker/type-unique-odr-b.ll
@@ -27,7 +27,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !24, metadata !{!"0x102"}), !dbg !26
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   ret void, !dbg !27
 }
 
diff --git a/llvm/test/Linker/type-unique-simple2-a.ll b/llvm/test/Linker/type-unique-simple2-a.ll
index 72a776b..7aef609 100644
--- a/llvm/test/Linker/type-unique-simple2-a.ll
+++ b/llvm/test/Linker/type-unique-simple2-a.ll
@@ -49,7 +49,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !39, metadata !{!"0x102"}), !dbg !41
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   call void @_ZN1AC2Ev(%class.A* %this1) #1, !dbg !42
   ret void, !dbg !42
 }
@@ -65,7 +65,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !44, metadata !{!"0x102"}), !dbg !45
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   %0 = bitcast %class.A* %this1 to i8***, !dbg !46
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %0, !dbg !46
   ret void, !dbg !46
diff --git a/llvm/test/Linker/type-unique-simple2-b.ll b/llvm/test/Linker/type-unique-simple2-b.ll
index 25e67d4..3cbeb2c 100644
--- a/llvm/test/Linker/type-unique-simple2-b.ll
+++ b/llvm/test/Linker/type-unique-simple2-b.ll
@@ -23,7 +23,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !32, metadata !{!"0x102"}), !dbg !34
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   ret void, !dbg !35
 }
 
@@ -36,7 +36,7 @@
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !36, metadata !{!"0x102"}), !dbg !37
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   ret i32 1, !dbg !38
 }
 
diff --git a/llvm/test/Linker/type-unique-type-array-a.ll b/llvm/test/Linker/type-unique-type-array-a.ll
index 29fe9de..edf6dd4 100644
--- a/llvm/test/Linker/type-unique-type-array-a.ll
+++ b/llvm/test/Linker/type-unique-type-array-a.ll
@@ -53,12 +53,12 @@
   store %class.A* %a, %class.A** %a.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %a.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
   call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !26, metadata !{!"0x102"}), !dbg !27
-  %0 = load %class.A** %a.addr, align 8, !dbg !28
+  %0 = load %class.A*, %class.A** %a.addr, align 8, !dbg !28
   %1 = bitcast %struct.SA* %agg.tmp to i8*, !dbg !28
   %2 = bitcast %struct.SA* %sa to i8*, !dbg !28
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 4, i32 4, i1 false), !dbg !28
   %coerce.dive1 = getelementptr %struct.SA, %struct.SA* %agg.tmp, i32 0, i32 0, !dbg !28
-  %3 = load i32* %coerce.dive1, !dbg !28
+  %3 = load i32, i32* %coerce.dive1, !dbg !28
   call void @_ZN1A5testAE2SA(%class.A* %0, i32 %3), !dbg !28
   ret void, !dbg !29
 }
@@ -76,7 +76,7 @@
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
   call void @llvm.dbg.declare(metadata %struct.SA* %a, metadata !32, metadata !{!"0x102"}), !dbg !33
-  %this1 = load %class.A** %this.addr
+  %this1 = load %class.A*, %class.A** %this.addr
   ret void, !dbg !34
 }
 
diff --git a/llvm/test/Linker/type-unique-type-array-b.ll b/llvm/test/Linker/type-unique-type-array-b.ll
index 52c09fc..f2b0e39 100644
--- a/llvm/test/Linker/type-unique-type-array-b.ll
+++ b/llvm/test/Linker/type-unique-type-array-b.ll
@@ -32,12 +32,12 @@
   store %class.B* %b, %class.B** %b.addr, align 8
   call void @llvm.dbg.declare(metadata %class.B** %b.addr, metadata !24, metadata !{!"0x102"}), !dbg !25
   call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !26, metadata !{!"0x102"}), !dbg !27
-  %0 = load %class.B** %b.addr, align 8, !dbg !28
+  %0 = load %class.B*, %class.B** %b.addr, align 8, !dbg !28
   %1 = bitcast %struct.SA* %agg.tmp to i8*, !dbg !28
   %2 = bitcast %struct.SA* %sa to i8*, !dbg !28
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 4, i32 4, i1 false), !dbg !28
   %coerce.dive1 = getelementptr %struct.SA, %struct.SA* %agg.tmp, i32 0, i32 0, !dbg !28
-  %3 = load i32* %coerce.dive1, !dbg !28
+  %3 = load i32, i32* %coerce.dive1, !dbg !28
   call void @_ZN1B5testBE2SA(%class.B* %0, i32 %3), !dbg !28
   ret void, !dbg !29
 }
@@ -55,7 +55,7 @@
   store %class.B* %this, %class.B** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.B** %this.addr, metadata !30, metadata !{!"0x102"}), !dbg !31
   call void @llvm.dbg.declare(metadata %struct.SA* %sa, metadata !32, metadata !{!"0x102"}), !dbg !33
-  %this1 = load %class.B** %this.addr
+  %this1 = load %class.B*, %class.B** %this.addr
   ret void, !dbg !34
 }
 
diff --git a/llvm/test/MC/AArch64/elf-globaladdress.ll b/llvm/test/MC/AArch64/elf-globaladdress.ll
index 7d031e6..8e4ae4c 100644
--- a/llvm/test/MC/AArch64/elf-globaladdress.ll
+++ b/llvm/test/MC/AArch64/elf-globaladdress.ll
@@ -12,16 +12,16 @@
 @var64 = global i64 0
 
 define void @loadstore() {
-    %val8 = load i8* @var8
+    %val8 = load i8, i8* @var8
     store volatile i8 %val8, i8* @var8
 
-    %val16 = load i16* @var16
+    %val16 = load i16, i16* @var16
     store volatile i16 %val16, i16* @var16
 
-    %val32 = load i32* @var32
+    %val32 = load i32, i32* @var32
     store volatile i32 %val32, i32* @var32
 
-    %val64 = load i64* @var64
+    %val64 = load i64, i64* @var64
     store volatile i64 %val64, i64* @var64
 
     ret void
diff --git a/llvm/test/MC/ARM/data-in-code.ll b/llvm/test/MC/ARM/data-in-code.ll
index 50cd5f8..724577b 100644
--- a/llvm/test/MC/ARM/data-in-code.ll
+++ b/llvm/test/MC/ARM/data-in-code.ll
@@ -10,7 +10,7 @@
 ;; marking the data-in-code region.
 
 define void @foo(i32* %ptr) nounwind ssp {
-  %tmp = load i32* %ptr, align 4
+  %tmp = load i32, i32* %ptr, align 4
   switch i32 %tmp, label %default [
     i32 11, label %bb0
     i32 10, label %bb1
diff --git a/llvm/test/MC/ARM/elf-reloc-03.ll b/llvm/test/MC/ARM/elf-reloc-03.ll
index b64d612..a0fdc3e 100644
--- a/llvm/test/MC/ARM/elf-reloc-03.ll
+++ b/llvm/test/MC/ARM/elf-reloc-03.ll
@@ -78,9 +78,9 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @startval, align 4
+  %0 = load i32, i32* @startval, align 4
   %1 = getelementptr inbounds [10 x i32 (...)*], [10 x i32 (...)*]* @vtable, i32 0, i32 %0
-  %2 = load i32 (...)** %1, align 4
+  %2 = load i32 (...)*, i32 (...)** %1, align 4
   %3 = tail call i32 (...)* %2() nounwind
   tail call void @exit(i32 %3) noreturn nounwind
   unreachable
diff --git a/llvm/test/MC/COFF/tricky-names.ll b/llvm/test/MC/COFF/tricky-names.ll
index 458aa41..f34b76a 100644
--- a/llvm/test/MC/COFF/tricky-names.ll
+++ b/llvm/test/MC/COFF/tricky-names.ll
@@ -10,9 +10,9 @@
 @"\01@foo.bar" = global i32 0
 
 define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" {
-  %a = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
-  %b = load i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
-  %c = load i32* @"\01@foo.bar"
+  %a = load i32, i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
+  %b = load i32, i32* @"\01__ZL16ExceptionHandlerP19_EXCEPTION_POINTERS@4"
+  %c = load i32, i32* @"\01@foo.bar"
   %x = add i32 %a, %b
   %y = add i32 %x, %c
   ret i32 %y
diff --git a/llvm/test/MC/MachO/tlv-bss.ll b/llvm/test/MC/MachO/tlv-bss.ll
index af620f9..3dbf4b0 100644
--- a/llvm/test/MC/MachO/tlv-bss.ll
+++ b/llvm/test/MC/MachO/tlv-bss.ll
@@ -26,7 +26,7 @@
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %0 = load i8** @_ZN3TlsIlE3valE, align 8
+  %0 = load i8*, i8** @_ZN3TlsIlE3valE, align 8
   ret i8* %0
 }
 
diff --git a/llvm/test/MC/MachO/x86-data-in-code.ll b/llvm/test/MC/MachO/x86-data-in-code.ll
index 2410974..c2e136f 100644
--- a/llvm/test/MC/MachO/x86-data-in-code.ll
+++ b/llvm/test/MC/MachO/x86-data-in-code.ll
@@ -6,7 +6,7 @@
 ; CHECK-NOT: (('command', 41)
 
 define void @foo(i32* %ptr) nounwind ssp {
-  %tmp = load i32* %ptr, align 4
+  %tmp = load i32, i32* %ptr, align 4
   switch i32 %tmp, label %default [
     i32 11, label %bb0
     i32 10, label %bb1
diff --git a/llvm/test/MC/Mips/elf-bigendian.ll b/llvm/test/MC/Mips/elf-bigendian.ll
index a92fe33..98b5682 100644
--- a/llvm/test/MC/Mips/elf-bigendian.ll
+++ b/llvm/test/MC/Mips/elf-bigendian.ll
@@ -43,7 +43,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %0 = load i32* @x, align 4
+  %0 = load i32, i32* @x, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %foo
 
diff --git a/llvm/test/Other/2004-08-16-PackedGlobalConstant.ll b/llvm/test/Other/2004-08-16-PackedGlobalConstant.ll
index 9130ccb..4f15c9e 100644
--- a/llvm/test/Other/2004-08-16-PackedGlobalConstant.ll
+++ b/llvm/test/Other/2004-08-16-PackedGlobalConstant.ll
@@ -4,7 +4,7 @@
 @bar = external global <2 x i32>                ; <<2 x i32>*> [#uses=1]
 
 define void @main() {
-        %t0 = load <2 x i32>* @foo              ; <<2 x i32>> [#uses=1]
+        %t0 = load <2 x i32>, <2 x i32>* @foo              ; <<2 x i32>> [#uses=1]
         store <2 x i32> %t0, <2 x i32>* @bar
         ret void
 }
diff --git a/llvm/test/Other/2004-08-16-PackedSelect.ll b/llvm/test/Other/2004-08-16-PackedSelect.ll
index c1d6214..1d0e7aa 100644
--- a/llvm/test/Other/2004-08-16-PackedSelect.ll
+++ b/llvm/test/Other/2004-08-16-PackedSelect.ll
@@ -4,7 +4,7 @@
 @bar = external global <4 x float>              ; <<4 x float>*> [#uses=1]
 
 define void @main() {
-        %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
+        %t0 = load <4 x float>, <4 x float>* @foo            ; <<4 x float>> [#uses=3]
         %t1 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t2 = select i1 true, <4 x float> %t0, <4 x float> %t1          ; <<4 x float>> [#uses=1]
         store <4 x float> %t2, <4 x float>* @bar
diff --git a/llvm/test/Other/2004-08-16-PackedSimple.ll b/llvm/test/Other/2004-08-16-PackedSimple.ll
index 81cecd4..3923c96 100644
--- a/llvm/test/Other/2004-08-16-PackedSimple.ll
+++ b/llvm/test/Other/2004-08-16-PackedSimple.ll
@@ -4,7 +4,7 @@
 @bar = external global <4 x float>              ; <<4 x float>*> [#uses=1]
 
 define void @main() {
-        %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
+        %t0 = load <4 x float>, <4 x float>* @foo            ; <<4 x float>> [#uses=3]
         %t2 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
         %t3 = select i1 false, <4 x float> %t0, <4 x float> %t2         ; <<4 x float>> [#uses=1]
         store <4 x float> %t3, <4 x float>* @bar
diff --git a/llvm/test/Other/2004-08-20-PackedControlFlow.ll b/llvm/test/Other/2004-08-20-PackedControlFlow.ll
index 3943570..64da59d 100644
--- a/llvm/test/Other/2004-08-20-PackedControlFlow.ll
+++ b/llvm/test/Other/2004-08-20-PackedControlFlow.ll
@@ -16,7 +16,7 @@
         br label %C
 
 A:              ; preds = %0
-        %t0 = load %v4f* @foo           ; <%v4f> [#uses=2]
+        %t0 = load %v4f, %v4f* @foo           ; <%v4f> [#uses=2]
         br label %B
 }
 
diff --git a/llvm/test/Other/2007-09-10-PassManager.ll b/llvm/test/Other/2007-09-10-PassManager.ll
index ded15e5..39af0d0 100644
--- a/llvm/test/Other/2007-09-10-PassManager.ll
+++ b/llvm/test/Other/2007-09-10-PassManager.ll
@@ -8,15 +8,15 @@
 	br label %bb3
 
 bb:		; preds = %bb3
-	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	call void @bar( i32 %tmp )
-	%tmp1 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
 	store i32 %tmp2, i32* %i, align 4
 	br label %bb3
 
 bb3:		; preds = %bb, %entry
-	%tmp4 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %i, align 4		; <i32> [#uses=1]
 	%tmp5 = icmp sle i32 %tmp4, 9		; <i1> [#uses=1]
 	%tmp56 = zext i1 %tmp5 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp56, 0		; <i1> [#uses=1]
diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll
index 5bd1c75..20d7ff9 100644
--- a/llvm/test/Other/lint.ll
+++ b/llvm/test/Other/lint.ll
@@ -19,11 +19,11 @@
 ; CHECK: Null pointer dereference
   store i32 0, i32* null
 ; CHECK: Null pointer dereference
-  %t = load i32* null
+  %t = load i32, i32* null
 ; CHECK: Undef pointer dereference
   store i32 0, i32* undef
 ; CHECK: Undef pointer dereference
-  %u = load i32* undef
+  %u = load i32, i32* undef
 ; CHECK: All-ones pointer dereference
   store i32 0, i32* inttoptr (i64 -1 to i32*)
 ; CHECK: Address one pointer dereference
@@ -61,7 +61,7 @@
 ; CHECK: Write to text section
   store i32 8, i32* bitcast (i32()* @foo to i32*)
 ; CHECK: Load from block address
-  %lb = load i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
+  %lb = load i32, i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
 ; CHECK: Call to block address
   call void()* bitcast (i8* blockaddress(@foo, %next) to void()*)()
 ; CHECK: Undefined behavior: Null pointer dereference
@@ -152,7 +152,7 @@
   store i32* %x, i32** %retval
   br label %next
 next:
-  %t0 = load i32** %retval
+  %t0 = load i32*, i32** %retval
   %t1 = insertvalue { i32, i32, i32* } zeroinitializer, i32* %t0, 2
   %t2 = extractvalue { i32, i32, i32* } %t1, 2
   br label %exit
@@ -172,7 +172,7 @@
 exit:
   %t3 = phi i32* [ %t4, %exit ]
   %t4 = bitcast i32* %t3 to i32*
-  %x = load volatile i32* %t3
+  %x = load volatile i32, i32* %t3
   br label %exit
 }
 
diff --git a/llvm/test/Other/optimization-remarks-inline.ll b/llvm/test/Other/optimization-remarks-inline.ll
index 566b206..bcd90bc 100644
--- a/llvm/test/Other/optimization-remarks-inline.ll
+++ b/llvm/test/Other/optimization-remarks-inline.ll
@@ -18,8 +18,8 @@
   %y.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
-  %0 = load i32* %x.addr, align 4
-  %1 = load i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
   %add = add nsw i32 %0, %1
   ret i32 %add
 }
@@ -28,8 +28,8 @@
 entry:
   %j.addr = alloca i32, align 4
   store i32 %j, i32* %j.addr, align 4
-  %0 = load i32* %j.addr, align 4
-  %1 = load i32* %j.addr, align 4
+  %0 = load i32, i32* %j.addr, align 4
+  %1 = load i32, i32* %j.addr, align 4
   %sub = sub nsw i32 %1, 2
   %call = call i32 @foo(i32 %0, i32 %sub)
 ; CHECK: foo inlined into bar
diff --git a/llvm/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll b/llvm/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
index 8f12bef..8d1beec 100644
--- a/llvm/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
+++ b/llvm/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
@@ -15,14 +15,14 @@
         br label %bb1
 
 bb1:            ; preds = %bb0
-        %reg107 = load %node_t** %nodelist.upgrd.1              ; <%node_t*> [#uses=2]
+        %reg107 = load %node_t*, %node_t** %nodelist.upgrd.1              ; <%node_t*> [#uses=2]
         %cond211 = icmp eq %node_t* %reg107, null               ; <i1> [#uses=1]
         br i1 %cond211, label %bb3, label %bb2
 
 bb2:            ; preds = %bb2, %bb1
         %reg109 = phi %node_t* [ %reg110, %bb2 ], [ %reg107, %bb1 ]             ; <%node_t*> [#uses=1]
         %reg212 = getelementptr %node_t, %node_t* %reg109, i64 0, i32 1          ; <%node_t**> [#uses=1]
-        %reg110 = load %node_t** %reg212                ; <%node_t*> [#uses=2]
+        %reg110 = load %node_t*, %node_t** %reg212                ; <%node_t*> [#uses=2]
         %cond213 = icmp ne %node_t* %reg110, null               ; <i1> [#uses=1]
         br i1 %cond213, label %bb2, label %bb3
 
diff --git a/llvm/test/Transforms/ADCE/2002-05-28-Crash.ll b/llvm/test/Transforms/ADCE/2002-05-28-Crash.ll
index 359c250..d88580a 100644
--- a/llvm/test/Transforms/ADCE/2002-05-28-Crash.ll
+++ b/llvm/test/Transforms/ADCE/2002-05-28-Crash.ll
@@ -15,7 +15,7 @@
 
 define i32 @rx_bitset_empty(i32 %size, i32* %set) {
 bb1:
-        %reg110 = load i32* %set                ; <i32> [#uses=2]
+        %reg110 = load i32, i32* %set                ; <i32> [#uses=2]
         store i32 1, i32* %set
         %cast112 = sext i32 %size to i64                ; <i64> [#uses=1]
         %reg113 = add i64 %cast112, 31          ; <i64> [#uses=1]
@@ -26,7 +26,7 @@
         %reg114-idxcast-offset = add i32 %reg114-idxcast, 1073741823            ; <i32> [#uses=1]
         %reg114-idxcast-offset.upgrd.1 = zext i32 %reg114-idxcast-offset to i64         ; <i64> [#uses=1]
         %reg124 = getelementptr i32, i32* %set, i64 %reg114-idxcast-offset.upgrd.1           ; <i32*> [#uses=1]
-        %reg125 = load i32* %reg124             ; <i32> [#uses=1]
+        %reg125 = load i32, i32* %reg124             ; <i32> [#uses=1]
         %cond232 = icmp ne i32 %reg125, 0               ; <i1> [#uses=1]
         br i1 %cond232, label %bb3, label %bb2
 
@@ -39,7 +39,7 @@
         %reg130-idxcast-offset = add i32 %reg130-idxcast, 1073741823            ; <i32> [#uses=1]
         %reg130-idxcast-offset.upgrd.2 = zext i32 %reg130-idxcast-offset to i64         ; <i64> [#uses=1]
         %reg118 = getelementptr i32, i32* %set, i64 %reg130-idxcast-offset.upgrd.2           ; <i32*> [#uses=1]
-        %reg119 = load i32* %reg118             ; <i32> [#uses=1]
+        %reg119 = load i32, i32* %reg118             ; <i32> [#uses=1]
         %cond233 = icmp eq i32 %reg119, 0               ; <i1> [#uses=1]
         br i1 %cond233, label %bb2, label %bb3
 
diff --git a/llvm/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll b/llvm/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
index 8f8dadf..ff8bdb3 100644
--- a/llvm/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
+++ b/llvm/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
@@ -7,7 +7,7 @@
 
 define void @foo(i8* %reg5481) {
         %cast611 = bitcast i8* %reg5481 to i8**         ; <i8**> [#uses=1]
-        %reg162 = load i8** %cast611            ; <i8*> [#uses=1]
+        %reg162 = load i8*, i8** %cast611            ; <i8*> [#uses=1]
         ptrtoint i8* %reg162 to i32             ; <i32>:1 [#uses=0]
         ret void
 }
diff --git a/llvm/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll b/llvm/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
index 2f0df67..1bf79e8 100644
--- a/llvm/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
+++ b/llvm/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
@@ -17,7 +17,7 @@
         br label %UnifiedExitNode
 
 bb4:            ; preds = %bb2
-        %reg117 = load i32* @hufts              ; <i32> [#uses=2]
+        %reg117 = load i32, i32* @hufts              ; <i32> [#uses=2]
         %cond241 = icmp ule i32 %reg117, %reg128                ; <i1> [#uses=1]
         br i1 %cond241, label %bb6, label %bb5
 
@@ -29,12 +29,12 @@
         br i1 false, label %bb2, label %bb7
 
 bb7:            ; preds = %bb6
-        %reg126 = load i32* @bk         ; <i32> [#uses=1]
+        %reg126 = load i32, i32* @bk         ; <i32> [#uses=1]
         %cond247 = icmp ule i32 %reg126, 7              ; <i1> [#uses=1]
         br i1 %cond247, label %bb9, label %bb8
 
 bb8:            ; preds = %bb8, %bb7
-        %reg119 = load i32* @bk         ; <i32> [#uses=1]
+        %reg119 = load i32, i32* @bk         ; <i32> [#uses=1]
         %cond256 = icmp ugt i32 %reg119, 7              ; <i1> [#uses=1]
         br i1 %cond256, label %bb8, label %bb9
 
diff --git a/llvm/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll b/llvm/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
index 5206b24..7c7e238 100644
--- a/llvm/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
+++ b/llvm/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
@@ -16,7 +16,7 @@
         br label %endif.42
 
 endif.65:               ; preds = %endif.42
-        %tmp.2846 = load i32** @G               ; <i32*> [#uses=1]
+        %tmp.2846 = load i32*, i32** @G               ; <i32*> [#uses=1]
         br i1 false, label %shortcirc_next.12, label %shortcirc_done.12
 
 shortcirc_next.12:              ; preds = %endif.65
diff --git a/llvm/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll b/llvm/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
index bf3506f..707e14a 100644
--- a/llvm/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
+++ b/llvm/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
@@ -49,7 +49,7 @@
 
 no_exit.2:		; preds = %shortcirc_next.4
 	%tmp.897 = getelementptr i32, i32* %SubArrays.10, i64 0		; <i32*> [#uses=1]
-	%tmp.899 = load i32* %tmp.897		; <i32> [#uses=1]
+	%tmp.899 = load i32, i32* %tmp.897		; <i32> [#uses=1]
 	store i32 %tmp.899, i32* null
 	ret i32 0
 
@@ -79,7 +79,7 @@
 
 then.53:		; preds = %shortcirc_next.7, %label.17
 	%SubArrays.8 = phi i32* [ %SubArrays.10, %shortcirc_next.7 ], [ %SubArrays.10, %label.17 ]		; <i32*> [#uses=1]
-	%tmp.1023 = load i32* null		; <i32> [#uses=1]
+	%tmp.1023 = load i32, i32* null		; <i32> [#uses=1]
 	switch i32 %tmp.1023, label %loopentry.1 [
 	]
 
diff --git a/llvm/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll b/llvm/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
index 559b652..f0de431 100644
--- a/llvm/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
+++ b/llvm/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
@@ -2,7 +2,7 @@
 
 define void @dead_test8(i32* %data.1, i32 %idx.1) {
 entry:
-        %tmp.1 = load i32* %data.1              ; <i32> [#uses=2]
+        %tmp.1 = load i32, i32* %data.1              ; <i32> [#uses=2]
         %tmp.41 = icmp sgt i32 %tmp.1, 0                ; <i1> [#uses=1]
         br i1 %tmp.41, label %no_exit.preheader, label %return
 
@@ -15,7 +15,7 @@
 no_exit:                ; preds = %endif, %no_exit.preheader
         %k.1 = phi i32 [ %k.0, %endif ], [ 0, %no_exit.preheader ]              ; <i32> [#uses=3]
         %i.0 = phi i32 [ %inc.1, %endif ], [ 0, %no_exit.preheader ]            ; <i32> [#uses=1]
-        %tmp.12 = load i32* %tmp.11             ; <i32> [#uses=1]
+        %tmp.12 = load i32, i32* %tmp.11             ; <i32> [#uses=1]
         %tmp.14 = sub i32 0, %tmp.12            ; <i32> [#uses=1]
         %tmp.161 = icmp ne i32 %k.1, %tmp.14            ; <i1> [#uses=1]
         br i1 %tmp.161, label %then, label %else
diff --git a/llvm/test/Transforms/ADCE/basictest1.ll b/llvm/test/Transforms/ADCE/basictest1.ll
index 7e2a786..4d0d386 100644
--- a/llvm/test/Transforms/ADCE/basictest1.ll
+++ b/llvm/test/Transforms/ADCE/basictest1.ll
@@ -22,7 +22,7 @@
 declare void @perror(i8*)
 
 define i32 @spec_getc(i32 %fd) {
-	%reg109 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg109 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond266 = icmp sle i32 %reg109, 4		; <i1> [#uses=1]
 	br i1 %cond266, label %bb3, label %bb2
 
@@ -46,14 +46,14 @@
 	%reg107-idxcast2 = sext i32 %fd to i64		; <i64> [#uses=1]
 	%reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2		; <%spec_fd_t*> [#uses=1]
 	%idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
-	%reg1321 = load i32* %idx1		; <i32> [#uses=3]
+	%reg1321 = load i32, i32* %idx1		; <i32> [#uses=3]
 	%idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1		; <i32*> [#uses=1]
-	%reg1331 = load i32* %idx2		; <i32> [#uses=1]
+	%reg1331 = load i32, i32* %idx2		; <i32> [#uses=1]
 	%cond270 = icmp slt i32 %reg1321, %reg1331		; <i1> [#uses=1]
 	br i1 %cond270, label %bb9, label %bb6
 
 bb6:		; preds = %bb5
-	%reg134 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg134 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond271 = icmp sle i32 %reg134, 4		; <i1> [#uses=1]
 	br i1 %cond271, label %bb8, label %bb7
 
@@ -67,15 +67,15 @@
 bb9:		; preds = %bb5
 	%reg107-idxcast3 = sext i32 %fd to i64		; <i64> [#uses=1]
 	%idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3		; <i8**> [#uses=1]
-	%reg1601 = load i8** %idx3		; <i8*> [#uses=1]
+	%reg1601 = load i8*, i8** %idx3		; <i8*> [#uses=1]
 	%reg132-idxcast1 = sext i32 %reg1321 to i64		; <i64> [#uses=1]
 	%idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1		; <i8*> [#uses=1]
-	%reg1621 = load i8* %idx4		; <i8> [#uses=2]
+	%reg1621 = load i8, i8* %idx4		; <i8> [#uses=2]
 	%cast108 = zext i8 %reg1621 to i64		; <i64> [#uses=0]
 	%reg157 = add i32 %reg1321, 1		; <i32> [#uses=1]
 	%idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
 	store i32 %reg157, i32* %idx5
-	%reg163 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg163 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond272 = icmp sle i32 %reg163, 4		; <i1> [#uses=1]
 	br i1 %cond272, label %bb11, label %bb10
 
diff --git a/llvm/test/Transforms/ADCE/basictest2.ll b/llvm/test/Transforms/ADCE/basictest2.ll
index a2d5e73..26b2e85 100644
--- a/llvm/test/Transforms/ADCE/basictest2.ll
+++ b/llvm/test/Transforms/ADCE/basictest2.ll
@@ -22,7 +22,7 @@
 declare void @perror(i8*)
 
 define i32 @spec_getc(i32 %fd) {
-	%reg109 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg109 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond266 = icmp sle i32 %reg109, 4		; <i1> [#uses=1]
 	br i1 %cond266, label %bb3, label %bb2
 
@@ -46,14 +46,14 @@
 	%reg107-idxcast2 = sext i32 %fd to i64		; <i64> [#uses=1]
 	%reg1311 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2		; <%spec_fd_t*> [#uses=1]
 	%idx1 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
-	%reg1321 = load i32* %idx1		; <i32> [#uses=3]
+	%reg1321 = load i32, i32* %idx1		; <i32> [#uses=3]
 	%idx2 = getelementptr %spec_fd_t, %spec_fd_t* %reg1311, i64 0, i32 1		; <i32*> [#uses=1]
-	%reg1331 = load i32* %idx2		; <i32> [#uses=1]
+	%reg1331 = load i32, i32* %idx2		; <i32> [#uses=1]
 	%cond270 = icmp slt i32 %reg1321, %reg1331		; <i1> [#uses=1]
 	br i1 %cond270, label %bb9, label %bb6
 
 bb6:		; preds = %bb5
-	%reg134 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg134 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond271 = icmp sle i32 %reg134, 4		; <i1> [#uses=1]
 	br i1 %cond271, label %bb8, label %bb7
 
@@ -67,15 +67,15 @@
 bb9:		; preds = %bb5
 	%reg107-idxcast3 = sext i32 %fd to i64		; <i64> [#uses=1]
 	%idx3 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3		; <i8**> [#uses=1]
-	%reg1601 = load i8** %idx3		; <i8*> [#uses=1]
+	%reg1601 = load i8*, i8** %idx3		; <i8*> [#uses=1]
 	%reg132-idxcast1 = sext i32 %reg1321 to i64		; <i64> [#uses=1]
 	%idx4 = getelementptr i8, i8* %reg1601, i64 %reg132-idxcast1		; <i8*> [#uses=1]
-	%reg1621 = load i8* %idx4		; <i8> [#uses=2]
+	%reg1621 = load i8, i8* %idx4		; <i8> [#uses=2]
 	%cast108 = zext i8 %reg1621 to i64		; <i64> [#uses=0]
 	%reg157 = add i32 %reg1321, 1		; <i32> [#uses=1]
 	%idx5 = getelementptr [3 x %spec_fd_t], [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
 	store i32 %reg157, i32* %idx5
-	%reg163 = load i32* @dbglvl		; <i32> [#uses=1]
+	%reg163 = load i32, i32* @dbglvl		; <i32> [#uses=1]
 	%cond272 = icmp sle i32 %reg163, 4		; <i1> [#uses=1]
 	br i1 %cond272, label %bb11, label %bb10
 
diff --git a/llvm/test/Transforms/AddDiscriminators/basic.ll b/llvm/test/Transforms/AddDiscriminators/basic.ll
index 7c8b3d3..464e7e7 100644
--- a/llvm/test/Transforms/AddDiscriminators/basic.ll
+++ b/llvm/test/Transforms/AddDiscriminators/basic.ll
@@ -16,13 +16,13 @@
   %i.addr = alloca i32, align 4
   %x = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4, !dbg !10
+  %0 = load i32, i32* %i.addr, align 4, !dbg !10
   %cmp = icmp slt i32 %0, 10, !dbg !10
   br i1 %cmp, label %if.then, label %if.end, !dbg !10
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* %i.addr, align 4, !dbg !10
-; CHECK:  %1 = load i32* %i.addr, align 4, !dbg !12
+  %1 = load i32, i32* %i.addr, align 4, !dbg !10
+; CHECK:  %1 = load i32, i32* %i.addr, align 4, !dbg !12
 
   store i32 %1, i32* %x, align 4, !dbg !10
 ; CHECK:  store i32 %1, i32* %x, align 4, !dbg !12
diff --git a/llvm/test/Transforms/AddDiscriminators/first-only.ll b/llvm/test/Transforms/AddDiscriminators/first-only.ll
index 153cfc8..f0fff8a 100644
--- a/llvm/test/Transforms/AddDiscriminators/first-only.ll
+++ b/llvm/test/Transforms/AddDiscriminators/first-only.ll
@@ -19,16 +19,16 @@
   %x = alloca i32, align 4
   %y = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4, !dbg !10
+  %0 = load i32, i32* %i.addr, align 4, !dbg !10
   %cmp = icmp slt i32 %0, 10, !dbg !10
   br i1 %cmp, label %if.then, label %if.end, !dbg !10
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* %i.addr, align 4, !dbg !12
+  %1 = load i32, i32* %i.addr, align 4, !dbg !12
   store i32 %1, i32* %x, align 4, !dbg !12
 
-  %2 = load i32* %i.addr, align 4, !dbg !14
-; CHECK:  %2 = load i32* %i.addr, align 4, !dbg !15
+  %2 = load i32, i32* %i.addr, align 4, !dbg !14
+; CHECK:  %2 = load i32, i32* %i.addr, align 4, !dbg !15
 
   %sub = sub nsw i32 0, %2, !dbg !14
 ; CHECK:  %sub = sub nsw i32 0, %2, !dbg !15
diff --git a/llvm/test/Transforms/AddDiscriminators/multiple.ll b/llvm/test/Transforms/AddDiscriminators/multiple.ll
index 5e552a8..4643188 100644
--- a/llvm/test/Transforms/AddDiscriminators/multiple.ll
+++ b/llvm/test/Transforms/AddDiscriminators/multiple.ll
@@ -15,13 +15,13 @@
   %i.addr = alloca i32, align 4
   %x = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
-  %0 = load i32* %i.addr, align 4, !dbg !10
+  %0 = load i32, i32* %i.addr, align 4, !dbg !10
   %cmp = icmp slt i32 %0, 10, !dbg !10
   br i1 %cmp, label %if.then, label %if.else, !dbg !10
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* %i.addr, align 4, !dbg !10
-; CHECK:  %1 = load i32* %i.addr, align 4, !dbg !12
+  %1 = load i32, i32* %i.addr, align 4, !dbg !10
+; CHECK:  %1 = load i32, i32* %i.addr, align 4, !dbg !12
 
   store i32 %1, i32* %x, align 4, !dbg !10
 ; CHECK:  store i32 %1, i32* %x, align 4, !dbg !12
@@ -30,8 +30,8 @@
 ; CHECK:  br label %if.end, !dbg !12
 
 if.else:                                          ; preds = %entry
-  %2 = load i32* %i.addr, align 4, !dbg !10
-; CHECK:  %2 = load i32* %i.addr, align 4, !dbg !14
+  %2 = load i32, i32* %i.addr, align 4, !dbg !10
+; CHECK:  %2 = load i32, i32* %i.addr, align 4, !dbg !14
 
   %sub = sub nsw i32 0, %2, !dbg !10
 ; CHECK:  %sub = sub nsw i32 0, %2, !dbg !14
diff --git a/llvm/test/Transforms/AddDiscriminators/no-discriminators.ll b/llvm/test/Transforms/AddDiscriminators/no-discriminators.ll
index dd7faf0..0010498 100644
--- a/llvm/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/llvm/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -18,8 +18,8 @@
   %i.addr = alloca i64, align 8
   store i64 %i, i64* %i.addr, align 8
   call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !13, metadata !{}), !dbg !14
-  %0 = load i64* %i.addr, align 8, !dbg !15
-; CHECK:  %0 = load i64* %i.addr, align 8, !dbg !15
+  %0 = load i64, i64* %i.addr, align 8, !dbg !15
+; CHECK:  %0 = load i64, i64* %i.addr, align 8, !dbg !15
   %cmp = icmp slt i64 %0, 5, !dbg !15
 ; CHECK:  %cmp = icmp slt i64 %0, 5, !dbg !15
   br i1 %cmp, label %if.then, label %if.else, !dbg !15
@@ -34,7 +34,7 @@
   br label %return, !dbg !15
 
 return:                                           ; preds = %if.else, %if.then
-  %1 = load i32* %retval, !dbg !17
+  %1 = load i32, i32* %retval, !dbg !17
   ret i32 %1, !dbg !17
 }
 
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index 887126ddb..851e6dc 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -7,11 +7,11 @@
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -23,11 +23,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo2
-; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: load i32, i32* {{[^,]+}}, align 16
 ; CHECK: ret i32
 }
 
@@ -39,11 +39,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo2a
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -53,11 +53,11 @@
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @goo
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -73,7 +73,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 8
   %1 = trunc i64 %indvars.iv.next to i32
@@ -85,7 +85,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @hoo
-; CHECK: load i32* %arrayidx, align 32
+; CHECK: load i32, i32* %arrayidx, align 32
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -101,7 +101,7 @@
   %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 8
   %1 = trunc i64 %indvars.iv.next to i32
@@ -113,7 +113,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @joo
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -129,7 +129,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
   %1 = trunc i64 %indvars.iv.next to i32
@@ -141,7 +141,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @koo
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -157,7 +157,7 @@
   %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
   %1 = trunc i64 %indvars.iv.next to i32
@@ -169,7 +169,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @koo2
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 7a0a6b6..2edc2e9 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -7,11 +7,11 @@
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -23,11 +23,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo2
-; CHECK: load i32* {{[^,]+}}, align 16
+; CHECK: load i32, i32* {{[^,]+}}, align 16
 ; CHECK: ret i32
 }
 
@@ -39,11 +39,11 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo2a
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -53,11 +53,11 @@
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @goo
-; CHECK: load i32* {{[^,]+}}, align 32
+; CHECK: load i32, i32* {{[^,]+}}, align 32
 ; CHECK: ret i32
 }
 
@@ -73,7 +73,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 8
   %1 = trunc i64 %indvars.iv.next to i32
@@ -85,7 +85,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @hoo
-; CHECK: load i32* %arrayidx, align 32
+; CHECK: load i32, i32* %arrayidx, align 32
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -101,7 +101,7 @@
   %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 8
   %1 = trunc i64 %indvars.iv.next to i32
@@ -113,7 +113,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @joo
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -129,7 +129,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
   %1 = trunc i64 %indvars.iv.next to i32
@@ -141,7 +141,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @koo
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
@@ -157,7 +157,7 @@
   %indvars.iv = phi i64 [ -4, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
   %1 = trunc i64 %indvars.iv.next to i32
@@ -169,7 +169,7 @@
   ret i32 %add.lcssa
 
 ; CHECK-LABEL: @koo2
-; CHECK: load i32* %arrayidx, align 16
+; CHECK: load i32, i32* %arrayidx, align 16
 ; CHECK: ret i32 %add.lcssa
 }
 
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/start-unk.ll b/llvm/test/Transforms/AlignmentFromAssumptions/start-unk.ll
index 754deec..99533cf 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/start-unk.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/start-unk.ll
@@ -79,7 +79,7 @@
   %maskcond.i.i187 = icmp eq i64 %maskedptr.i.i186, 0
   tail call void @llvm.assume(i1 %maskcond.i.i187) #0
   %ret.0..sroa_cast.i.i188 = bitcast %type1* undef to i32*
-  %ret.0.copyload.i.i189 = load i32* %ret.0..sroa_cast.i.i188, align 2
+  %ret.0.copyload.i.i189 = load i32, i32* %ret.0..sroa_cast.i.i188, align 2
 
 ; CHECK: load {{.*}} align 2
 
@@ -117,7 +117,7 @@
   br i1 undef, label %cond.false.i70, label %cond.end.i
 
 if.then140:                                       ; preds = %for.body137
-  %ret.0.copyload.i.i102 = load i32* %ret.0..sroa_cast.i.i106, align 2
+  %ret.0.copyload.i.i102 = load i32, i32* %ret.0..sroa_cast.i.i106, align 2
 
 ; CHECK: load {{.*}} align 2
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index 1226b98..c988774 100644
--- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -3,7 +3,7 @@
 ; CHECK: define internal i32 @deref(i32 %x.val) #0 {
 define internal i32 @deref(i32* %x) nounwind {
 entry:
-  %tmp2 = load i32* %x, align 4
+  %tmp2 = load i32, i32* %x, align 4
   ret i32 %tmp2
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
index 08a1b80..267a6c0 100644
--- a/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -7,14 +7,14 @@
 define internal i32 @callee(i1 %C, i32* %A) {
 entry:
         ; Unconditonally load the element at %A
-        %A.0 = load i32* %A
+        %A.0 = load i32, i32* %A
         br i1 %C, label %T, label %F
 T:
         ret i32 %A.0
 F:
         ; Load the element at offset two from %A. This should not be promoted!
         %A.2 = getelementptr i32, i32* %A, i32 2
-        %R = load i32* %A.2
+        %R = load i32, i32* %A.2
         ret i32 %R
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
index 23ea2e0..3f521ba 100644
--- a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -11,8 +11,8 @@
 define internal i32 @test(%QuadTy* %P) {
         %A = getelementptr %QuadTy, %QuadTy* %P, i64 0, i32 3            ; <i32*> [#uses=1]
         %B = getelementptr %QuadTy, %QuadTy* %P, i64 0, i32 2            ; <i32*> [#uses=1]
-        %a = load i32* %A               ; <i32> [#uses=1]
-        %b = load i32* %B               ; <i32> [#uses=1]
+        %a = load i32, i32* %A               ; <i32> [#uses=1]
+        %b = load i32, i32* %B               ; <i32> [#uses=1]
         %V = add i32 %a, %b             ; <i32> [#uses=1]
         ret i32 %V
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
index 6213645..46128f9 100644
--- a/llvm/test/Transforms/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/attrs.ll
@@ -5,7 +5,7 @@
 define internal void @f(%struct.ss* byval  %b, i32* byval %X, i32 %i) nounwind  {
 entry:
 	%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
-	%tmp1 = load i32* %tmp, align 4
+	%tmp1 = load i32, i32* %tmp, align 4
 	%tmp2 = add i32 %tmp1, 1	
 	store i32 %tmp2, i32* %tmp, align 4
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/ArgumentPromotion/basictest.ll
index 8f78b98..89888bb 100644
--- a/llvm/test/Transforms/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/basictest.ll
@@ -3,8 +3,8 @@
 
 define internal i32 @test(i32* %X, i32* %Y) {
 ; CHECK-LABEL: define internal i32 @test(i32 %X.val, i32 %Y.val)
-  %A = load i32* %X
-  %B = load i32* %Y
+  %A = load i32, i32* %X
+  %B = load i32, i32* %Y
   %C = add i32 %A, %B
   ret i32 %C
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
index 17845b8..6c0288f 100644
--- a/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1, i32* byval %X)
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
-  %tmp1 = load i32* %tmp, align 4
+  %tmp1 = load i32, i32* %tmp, align 4
   %tmp2 = add i32 %tmp1, 1
   store i32 %tmp2, i32* %tmp, align 4
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/byval.ll b/llvm/test/Transforms/ArgumentPromotion/byval.ll
index f99050b..b091b09 100644
--- a/llvm/test/Transforms/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/byval.ll
@@ -8,7 +8,7 @@
 ; CHECK-LABEL: define internal void @f(i32 %b.0, i64 %b.1)
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-  %tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+  %tmp1 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
   %tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
   store i32 %tmp2, i32* %tmp, align 4
   ret void
diff --git a/llvm/test/Transforms/ArgumentPromotion/chained.ll b/llvm/test/Transforms/ArgumentPromotion/chained.ll
index c9a4538..6ba2e8d 100644
--- a/llvm/test/Transforms/ArgumentPromotion/chained.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/chained.ll
@@ -5,8 +5,8 @@
 @G2 = constant i32* @G1         ; <i32**> [#uses=1]
 
 define internal i32 @test(i32** %X) {
-        %Y = load i32** %X              ; <i32*> [#uses=1]
-        %X.upgrd.1 = load i32* %Y               ; <i32> [#uses=1]
+        %Y = load i32*, i32** %X              ; <i32*> [#uses=1]
+        %X.upgrd.1 = load i32, i32* %Y               ; <i32> [#uses=1]
         ret i32 %X.upgrd.1
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
index e4a61da..cdff36e 100644
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow.ll
@@ -8,7 +8,7 @@
         ret i32 17
 
 F:              ; preds = %0
-        %X = load i32* %P               ; <i32> [#uses=1]
+        %X = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
index db63584..7413f46 100644
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -argpromotion -S | FileCheck %s
 
-; CHECK: load i32* %A
+; CHECK: load i32, i32* %A
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
 define internal i32 @callee(i1 %C, i32* %P) {
@@ -10,7 +10,7 @@
         ret i32 17
 
 F:              ; preds = %0
-        %X = load i32* %P               ; <i32> [#uses=1]
+        %X = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/crash.ll b/llvm/test/Transforms/ArgumentPromotion/crash.ll
index 353d318..dbd343a 100644
--- a/llvm/test/Transforms/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/crash.ll
@@ -51,9 +51,9 @@
 define internal i32 @"clay_assign(Chain, Chain)"(%0* %c, %0* %d) {
 init:
   %0 = getelementptr %0, %0* %d, i32 0, i32 0
-  %1 = load %0** %0
+  %1 = load %0*, %0** %0
   %2 = getelementptr %0, %0* %c, i32 0, i32 0
-  %3 = load %0** %2
+  %3 = load %0*, %0** %2
   %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
   ret i32 0
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/dbg.ll b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
index 65cf367..79d4b16 100644
--- a/llvm/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/dbg.ll
@@ -5,8 +5,8 @@
 declare void @sink(i32)
 
 define internal void @test(i32** %X) {
-  %1 = load i32** %X, align 8
-  %2 = load i32* %1, align 8
+  %1 = load i32*, i32** %X, align 8
+  %2 = load i32, i32* %1, align 8
   call void @sink(i32 %2)
   ret void
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
index e08c3f7..84ef603 100644
--- a/llvm/test/Transforms/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/fp80.ll
@@ -25,21 +25,21 @@
 entry:
   %bitcast = bitcast %union.u* %arg to %struct.s*
   %gep = getelementptr inbounds %struct.s, %struct.s* %bitcast, i64 0, i32 2
-  %result = load i8* %gep
+  %result = load i8, i8* %gep
   ret i8 %result
 }
 
 ; CHECK: internal x86_fp80 @UseLongDoubleSafely(x86_fp80 {{%.*}}) {
 define internal x86_fp80 @UseLongDoubleSafely(%union.u* byval align 16 %arg) {
   %gep = getelementptr inbounds %union.u, %union.u* %arg, i64 0, i32 0
-  %fp80 = load x86_fp80* %gep
+  %fp80 = load x86_fp80, x86_fp80* %gep
   ret x86_fp80 %fp80
 }
 
 ; CHECK: define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
 define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
   %p = bitcast %struct.Foo* %a to i64*
-  %v = load i64* %p
+  %v = load i64, i64* %p
   ret i64 %v
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
index 5160994..80bd6fd 100644
--- a/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -9,8 +9,8 @@
 entry:
   %f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
   %f1 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 1
-  %a = load i32* %f0, align 4
-  %b = load i32* %f1, align 4
+  %a = load i32, i32* %f0, align 4
+  %b = load i32, i32* %f1, align 4
   %r = add i32 %a, %b
   ret i32 %r
 }
diff --git a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
index db9d70d..3c8ed79 100644
--- a/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/reserve-tbaa.ll
@@ -14,9 +14,9 @@
 
 define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
 entry:
-  %0 = load i64* %p2, align 8, !tbaa !1
+  %0 = load i64, i64* %p2, align 8, !tbaa !1
   %conv = trunc i64 %0 to i32
-  %1 = load i32* %p1, align 4, !tbaa !5
+  %1 = load i32, i32* %p1, align 4, !tbaa !5
   %conv1 = trunc i32 %1 to i8
   store i8 %conv1, i8* @d, align 1, !tbaa !7
   ret void
@@ -26,11 +26,11 @@
 entry:
 ; CHECK-LABEL: main
 ; CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa ![[I32:[0-9]+]]
-; CHECK: %g.val = load i32* @g, align 4, !tbaa ![[I32]]
-; CHECK: %c.val = load i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
-  %0 = load i32*** @e, align 8, !tbaa !8
+; CHECK: %g.val = load i32, i32* @g, align 4, !tbaa ![[I32]]
+; CHECK: %c.val = load i64, i64* @c, align 8, !tbaa ![[LONG:[0-9]+]]
+  %0 = load i32**, i32*** @e, align 8, !tbaa !8
   store i32* @g, i32** %0, align 8, !tbaa !8
-  %1 = load i32** @a, align 8, !tbaa !8
+  %1 = load i32*, i32** @a, align 8, !tbaa !8
   store i32 1, i32* %1, align 4, !tbaa !5
   call fastcc void @fn(i32* @g, i64* @c)
 
diff --git a/llvm/test/Transforms/BBVectorize/X86/loop1.ll b/llvm/test/Transforms/BBVectorize/X86/loop1.ll
index 34dc19f..c3c3045 100644
--- a/llvm/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/loop1.ll
@@ -13,9 +13,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul = fmul double %0, %0
   %mul3 = fmul double %0, %1
   %add = fadd double %mul, %mul3
diff --git a/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll b/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll
index 29b33a5..d7a004c 100644
--- a/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-rec2.ll
@@ -13,13 +13,13 @@
   %incdec.ptr157 = getelementptr inbounds i8, i8* %c, i64 11
   store i8 0, i8* %incdec.ptr136, align 1
   %arrayidx162 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 11
-  %0 = load i16* %arrayidx162, align 2
+  %0 = load i16, i16* %arrayidx162, align 2
   %conv1631 = trunc i16 %0 to i8
   %and164 = shl i8 %conv1631, 3
   %shl165 = and i8 %and164, 56
   %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
   store i8 %shl165, i8* %incdec.ptr157, align 1
-  %1 = load i16* inttoptr (i64 2 to i16*), align 2
+  %1 = load i16, i16* inttoptr (i64 2 to i16*), align 2
   %conv1742 = trunc i16 %1 to i8
   %and175 = shl i8 %conv1742, 1
   %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
@@ -29,13 +29,13 @@
   %arrayidx214 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 15
   %incdec.ptr220 = getelementptr inbounds i8, i8* %c, i64 15
   store i8 0, i8* %incdec.ptr199, align 1
-  %2 = load i16* %arrayidx214, align 2
+  %2 = load i16, i16* %arrayidx214, align 2
   %conv2223 = trunc i16 %2 to i8
   %and223 = shl i8 %conv2223, 6
   %incdec.ptr235 = getelementptr inbounds i8, i8* %c, i64 16
   store i8 %and223, i8* %incdec.ptr220, align 1
   %arrayidx240 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 19
-  %3 = load i16* %arrayidx240, align 2
+  %3 = load i16, i16* %arrayidx240, align 2
   %conv2414 = trunc i16 %3 to i8
   %and242 = shl i8 %conv2414, 2
   %shl243 = and i8 %and242, 28
@@ -44,7 +44,7 @@
   %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
   store i8 0, i8* %incdec.ptr251, align 1
   %arrayidx282 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 25
-  %4 = load i16* %arrayidx282, align 2
+  %4 = load i16, i16* %arrayidx282, align 2
   %conv2835 = trunc i16 %4 to i8
   %and284 = and i8 %conv2835, 7
   %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
@@ -54,14 +54,14 @@
   %incdec.ptr314 = getelementptr inbounds i8, i8* %c, i64 21
   store i8 0, i8* %incdec.ptr298, align 1
   %arrayidx319 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 26
-  %5 = load i16* %arrayidx319, align 4
+  %5 = load i16, i16* %arrayidx319, align 4
   %conv3206 = trunc i16 %5 to i8
   %and321 = shl i8 %conv3206, 4
   %shl322 = and i8 %and321, 112
   %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
   store i8 %shl322, i8* %incdec.ptr314, align 1
   %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
-  %6 = load i16* %arrayidx340, align 2
+  %6 = load i16, i16* %arrayidx340, align 2
   %conv3417 = trunc i16 %6 to i8
   %and342 = shl i8 %conv3417, 3
   %shl343 = and i8 %and342, 56
@@ -72,7 +72,7 @@
   %arrayidx381 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 36
   %incdec.ptr387 = getelementptr inbounds i8, i8* %c, i64 25
   store i8 0, i8* %incdec.ptr366, align 1
-  %7 = load i16* %arrayidx381, align 8
+  %7 = load i16, i16* %arrayidx381, align 8
   %conv3898 = trunc i16 %7 to i8
   %and390 = shl i8 %conv3898, 6
   store i8 %and390, i8* %incdec.ptr387, align 1
diff --git a/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll b/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll
index 61df336..2096deb 100644
--- a/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/sh-rec3.ll
@@ -19,7 +19,7 @@
   %arraydecay3 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 0
   %arraydecay5 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 0
   call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
-  %0 = load i64* %LARc28.sub, align 16
+  %0 = load i64, i64* %LARc28.sub, align 16
   %1 = trunc i64 %0 to i32
   %conv1 = lshr i32 %1, 2
   %and = and i32 %conv1, 15
@@ -39,21 +39,21 @@
   %incdec.ptr42 = getelementptr inbounds i8, i8* %c, i64 4
   store i8 0, i8* %incdec.ptr26, align 1
   %arrayidx52 = getelementptr inbounds [8 x i16], [8 x i16]* %tmpcast, i64 0, i64 7
-  %3 = load i16* %arrayidx52, align 2
+  %3 = load i16, i16* %arrayidx52, align 2
   %conv537 = trunc i16 %3 to i8
   %and54 = and i8 %conv537, 7
   %incdec.ptr57 = getelementptr inbounds i8, i8* %c, i64 5
   store i8 %and54, i8* %incdec.ptr42, align 1
   %incdec.ptr68 = getelementptr inbounds i8, i8* %c, i64 6
   store i8 0, i8* %incdec.ptr57, align 1
-  %4 = load i16* %arraydecay3, align 2
+  %4 = load i16, i16* %arraydecay3, align 2
   %conv748 = trunc i16 %4 to i8
   %and75 = shl i8 %conv748, 5
   %shl76 = and i8 %and75, 96
   %incdec.ptr84 = getelementptr inbounds i8, i8* %c, i64 7
   store i8 %shl76, i8* %incdec.ptr68, align 1
   %arrayidx94 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 1
-  %5 = load i16* %arrayidx94, align 2
+  %5 = load i16, i16* %arrayidx94, align 2
   %conv959 = trunc i16 %5 to i8
   %and96 = shl i8 %conv959, 1
   %shl97 = and i8 %and96, 14
@@ -62,7 +62,7 @@
   store i8 %or103, i8* %incdec.ptr84, align 1
   %arrayidx115 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 4
   %6 = bitcast i16* %arrayidx115 to i32*
-  %7 = load i32* %6, align 8
+  %7 = load i32, i32* %6, align 8
   %conv11610 = trunc i32 %7 to i8
   %and117 = and i8 %conv11610, 7
   %incdec.ptr120 = getelementptr inbounds i8, i8* %c, i64 9
@@ -77,11 +77,11 @@
   %incdec.ptr172 = getelementptr inbounds i8, i8* %c, i64 12
   store i8 0, i8* %incdec.ptr157, align 1
   %arrayidx173 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 1
-  %9 = load i16* %arrayidx173, align 2
+  %9 = load i16, i16* %arrayidx173, align 2
   %conv17412 = zext i16 %9 to i32
   %and175 = shl nuw nsw i32 %conv17412, 1
   %arrayidx177 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 1
-  %10 = load i16* %arrayidx177, align 2
+  %10 = load i16, i16* %arrayidx177, align 2
   %conv17826 = zext i16 %10 to i32
   %shr17913 = lshr i32 %conv17826, 1
   %and180 = and i32 %shr17913, 1
@@ -90,14 +90,14 @@
   %incdec.ptr183 = getelementptr inbounds i8, i8* %c, i64 13
   store i8 %conv182, i8* %incdec.ptr172, align 1
   %arrayidx188 = getelementptr inbounds [4 x i16], [4 x i16]* %Mc, i64 0, i64 1
-  %11 = load i16* %arrayidx188, align 2
+  %11 = load i16, i16* %arrayidx188, align 2
   %conv18914 = trunc i16 %11 to i8
   %and190 = shl i8 %conv18914, 5
   %shl191 = and i8 %and190, 96
   %incdec.ptr199 = getelementptr inbounds i8, i8* %c, i64 14
   store i8 %shl191, i8* %incdec.ptr183, align 1
   %arrayidx209 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 14
-  %12 = load i16* %arrayidx209, align 4
+  %12 = load i16, i16* %arrayidx209, align 4
   %conv21015 = trunc i16 %12 to i8
   %and211 = shl i8 %conv21015, 1
   %shl212 = and i8 %and211, 14
@@ -106,7 +106,7 @@
   store i8 %or218, i8* %incdec.ptr199, align 1
   %arrayidx225 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 16
   %13 = bitcast i16* %arrayidx225 to i64*
-  %14 = load i64* %13, align 16
+  %14 = load i64, i64* %13, align 16
   %conv22616 = trunc i64 %14 to i8
   %and227 = shl i8 %conv22616, 3
   %shl228 = and i8 %and227, 56
@@ -120,17 +120,17 @@
   %arrayidx266 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 23
   %incdec.ptr272 = getelementptr inbounds i8, i8* %c, i64 18
   store i8 0, i8* %incdec.ptr251, align 1
-  %16 = load i16* %arrayidx266, align 2
+  %16 = load i16, i16* %arrayidx266, align 2
   %conv27418 = trunc i16 %16 to i8
   %and275 = shl i8 %conv27418, 6
   %incdec.ptr287 = getelementptr inbounds i8, i8* %c, i64 19
   store i8 %and275, i8* %incdec.ptr272, align 1
   %arrayidx288 = getelementptr inbounds [4 x i16], [4 x i16]* %Nc, i64 0, i64 2
-  %17 = load i16* %arrayidx288, align 2
+  %17 = load i16, i16* %arrayidx288, align 2
   %conv28919 = zext i16 %17 to i32
   %and290 = shl nuw nsw i32 %conv28919, 1
   %arrayidx292 = getelementptr inbounds [4 x i16], [4 x i16]* %bc, i64 0, i64 2
-  %18 = load i16* %arrayidx292, align 2
+  %18 = load i16, i16* %arrayidx292, align 2
   %conv29327 = zext i16 %18 to i32
   %shr29420 = lshr i32 %conv29327, 1
   %and295 = and i32 %shr29420, 1
@@ -145,7 +145,7 @@
   %incdec.ptr335 = getelementptr inbounds i8, i8* %c, i64 22
   store i8 0, i8* %incdec.ptr314, align 1
   %arrayidx340 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 29
-  %19 = load i16* %arrayidx340, align 2
+  %19 = load i16, i16* %arrayidx340, align 2
   %conv34122 = trunc i16 %19 to i8
   %and342 = shl i8 %conv34122, 3
   %shl343 = and i8 %and342, 56
@@ -153,7 +153,7 @@
   store i8 %shl343, i8* %incdec.ptr335, align 1
   %arrayidx355 = getelementptr inbounds [52 x i16], [52 x i16]* %xmc, i64 0, i64 32
   %20 = bitcast i16* %arrayidx355 to i32*
-  %21 = load i32* %20, align 16
+  %21 = load i32, i32* %20, align 16
   %conv35623 = shl i32 %21, 2
   %shl358 = and i32 %conv35623, 28
   %22 = lshr i32 %21, 17
diff --git a/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll
index 214ab9f..2c05f30 100644
--- a/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/simple-ldstr.ll
@@ -4,13 +4,13 @@
 ; Simple 3-pair chain with loads and stores
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -19,8 +19,8 @@
 ; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
diff --git a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
index 57f8c92..56448c0 100644
--- a/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/llvm/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -27,7 +27,7 @@
 ; CHECK: <2 x double>
 ; CHECK: @_ZL12printQBezier7QBezier
 ; CHECK: store double %mul8.i, double* %x3.i, align 16
-; CHECK: load double* %x3.i, align 16
+; CHECK: load double, double* %x3.i, align 16
 ; CHECK: ret
 
 arrayctor.cont:                                   ; preds = %newFuncRoot
@@ -58,72 +58,72 @@
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %v0, i8* %v3, i64 64, i32 8, i1 false)
   call fastcc void @_ZL12printQBezier7QBezier(%class.QBezier.15* byval align 8 %agg.tmp.i)
   %x2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 2
-  %v4 = load double* %x2.i, align 16
+  %v4 = load double, double* %x2.i, align 16
   %x3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 4
-  %v5 = load double* %x3.i, align 16
+  %v5 = load double, double* %x3.i, align 16
   %add.i = fadd double %v4, %v5
   %mul.i = fmul double 5.000000e-01, %add.i
   %x1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 0
-  %v6 = load double* %x1.i, align 16
+  %v6 = load double, double* %x1.i, align 16
   %add3.i = fadd double %v4, %v6
   %mul4.i = fmul double 5.000000e-01, %add3.i
   %x25.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 2
   store double %mul4.i, double* %x25.i, align 16
-  %v7 = load double* %x3.i, align 16
+  %v7 = load double, double* %x3.i, align 16
   %x4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 6
-  %v8 = load double* %x4.i, align 16
+  %v8 = load double, double* %x4.i, align 16
   %add7.i = fadd double %v7, %v8
   %mul8.i = fmul double 5.000000e-01, %add7.i
   store double %mul8.i, double* %x3.i, align 16
-  %v9 = load double* %x1.i, align 16
+  %v9 = load double, double* %x1.i, align 16
   %x111.i = getelementptr inbounds %class.QBezier.15, %class.QBezier.15* %add.ptr, i64 0, i32 0
   store double %v9, double* %x111.i, align 16
-  %v10 = load double* %x25.i, align 16
+  %v10 = load double, double* %x25.i, align 16
   %add15.i = fadd double %mul.i, %v10
   %mul16.i = fmul double 5.000000e-01, %add15.i
   %x317.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 4
   store double %mul16.i, double* %x317.i, align 16
-  %v11 = load double* %x3.i, align 16
+  %v11 = load double, double* %x3.i, align 16
   %add19.i = fadd double %mul.i, %v11
   %mul20.i = fmul double 5.000000e-01, %add19.i
   store double %mul20.i, double* %x2.i, align 16
-  %v12 = load double* %x317.i, align 16
+  %v12 = load double, double* %x317.i, align 16
   %add24.i = fadd double %v12, %mul20.i
   %mul25.i = fmul double 5.000000e-01, %add24.i
   store double %mul25.i, double* %x1.i, align 16
   %x427.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 6
   store double %mul25.i, double* %x427.i, align 16
   %y2.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 3
-  %v13 = load double* %y2.i, align 8
+  %v13 = load double, double* %y2.i, align 8
   %y3.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 5
-  %v14 = load double* %y3.i, align 8
+  %v14 = load double, double* %y3.i, align 8
   %add28.i = fadd double %v13, %v14
   %div.i = fmul double 5.000000e-01, %add28.i
   %y1.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 1
-  %v15 = load double* %y1.i, align 8
+  %v15 = load double, double* %y1.i, align 8
   %add30.i = fadd double %v13, %v15
   %mul31.i = fmul double 5.000000e-01, %add30.i
   %y232.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 3
   store double %mul31.i, double* %y232.i, align 8
-  %v16 = load double* %y3.i, align 8
+  %v16 = load double, double* %y3.i, align 8
   %y4.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 0, i32 7
-  %v17 = load double* %y4.i, align 8
+  %v17 = load double, double* %y4.i, align 8
   %add34.i = fadd double %v16, %v17
   %mul35.i = fmul double 5.000000e-01, %add34.i
   store double %mul35.i, double* %y3.i, align 8
-  %v18 = load double* %y1.i, align 8
+  %v18 = load double, double* %y1.i, align 8
   %y138.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 1
   store double %v18, double* %y138.i, align 8
-  %v19 = load double* %y232.i, align 8
+  %v19 = load double, double* %y232.i, align 8
   %add42.i = fadd double %div.i, %v19
   %mul43.i = fmul double 5.000000e-01, %add42.i
   %y344.i = getelementptr inbounds [10 x %class.QBezier.15], [10 x %class.QBezier.15]* %beziers, i64 0, i64 1, i32 5
   store double %mul43.i, double* %y344.i, align 8
-  %v20 = load double* %y3.i, align 8
+  %v20 = load double, double* %y3.i, align 8
   %add46.i = fadd double %div.i, %v20
   %mul47.i = fmul double 5.000000e-01, %add46.i
   store double %mul47.i, double* %y2.i, align 8
-  %v21 = load double* %y344.i, align 8
+  %v21 = load double, double* %y344.i, align 8
   %add51.i = fadd double %v21, %mul47.i
   %mul52.i = fmul double 5.000000e-01, %add51.i
   store double %mul52.i, double* %y1.i, align 8
diff --git a/llvm/test/Transforms/BBVectorize/func-alias.ll b/llvm/test/Transforms/BBVectorize/func-alias.ll
index cb1ea95..408edca 100644
--- a/llvm/test/Transforms/BBVectorize/func-alias.ll
+++ b/llvm/test/Transforms/BBVectorize/func-alias.ll
@@ -60,82 +60,82 @@
   %tmp134 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
   %tmp135 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp134, i32 0, i32 0
   store i32 4096, i32* %tmp135, align 4
-  %iounit.8748_288 = load i32* @__main1_MOD_iounit, align 4
+  %iounit.8748_288 = load i32, i32* @__main1_MOD_iounit, align 4
   %tmp136 = getelementptr inbounds %struct.__st_parameter_dt, %struct.__st_parameter_dt* %memtmp3, i32 0, i32 0
   %tmp137 = getelementptr inbounds %struct.__st_parameter_common, %struct.__st_parameter_common* %tmp136, i32 0, i32 1
   store i32 %iounit.8748_288, i32* %tmp137, align 4
   call void @_gfortran_st_write(%struct.__st_parameter_dt* %memtmp3) nounwind
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j.4580, i32 4) nounwind
 ; CHECK: @_gfortran_transfer_integer_write
-  %D.75807_289 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
-  %j.8758_290 = load i32* @j.4580, align 4
+  %D.75807_289 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j.8758_290 = load i32, i32* @j.4580, align 4
   %D.75760_291 = sext i32 %j.8758_290 to i64
-  %iave.8736_292 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_292 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_293 = sext i32 %iave.8736_292 to i64
-  %D.75808_294 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75808_294 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75809_295 = mul nsw i64 %D.75620_293, %D.75808_294
-  %igrp.8737_296 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_296 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_297 = sext i32 %igrp.8737_296 to i64
-  %D.75810_298 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75810_298 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75811_299 = mul nsw i64 %D.75635_297, %D.75810_298
   %D.75812_300 = add nsw i64 %D.75809_295, %D.75811_299
   %D.75813_301 = add nsw i64 %D.75760_291, %D.75812_300
-  %ityp.8750_302 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_302 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_303 = sext i32 %ityp.8750_302 to i64
-  %D.75814_304 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75814_304 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75815_305 = mul nsw i64 %D.75704_303, %D.75814_304
   %D.75816_306 = add nsw i64 %D.75813_301, %D.75815_305
-  %D.75817_307 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75817_307 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
   %D.75818_308 = add nsw i64 %D.75816_306, %D.75817_307
   %tmp138 = bitcast i8* %D.75807_289 to [0 x float]*
   %tmp139 = bitcast [0 x float]* %tmp138 to float*
   %D.75819_309 = getelementptr inbounds float, float* %tmp139, i64 %D.75818_308
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75819_309, i32 4) nounwind
 ; CHECK: @_gfortran_transfer_real_write
-  %D.75820_310 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
-  %j.8758_311 = load i32* @j.4580, align 4
+  %D.75820_310 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j.8758_311 = load i32, i32* @j.4580, align 4
   %D.75760_312 = sext i32 %j.8758_311 to i64
-  %iave.8736_313 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_313 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_314 = sext i32 %iave.8736_313 to i64
-  %D.75821_315 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75821_315 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75822_316 = mul nsw i64 %D.75620_314, %D.75821_315
-  %igrp.8737_317 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_317 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_318 = sext i32 %igrp.8737_317 to i64
-  %D.75823_319 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75823_319 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75824_320 = mul nsw i64 %D.75635_318, %D.75823_319
   %D.75825_321 = add nsw i64 %D.75822_316, %D.75824_320
   %D.75826_322 = add nsw i64 %D.75760_312, %D.75825_321
-  %ityp.8750_323 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_323 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_324 = sext i32 %ityp.8750_323 to i64
-  %D.75827_325 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75827_325 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75828_326 = mul nsw i64 %D.75704_324, %D.75827_325
   %D.75829_327 = add nsw i64 %D.75826_322, %D.75828_326
-  %D.75830_328 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75830_328 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
   %D.75831_329 = add nsw i64 %D.75829_327, %D.75830_328
   %tmp140 = bitcast i8* %D.75820_310 to [0 x [1 x i8]]*
   %tmp141 = bitcast [0 x [1 x i8]]* %tmp140 to [1 x i8]*
   %D.75832_330 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp141, i64 %D.75831_329
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75832_330, i32 1) nounwind
 ; CHECK: @_gfortran_transfer_character_write
-  %D.75833_331 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
-  %j.8758_332 = load i32* @j.4580, align 4
+  %D.75833_331 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j.8758_332 = load i32, i32* @j.4580, align 4
   %D.75760_333 = sext i32 %j.8758_332 to i64
-  %iave.8736_334 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_334 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_335 = sext i32 %iave.8736_334 to i64
-  %D.75834_336 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75834_336 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75835_337 = mul nsw i64 %D.75620_335, %D.75834_336
-  %igrp.8737_338 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_338 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_339 = sext i32 %igrp.8737_338 to i64
-  %D.75836_340 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75836_340 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75837_341 = mul nsw i64 %D.75635_339, %D.75836_340
   %D.75838_342 = add nsw i64 %D.75835_337, %D.75837_341
   %D.75839_343 = add nsw i64 %D.75760_333, %D.75838_342
-  %ityp.8750_344 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_344 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_345 = sext i32 %ityp.8750_344 to i64
-  %D.75840_346 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75840_346 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75841_347 = mul nsw i64 %D.75704_345, %D.75840_346
   %D.75842_348 = add nsw i64 %D.75839_343, %D.75841_347
-  %D.75843_349 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75843_349 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
   %D.75844_350 = add nsw i64 %D.75842_348, %D.75843_349
   %tmp142 = bitcast i8* %D.75833_331 to [0 x i32]*
   %tmp143 = bitcast [0 x i32]* %tmp142 to i32*
@@ -150,75 +150,75 @@
 ; CHECK: @_gfortran_transfer_character_write
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_integer_write to void (%struct.__st_parameter_dt*, i32*, i32)*)(%struct.__st_parameter_dt* %memtmp3, i32* @j1.4581, i32 4) nounwind
 ; CHECK: @_gfortran_transfer_integer_write
-  %D.75807_352 = load i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
-  %j1.8760_353 = load i32* @j1.4581, align 4
+  %D.75807_352 = load i8*, i8** getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 0), align 8
+  %j1.8760_353 = load i32, i32* @j1.4581, align 4
   %D.75773_354 = sext i32 %j1.8760_353 to i64
-  %iave.8736_355 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_355 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_356 = sext i32 %iave.8736_355 to i64
-  %D.75808_357 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75808_357 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75809_358 = mul nsw i64 %D.75620_356, %D.75808_357
-  %igrp.8737_359 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_359 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_360 = sext i32 %igrp.8737_359 to i64
-  %D.75810_361 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75810_361 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75811_362 = mul nsw i64 %D.75635_360, %D.75810_361
   %D.75812_363 = add nsw i64 %D.75809_358, %D.75811_362
   %D.75846_364 = add nsw i64 %D.75773_354, %D.75812_363
-  %ityp.8750_365 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_365 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_366 = sext i32 %ityp.8750_365 to i64
-  %D.75814_367 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75814_367 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75815_368 = mul nsw i64 %D.75704_366, %D.75814_367
   %D.75847_369 = add nsw i64 %D.75846_364, %D.75815_368
-  %D.75817_370 = load i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
+  %D.75817_370 = load i64, i64* getelementptr inbounds (%"struct.array4_real(kind=4)"* @__main1_MOD_rmxval, i64 0, i32 1), align 8
   %D.75848_371 = add nsw i64 %D.75847_369, %D.75817_370
   %tmp144 = bitcast i8* %D.75807_352 to [0 x float]*
   %tmp145 = bitcast [0 x float]* %tmp144 to float*
   %D.75849_372 = getelementptr inbounds float, float* %tmp145, i64 %D.75848_371
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_real_write to void (%struct.__st_parameter_dt*, float*, i32)*)(%struct.__st_parameter_dt* %memtmp3, float* %D.75849_372, i32 4) nounwind
 ; CHECK: @_gfortran_transfer_real_write
-  %D.75820_373 = load i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
-  %j1.8760_374 = load i32* @j1.4581, align 4
+  %D.75820_373 = load i8*, i8** getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 0), align 8
+  %j1.8760_374 = load i32, i32* @j1.4581, align 4
   %D.75773_375 = sext i32 %j1.8760_374 to i64
-  %iave.8736_376 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_376 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_377 = sext i32 %iave.8736_376 to i64
-  %D.75821_378 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75821_378 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75822_379 = mul nsw i64 %D.75620_377, %D.75821_378
-  %igrp.8737_380 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_380 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_381 = sext i32 %igrp.8737_380 to i64
-  %D.75823_382 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75823_382 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75824_383 = mul nsw i64 %D.75635_381, %D.75823_382
   %D.75825_384 = add nsw i64 %D.75822_379, %D.75824_383
   %D.75850_385 = add nsw i64 %D.75773_375, %D.75825_384
-  %ityp.8750_386 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_386 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_387 = sext i32 %ityp.8750_386 to i64
-  %D.75827_388 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75827_388 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75828_389 = mul nsw i64 %D.75704_387, %D.75827_388
   %D.75851_390 = add nsw i64 %D.75850_385, %D.75828_389
-  %D.75830_391 = load i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
+  %D.75830_391 = load i64, i64* getelementptr inbounds (%struct.array4_unknown* @__main1_MOD_mclmsg, i64 0, i32 1), align 8
   %D.75852_392 = add nsw i64 %D.75851_390, %D.75830_391
   %tmp146 = bitcast i8* %D.75820_373 to [0 x [1 x i8]]*
   %tmp147 = bitcast [0 x [1 x i8]]* %tmp146 to [1 x i8]*
   %D.75853_393 = getelementptr inbounds [1 x i8], [1 x i8]* %tmp147, i64 %D.75852_392
   call void bitcast (void (%struct.__st_parameter_dt*, i8*, i32)* @_gfortran_transfer_character_write to void (%struct.__st_parameter_dt*, [1 x i8]*, i32)*)(%struct.__st_parameter_dt* %memtmp3, [1 x i8]* %D.75853_393, i32 1) nounwind
 ; CHECK: @_gfortran_transfer_character_write
-  %D.75833_394 = load i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
-  %j1.8760_395 = load i32* @j1.4581, align 4
+  %D.75833_394 = load i8*, i8** getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 0), align 8
+  %j1.8760_395 = load i32, i32* @j1.4581, align 4
   %D.75773_396 = sext i32 %j1.8760_395 to i64
-  %iave.8736_397 = load i32* @__main1_MOD_iave, align 4
+  %iave.8736_397 = load i32, i32* @__main1_MOD_iave, align 4
   %D.75620_398 = sext i32 %iave.8736_397 to i64
-  %D.75834_399 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
+  %D.75834_399 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 2, i32 0), align 8
   %D.75835_400 = mul nsw i64 %D.75620_398, %D.75834_399
-  %igrp.8737_401 = load i32* @__main1_MOD_igrp, align 4
+  %igrp.8737_401 = load i32, i32* @__main1_MOD_igrp, align 4
   %D.75635_402 = sext i32 %igrp.8737_401 to i64
-  %D.75836_403 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
+  %D.75836_403 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 1, i32 0), align 8
   %D.75837_404 = mul nsw i64 %D.75635_402, %D.75836_403
   %D.75838_405 = add nsw i64 %D.75835_400, %D.75837_404
   %D.75854_406 = add nsw i64 %D.75773_396, %D.75838_405
-  %ityp.8750_407 = load i32* @__main1_MOD_ityp, align 4
+  %ityp.8750_407 = load i32, i32* @__main1_MOD_ityp, align 4
   %D.75704_408 = sext i32 %ityp.8750_407 to i64
-  %D.75840_409 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
+  %D.75840_409 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 3, i64 3, i32 0), align 8
   %D.75841_410 = mul nsw i64 %D.75704_408, %D.75840_409
   %D.75855_411 = add nsw i64 %D.75854_406, %D.75841_410
-  %D.75843_412 = load i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
+  %D.75843_412 = load i64, i64* getelementptr inbounds (%"struct.array4_integer(kind=4).73"* @__main1_MOD_mxdate, i64 0, i32 1), align 8
   %D.75856_413 = add nsw i64 %D.75855_411, %D.75843_412
   %tmp148 = bitcast i8* %D.75833_394 to [0 x i32]*
   %tmp149 = bitcast [0 x i32]* %tmp148 to i32*
@@ -233,9 +233,9 @@
 ; CHECK: @_gfortran_transfer_character_write
   call void @_gfortran_st_write_done(%struct.__st_parameter_dt* %memtmp3) nounwind
 ; CHECK: @_gfortran_st_write_done
-  %j.8758_415 = load i32* @j.4580, align 4
+  %j.8758_415 = load i32, i32* @j.4580, align 4
   %D.4634_416 = icmp eq i32 %j.8758_415, %D.4627_188.reload
-  %j.8758_417 = load i32* @j.4580, align 4
+  %j.8758_417 = load i32, i32* @j.4580, align 4
   %j.8770_418 = add nsw i32 %j.8758_417, 1
   store i32 %j.8770_418, i32* @j.4580, align 4
   %tmp150 = icmp ne i1 %D.4634_416, false
diff --git a/llvm/test/Transforms/BBVectorize/ld1.ll b/llvm/test/Transforms/BBVectorize/ld1.ll
index 65fa49a..368c38a 100644
--- a/llvm/test/Transforms/BBVectorize/ld1.ll
+++ b/llvm/test/Transforms/BBVectorize/ld1.ll
@@ -3,18 +3,18 @@
 
 define double @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
-  %i2 = load double* %c, align 8
+  %i2 = load double, double* %c, align 8
   %add = fadd double %mul, %i2
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %arrayidx6 = getelementptr inbounds double, double* %c, i64 1
-  %i5 = load double* %arrayidx6, align 8
+  %i5 = load double, double* %arrayidx6, align 8
   %add7 = fadd double %mul5, %i5
   %mul9 = fmul double %add, %i1
   %add11 = fadd double %mul9, %i2
@@ -26,10 +26,10 @@
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
 ; CHECK: %i2.v.i0 = bitcast double* %c to <2 x double>*
-; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
-; CHECK: %i2 = load <2 x double>* %i2.v.i0, align 8
+; CHECK: %i2 = load <2 x double>, <2 x double>* %i2.v.i0, align 8
 ; CHECK: %add = fadd <2 x double> %mul, %i2
 ; CHECK: %mul9 = fmul <2 x double> %add, %i1
 ; CHECK: %add11 = fadd <2 x double> %mul9, %i2
diff --git a/llvm/test/Transforms/BBVectorize/loop1.ll b/llvm/test/Transforms/BBVectorize/loop1.ll
index 45e14df..70a5def 100644
--- a/llvm/test/Transforms/BBVectorize/loop1.ll
+++ b/llvm/test/Transforms/BBVectorize/loop1.ll
@@ -13,9 +13,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul = fmul double %0, %0
   %mul3 = fmul double %0, %1
   %add = fadd double %mul, %mul3
@@ -36,9 +36,9 @@
   br i1 %exitcond, label %for.end, label %for.body
 ; CHECK: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
 ; CHECK: %arrayidx = getelementptr inbounds double, double* %in1, i64 %indvars.iv
-; CHECK: %0 = load double* %arrayidx, align 8
+; CHECK: %0 = load double, double* %arrayidx, align 8
 ; CHECK: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
-; CHECK: %1 = load double* %arrayidx2, align 8
+; CHECK: %1 = load double, double* %arrayidx2, align 8
 ; CHECK: %mul = fmul double %0, %0
 ; CHECK: %mul3 = fmul double %0, %1
 ; CHECK: %add = fadd double %mul, %mul3
@@ -67,8 +67,8 @@
 ; CHECK-UNRL: %arrayidx2 = getelementptr inbounds double, double* %in2, i64 %indvars.iv
 ; CHECK-UNRL: %1 = bitcast double* %arrayidx2 to <2 x double>*
 ; CHECK-UNRL: %arrayidx14 = getelementptr inbounds double, double* %out, i64 %indvars.iv
-; CHECK-UNRL: %2 = load <2 x double>* %0, align 8
-; CHECK-UNRL: %3 = load <2 x double>* %1, align 8
+; CHECK-UNRL: %2 = load <2 x double>, <2 x double>* %0, align 8
+; CHECK-UNRL: %3 = load <2 x double>, <2 x double>* %1, align 8
 ; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
 ; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
 ; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
diff --git a/llvm/test/Transforms/BBVectorize/mem-op-depth.ll b/llvm/test/Transforms/BBVectorize/mem-op-depth.ll
index c31d452..c181c68 100644
--- a/llvm/test/Transforms/BBVectorize/mem-op-depth.ll
+++ b/llvm/test/Transforms/BBVectorize/mem-op-depth.ll
@@ -7,11 +7,11 @@
 
 define i32 @test1() nounwind {
 ; CHECK-LABEL: @test1(
-  %V1 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
-  %V2 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
-  %V3= load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
-  %V4 = load float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
-; CHECK:   %V1 = load <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
+  %V1 = load float, float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 0), align 16
+  %V2 = load float, float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 1), align 4
+  %V3= load float, float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 2), align 8
+  %V4 = load float, float* getelementptr inbounds ([1024 x float]* @A, i64 0, i64 3), align 4
+; CHECK:   %V1 = load <4 x float>, <4 x float>* bitcast ([1024 x float]* @A to <4 x float>*), align 16
   store float %V1, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 0), align 16
   store float %V2, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 1), align 4
   store float %V3, float* getelementptr inbounds ([1024 x float]* @B, i64 0, i64 2), align 8
diff --git a/llvm/test/Transforms/BBVectorize/metadata.ll b/llvm/test/Transforms/BBVectorize/metadata.ll
index fade403..f5580a8 100644
--- a/llvm/test/Transforms/BBVectorize/metadata.ll
+++ b/llvm/test/Transforms/BBVectorize/metadata.ll
@@ -4,13 +4,13 @@
 ; Simple 3-pair chain with loads and stores (with fpmath)
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1, !fpmath !2
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4, !fpmath !3
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -24,13 +24,13 @@
 ; Simple 3-pair chain with loads and stores (ints with range)
 define void @test2(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load i64* %a, align 8, !range !0
-  %i1 = load i64* %b, align 8
+  %i0 = load i64, i64* %a, align 8, !range !0
+  %i1 = load i64, i64* %b, align 8
   %mul = mul i64 %i0, %i1
   %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64* %arrayidx3, align 8, !range !1
+  %i3 = load i64, i64* %arrayidx3, align 8, !range !1
   %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64* %arrayidx4, align 8
+  %i4 = load i64, i64* %arrayidx4, align 8
   %mul5 = mul i64 %i3, %i4
   store i64 %mul, i64* %c, align 8
   %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
diff --git a/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll b/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll
index b864fe1..a84cd65 100644
--- a/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll
+++ b/llvm/test/Transforms/BBVectorize/no-ldstr-conn.ll
@@ -9,8 +9,8 @@
   %a1 = inttoptr i64 %a to i64*
   %a2 = getelementptr i64, i64* %a1, i64 1
   %a3 = getelementptr i64, i64* %a1, i64 2
-  %v2 = load i64* %a2, align 8
-  %v3 = load i64* %a3, align 8
+  %v2 = load i64, i64* %a2, align 8
+  %v3 = load i64, i64* %a3, align 8
   %v2a = add i64 %v2, 5
   %v3a = add i64 %v3, 7
   store i64 %v2a, i64* %a2, align 8
diff --git a/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
index ff812b1..fcc0236 100644
--- a/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@@ -8,20 +8,20 @@
 ; Simple 3-pair chain also with loads and stores (using ptrs and gep)
 define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load i64* %a, align 8
-  %i1 = load i64* %b, align 8
+  %i0 = load i64, i64* %a, align 8
+  %i1 = load i64, i64* %b, align 8
   %mul = mul i64 %i0, %i1
   %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64* %arrayidx3, align 8
+  %i3 = load i64, i64* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64* %arrayidx4, align 8
+  %i4 = load i64, i64* %arrayidx4, align 8
   %mul5 = mul i64 %i3, %i4
   %ptr = inttoptr i64 %mul to double*
   %ptr5 = inttoptr i64 %mul5 to double*
   %aptr = getelementptr inbounds double, double* %ptr, i64 2
   %aptr5 = getelementptr inbounds double, double* %ptr5, i64 3
-  %av = load double* %aptr, align 16
-  %av5 = load double* %aptr5, align 16
+  %av = load double, double* %aptr, align 16
+  %av5 = load double, double* %aptr5, align 16
   %r = fmul double %av, %av5
   store i64 %mul, i64* %c, align 8
   %arrayidx5 = getelementptr inbounds i64, i64* %c, i64 1
@@ -30,15 +30,15 @@
 ; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast i64* %a to <2 x i64>*
 ; CHECK: %i1.v.i0 = bitcast i64* %b to <2 x i64>*
-; CHECK: %i0 = load <2 x i64>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x i64>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x i64>, <2 x i64>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x i64>, <2 x i64>* %i1.v.i0, align 8
 ; CHECK: %mul = mul <2 x i64> %i0, %i1
 ; CHECK: %ptr = inttoptr <2 x i64> %mul to <2 x double*>
 ; CHECK: %aptr = getelementptr inbounds double, <2 x double*> %ptr, <2 x i64> <i64 2, i64 3>
 ; CHECK: %aptr.v.r1 = extractelement <2 x double*> %aptr, i32 0
 ; CHECK: %aptr.v.r2 = extractelement <2 x double*> %aptr, i32 1
-; CHECK: %av = load double* %aptr.v.r1, align 16
-; CHECK: %av5 = load double* %aptr.v.r2, align 16
+; CHECK: %av = load double, double* %aptr.v.r1, align 16
+; CHECK: %av5 = load double, double* %aptr.v.r2, align 16
 ; CHECK: %r = fmul double %av, %av5
 ; CHECK: %0 = bitcast i64* %c to <2 x i64>*
 ; CHECK: store <2 x i64> %mul, <2 x i64>* %0, align 8
@@ -50,14 +50,14 @@
 ; Simple 3-pair chain with loads and stores (using ptrs and gep)
 define void @test2(i64** %a, i64** %b, i64** %c) nounwind uwtable readonly {
 entry:
-  %i0 = load i64** %a, align 8
-  %i1 = load i64** %b, align 8
+  %i0 = load i64*, i64** %a, align 8
+  %i1 = load i64*, i64** %b, align 8
   %arrayidx3 = getelementptr inbounds i64*, i64** %a, i64 1
-  %i3 = load i64** %arrayidx3, align 8
+  %i3 = load i64*, i64** %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
-  %i4 = load i64** %arrayidx4, align 8
-  %o1 = load i64* %i1, align 8
-  %o4 = load i64* %i4, align 8
+  %i4 = load i64*, i64** %arrayidx4, align 8
+  %o1 = load i64, i64* %i1, align 8
+  %o4 = load i64, i64* %i4, align 8
   %ptr0 = getelementptr inbounds i64, i64* %i0, i64 %o1
   %ptr3 = getelementptr inbounds i64, i64* %i3, i64 %o4
   store i64* %ptr0, i64** %c, align 8
@@ -66,12 +66,12 @@
   ret void
 ; CHECK-LABEL: @test2(
 ; CHECK: %i0.v.i0 = bitcast i64** %a to <2 x i64*>*
-; CHECK: %i1 = load i64** %b, align 8
-; CHECK: %i0 = load <2 x i64*>* %i0.v.i0, align 8
+; CHECK: %i1 = load i64*, i64** %b, align 8
+; CHECK: %i0 = load <2 x i64*>, <2 x i64*>* %i0.v.i0, align 8
 ; CHECK: %arrayidx4 = getelementptr inbounds i64*, i64** %b, i64 1
-; CHECK: %i4 = load i64** %arrayidx4, align 8
-; CHECK: %o1 = load i64* %i1, align 8
-; CHECK: %o4 = load i64* %i4, align 8
+; CHECK: %i4 = load i64*, i64** %arrayidx4, align 8
+; CHECK: %o1 = load i64, i64* %i1, align 8
+; CHECK: %o4 = load i64, i64* %i4, align 8
 ; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
 ; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
 ; CHECK: %ptr0 = getelementptr inbounds i64, <2 x i64*> %i0, <2 x i64> %ptr0.v.i1.2
@@ -86,16 +86,16 @@
 ; using pointer vectors.
 define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load <2 x i64*>* %a, align 8
-  %i1 = load <2 x i64*>* %b, align 8
+  %i0 = load <2 x i64*>, <2 x i64*>* %a, align 8
+  %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
   %arrayidx3 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %a, i64 1
-  %i3 = load <2 x i64*>* %arrayidx3, align 8
+  %i3 = load <2 x i64*>, <2 x i64*>* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
-  %i4 = load <2 x i64*>* %arrayidx4, align 8
+  %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
   %j1 = extractelement <2 x i64*> %i1, i32 0
   %j4 = extractelement <2 x i64*> %i4, i32 0
-  %o1 = load i64* %j1, align 8
-  %o4 = load i64* %j4, align 8
+  %o1 = load i64, i64* %j1, align 8
+  %o4 = load i64, i64* %j4, align 8
   %j0 = extractelement <2 x i64*> %i0, i32 0
   %j3 = extractelement <2 x i64*> %i3, i32 0
   %ptr0 = getelementptr inbounds i64, i64* %j0, i64 %o1
@@ -110,14 +110,14 @@
   ret void
 ; CHECK-LABEL: @test3(
 ; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
-; CHECK: %i1 = load <2 x i64*>* %b, align 8
-; CHECK: %i0 = load <4 x i64*>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x i64*>, <2 x i64*>* %b, align 8
+; CHECK: %i0 = load <4 x i64*>, <4 x i64*>* %i0.v.i0, align 8
 ; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>, <2 x i64*>* %b, i64 1
-; CHECK: %i4 = load <2 x i64*>* %arrayidx4, align 8
+; CHECK: %i4 = load <2 x i64*>, <2 x i64*>* %arrayidx4, align 8
 ; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
 ; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
-; CHECK: %o1 = load i64* %j1, align 8
-; CHECK: %o4 = load i64* %j4, align 8
+; CHECK: %o1 = load i64, i64* %j1, align 8
+; CHECK: %o4 = load i64, i64* %j4, align 8
 ; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
 ; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
 ; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
diff --git a/llvm/test/Transforms/BBVectorize/simple-ldstr.ll b/llvm/test/Transforms/BBVectorize/simple-ldstr.ll
index d085817..56c1a06 100644
--- a/llvm/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/llvm/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -5,13 +5,13 @@
 ; Simple 3-pair chain with loads and stores
 define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -20,8 +20,8 @@
 ; CHECK-LABEL: @test1(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
@@ -33,16 +33,16 @@
 ; Simple chain with extending loads and stores
 define void @test2(float* %a, float* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0f = load float* %a, align 4
+  %i0f = load float, float* %a, align 4
   %i0 = fpext float %i0f to double
-  %i1f = load float* %b, align 4
+  %i1f = load float, float* %b, align 4
   %i1 = fpext float %i1f to double
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds float, float* %a, i64 1
-  %i3f = load float* %arrayidx3, align 4
+  %i3f = load float, float* %arrayidx3, align 4
   %i3 = fpext float %i3f to double
   %arrayidx4 = getelementptr inbounds float, float* %b, i64 1
-  %i4f = load float* %arrayidx4, align 4
+  %i4f = load float, float* %arrayidx4, align 4
   %i4 = fpext float %i4f to double
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
@@ -52,9 +52,9 @@
 ; CHECK-LABEL: @test2(
 ; CHECK: %i0f.v.i0 = bitcast float* %a to <2 x float>*
 ; CHECK: %i1f.v.i0 = bitcast float* %b to <2 x float>*
-; CHECK: %i0f = load <2 x float>* %i0f.v.i0, align 4
+; CHECK: %i0f = load <2 x float>, <2 x float>* %i0f.v.i0, align 4
 ; CHECK: %i0 = fpext <2 x float> %i0f to <2 x double>
-; CHECK: %i1f = load <2 x float>* %i1f.v.i0, align 4
+; CHECK: %i1f = load <2 x float>, <2 x float>* %i1f.v.i0, align 4
 ; CHECK: %i1 = fpext <2 x float> %i1f to <2 x double>
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
@@ -67,14 +67,14 @@
 ; Simple chain with loads and truncating stores
 define void @test3(double* %a, double* %b, float* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %mulf = fptrunc double %mul to float
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %mul5f = fptrunc double %mul5 to float
   store float %mulf, float* %c, align 8
@@ -84,20 +84,20 @@
 ; CHECK-LABEL: @test3(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
 ; CHECK: %mulf = fptrunc <2 x double> %mul to <2 x float>
 ; CHECK: %0 = bitcast float* %c to <2 x float>*
 ; CHECK: store <2 x float> %mulf, <2 x float>* %0, align 8
 ; CHECK: ret void
 ; CHECK-AO-LABEL: @test3(
-; CHECK-AO: %i0 = load double* %a, align 8
-; CHECK-AO: %i1 = load double* %b, align 8
+; CHECK-AO: %i0 = load double, double* %a, align 8
+; CHECK-AO: %i1 = load double, double* %b, align 8
 ; CHECK-AO: %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-; CHECK-AO: %i3 = load double* %arrayidx3, align 8
+; CHECK-AO: %i3 = load double, double* %arrayidx3, align 8
 ; CHECK-AO: %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %i4 = load double, double* %arrayidx4, align 8
 ; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
 ; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
 ; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
@@ -119,13 +119,13 @@
   br label %if.then
 
 if.then:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -143,13 +143,13 @@
 ; Simple 3-pair chain with loads and stores
 define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
@@ -158,8 +158,8 @@
 ; CHECK-LABEL: @test5(
 ; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
 ; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
-; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
-; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %i0 = load <2 x double>, <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>, <2 x double>* %i1.v.i0, align 8
 ; CHECK: %mul = fmul <2 x double> %i0, %i1
 ; CHECK: %0 = bitcast double* %c to <2 x double>*
 ; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
diff --git a/llvm/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll b/llvm/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
index a6ee63e..2f9c0c73 100644
--- a/llvm/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
+++ b/llvm/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
@@ -13,7 +13,7 @@
         br i1 false, label %no_exit.1, label %loopexit.0.loopexit1
 
 no_exit.1:              ; preds = %loopentry.1
-        %tmp.53 = load i32* null                ; <i32> [#uses=1]
+        %tmp.53 = load i32, i32* null                ; <i32> [#uses=1]
         br i1 false, label %shortcirc_next.2, label %loopentry.1
 
 shortcirc_next.2:               ; preds = %no_exit.1
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
index 430b992..519e1ee 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
@@ -9,7 +9,7 @@
 ; CHECK-LABEL: block1:
 ; CHECK-NEXT: load
 block1:
-  %l1 = load i64* %mem1
+  %l1 = load i64, i64* %mem1
   %s1 = sext i64 %l1 to i128
   br label %block2
 
@@ -18,7 +18,7 @@
 ; CHECK-NEXT: load
 ; CHECK-NEXT: sext
 block2:
-  %l2 = load i64* %mem2
+  %l2 = load i64, i64* %mem2
   %s2 = sext i64 %l2 to i128
   %res = mul i128 %s1, %s2
   ret i128 %res
@@ -31,7 +31,7 @@
 ; CHECK-NEXT: load
 ; CHECK-NEXT: sext
 block1:
-  %l1 = load i32* %mem1
+  %l1 = load i32, i32* %mem1
   br label %block2
 
 ; CHECK-LABEL: block2:
@@ -39,7 +39,7 @@
 ; CHECK-NEXT: sext
 block2:
   %s1 = sext i32 %l1 to i64
-  %l2 = load i32* %mem2
+  %l2 = load i32, i32* %mem2
   %s2 = sext i32 %l2 to i64
   %res = mul i64 %s1, %s2
   ret i64 %res
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
index 097e1f7..c9f49b5 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/sink-addrspacecast.ll
@@ -13,7 +13,7 @@
   br i1 %cond, label %if.then, label %fallthrough
 
 if.then:
-  %v = load i32 addrspace(1)* %casted, align 4
+  %v = load i32, i32 addrspace(1)* %casted, align 4
   br label %fallthrough
 
 fallthrough:
diff --git a/llvm/test/Transforms/CodeGenPrepare/statepoint-relocate.ll b/llvm/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
index 939e813..7aa526f 100644
--- a/llvm/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
@@ -13,7 +13,7 @@
        %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
@@ -29,7 +29,7 @@
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
@@ -41,7 +41,7 @@
        %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
@@ -53,7 +53,7 @@
        %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
        %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
@@ -65,7 +65,7 @@
        %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
        %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
@@ -79,7 +79,7 @@
        %tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
        %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
        %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
-       %ret = load i32* %ptr-new
+       %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
diff --git a/llvm/test/Transforms/ConstProp/loads.ll b/llvm/test/Transforms/ConstProp/loads.ll
index 5a23dad..dbfd992 100644
--- a/llvm/test/Transforms/ConstProp/loads.ll
+++ b/llvm/test/Transforms/ConstProp/loads.ll
@@ -9,7 +9,7 @@
 
 ; Simple load
 define i32 @test1() {
-  %r = load i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
+  %r = load i32, i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
   ret i32 %r
 
 ; 0xDEADBEEF
@@ -24,7 +24,7 @@
 ; PR3152
 ; Load of first 16 bits of 32-bit value.
 define i16 @test2() {
-  %r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
+  %r = load i16, i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
   ret i16 %r
 
 ; 0xBEEF
@@ -37,7 +37,7 @@
 }
 
 define i16 @test2_addrspacecast() {
-  %r = load i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
+  %r = load i16, i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
   ret i16 %r
 
 ; 0xBEEF
@@ -51,7 +51,7 @@
 
 ; Load of second 16 bits of 32-bit value.
 define i16 @test3() {
-  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
+  %r = load i16, i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
   ret i16 %r
 
 ; 0xDEAD
@@ -65,7 +65,7 @@
 
 ; Load of 8 bit field + tail padding.
 define i16 @test4() {
-  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
+  %r = load i16, i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
   ret i16 %r
 
 ; 0x00BA
@@ -79,7 +79,7 @@
 
 ; Load of double bits.
 define i64 @test6() {
-  %r = load i64* bitcast(double* @g2 to i64*)
+  %r = load i64, i64* bitcast(double* @g2 to i64*)
   ret i64 %r
 
 ; 0x3FF_0000000000000
@@ -93,7 +93,7 @@
 
 ; Load of double bits.
 define i16 @test7() {
-  %r = load i16* bitcast(double* @g2 to i16*)
+  %r = load i16, i16* bitcast(double* @g2 to i16*)
   ret i16 %r
 
 ; 0x0000
@@ -107,7 +107,7 @@
 
 ; Double load.
 define double @test8() {
-  %r = load double* bitcast({{i32,i8},i32}* @g1 to double*)
+  %r = load double, double* bitcast({{i32,i8},i32}* @g1 to double*)
   ret double %r
 
 ; LE-LABEL: @test8(
@@ -120,7 +120,7 @@
 
 ; i128 load.
 define i128 @test9() {
-  %r = load i128* bitcast({i64, i64}* @g3 to i128*)
+  %r = load i128, i128* bitcast({i64, i64}* @g3 to i128*)
   ret i128 %r
 
 ; 0x00000000_06B1BFF8_00000000_0000007B
@@ -134,7 +134,7 @@
 
 ; vector load.
 define <2 x i64> @test10() {
-  %r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
+  %r = load <2 x i64>, <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
   ret <2 x i64> %r
 
 ; LE-LABEL: @test10(
@@ -151,7 +151,7 @@
 
 define i16 @test11() nounwind {
 entry:
-  %a = load i16* bitcast ({ i8, i8 }* @g4 to i16*)
+  %a = load i16, i16* bitcast ({ i8, i8 }* @g4 to i16*)
   ret i16 %a
 
 ; 0x08A1
@@ -168,7 +168,7 @@
 @test12g = private constant [6 x i8] c"a\00b\00\00\00"
 
 define i16 @test12() {
-  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
+  %a = load i16, i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
   ret i16 %a
 
 ; 0x0062
@@ -184,7 +184,7 @@
 ; PR5978
 @g5 = constant i8 4
 define i1 @test13() {
-  %A = load i1* bitcast (i8* @g5 to i1*)
+  %A = load i1, i1* bitcast (i8* @g5 to i1*)
   ret i1 %A
 
 ; LE-LABEL: @test13(
@@ -197,7 +197,7 @@
 @g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)]
 define i64 @test14() nounwind {
 entry:
-  %tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*)
+  %tmp = load i64, i64* bitcast ([2 x i8*]* @g6 to i64*)
   ret i64 %tmp
 
 ; LE-LABEL: @test14(
@@ -211,7 +211,7 @@
 @g6_as1 = constant [2 x i8 addrspace(1)*] [i8 addrspace(1)* inttoptr (i16 1 to i8 addrspace(1)*), i8 addrspace(1)* inttoptr (i16 2 to i8 addrspace(1)*)]
 define i16 @test14_as1() nounwind {
 entry:
-  %tmp = load i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
+  %tmp = load i16, i16* bitcast ([2 x i8 addrspace(1)*]* @g6_as1 to i16*)
   ret i16 %tmp
 
 ; LE: @test14_as1
@@ -223,7 +223,7 @@
 
 define i64 @test15() nounwind {
 entry:
-  %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
+  %tmp = load i64, i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
   ret i64 %tmp
 
 ; LE-LABEL: @test15(
@@ -235,7 +235,7 @@
 
 @gv7 = constant [4 x i8*] [i8* null, i8* inttoptr (i64 -14 to i8*), i8* null, i8* null]
 define i64 @test16.1() {
-  %v = load i64* bitcast ([4 x i8*]* @gv7 to i64*), align 8
+  %v = load i64, i64* bitcast ([4 x i8*]* @gv7 to i64*), align 8
   ret i64 %v
 
 ; LE-LABEL: @test16.1(
@@ -246,7 +246,7 @@
 }
 
 define i64 @test16.2() {
-  %v = load i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 1) to i64*), align 8
+  %v = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 1) to i64*), align 8
   ret i64 %v
 
 ; LE-LABEL: @test16.2(
@@ -257,7 +257,7 @@
 }
 
 define i64 @test16.3() {
-  %v = load i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 2) to i64*), align 8
+  %v = load i64, i64* bitcast (i8** getelementptr inbounds ([4 x i8*]* @gv7, i64 0, i64 2) to i64*), align 8
   ret i64 %v
 
 ; LE-LABEL: @test16.3(
diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/const-addr.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
index a317e5c..4c36d20 100644
--- a/llvm/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
+++ b/llvm/test/Transforms/ConstantHoisting/AArch64/const-addr.ll
@@ -11,12 +11,12 @@
 ; CHECK: %o3 = getelementptr %T, %T* %1, i32 0, i32 3
   %at = inttoptr i64 68141056 to %T*
   %o1 = getelementptr %T, %T* %at, i32 0, i32 1
-  %t1 = load i32* %o1
+  %t1 = load i32, i32* %o1
   %o2 = getelementptr %T, %T* %at, i32 0, i32 2
-  %t2 = load i32* %o2
+  %t2 = load i32, i32* %o2
   %a1 = add i32 %t1, %t2
   %o3 = getelementptr %T, %T* %at, i32 0, i32 3
-  %t3 = load i32* %o3
+  %t3 = load i32, i32* %o3
   %a2 = add i32 %a1, %t3
   ret i32 %a2
 }
diff --git a/llvm/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll b/llvm/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
index 82ee04b..69b13cf 100644
--- a/llvm/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
+++ b/llvm/test/Transforms/ConstantHoisting/PowerPC/const-base-addr.ll
@@ -11,11 +11,11 @@
 ; CHECK:        %1 = inttoptr i32 %const to %T*
 ; CHECK:        %addr1 = getelementptr %T, %T* %1, i32 0, i32 1
   %addr1 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
-  %tmp1 = load i32* %addr1
+  %tmp1 = load i32, i32* %addr1
   %addr2 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
-  %tmp2 = load i32* %addr2
+  %tmp2 = load i32, i32* %addr2
   %addr3 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
-  %tmp3 = load i32* %addr3
+  %tmp3 = load i32, i32* %addr3
   %tmp4 = add i32 %tmp1, %tmp2
   %tmp5 = add i32 %tmp3, %tmp4
   ret i32 %tmp5
diff --git a/llvm/test/Transforms/ConstantHoisting/PowerPC/masks.ll b/llvm/test/Transforms/ConstantHoisting/PowerPC/masks.ll
index d553182..4cc504f 100644
--- a/llvm/test/Transforms/ConstantHoisting/PowerPC/masks.ll
+++ b/llvm/test/Transforms/ConstantHoisting/PowerPC/masks.ll
@@ -19,7 +19,7 @@
 if.end167:
 ; CHECK: and i32 {{.*}}, 32768
   %shl161 = shl nuw nsw i32 %conv121, 15
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv169 = zext i8 %0 to i32
   %shl170 = shl nuw nsw i32 %conv169, 7
   %shl161.masked = and i32 %shl161, 32768
@@ -49,7 +49,7 @@
 if.end167:
 ; CHECK: add i32 {{.*}}, -32758
   %shl161 = shl nuw nsw i32 %conv121, 15
-  %0 = load i8* undef, align 1
+  %0 = load i8, i8* undef, align 1
   %conv169 = zext i8 %0 to i32
   %shl170 = shl nuw nsw i32 %conv169, 7
   %shl161.masked = and i32 %shl161, 32773
diff --git a/llvm/test/Transforms/ConstantHoisting/X86/cast-inst.ll b/llvm/test/Transforms/ConstantHoisting/X86/cast-inst.ll
index f490f4a..bd1e196 100644
--- a/llvm/test/Transforms/ConstantHoisting/X86/cast-inst.ll
+++ b/llvm/test/Transforms/ConstantHoisting/X86/cast-inst.ll
@@ -9,19 +9,19 @@
 ; CHECK-LABEL:  @cast_inst_test
 ; CHECK:        %const = bitcast i64 4646526064 to i64
 ; CHECK:        %1 = inttoptr i64 %const to i32*
-; CHECK:        %v0 = load i32* %1, align 16
+; CHECK:        %v0 = load i32, i32* %1, align 16
 ; CHECK:        %const_mat = add i64 %const, 16
 ; CHECK-NEXT:   %2 = inttoptr i64 %const_mat to i32*
-; CHECK-NEXT:   %v1 = load i32* %2, align 16
+; CHECK-NEXT:   %v1 = load i32, i32* %2, align 16
 ; CHECK:        %const_mat1 = add i64 %const, 32
 ; CHECK-NEXT:   %3 = inttoptr i64 %const_mat1 to i32*
-; CHECK-NEXT:   %v2 = load i32* %3, align 16
+; CHECK-NEXT:   %v2 = load i32, i32* %3, align 16
   %a0 = inttoptr i64 4646526064 to i32*
-  %v0 = load i32* %a0, align 16
+  %v0 = load i32, i32* %a0, align 16
   %a1 = inttoptr i64 4646526080 to i32*
-  %v1 = load i32* %a1, align 16
+  %v1 = load i32, i32* %a1, align 16
   %a2 = inttoptr i64 4646526096 to i32*
-  %v2 = load i32* %a2, align 16
+  %v2 = load i32, i32* %a2, align 16
   %r0 = add i32 %v0, %v1
   %r1 = add i32 %r0, %v2
   ret i32 %r1
diff --git a/llvm/test/Transforms/ConstantHoisting/X86/const-base-addr.ll b/llvm/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
index eabf80d..db5dfdd 100644
--- a/llvm/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
+++ b/llvm/test/Transforms/ConstantHoisting/X86/const-base-addr.ll
@@ -12,11 +12,11 @@
 ; CHECK:        %1 = inttoptr i32 %const to %T*
 ; CHECK:        %addr1 = getelementptr %T, %T* %1, i32 0, i32 1
   %addr1 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 1
-  %tmp1 = load i32* %addr1
+  %tmp1 = load i32, i32* %addr1
   %addr2 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 2
-  %tmp2 = load i32* %addr2
+  %tmp2 = load i32, i32* %addr2
   %addr3 = getelementptr %T, %T* inttoptr (i32 12345678 to %T*), i32 0, i32 3
-  %tmp3 = load i32* %addr3
+  %tmp3 = load i32, i32* %addr3
   %tmp4 = add i32 %tmp1, %tmp2
   %tmp5 = add i32 %tmp3, %tmp4
   ret i32 %tmp5
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
index 9a22647..9d1253a 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -52,8 +52,8 @@
         ret i8 0
 
 bb2:            ; preds = %entry
-; CHECK: %should_be_const = load i8* @gv
-        %should_be_const = load i8* %a
+; CHECK: %should_be_const = load i8, i8* @gv
+        %should_be_const = load i8, i8* %a
         ret i8 %should_be_const
 }
 
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
index b14abd8..6bb8bb0 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -2,7 +2,7 @@
 
 define void @test1(i8* %ptr) {
 ; CHECK: test1
-  %A = load i8* %ptr
+  %A = load i8, i8* %ptr
   br label %bb
 bb:
   icmp ne i8* %ptr, null
diff --git a/llvm/test/Transforms/DeadArgElim/aggregates.ll b/llvm/test/Transforms/DeadArgElim/aggregates.ll
index f54c6c9..68d2534 100644
--- a/llvm/test/Transforms/DeadArgElim/aggregates.ll
+++ b/llvm/test/Transforms/DeadArgElim/aggregates.ll
@@ -157,6 +157,6 @@
   %res = call {i8*, i32} @mid()
   %resptr = extractvalue {i8*, i32} %res, 0
 
-  %val = load i8* %resptr
+  %val = load i8, i8* %resptr
   ret i8 %val
 }
\ No newline at end of file
diff --git a/llvm/test/Transforms/DeadArgElim/deadexternal.ll b/llvm/test/Transforms/DeadArgElim/deadexternal.ll
index 665d7db..21cbc37 100644
--- a/llvm/test/Transforms/DeadArgElim/deadexternal.ll
+++ b/llvm/test/Transforms/DeadArgElim/deadexternal.ll
@@ -31,9 +31,9 @@
 entry:
   %i = alloca i32, align 4
   store volatile i32 10, i32* %i, align 4
-; CHECK: %tmp = load volatile i32* %i, align 4
+; CHECK: %tmp = load volatile i32, i32* %i, align 4
 ; CHECK-NEXT: call void @f(i32 undef)
-  %tmp = load volatile i32* %i, align 4
+  %tmp = load volatile i32, i32* %i, align 4
   call void @f(i32 %tmp)
   ret void
 }
diff --git a/llvm/test/Transforms/DeadArgElim/deadretval2.ll b/llvm/test/Transforms/DeadArgElim/deadretval2.ll
index dcdc36e..b0d2428 100644
--- a/llvm/test/Transforms/DeadArgElim/deadretval2.ll
+++ b/llvm/test/Transforms/DeadArgElim/deadretval2.ll
@@ -20,7 +20,7 @@
 }
 
 define internal i32 @foo() {
-        %DEAD = load i32* @P            ; <i32> [#uses=1]
+        %DEAD = load i32, i32* @P            ; <i32> [#uses=1]
         ret i32 %DEAD
 }
 
diff --git a/llvm/test/Transforms/DeadArgElim/keepalive.ll b/llvm/test/Transforms/DeadArgElim/keepalive.ll
index 16569db..d8a0993 100644
--- a/llvm/test/Transforms/DeadArgElim/keepalive.ll
+++ b/llvm/test/Transforms/DeadArgElim/keepalive.ll
@@ -31,7 +31,7 @@
 ; We can't remove 'this' here, as that would put argmem in ecx instead of
 ; memory.
 define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem) {
-	%v = load i32* %argmem
+	%v = load i32, i32* %argmem
 	ret i32 %v
 }
 ; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this(i32* %this, i32* inalloca %argmem)
diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
index d5706c9..7746cce 100644
--- a/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
@@ -13,7 +13,7 @@
 ; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
   %ln2gz = getelementptr i32, i32* %Arg1, i32 14
   %ln2gA = bitcast i32* %ln2gz to double*
-  %ln2gB = load double* %ln2gA
+  %ln2gB = load double, double* %ln2gA
   %ln2gD = getelementptr i32, i32* %Arg2, i32 -3
   %ln2gE = bitcast i32* %ln2gD to double*
   store double %ln2gB, double* %ln2gE
diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
index cb8da46..7e46d28 100644
--- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-EndOfFunction.ll
@@ -11,7 +11,7 @@
   %temp.lvalue = alloca %"class.std::auto_ptr", align 8
   call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
   %_M_ptr.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
-  %tmp.i.i = load i32** %_M_ptr.i.i, align 8
+  %tmp.i.i = load i32*, i32** %_M_ptr.i.i, align 8
 ; CHECK-NOT: store i32* null
   store i32* null, i32** %_M_ptr.i.i, align 8
   %_M_ptr.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
diff --git a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
index 0baaea5..d30e9a2 100644
--- a/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/2011-09-06-MemCpy.ll
@@ -70,11 +70,11 @@
   %memtmp = alloca %struct.pair.162, align 8
   %0 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 0
   %1 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 0
-  %2 = load %struct.BasicBlock** %1, align 8
+  %2 = load %struct.BasicBlock*, %struct.BasicBlock** %1, align 8
   store %struct.BasicBlock* %2, %struct.BasicBlock** %0, align 8
   %3 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %memtmp, i64 0, i32 1
   %4 = getelementptr inbounds %struct.pair.162, %struct.pair.162* %__a, i64 0, i32 1
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   store i32 %5, i32* %3, align 8
   %6 = bitcast %struct.pair.162* %__a to i8*
   %7 = bitcast %struct.pair.162* %__b to i8*
diff --git a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll b/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll
index 4582c84..d85b4de 100644
--- a/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -31,7 +31,7 @@
   %tmp.1 = bitcast { [3 x i32] }* %__u to double*
   store double %__x, double* %tmp.1
   %tmp.4 = getelementptr { [3 x i32] }, { [3 x i32] }* %__u, i32 0, i32 0, i32 1
-  %tmp.5 = load i32* %tmp.4
+  %tmp.5 = load i32, i32* %tmp.4
   %tmp.6 = icmp slt i32 %tmp.5, 0
   %tmp.7 = zext i1 %tmp.6 to i32
   ret i32 %tmp.7
diff --git a/llvm/test/Transforms/DeadStoreElimination/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/atomic.ll
index af303fa..4d2cb37 100644
--- a/llvm/test/Transforms/DeadStoreElimination/atomic.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/atomic.ll
@@ -29,7 +29,7 @@
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
   store i32 0, i32* @x
-  %x = load atomic i32* @y seq_cst, align 4
+  %x = load atomic i32, i32* @y seq_cst, align 4
   store i32 1, i32* @x
   ret i32 %x
 }
@@ -69,7 +69,7 @@
 ; CHECK-LABEL: test6
 ; CHECK-NOT: store
 ; CHECK: ret void
-  %x = load atomic i32* @x unordered, align 4
+  %x = load atomic i32, i32* @x unordered, align 4
   store atomic i32 %x, i32* @x unordered, align 4
   ret void
 }
@@ -93,7 +93,7 @@
   %a = alloca i32
   call void @randomop(i32* %a)
   store i32 0, i32* %a, align 4
-  %x = load atomic i32* @x seq_cst, align 4
+  %x = load atomic i32, i32* @x seq_cst, align 4
   ret i32 %x
 }
 
@@ -103,7 +103,7 @@
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
   store i32 0, i32* @x
-  %x = load atomic i32* @y monotonic, align 4
+  %x = load atomic i32, i32* @y monotonic, align 4
   store i32 1, i32* @x
   ret i32 %x
 }
@@ -125,7 +125,7 @@
 ; CHECK: store atomic i32 0
 ; CHECK: store atomic i32 1
   store atomic i32 0, i32* @x monotonic, align 4
-  %x = load atomic i32* @y monotonic, align 4
+  %x = load atomic i32, i32* @y monotonic, align 4
   store atomic i32 1, i32* @x monotonic, align 4
   ret i32 %x
 }
@@ -147,7 +147,7 @@
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
   store i32 0, i32* @x
-  %x = load atomic i32* @y seq_cst, align 4
+  %x = load atomic i32, i32* @y seq_cst, align 4
   store atomic i32 %x, i32* @y seq_cst, align 4
   store i32 1, i32* @x
   ret i32 %x
@@ -159,7 +159,7 @@
 ; CHECK-NOT: store i32 0
 ; CHECK: store i32 1
   store i32 0, i32* @x
-  %x = load atomic i32* @y acquire, align 4
+  %x = load atomic i32, i32* @y acquire, align 4
   store atomic i32 %x, i32* @y release, align 4
   store i32 1, i32* @x
   ret i32 %x
@@ -172,7 +172,7 @@
 ; CHECK: store i32 1
   store i32 0, i32* @x
   store atomic i32 0, i32* @y release, align 4
-  %x = load atomic i32* @y acquire, align 4
+  %x = load atomic i32, i32* @y acquire, align 4
   store i32 1, i32* @x
   ret i32 %x
 }
diff --git a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll b/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll
index f2c5c6a..e4403ed 100644
--- a/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -9,7 +9,7 @@
   %p = getelementptr inbounds %t, %t* %pp, i32 0, i32 0
 
   store i32 1, i32* %p; <-- This is dead
-  %x = load i32* inttoptr (i32 12345 to i32*)
+  %x = load i32, i32* inttoptr (i32 12345 to i32*)
   store i32 %x, i32* %p
   ret void
 ; CHECK-LABEL: define void @test1(
@@ -30,7 +30,7 @@
 
 define void @test4(i32* %p) {
   store i32 1, i32* %p
-  %x = load i32* @g; <-- %p and @g could alias
+  %x = load i32, i32* @g; <-- %p and @g could alias
   store i32 %x, i32* %p
   ret void
 ; CHECK-LABEL: define void @test4(
diff --git a/llvm/test/Transforms/DeadStoreElimination/crash.ll b/llvm/test/Transforms/DeadStoreElimination/crash.ll
index ab04482..78cb842 100644
--- a/llvm/test/Transforms/DeadStoreElimination/crash.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/crash.ll
@@ -66,8 +66,8 @@
 entry:
 	%t = alloca %struct.f393a00_2__windmill		; <%struct.f393a00_2__windmill*> [#uses=1]
 	%0 = getelementptr %struct.f393a00_2__windmill, %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0		; <%struct.ada__tags__dispatch_table**> [#uses=1]
-	%1 = load %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
-	%2 = load %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	%1 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	%2 = load %struct.ada__tags__dispatch_table*, %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
 	store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4
 	store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4
 	ret void
diff --git a/llvm/test/Transforms/DeadStoreElimination/free.ll b/llvm/test/Transforms/DeadStoreElimination/free.ll
index bac325e..6b69ec8 100644
--- a/llvm/test/Transforms/DeadStoreElimination/free.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/free.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: @free
 ; CHECK-NEXT: ret void
 define void @test(i32* %Q, i32* %P) {
-        %DEAD = load i32* %Q            ; <i32> [#uses=1]
+        %DEAD = load i32, i32* %Q            ; <i32> [#uses=1]
         store i32 %DEAD, i32* %P
         %1 = bitcast i32* %P to i8*
         tail call void @free(i8* %1)
diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 3e5f7b8..dd1443e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -6,7 +6,7 @@
 declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
-        %DEAD = load i32* %Q
+        %DEAD = load i32, i32* %Q
         store i32 %DEAD, i32* %P
         store i32 0, i32* %P
         ret void
@@ -31,17 +31,17 @@
 
 define i32 @test3(i32* %g_addr) nounwind {
 ; CHECK-LABEL: @test3(
-; CHECK: load i32* %g_addr
-  %g_value = load i32* %g_addr, align 4
+; CHECK: load i32, i32* %g_addr
+  %g_value = load i32, i32* %g_addr, align 4
   store i32 -1, i32* @g, align 4
   store i32 %g_value, i32* %g_addr, align 4
-  %tmp3 = load i32* @g, align 4
+  %tmp3 = load i32, i32* @g, align 4
   ret i32 %tmp3
 }
 
 
 define void @test4(i32* %Q) {
-        %a = load i32* %Q
+        %a = load i32, i32* %Q
         store volatile i32 %a, i32* %Q
         ret void
 ; CHECK-LABEL: @test4(
@@ -51,7 +51,7 @@
 }
 
 define void @test5(i32* %Q) {
-        %a = load volatile i32* %Q
+        %a = load volatile i32, i32* %Q
         store i32 %a, i32* %Q
         ret void
 ; CHECK-LABEL: @test5(
@@ -87,7 +87,7 @@
         store i32 1234567, i32* %V
         %V2 = bitcast i32* %V to i8*
         store i8 0, i8* %V2
-        %X = load i32* %V
+        %X = load i32, i32* %V
         ret i32 %X
         
 ; CHECK-LABEL: @test8(
@@ -142,9 +142,9 @@
 ; PR2599 - load -> store to same address.
 define void @test12({ i32, i32 }* %x) nounwind  {
 	%tmp4 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 0
-	%tmp5 = load i32* %tmp4, align 4
+	%tmp5 = load i32, i32* %tmp4, align 4
 	%tmp7 = getelementptr { i32, i32 }, { i32, i32 }* %x, i32 0, i32 1
-	%tmp8 = load i32* %tmp7, align 4
+	%tmp8 = load i32, i32* %tmp7, align 4
 	%tmp17 = sub i32 0, %tmp8
 	store i32 %tmp5, i32* %tmp4, align 4
 	store i32 %tmp17, i32* %tmp7, align 4
@@ -160,7 +160,7 @@
 define i32* @test13() {
         %p = tail call i8* @malloc(i32 4)
         %P = bitcast i8* %p to i32*
-        %DEAD = load i32* %P
+        %DEAD = load i32, i32* %P
         %DEAD2 = add i32 %DEAD, 1
         store i32 %DEAD2, i32* %P
         call void @test13f( )
@@ -176,7 +176,7 @@
   %p = tail call i8* @malloc(i32 4)
   %p.bc = bitcast i8* %p to i32*
   %P = addrspacecast i32* %p.bc to i32 addrspace(1)*
-  %DEAD = load i32 addrspace(1)* %P
+  %DEAD = load i32, i32 addrspace(1)* %P
   %DEAD2 = add i32 %DEAD, 1
   store i32 %DEAD2, i32 addrspace(1)* %P
   call void @test13f( )
@@ -195,7 +195,7 @@
 
 define void @test14(i32* %Q) {
         %P = alloca i32
-        %DEAD = load i32* %Q
+        %DEAD = load i32, i32* %Q
         store i32 %DEAD, i32* %P
         ret void
 
@@ -344,7 +344,7 @@
 ; CHECK: store i8 %tmp
 define i8* @test25(i8* %p) nounwind {
   %p.4 = getelementptr i8, i8* %p, i64 4
-  %tmp = load i8* %p.4, align 1
+  %tmp = load i8, i8* %p.4, align 1
   store i8 0, i8* %p.4, align 1
   %q = call i8* @strdup(i8* %p) nounwind optsize
   store i8 %tmp, i8* %p.4, align 1
diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll
index a36a103..43b5e60 100644
--- a/llvm/test/Transforms/EarlyCSE/basic.ll
+++ b/llvm/test/Transforms/EarlyCSE/basic.ll
@@ -37,8 +37,8 @@
 ;; Simple load value numbering.
 ; CHECK-LABEL: @test2(
 define i32 @test2(i32 *%P) {
-  %V1 = load i32* %P
-  %V2 = load i32* %P
+  %V1 = load i32, i32* %P
+  %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
   ; CHECK: ret i32 0
@@ -46,9 +46,9 @@
 
 ; CHECK-LABEL: @test2a(
 define i32 @test2a(i32 *%P, i1 %b) {
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   tail call void @llvm.assume(i1 %b)
-  %V2 = load i32* %P
+  %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
   ; CHECK: ret i32 0
@@ -57,13 +57,13 @@
 ;; Cross block load value numbering.
 ; CHECK-LABEL: @test3(
 define i32 @test3(i32 *%P, i1 %Cond) {
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
   store i32 4, i32* %P
   ret i32 42
 F:
-  %V2 = load i32* %P
+  %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
   ; CHECK: F:
@@ -72,14 +72,14 @@
 
 ; CHECK-LABEL: @test3a(
 define i32 @test3a(i32 *%P, i1 %Cond, i1 %b) {
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
   store i32 4, i32* %P
   ret i32 42
 F:
   tail call void @llvm.assume(i1 %b)
-  %V2 = load i32* %P
+  %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
   ; CHECK: F:
@@ -89,7 +89,7 @@
 ;; Cross block load value numbering stops when stores happen.
 ; CHECK-LABEL: @test4(
 define i32 @test4(i32 *%P, i1 %Cond) {
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
   ret i32 42
@@ -97,7 +97,7 @@
   ; Clobbers V1
   store i32 42, i32* %P
   
-  %V2 = load i32* %P
+  %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
   ; CHECK: F:
@@ -120,7 +120,7 @@
 ; CHECK-LABEL: @test6(
 define i32 @test6(i32 *%P) {
   store i32 42, i32* %P
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   ret i32 %V1
   ; CHECK: ret i32 42
 }
@@ -129,7 +129,7 @@
 define i32 @test6a(i32 *%P, i1 %b) {
   store i32 42, i32* %P
   tail call void @llvm.assume(i1 %b)
-  %V1 = load i32* %P
+  %V1 = load i32, i32* %P
   ret i32 %V1
   ; CHECK: ret i32 42
 }
@@ -195,11 +195,11 @@
 
 ; CHECK-LABEL: @test12(
 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
-  %load0 = load i32* %P1
-  %1 = load atomic i32* %P2 seq_cst, align 4
-  %load1 = load i32* %P1
+  %load0 = load i32, i32* %P1
+  %1 = load atomic i32, i32* %P2 seq_cst, align 4
+  %load1 = load i32, i32* %P1
   %sel = select i1 %B, i32 %load0, i32 %load1
   ret i32 %sel
-  ; CHECK: load i32* %P1
-  ; CHECK: load i32* %P1
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load i32, i32* %P1
 }
diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index 36a7658..ca05d63 100644
--- a/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -17,7 +17,7 @@
 
 ; CHECK: define i32 @h() #0
 define i32 @h() readnone {
-	%tmp = load i32* @x		; <i32> [#uses=1]
+	%tmp = load i32, i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/llvm/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
index b7e4d1f..fef872c 100644
--- a/llvm/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -4,6 +4,6 @@
 @g = global i32 0		; <i32*> [#uses=1]
 
 define i32 @f() {
-	%t = load volatile i32* @g		; <i32> [#uses=1]
+	%t = load volatile i32, i32* @g		; <i32> [#uses=1]
 	ret i32 %t
 }
diff --git a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
index 9655da4..e3a8f01 100644
--- a/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
@@ -3,6 +3,6 @@
 @s = external constant i8		; <i8*> [#uses=1]
 
 define i8 @f() {
-	%tmp = load i8* @s		; <i8> [#uses=1]
+	%tmp = load i8, i8* @s		; <i8> [#uses=1]
 	ret i8 %tmp
 }
diff --git a/llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
index 0cf1cb7c6..ec1db09 100644
--- a/llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
@@ -2,7 +2,7 @@
 
 ; CHECK: define i32* @a(i32** nocapture readonly %p)
 define i32* @a(i32** %p) {
-	%tmp = load i32** %p
+	%tmp = load i32*, i32** %p
 	ret i32* %tmp
 }
 
diff --git a/llvm/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/llvm/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index 93991d2..1a64a83 100644
--- a/llvm/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/llvm/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -5,6 +5,6 @@
 
 define void @foo() {
 ; CHECK: void @foo() {
-  %tmp = load volatile i32* @g
+  %tmp = load volatile i32, i32* @g
   ret void
 }
diff --git a/llvm/test/Transforms/FunctionAttrs/atomic.ll b/llvm/test/Transforms/FunctionAttrs/atomic.ll
index d5a8db7..bb86701 100644
--- a/llvm/test/Transforms/FunctionAttrs/atomic.ll
+++ b/llvm/test/Transforms/FunctionAttrs/atomic.ll
@@ -7,7 +7,7 @@
 entry:
   %x.addr = alloca i32, align 4
   store atomic i32 %x, i32* %x.addr seq_cst, align 4
-  %r = load atomic i32* %x.addr seq_cst, align 4
+  %r = load atomic i32, i32* %x.addr seq_cst, align 4
   ret i32 %r
 }
 
@@ -15,7 +15,7 @@
 define i32 @test2(i32* %x) uwtable ssp {
 ; CHECK: define i32 @test2(i32* nocapture readonly %x) #1 {
 entry:
-  %r = load atomic i32* %x seq_cst, align 4
+  %r = load atomic i32, i32* %x seq_cst, align 4
   ret i32 %r
 }
 
diff --git a/llvm/test/Transforms/FunctionAttrs/nocapture.ll b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
index f09c98b..23cbc85 100644
--- a/llvm/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nocapture.ll
@@ -40,7 +40,7 @@
 	%bit = and i32 %tmp2, 1
         ; subtle escape mechanism follows
 	%lookup = getelementptr [2 x i1], [2 x i1]* @lookup_table, i32 0, i32 %bit
-	%val = load i1* %lookup
+	%val = load i1, i1* %lookup
 	ret i1 %val
 }
 
@@ -71,7 +71,7 @@
 ; CHECK: define i1 @c7(i32* readonly %q, i32 %bitno)
 define i1 @c7(i32* %q, i32 %bitno) {
 	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
-	%val = load i1* %ptr
+	%val = load i1, i1* %ptr
 	ret i1 %val
 }
 
@@ -85,7 +85,7 @@
 	%y = phi i32* [ %q, %e ]
 	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
 	%tmp2 = select i1 %b, i32* %tmp, i32* %y
-	%val = load i32* %tmp2		; <i32> [#uses=1]
+	%val = load i32, i32* %tmp2		; <i32> [#uses=1]
 	store i32 0, i32* %tmp
 	store i32* %y, i32** @g
 	ret i32 %val
@@ -100,7 +100,7 @@
 	%y = phi i32* [ %q, %e ]
 	%tmp = addrspacecast i32 addrspace(1)* %x to i32*		; <i32*> [#uses=2]
 	%tmp2 = select i1 %b, i32* %tmp, i32* %y
-	%val = load i32* %tmp2		; <i32> [#uses=1]
+	%val = load i32, i32* %tmp2		; <i32> [#uses=1]
 	store i32 0, i32* %tmp
 	store i32* %y, i32** @g
 	ret i32 %val
diff --git a/llvm/test/Transforms/FunctionAttrs/optnone-simple.ll b/llvm/test/Transforms/FunctionAttrs/optnone-simple.ll
index 9d0f8e3..beaa588 100644
--- a/llvm/test/Transforms/FunctionAttrs/optnone-simple.ll
+++ b/llvm/test/Transforms/FunctionAttrs/optnone-simple.ll
@@ -15,8 +15,8 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %a.addr, align 4
-  %1 = load i32* %b.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %0, %1
   ret i32 %add
 }
@@ -33,8 +33,8 @@
   %b.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
   store i32 %b, i32* %b.addr, align 4
-  %0 = load i32* %a.addr, align 4
-  %1 = load i32* %b.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %1 = load i32, i32* %b.addr, align 4
   %add = add nsw i32 %0, %1
   ret i32 %add
 }
@@ -57,8 +57,8 @@
   %b.addr = alloca float, align 4
   store float %a, float* %a.addr, align 4
   store float %b, float* %b.addr, align 4
-  %0 = load float* %a.addr, align 4
-  %1 = load float* %b.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %1 = load float, float* %b.addr, align 4
   %sub = fsub float %0, %1
   ret float %sub
 }
@@ -75,8 +75,8 @@
   %b.addr = alloca float, align 4
   store float %a, float* %a.addr, align 4
   store float %b, float* %b.addr, align 4
-  %0 = load float* %a.addr, align 4
-  %1 = load float* %b.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %1 = load float, float* %b.addr, align 4
   %sub = fsub float %0, %1
   ret float %sub
 }
@@ -100,8 +100,8 @@
   %b.addr = alloca <4 x float>, align 16
   store <4 x float> %a, <4 x float>* %a.addr, align 16
   store <4 x float> %b, <4 x float>* %b.addr, align 16
-  %0 = load <4 x float>* %a.addr, align 16
-  %1 = load <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>, <4 x float>* %b.addr, align 16
   %mul = fmul <4 x float> %0, %1
   ret <4 x float> %mul
 }
@@ -118,8 +118,8 @@
   %b.addr = alloca <4 x float>, align 16
   store <4 x float> %a, <4 x float>* %a.addr, align 16
   store <4 x float> %b, <4 x float>* %b.addr, align 16
-  %0 = load <4 x float>* %a.addr, align 16
-  %1 = load <4 x float>* %b.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %a.addr, align 16
+  %1 = load <4 x float>, <4 x float>* %b.addr, align 16
   %mul = fmul <4 x float> %0, %1
   ret <4 x float> %mul
 }
diff --git a/llvm/test/Transforms/GCOVProfiling/linezero.ll b/llvm/test/Transforms/GCOVProfiling/linezero.ll
index f0b32ec..c9c07f4 100644
--- a/llvm/test/Transforms/GCOVProfiling/linezero.ll
+++ b/llvm/test/Transforms/GCOVProfiling/linezero.ll
@@ -26,30 +26,30 @@
   call void @_Z13TagFieldSpecsv(), !dbg !31
   store %struct.vector* %ref.tmp, %struct.vector** %__range, align 8, !dbg !31
   call void @llvm.dbg.declare(metadata i8** %__begin, metadata !32, metadata !{}), !dbg !30
-  %1 = load %struct.vector** %__range, align 8, !dbg !31
+  %1 = load %struct.vector*, %struct.vector** %__range, align 8, !dbg !31
   %call = call i8* @_ZN6vector5beginEv(%struct.vector* %1), !dbg !31
   store i8* %call, i8** %__begin, align 8, !dbg !31
   call void @llvm.dbg.declare(metadata i8** %__end, metadata !33, metadata !{}), !dbg !30
-  %2 = load %struct.vector** %__range, align 8, !dbg !31
+  %2 = load %struct.vector*, %struct.vector** %__range, align 8, !dbg !31
   %call1 = call i8* @_ZN6vector3endEv(%struct.vector* %2), !dbg !31
   store i8* %call1, i8** %__end, align 8, !dbg !31
   br label %for.cond, !dbg !31
 
 for.cond:                                         ; preds = %for.inc, %0
-  %3 = load i8** %__begin, align 8, !dbg !34
-  %4 = load i8** %__end, align 8, !dbg !34
+  %3 = load i8*, i8** %__begin, align 8, !dbg !34
+  %4 = load i8*, i8** %__end, align 8, !dbg !34
   %cmp = icmp ne i8* %3, %4, !dbg !34
   br i1 %cmp, label %for.body, label %for.end, !dbg !34
 
 for.body:                                         ; preds = %for.cond
   call void @llvm.dbg.declare(metadata i8* %spec, metadata !37, metadata !{}), !dbg !31
-  %5 = load i8** %__begin, align 8, !dbg !38
-  %6 = load i8* %5, align 1, !dbg !38
+  %5 = load i8*, i8** %__begin, align 8, !dbg !38
+  %6 = load i8, i8* %5, align 1, !dbg !38
   store i8 %6, i8* %spec, align 1, !dbg !38
   br label %for.inc, !dbg !38
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i8** %__begin, align 8, !dbg !40
+  %7 = load i8*, i8** %__begin, align 8, !dbg !40
   %incdec.ptr = getelementptr inbounds i8, i8* %7, i32 1, !dbg !40
   store i8* %incdec.ptr, i8** %__begin, align 8, !dbg !40
   br label %for.cond, !dbg !40
@@ -59,7 +59,7 @@
   unreachable, !dbg !42
 
 return:                                           ; No predecessors!
-  %8 = load i32* %retval, !dbg !44
+  %8 = load i32, i32* %retval, !dbg !44
   ret i32 %8, !dbg !44
 }
 
diff --git a/llvm/test/Transforms/GCOVProfiling/return-block.ll b/llvm/test/Transforms/GCOVProfiling/return-block.ll
index f0be3d2..787a75e 100644
--- a/llvm/test/Transforms/GCOVProfiling/return-block.ll
+++ b/llvm/test/Transforms/GCOVProfiling/return-block.ll
@@ -13,7 +13,7 @@
 define void @test() #0 {
 entry:
   tail call void (...)* @f() #2, !dbg !14
-  %0 = load i32* @A, align 4, !dbg !15
+  %0 = load i32, i32* @A, align 4, !dbg !15
   %tobool = icmp eq i32 %0, 0, !dbg !15
   br i1 %tobool, label %if.end, label %if.then, !dbg !15
 
diff --git a/llvm/test/Transforms/GVN/2007-07-25-DominatedLoop.ll b/llvm/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
index ad580ce..10d1e22 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
@@ -71,11 +71,11 @@
 	ret void
 
 cond_next150:		; preds = %Perl_safefree.exit68
-	%tmp16092 = load i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	%tmp16092 = load i32, i32* @PL_sv_count, align 4		; <i32> [#uses=0]
 	br label %cond_next165
 
 bb157:		; preds = %cond_next165
-	%tmp158 = load i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	%tmp158 = load i32, i32* @PL_sv_count, align 4		; <i32> [#uses=0]
 	br label %cond_next165
 
 cond_next165:		; preds = %bb157, %cond_next150
diff --git a/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 7e9c982..0ffb34c 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -9,6 +9,6 @@
 	br label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp10 = load %struct.INT2** @blkshifts, align 4		; <%struct.INT2*> [#uses=0]
+	%tmp10 = load %struct.INT2*, %struct.INT2** @blkshifts, align 4		; <%struct.INT2*> [#uses=0]
 	br label %bb
 }
diff --git a/llvm/test/Transforms/GVN/2007-07-25-Loop.ll b/llvm/test/Transforms/GVN/2007-07-25-Loop.ll
index 6a9f58e..54c0d98 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-Loop.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-Loop.ll
@@ -10,6 +10,6 @@
 	br i1 false, label %bb278, label %bb344
 
 bb344:		; preds = %bb278, %entry
-	%tmp38758 = load i16* null, align 2		; <i16> [#uses=0]
+	%tmp38758 = load i16, i16* null, align 2		; <i16> [#uses=0]
 	ret void
 }
diff --git a/llvm/test/Transforms/GVN/2007-07-25-NestedLoop.ll b/llvm/test/Transforms/GVN/2007-07-25-NestedLoop.ll
index 6a7a409..8f2c182 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-NestedLoop.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-NestedLoop.ll
@@ -11,21 +11,21 @@
 
 bb556.preheader:		; preds = %entry
 	%tmp56119 = getelementptr %struct.TypHeader, %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp56220 = load i32* %tmp56119		; <i32> [#uses=0]
+	%tmp56220 = load i32, i32* %tmp56119		; <i32> [#uses=0]
 	br i1 false, label %bb.nph23, label %bb675.preheader
 
 bb.nph23:		; preds = %bb556.preheader
 	ret %struct.TypHeader* null
 
 bb656:		; preds = %bb675.outer, %bb656
-	%tmp678 = load i32* %tmp677		; <i32> [#uses=0]
+	%tmp678 = load i32, i32* %tmp677		; <i32> [#uses=0]
 	br i1 false, label %bb684, label %bb656
 
 bb684:		; preds = %bb675.outer, %bb656
 	br i1 false, label %bb924.preheader, label %bb675.outer
 
 bb675.outer:		; preds = %bb675.preheader, %bb684
-	%tmp67812 = load i32* %tmp67711		; <i32> [#uses=0]
+	%tmp67812 = load i32, i32* %tmp67711		; <i32> [#uses=0]
 	br i1 false, label %bb684, label %bb656
 
 bb675.preheader:		; preds = %bb556.preheader
diff --git a/llvm/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll b/llvm/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
index 6f21423..d7e6c20 100644
--- a/llvm/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
+++ b/llvm/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
@@ -21,7 +21,7 @@
 
 cond_true:		; preds = %cond_next.i
 	%tmp3.i8 = getelementptr %struct.mrViewingHitRecord, %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp46 = load double* %tmp3.i8		; <double> [#uses=0]
+	%tmp46 = load double, double* %tmp3.i8		; <double> [#uses=0]
 	ret i32 1
 
 return:		; preds = %cond_next.i, %entry
diff --git a/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
index 5a15f0e..f88ffcf 100644
--- a/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
+++ b/llvm/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -5,7 +5,7 @@
 define i32 @NextRootMove(i32 %wtm, i32 %x, i32 %y, i32 %z) {
 entry:
         %A = alloca i32*
-	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+	%tmp17618 = load i32*, i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17618, i32** %A
 ; CHECK: entry:
 ; CHECK-NEXT: alloca i32
@@ -19,7 +19,7 @@
 	br i1 %cmp, label %cond_true128, label %cond_true145
 
 cond_true128:
-	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+	%tmp17625 = load i32*, i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17625, i32** %A
    %cmp1 = icmp eq i32 %x, %z
 	br i1 %cmp1 , label %bb98.backedge, label %return.loopexit
@@ -28,7 +28,7 @@
 	br label %cond_true116
 
 cond_true145:
-	%tmp17631 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+	%tmp17631 = load i32*, i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
         store i32* %tmp17631, i32** %A
 	br i1 false, label %bb98.backedge, label %return.loopexit
 
diff --git a/llvm/test/Transforms/GVN/2007-07-26-NonRedundant.ll b/llvm/test/Transforms/GVN/2007-07-26-NonRedundant.ll
index 7579e8a..211830a 100644
--- a/llvm/test/Transforms/GVN/2007-07-26-NonRedundant.ll
+++ b/llvm/test/Transforms/GVN/2007-07-26-NonRedundant.ll
@@ -11,6 +11,6 @@
 	br label %bb19
 
 bb19:		; preds = %cond_next, %entry
-	%tmp29 = load i32* @bsLive, align 4		; <i32> [#uses=0]
+	%tmp29 = load i32, i32* @bsLive, align 4		; <i32> [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll b/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
index d898ab8..82af521 100644
--- a/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
+++ b/llvm/test/Transforms/GVN/2007-07-26-PhiErasure.ll
@@ -9,7 +9,7 @@
 
 define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) {
 cond_next2835.1:		; preds = %cond_next2861
-	%tmp2922 = load i32* @n_spills, align 4		; <i32> [#uses=0]
+	%tmp2922 = load i32, i32* @n_spills, align 4		; <i32> [#uses=0]
 	br label %bb2928
 
 bb2928:		; preds = %cond_next2835.1, %cond_next2943
@@ -22,7 +22,7 @@
 	br i1 false, label %bb2982.preheader, label %bb2928
 
 bb2982.preheader:		; preds = %cond_next2943
-	%tmp298316 = load i32* @n_spills, align 4		; <i32> [#uses=0]
+	%tmp298316 = load i32, i32* @n_spills, align 4		; <i32> [#uses=0]
 	ret i32 %tmp298316
 
 }
diff --git a/llvm/test/Transforms/GVN/2007-07-30-PredIDom.ll b/llvm/test/Transforms/GVN/2007-07-30-PredIDom.ll
index 5cb6bb3..3a7eec7 100644
--- a/llvm/test/Transforms/GVN/2007-07-30-PredIDom.ll
+++ b/llvm/test/Transforms/GVN/2007-07-30-PredIDom.ll
@@ -269,6 +269,6 @@
 	br label %return
 
 return:		; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb
-	%retval980 = load i8** null		; <i8*> [#uses=1]
+	%retval980 = load i8*, i8** null		; <i8*> [#uses=1]
 	ret i8* %retval980
 }
diff --git a/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index c9acdbb..c30a283 100644
--- a/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/llvm/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -149,27 +149,27 @@
 	br label %bb91
 
 bb:		; preds = %cond_next97
-	%tmp1 = load i32* @numi		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* @numi		; <i32> [#uses=1]
 	%tmp2 = getelementptr [44 x i8], [44 x i8]* @.str43, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp3 = call i32 (i8*, ...)* @printf( i8* %tmp2, i32 %tmp1 )		; <i32> [#uses=0]
 	store i32 0, i32* %i
 	br label %bb13
 
 bb4:		; preds = %bb13
-	%tmp5 = load i32* %i		; <i32> [#uses=1]
-	%tmp6 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %i		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp7 = getelementptr [17 x i32], [17 x i32]* @trialx, i32 0, i32 %tmp6		; <i32*> [#uses=1]
-	%tmp8 = load i32* %tmp7		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %tmp7		; <i32> [#uses=1]
 	%tmp9 = call i32 @userfun( i32 %tmp8 )		; <i32> [#uses=1]
 	%tmp10 = getelementptr [17 x i32], [17 x i32]* @correct_result, i32 0, i32 %tmp5		; <i32*> [#uses=1]
 	store i32 %tmp9, i32* %tmp10
-	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp11 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp12 = add i32 %tmp11, 1		; <i32> [#uses=1]
 	store i32 %tmp12, i32* %i
 	br label %bb13
 
 bb13:		; preds = %bb4, %bb
-	%tmp14 = load i32* %i		; <i32> [#uses=1]
+	%tmp14 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp15 = icmp sle i32 %tmp14, 16		; <i1> [#uses=1]
 	%tmp1516 = zext i1 %tmp15 to i32		; <i32> [#uses=1]
 	%toBool = icmp ne i32 %tmp1516, 0		; <i1> [#uses=1]
@@ -180,47 +180,47 @@
 	br label %bb49
 
 bb18:		; preds = %bb49
-	%tmp19 = load i32* %i		; <i32> [#uses=1]
+	%tmp19 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp20 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp19		; <{ i32, [3 x i32] }*> [#uses=1]
 	%tmp21 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp20, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp21
-	%tmp22 = load i32* %i		; <i32> [#uses=1]
+	%tmp22 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp23 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
 	%tmp24 = getelementptr %struct.anon, %struct.anon* %tmp23, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
 	%tmp25 = getelementptr [3 x i32], [3 x i32]* %tmp24, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp26 = load i32* %tmp25		; <i32> [#uses=1]
+	%tmp26 = load i32, i32* %tmp25		; <i32> [#uses=1]
 	%tmp27 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp22		; <{ i32, [3 x i32] }*> [#uses=1]
 	%tmp28 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp27, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
 	%tmp29 = getelementptr [3 x i32], [3 x i32]* %tmp28, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %tmp26, i32* %tmp29
-	%tmp30 = load i32* %i		; <i32> [#uses=1]
+	%tmp30 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp31 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
 	%tmp32 = getelementptr %struct.anon, %struct.anon* %tmp31, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
 	%tmp33 = getelementptr [3 x i32], [3 x i32]* %tmp32, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]
+	%tmp34 = load i32, i32* %tmp33		; <i32> [#uses=1]
 	%tmp35 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp30		; <{ i32, [3 x i32] }*> [#uses=1]
 	%tmp36 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp35, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
 	%tmp37 = getelementptr [3 x i32], [3 x i32]* %tmp36, i32 0, i32 1		; <i32*> [#uses=1]
 	store i32 %tmp34, i32* %tmp37
-	%tmp38 = load i32* %i		; <i32> [#uses=1]
+	%tmp38 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp39 = getelementptr [13 x %struct.anon], [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
 	%tmp40 = getelementptr %struct.anon, %struct.anon* %tmp39, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
 	%tmp41 = getelementptr [3 x i32], [3 x i32]* %tmp40, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp42 = load i32* %tmp41		; <i32> [#uses=1]
+	%tmp42 = load i32, i32* %tmp41		; <i32> [#uses=1]
 	%tmp43 = getelementptr [5 x { i32, [3 x i32] }], [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp38		; <{ i32, [3 x i32] }*> [#uses=1]
 	%tmp44 = getelementptr { i32, [3 x i32] }, { i32, [3 x i32] }* %tmp43, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
 	%tmp45 = getelementptr [3 x i32], [3 x i32]* %tmp44, i32 0, i32 2		; <i32*> [#uses=1]
 	store i32 %tmp42, i32* %tmp45
-	%tmp46 = load i32* %i		; <i32> [#uses=1]
+	%tmp46 = load i32, i32* %i		; <i32> [#uses=1]
 	call void @fix_operands( i32 %tmp46 )
-	%tmp47 = load i32* %i		; <i32> [#uses=1]
+	%tmp47 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp48 = add i32 %tmp47, 1		; <i32> [#uses=1]
 	store i32 %tmp48, i32* %i
 	br label %bb49
 
 bb49:		; preds = %bb18, %bb17
-	%tmp50 = load i32* @numi		; <i32> [#uses=1]
-	%tmp51 = load i32* %i		; <i32> [#uses=1]
+	%tmp50 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp51 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp52 = icmp slt i32 %tmp51, %tmp50		; <i1> [#uses=1]
 	%tmp5253 = zext i1 %tmp52 to i32		; <i32> [#uses=1]
 	%toBool54 = icmp ne i32 %tmp5253, 0		; <i1> [#uses=1]
@@ -230,9 +230,9 @@
 	%tmp56 = call i32 @search( )		; <i32> [#uses=1]
 	store i32 %tmp56, i32* %num_sol
 	%tmp57 = getelementptr [21 x i8], [21 x i8]* @.str44, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp58 = load i32* %num_sol		; <i32> [#uses=1]
+	%tmp58 = load i32, i32* %num_sol		; <i32> [#uses=1]
 	%tmp59 = call i32 (i8*, ...)* @printf( i8* %tmp57, i32 %tmp58 )		; <i32> [#uses=0]
-	%tmp60 = load i32* @counters		; <i32> [#uses=1]
+	%tmp60 = load i32, i32* @counters		; <i32> [#uses=1]
 	%tmp61 = icmp ne i32 %tmp60, 0		; <i1> [#uses=1]
 	%tmp6162 = zext i1 %tmp61 to i32		; <i32> [#uses=1]
 	%toBool63 = icmp ne i32 %tmp6162, 0		; <i1> [#uses=1]
@@ -246,25 +246,25 @@
 	br label %bb79
 
 bb66:		; preds = %bb79
-	%tmp67 = load i32* %i		; <i32> [#uses=1]
+	%tmp67 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp68 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp67		; <i32*> [#uses=1]
-	%tmp69 = load i32* %tmp68		; <i32> [#uses=1]
+	%tmp69 = load i32, i32* %tmp68		; <i32> [#uses=1]
 	%tmp70 = getelementptr [5 x i8], [5 x i8]* @.str46, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp71 = call i32 (i8*, ...)* @printf( i8* %tmp70, i32 %tmp69 )		; <i32> [#uses=0]
-	%tmp72 = load i32* %i		; <i32> [#uses=1]
+	%tmp72 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp73 = getelementptr [5 x i32], [5 x i32]* @counter, i32 0, i32 %tmp72		; <i32*> [#uses=1]
-	%tmp74 = load i32* %tmp73		; <i32> [#uses=1]
-	%tmp75 = load i32* %total		; <i32> [#uses=1]
+	%tmp74 = load i32, i32* %tmp73		; <i32> [#uses=1]
+	%tmp75 = load i32, i32* %total		; <i32> [#uses=1]
 	%tmp76 = add i32 %tmp74, %tmp75		; <i32> [#uses=1]
 	store i32 %tmp76, i32* %total
-	%tmp77 = load i32* %i		; <i32> [#uses=1]
+	%tmp77 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp78 = add i32 %tmp77, 1		; <i32> [#uses=1]
 	store i32 %tmp78, i32* %i
 	br label %bb79
 
 bb79:		; preds = %bb66, %cond_true
-	%tmp80 = load i32* @numi		; <i32> [#uses=1]
-	%tmp81 = load i32* %i		; <i32> [#uses=1]
+	%tmp80 = load i32, i32* @numi		; <i32> [#uses=1]
+	%tmp81 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp82 = icmp slt i32 %tmp81, %tmp80		; <i1> [#uses=1]
 	%tmp8283 = zext i1 %tmp82 to i32		; <i32> [#uses=1]
 	%toBool84 = icmp ne i32 %tmp8283, 0		; <i1> [#uses=1]
@@ -272,18 +272,18 @@
 
 bb85:		; preds = %bb79
 	%tmp86 = getelementptr [12 x i8], [12 x i8]* @.str47, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp87 = load i32* %total		; <i32> [#uses=1]
+	%tmp87 = load i32, i32* %total		; <i32> [#uses=1]
 	%tmp88 = call i32 (i8*, ...)* @printf( i8* %tmp86, i32 %tmp87 )		; <i32> [#uses=0]
 	br label %cond_next
 
 cond_next:		; preds = %bb85, %bb55
-	%tmp89 = load i32* @numi		; <i32> [#uses=1]
+	%tmp89 = load i32, i32* @numi		; <i32> [#uses=1]
 	%tmp90 = add i32 %tmp89, 1		; <i32> [#uses=1]
 	store i32 %tmp90, i32* @numi
 	br label %bb91
 
 bb91:		; preds = %cond_next, %entry
-	%tmp92 = load i32* @numi		; <i32> [#uses=1]
+	%tmp92 = load i32, i32* @numi		; <i32> [#uses=1]
 	%tmp93 = icmp sgt i32 %tmp92, 5		; <i1> [#uses=1]
 	%tmp9394 = zext i1 %tmp93 to i32		; <i32> [#uses=1]
 	%toBool95 = icmp ne i32 %tmp9394, 0		; <i1> [#uses=1]
@@ -293,7 +293,7 @@
 	br label %bb102
 
 cond_next97:		; preds = %bb91
-	%tmp98 = load i32* %num_sol		; <i32> [#uses=1]
+	%tmp98 = load i32, i32* %num_sol		; <i32> [#uses=1]
 	%tmp99 = icmp eq i32 %tmp98, 0		; <i1> [#uses=1]
 	%tmp99100 = zext i1 %tmp99 to i32		; <i32> [#uses=1]
 	%toBool101 = icmp ne i32 %tmp99100, 0		; <i1> [#uses=1]
@@ -301,12 +301,12 @@
 
 bb102:		; preds = %cond_next97, %cond_true96
 	store i32 0, i32* %tmp
-	%tmp103 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp103 = load i32, i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp103, i32* %retval
 	br label %return
 
 return:		; preds = %bb102
-	%retval104 = load i32* %retval		; <i32> [#uses=1]
+	%retval104 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval104
 }
 
diff --git a/llvm/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/llvm/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
index 13419d1..b285560 100644
--- a/llvm/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
+++ b/llvm/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -17,6 +17,6 @@
 	br label %cond_next698
 
 cond_next698:		; preds = %cond_true492
-	%tmp701 = load i16* @img_width, align 2		; <i16> [#uses=0]
+	%tmp701 = load i16, i16* @img_width, align 2		; <i16> [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/Transforms/GVN/2008-02-12-UndefLoad.ll b/llvm/test/Transforms/GVN/2008-02-12-UndefLoad.ll
index 2a35f10..8ebeb14 100644
--- a/llvm/test/Transforms/GVN/2008-02-12-UndefLoad.ll
+++ b/llvm/test/Transforms/GVN/2008-02-12-UndefLoad.ll
@@ -8,7 +8,7 @@
         %c = alloca %struct.anon                ; <%struct.anon*> [#uses=2]
         %tmp = getelementptr %struct.anon, %struct.anon* %c, i32 0, i32 0             ; <i32*> [#uses=1]
         %tmp1 = getelementptr i32, i32* %tmp, i32 1          ; <i32*> [#uses=2]
-        %tmp2 = load i32* %tmp1, align 4                ; <i32> [#uses=1]
+        %tmp2 = load i32, i32* %tmp1, align 4                ; <i32> [#uses=1]
         %tmp3 = or i32 %tmp2, 11                ; <i32> [#uses=1]
         %tmp4 = and i32 %tmp3, -21              ; <i32> [#uses=1]
         store i32 %tmp4, i32* %tmp1, align 4
diff --git a/llvm/test/Transforms/GVN/2008-02-13-NewPHI.ll b/llvm/test/Transforms/GVN/2008-02-13-NewPHI.ll
index 80b519d..638939b 100644
--- a/llvm/test/Transforms/GVN/2008-02-13-NewPHI.ll
+++ b/llvm/test/Transforms/GVN/2008-02-13-NewPHI.ll
@@ -8,7 +8,7 @@
 	br label %bb33
 
 bb:		; preds = %bb33
-	%tmp27 = load float** %sx_addr, align 4		; <float*> [#uses=1]
+	%tmp27 = load float*, float** %sx_addr, align 4		; <float*> [#uses=1]
 	store float 0.000000e+00, float* %tmp27, align 4
 	store float* null, float** %sx_addr, align 4
 	br label %bb33
@@ -17,6 +17,6 @@
 	br i1 false, label %bb, label %return
 
 return:		; preds = %bb33
-	%retval59 = load i32* null, align 4		; <i32> [#uses=1]
+	%retval59 = load i32, i32* null, align 4		; <i32> [#uses=1]
 	ret i32 %retval59
 }
diff --git a/llvm/test/Transforms/GVN/2008-07-02-Unreachable.ll b/llvm/test/Transforms/GVN/2008-07-02-Unreachable.ll
index ce83fa4..d993264 100644
--- a/llvm/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/llvm/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -13,7 +13,7 @@
 	br label %ifend
 
 ifelse:		; preds = %entry
-	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
+	%tmp3 = load i8, i8* @g_3		; <i8> [#uses=0]
         store i8 %tmp3, i8* %A
 	br label %afterfor
 
@@ -27,7 +27,7 @@
 	br label %forcond
 
 afterfor:		; preds = %forcond, %forcond.thread
-	%tmp10 = load i8* @g_3		; <i8> [#uses=0]
+	%tmp10 = load i8, i8* @g_3		; <i8> [#uses=0]
 	ret i8 %tmp10
 
 ifend:		; preds = %afterfor, %ifthen
diff --git a/llvm/test/Transforms/GVN/2008-12-09-SelfRemove.ll b/llvm/test/Transforms/GVN/2008-12-09-SelfRemove.ll
index dacea14..d8ab1ba 100644
--- a/llvm/test/Transforms/GVN/2008-12-09-SelfRemove.ll
+++ b/llvm/test/Transforms/GVN/2008-12-09-SelfRemove.ll
@@ -14,7 +14,7 @@
 	br i1 false, label %return, label %bb
 
 bb:		; preds = %entry
-	%1 = load i8** %0, align 4		; <i8*> [#uses=0]
+	%1 = load i8*, i8** %0, align 4		; <i8*> [#uses=0]
 	%2 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1		; <i8**> [#uses=0]
 	br label %bb21
 
diff --git a/llvm/test/Transforms/GVN/2008-12-12-RLE-Crash.ll b/llvm/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
index bcb5427..dabf7fa 100644
--- a/llvm/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
+++ b/llvm/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
@@ -7,7 +7,7 @@
 	br label %bb84
 
 bb41:		; preds = %bb82
-	%tmp = load i8* %opt.0, align 1		; <i8> [#uses=0]
+	%tmp = load i8, i8* %opt.0, align 1		; <i8> [#uses=0]
 	%tmp1 = getelementptr i8, i8* %opt.0, i32 1		; <i8*> [#uses=2]
 	switch i32 0, label %bb81 [
 		i32 102, label %bb82
@@ -26,7 +26,7 @@
 
 bb82:		; preds = %bb84, %bb79, %bb41
 	%opt.0 = phi i8* [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ]		; <i8*> [#uses=3]
-	%tmp2 = load i8* %opt.0, align 1		; <i8> [#uses=0]
+	%tmp2 = load i8, i8* %opt.0, align 1		; <i8> [#uses=0]
 	br i1 false, label %bb84, label %bb41
 
 bb84:		; preds = %bb82, %entry
diff --git a/llvm/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll b/llvm/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
index de2950f..8454022 100644
--- a/llvm/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
+++ b/llvm/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
@@ -12,7 +12,7 @@
 
 bb23:		; preds = %bb23, %bb22
 	%sortv.233 = phi i32* [ getelementptr ([256 x i32]* @sort_value, i32 0, i32 0), %bb22 ], [ %sortv.2, %bb23 ]		; <i32*> [#uses=1]
-	%0 = load i32* %sortv.233, align 4		; <i32> [#uses=0]
+	%0 = load i32, i32* %sortv.233, align 4		; <i32> [#uses=0]
 	%sortv.2 = getelementptr [256 x i32], [256 x i32]* @sort_value, i32 0, i32 0		; <i32*> [#uses=1]
 	br i1 false, label %bb23, label %bb22
 }
diff --git a/llvm/test/Transforms/GVN/2008-12-15-CacheVisited.ll b/llvm/test/Transforms/GVN/2008-12-15-CacheVisited.ll
index 6c2e4da..73adacd 100644
--- a/llvm/test/Transforms/GVN/2008-12-15-CacheVisited.ll
+++ b/llvm/test/Transforms/GVN/2008-12-15-CacheVisited.ll
@@ -11,7 +11,7 @@
 
 bb203:		; preds = %entry
 	%tmp = getelementptr i32, i32* %decl, i32 1		; <i32*> [#uses=1]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=0]
+	%tmp1 = load i32, i32* %tmp, align 4		; <i32> [#uses=0]
 	br i1 false, label %bb207, label %bb204
 
 bb204:		; preds = %bb203
@@ -23,6 +23,6 @@
 
 bb208:		; preds = %bb207, %bb204
 	%iftmp.1374.0.in = phi i32* [ null, %bb207 ], [ %tmp2, %bb204 ]		; <i32*> [#uses=1]
-	%iftmp.1374.0 = load i32* %iftmp.1374.0.in		; <i32> [#uses=0]
+	%iftmp.1374.0 = load i32, i32* %iftmp.1374.0.in		; <i32> [#uses=0]
 	unreachable
 }
diff --git a/llvm/test/Transforms/GVN/2009-01-21-SortInvalidation.ll b/llvm/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
index 3677593..6144697 100644
--- a/llvm/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
+++ b/llvm/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
@@ -37,7 +37,7 @@
 	br i1 false, label %bb554, label %bb552
 
 bb552:		; preds = %bb550
-	%0 = load i8* %d.0, align 8		; <i8> [#uses=0]
+	%0 = load i8, i8* %d.0, align 8		; <i8> [#uses=0]
 	br label %bb554
 
 bb554:		; preds = %bb552, %bb550, %bb549
diff --git a/llvm/test/Transforms/GVN/2009-01-22-SortInvalidation.ll b/llvm/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
index 8a4d48b..89b058a 100644
--- a/llvm/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
+++ b/llvm/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
@@ -79,11 +79,11 @@
 	br label %bb69.loopexit
 
 bb59:		; preds = %bb63.preheader
-	%0 = load %struct..4sPragmaType** %3, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	%0 = load %struct..4sPragmaType*, %struct..4sPragmaType** %3, align 4		; <%struct..4sPragmaType*> [#uses=0]
 	br label %bb65
 
 bb65:		; preds = %bb63.preheader, %bb59
-	%1 = load %struct..4sPragmaType** %4, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	%1 = load %struct..4sPragmaType*, %struct..4sPragmaType** %4, align 4		; <%struct..4sPragmaType*> [#uses=0]
 	br i1 false, label %bb67, label %bb63.preheader
 
 bb67:		; preds = %bb65
diff --git a/llvm/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll b/llvm/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
index cc2bb1d..378d7e7 100644
--- a/llvm/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
+++ b/llvm/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
@@ -24,11 +24,11 @@
 	%addr = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=5]
 	%iftmp.1532 = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=3]
 	store %struct.rtx_def* %orig, %struct.rtx_def** null
-	%0 = load %struct.rtx_def** null, align 4		; <%struct.rtx_def*> [#uses=0]
+	%0 = load %struct.rtx_def*, %struct.rtx_def** null, align 4		; <%struct.rtx_def*> [#uses=0]
 	br i1 false, label %bb96, label %bb59
 
 bb59:		; preds = %entry
-	%1 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%1 = load %struct.rtx_def*, %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
 	%2 = call i32 @local_symbolic_operand(%struct.rtx_def* %1, i32 0) nounwind		; <i32> [#uses=0]
 	br i1 false, label %bb96, label %bb63
 
@@ -89,22 +89,22 @@
 	unreachable
 
 bb96:		; preds = %bb59, %entry
-	%5 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%5 = load %struct.rtx_def*, %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
 	%6 = getelementptr %struct.rtx_def, %struct.rtx_def* %5, i32 0, i32 0		; <i16*> [#uses=1]
-	%7 = load i16* %6, align 2		; <i16> [#uses=0]
+	%7 = load i16, i16* %6, align 2		; <i16> [#uses=0]
 	br i1 false, label %bb147, label %bb97
 
 bb97:		; preds = %bb96
-	%8 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
+	%8 = load %struct.rtx_def*, %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
 	br i1 false, label %bb147, label %bb99
 
 bb99:		; preds = %bb97
 	unreachable
 
 bb147:		; preds = %bb97, %bb96
-	%9 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%9 = load %struct.rtx_def*, %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
 	%10 = getelementptr %struct.rtx_def, %struct.rtx_def* %9, i32 0, i32 0		; <i16*> [#uses=1]
-	%11 = load i16* %10, align 2		; <i16> [#uses=0]
+	%11 = load i16, i16* %10, align 2		; <i16> [#uses=0]
 	br i1 false, label %bb164, label %bb148
 
 bb148:		; preds = %bb147
@@ -167,7 +167,7 @@
 	unreachable
 
 bb211:		; preds = %bb168, %bb167
-	%14 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
+	%14 = load %struct.rtx_def*, %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
 	%15 = getelementptr [1 x %struct.cgraph_rtl_info], [1 x %struct.cgraph_rtl_info]* null, i32 0, i32 0		; <%struct.cgraph_rtl_info*> [#uses=0]
 	store %struct.rtx_def* null, %struct.rtx_def** null, align 4
 	br i1 false, label %bb212, label %bb213
@@ -183,7 +183,7 @@
 bb214:		; preds = %bb213, %bb212
 	%16 = bitcast %struct.block_symbol* null to [1 x %struct.cgraph_rtl_info]*		; <[1 x %struct.cgraph_rtl_info]*> [#uses=1]
 	%17 = getelementptr [1 x %struct.cgraph_rtl_info], [1 x %struct.cgraph_rtl_info]* %16, i32 0, i32 1		; <%struct.cgraph_rtl_info*> [#uses=0]
-	%18 = load %struct.rtx_def** %iftmp.1532, align 4		; <%struct.rtx_def*> [#uses=0]
+	%18 = load %struct.rtx_def*, %struct.rtx_def** %iftmp.1532, align 4		; <%struct.rtx_def*> [#uses=0]
 	%19 = getelementptr %struct.rtx_def, %struct.rtx_def* null, i32 0, i32 3		; <%struct.u*> [#uses=1]
 	%20 = getelementptr %struct.u, %struct.u* %19, i32 0, i32 0		; <%struct.block_symbol*> [#uses=1]
 	%21 = bitcast %struct.block_symbol* %20 to [1 x i64]*		; <[1 x i64]*> [#uses=1]
diff --git a/llvm/test/Transforms/GVN/2009-06-17-InvalidPRE.ll b/llvm/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
index 5b695c5..bf0a234 100644
--- a/llvm/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
+++ b/llvm/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
@@ -21,7 +21,7 @@
   %3 = getelementptr %struct.mbuf, %struct.mbuf* %m.0.ph, i32 0, i32 2    ; <i32*> [#uses=1]
   store i32 0, i32* %3, align 4
   %4 = getelementptr %struct.mbuf, %struct.mbuf* %m.0.ph, i32 0, i32 0    ; <%struct.mbuf**> [#uses=1]
-  %5 = load %struct.mbuf** %4, align 4    ; <%struct.mbuf*> [#uses=1]
+  %5 = load %struct.mbuf*, %struct.mbuf** %4, align 4    ; <%struct.mbuf*> [#uses=1]
   br label %bb4.outer
 
 bb4.outer:    ; preds = %bb4.preheader, %bb2
@@ -41,21 +41,21 @@
   br i1 %12, label %bb1, label %bb7
 
 bb1:    ; preds = %bb4
-  %13 = load i32* %7, align 4    ; <i32> [#uses=3]
+  %13 = load i32, i32* %7, align 4    ; <i32> [#uses=3]
   %14 = icmp sgt i32 %13, %len.0    ; <i1> [#uses=1]
   br i1 %14, label %bb3, label %bb2
 
 bb3:    ; preds = %bb1
   %15 = sub i32 %13, %len.0    ; <i32> [#uses=1]
   store i32 %15, i32* %8, align 4
-  %16 = load i8** %9, align 4    ; <i8*> [#uses=1]
+  %16 = load i8*, i8** %9, align 4    ; <i8*> [#uses=1]
   %17 = getelementptr i8, i8* %16, i32 %len.0   ; <i8*> [#uses=1]
   store i8* %17, i8** %10, align 4
   br label %bb4
 
 bb7:    ; preds = %bb4
   %18 = getelementptr %struct.mbuf, %struct.mbuf* %mp, i32 0, i32 5   ; <i16*> [#uses=1]
-  %19 = load i16* %18, align 2    ; <i16> [#uses=1]
+  %19 = load i16, i16* %18, align 2    ; <i16> [#uses=1]
   %20 = zext i16 %19 to i32   ; <i32> [#uses=1]
   %21 = and i32 %20, 2    ; <i32> [#uses=1]
   %22 = icmp eq i32 %21, 0    ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/llvm/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
index f079108..0ed5237 100644
--- a/llvm/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
+++ b/llvm/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
@@ -22,11 +22,11 @@
 	br label %bb62
 
 bb9:		; preds = %bb
-	%0 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	%0 = load i8, i8* %sp.1, align 1		; <i8> [#uses=0]
 	br label %bb62
 
 bb51:		; preds = %bb
-	%1 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	%1 = load i8, i8* %sp.1, align 1		; <i8> [#uses=0]
 	ret i8* null
 
 bb62:		; preds = %bb9, %bb2, %bb
diff --git a/llvm/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll b/llvm/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
index b433297..e0dbb4b 100644
--- a/llvm/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
+++ b/llvm/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
@@ -6,9 +6,9 @@
   %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64* null, i64 1) to i64))) ; <i8*> [#uses=2]
   store i8 42, i8* %1
   %X = bitcast i8* %1 to i64*                     ; <i64*> [#uses=1]
-  %Y = load i64* %X                               ; <i64> [#uses=1]
+  %Y = load i64, i64* %X                               ; <i64> [#uses=1]
   ret i64 %Y
-; CHECK: %Y = load i64* %X
+; CHECK: %Y = load i64, i64* %X
 ; CHECK: ret i64 %Y
 }
 
diff --git a/llvm/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll b/llvm/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
index d6e1c6b..9d9ad54 100644
--- a/llvm/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
+++ b/llvm/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
@@ -24,8 +24,8 @@
   br i1 undef, label %bb9, label %bb10
 
 bb9:                                              ; preds = %bb8
-  %0 = load i8** undef, align 4                   ; <i8*> [#uses=0]
-  %1 = load i8** undef, align 4                   ; <i8*> [#uses=0]
+  %0 = load i8*, i8** undef, align 4                   ; <i8*> [#uses=0]
+  %1 = load i8*, i8** undef, align 4                   ; <i8*> [#uses=0]
   br label %bb11
 
 bb10:                                             ; preds = %bb8
diff --git a/llvm/test/Transforms/GVN/2010-05-08-OneBit.ll b/llvm/test/Transforms/GVN/2010-05-08-OneBit.ll
index 1e75410..0e3fa4b8 100644
--- a/llvm/test/Transforms/GVN/2010-05-08-OneBit.ll
+++ b/llvm/test/Transforms/GVN/2010-05-08-OneBit.ll
@@ -31,7 +31,7 @@
 k133.i.i:                                         ; preds = %k121.i.i
   %2 = getelementptr i8, i8* undef, i64 5             ; <i8*> [#uses=1]
   %3 = bitcast i8* %2 to i1*                      ; <i1*> [#uses=1]
-  %4 = load i1* %3                                ; <i1> [#uses=1]
+  %4 = load i1, i1* %3                                ; <i1> [#uses=1]
   br i1 %4, label %k151.i.i, label %l147.i.i
 
 l147.i.i:                                         ; preds = %k133.i.i
diff --git a/llvm/test/Transforms/GVN/2011-04-27-phioperands.ll b/llvm/test/Transforms/GVN/2011-04-27-phioperands.ll
index 42c4650..05cf6c7 100644
--- a/llvm/test/Transforms/GVN/2011-04-27-phioperands.ll
+++ b/llvm/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -50,7 +50,7 @@
 
 "<bb 53>.i":
   %wascaret_2.i = phi i32 [ 0, %"<L39>.i" ], [ 0, %"<L29>.i" ], [ 0, %"<L28>.i" ], [ 0, %"<bb 35>.i" ], [ 0, %"<L99>.i" ], [ 0, %"<L98>.i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %"<L24>.i" ]
-  %D.5496_84.i = load i8** undef, align 8
+  %D.5496_84.i = load i8*, i8** undef, align 8
   br i1 undef, label %"<bb 54>.i", label %"<bb 5>"
 
 "<bb 54>.i":
diff --git a/llvm/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/llvm/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
index 395c01f..635da27 100644
--- a/llvm/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
+++ b/llvm/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
@@ -22,7 +22,7 @@
 ; CHECK: [[TMP:%.*]] = phi i8* [ getelementptr (i8* null, i64 undef), %bb10 ], [ null, %bb ]
 
 ; CHECK: bb1.bb15_crit_edge:
-; CHECK: %tmp17.pre = load i8* [[TMP]], align 1
+; CHECK: %tmp17.pre = load i8, i8* [[TMP]], align 1
 
 bb3:
   call void @isalnum()
@@ -32,22 +32,22 @@
   br i1 undef, label %bb10, label %bb6
 
 bb6:
-  %tmp7 = load i8** %tmp, align 8
-  %tmp8 = load i8* %tmp7, align 1
+  %tmp7 = load i8*, i8** %tmp, align 8
+  %tmp8 = load i8, i8* %tmp7, align 1
   %tmp9 = zext i8 %tmp8 to i64
   br i1 undef, label %bb15, label %bb10
 
 bb10:
-  %tmp11 = load i8** %tmp, align 8
-  %tmp12 = load i8* %tmp11, align 1
+  %tmp11 = load i8*, i8** %tmp, align 8
+  %tmp12 = load i8, i8* %tmp11, align 1
   %tmp13 = zext i8 %tmp12 to i64
   %tmp14 = getelementptr inbounds i8, i8* null, i64 undef
   store i8* %tmp14, i8** %tmp, align 8
   br label %bb1
 
 bb15:
-  %tmp16 = load i8** %tmp, align 8
-  %tmp17 = load i8* %tmp16, align 1
+  %tmp16 = load i8*, i8** %tmp, align 8
+  %tmp17 = load i8, i8* %tmp16, align 1
   %tmp18 = icmp eq i8 %tmp17, 0
   br label %bb19
 
diff --git a/llvm/test/Transforms/GVN/MemdepMiscompile.ll b/llvm/test/Transforms/GVN/MemdepMiscompile.ll
index d420169..0652304 100644
--- a/llvm/test/Transforms/GVN/MemdepMiscompile.ll
+++ b/llvm/test/Transforms/GVN/MemdepMiscompile.ll
@@ -13,14 +13,14 @@
 ; CHECK: call void @RunInMode
 ; CHECK: br i1 %tobool, label %while.cond.backedge, label %if.then
 ; CHECK: while.cond.backedge:
-; CHECK: load i32* %shouldExit
+; CHECK: load i32, i32* %shouldExit
 ; CHECK: br i1 %cmp, label %while.body
   %shouldExit = alloca i32, align 4
   %tasksIdle = alloca i32, align 4
   store i32 0, i32* %shouldExit, align 4
   store i32 0, i32* %tasksIdle, align 4
   call void @CTestInitialize(i32* %tasksIdle) nounwind
-  %0 = load i32* %shouldExit, align 4
+  %0 = load i32, i32* %shouldExit, align 4
   %cmp1 = icmp eq i32 %0, 0
   br i1 %cmp1, label %while.body.lr.ph, label %while.end
 
@@ -29,7 +29,7 @@
 
 while.body:
   call void @RunInMode(i32 100) nounwind
-  %1 = load i32* %tasksIdle, align 4
+  %1 = load i32, i32* %tasksIdle, align 4
   %tobool = icmp eq i32 %1, 0
   br i1 %tobool, label %while.cond.backedge, label %if.then
 
@@ -39,7 +39,7 @@
   br label %while.cond.backedge
 
 while.cond.backedge:
-  %2 = load i32* %shouldExit, align 4
+  %2 = load i32, i32* %shouldExit, align 4
   %cmp = icmp eq i32 %2, 0
   br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
 
diff --git a/llvm/test/Transforms/GVN/atomic.ll b/llvm/test/Transforms/GVN/atomic.ll
index 8c13d20..aada137 100644
--- a/llvm/test/Transforms/GVN/atomic.ll
+++ b/llvm/test/Transforms/GVN/atomic.ll
@@ -11,9 +11,9 @@
 ; CHECK-LABEL: test1
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load i32* @y
+  %x = load i32, i32* @y
   store atomic i32 %x, i32* @x unordered, align 4
-  %y = load i32* @y
+  %y = load i32, i32* @y
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -23,9 +23,9 @@
 ; CHECK-LABEL: test2
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load i32* @y
+  %x = load i32, i32* @y
   store atomic i32 %x, i32* @x seq_cst, align 4
-  %y = load i32* @y
+  %y = load i32, i32* @y
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -35,9 +35,9 @@
 ; CHECK-LABEL: test3
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load i32* @y
-  %y = load atomic i32* @x unordered, align 4
-  %z = load i32* @y
+  %x = load i32, i32* @y
+  %y = load atomic i32, i32* @x unordered, align 4
+  %z = load i32, i32* @y
   %a = add i32 %x, %z
   %b = add i32 %y, %a
   ret i32 %b
@@ -46,12 +46,12 @@
 ; GVN across acquire load (allowed as the original load was not atomic)
 define i32 @test4() nounwind uwtable ssp {
 ; CHECK-LABEL: test4
-; CHECK: load atomic i32* @x
-; CHECK-NOT: load i32* @y
+; CHECK: load atomic i32, i32* @x
+; CHECK-NOT: load i32, i32* @y
 entry:
-  %x = load i32* @y
-  %y = load atomic i32* @x seq_cst, align 4
-  %x2 = load i32* @y
+  %x = load i32, i32* @y
+  %y = load atomic i32, i32* @x seq_cst, align 4
+  %x2 = load i32, i32* @y
   %x3 = add i32 %x, %x2
   %y2 = add i32 %y, %x3
   ret i32 %y2
@@ -62,8 +62,8 @@
 ; CHECK-LABEL: test5
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load atomic i32* @x unordered, align 4
-  %y = load i32* @x
+  %x = load atomic i32, i32* @x unordered, align 4
+  %y = load i32, i32* @x
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -71,10 +71,10 @@
 ; GVN unordered load to load (unordered load must not be removed)
 define i32 @test6() nounwind uwtable ssp {
 ; CHECK-LABEL: test6
-; CHECK: load atomic i32* @x unordered
+; CHECK: load atomic i32, i32* @x unordered
 entry:
-  %x = load i32* @x
-  %x2 = load atomic i32* @x unordered, align 4
+  %x = load i32, i32* @x
+  %x2 = load atomic i32, i32* @x unordered, align 4
   %x3 = add i32 %x, %x2
   ret i32 %x3
 }
@@ -84,10 +84,10 @@
 ; CHECK-LABEL: test7
 ; CHECK: add i32 %x, %y
 entry:
-  %x = load i32* @y
+  %x = load i32, i32* @y
   store atomic i32 %x, i32* @x release, align 4
-  %w = load atomic i32* @x acquire, align 4
-  %y = load i32* @y
+  %w = load atomic i32, i32* @x acquire, align 4
+  %y = load i32, i32* @y
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -97,10 +97,10 @@
 ; CHECK-LABEL: test8
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load i32* @y
-  %w = load atomic i32* @x acquire, align 4
+  %x = load i32, i32* @y
+  %w = load atomic i32, i32* @x acquire, align 4
   store atomic i32 %x, i32* @x release, align 4
-  %y = load i32* @y
+  %y = load i32, i32* @y
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -110,9 +110,9 @@
 ; CHECK-LABEL: test9
 ; CHECK: add i32 %x, %x
 entry:
-  %x = load i32* @y
+  %x = load i32, i32* @y
   store atomic i32 %x, i32* @x monotonic, align 4
-  %y = load i32* @y
+  %y = load i32, i32* @y
   %z = add i32 %x, %y
   ret i32 %z
 }
@@ -122,9 +122,9 @@
 ; CHECK-LABEL: test10
 ; CHECK: add i32 %x, %y
 entry:
-  %x = load atomic i32* @y unordered, align 4
-  %clobber = load atomic i32* @x monotonic, align 4
-  %y = load atomic i32* @y monotonic, align 4
+  %x = load atomic i32, i32* @y unordered, align 4
+  %clobber = load atomic i32, i32* @x monotonic, align 4
+  %y = load atomic i32, i32* @y monotonic, align 4
   %z = add i32 %x, %y
   ret i32 %z
 }
diff --git a/llvm/test/Transforms/GVN/calloc-load-removal.ll b/llvm/test/Transforms/GVN/calloc-load-removal.ll
index 2dde5b7..a51f71f 100644
--- a/llvm/test/Transforms/GVN/calloc-load-removal.ll
+++ b/llvm/test/Transforms/GVN/calloc-load-removal.ll
@@ -9,11 +9,11 @@
   %1 = tail call noalias i8* @calloc(i64 1, i64 4)
   %2 = bitcast i8* %1 to i32*
   ; This load is trivially constant zero
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   ret i32 %3
 
 ; CHECK-LABEL: @test1(
-; CHECK-NOT: %3 = load i32* %2, align 4
+; CHECK-NOT: %3 = load i32, i32* %2, align 4
 ; CHECK: ret i32 0
 
 ; CHECK_NO_LIBCALLS-LABEL: @test1(
diff --git a/llvm/test/Transforms/GVN/cond_br.ll b/llvm/test/Transforms/GVN/cond_br.ll
index 918e7d4..aeb1a6e 100644
--- a/llvm/test/Transforms/GVN/cond_br.ll
+++ b/llvm/test/Transforms/GVN/cond_br.ll
@@ -5,11 +5,11 @@
 ; Function Attrs: nounwind ssp uwtable
 define void @foo(i32 %x) {
 ; CHECK: @foo(i32 %x)
-; CHECK: %.pre = load i32* @y
+; CHECK: %.pre = load i32, i32* @y
 ; CHECK: call void @bar(i32 %.pre)
 
   %t = sub i32 %x, %x
-  %.pre = load i32* @y, align 4
+  %.pre = load i32, i32* @y, align 4
   %cmp = icmp sgt i32 %t, 2
   br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
 
@@ -29,11 +29,11 @@
 
 define void @foo2(i32 %x) {
 ; CHECK: @foo2(i32 %x)
-; CHECK: %.pre = load i32* @y
+; CHECK: %.pre = load i32, i32* @y
 ; CHECK: tail call void @bar(i32 %.pre)
 entry:
   %t = sub i32 %x, %x
-  %.pre = load i32* @y, align 4
+  %.pre = load i32, i32* @y, align 4
   %cmp = icmp sgt i32 %t, 2
   br i1 %cmp, label %if.then, label %if.else
 
diff --git a/llvm/test/Transforms/GVN/cond_br2.ll b/llvm/test/Transforms/GVN/cond_br2.ll
index 10e1a85..02154a7 100644
--- a/llvm/test/Transforms/GVN/cond_br2.ll
+++ b/llvm/test/Transforms/GVN/cond_br2.ll
@@ -30,7 +30,7 @@
   %add.ptr.i.i.i.i.i.i = bitcast %"union.llvm::SmallVectorBase::U"* %add.ptr.i.i.i.i2.i.i to i8*
   store i8* %add.ptr.i.i.i.i.i.i, i8** %CapacityX.i.i.i.i.i.i, align 16, !tbaa !4
   %EndX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 1
-  %2 = load i8** %EndX.i, align 8, !tbaa !4
+  %2 = load i8*, i8** %EndX.i, align 8, !tbaa !4
   %CapacityX.i = getelementptr inbounds %"class.llvm::SmallVector", %"class.llvm::SmallVector"* %sv, i64 0, i32 0, i32 0, i32 0, i32 0, i32 2
   %cmp.i = icmp ult i8* %2, %add.ptr.i.i.i.i.i.i
   br i1 %cmp.i, label %Retry.i, label %if.end.i
@@ -51,18 +51,18 @@
           to label %.noexc unwind label %lpad
 
 .noexc:                                           ; preds = %if.end.i
-  %.pre.i = load i8** %EndX.i, align 8, !tbaa !4
+  %.pre.i = load i8*, i8** %EndX.i, align 8, !tbaa !4
   br label %Retry.i
 
 invoke.cont:                                      ; preds = %new.notnull.i, %Retry.i
   %add.ptr.i = getelementptr inbounds i8, i8* %3, i64 4
   store i8* %add.ptr.i, i8** %EndX.i, align 8, !tbaa !4
-  %6 = load i8** %CapacityX.i, align 16, !tbaa !4
+  %6 = load i8*, i8** %CapacityX.i, align 16, !tbaa !4
   %cmp.i8 = icmp ult i8* %add.ptr.i, %6
   br i1 %cmp.i8, label %new.notnull.i11, label %if.end.i14
 
 Retry.i10:                                        ; preds = %if.end.i14
-  %.pre.i13 = load i8** %EndX.i, align 8, !tbaa !4
+  %.pre.i13 = load i8*, i8** %EndX.i, align 8, !tbaa !4
   %new.isnull.i9 = icmp eq i8* %.pre.i13, null
   br i1 %new.isnull.i9, label %invoke.cont2, label %new.notnull.i11
 
@@ -85,7 +85,7 @@
           to label %invoke.cont3 unwind label %lpad
 
 invoke.cont3:                                     ; preds = %invoke.cont2
-  %11 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %11 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
   %cmp.i.i.i.i19 = icmp eq i8* %11, %1
   br i1 %cmp.i.i.i.i19, label %_ZN4llvm11SmallVectorIiLj8EED1Ev.exit21, label %if.then.i.i.i20
 
@@ -100,7 +100,7 @@
 lpad:                                             ; preds = %if.end.i14, %if.end.i, %invoke.cont2
   %12 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
           cleanup
-  %13 = load i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
+  %13 = load i8*, i8** %BeginX.i.i.i.i.i.i, align 16, !tbaa !4
   %cmp.i.i.i.i = icmp eq i8* %13, %1
   br i1 %cmp.i.i.i.i, label %eh.resume, label %if.then.i.i.i
 
diff --git a/llvm/test/Transforms/GVN/condprop.ll b/llvm/test/Transforms/GVN/condprop.ll
index 845f88e..6aa3cb8 100644
--- a/llvm/test/Transforms/GVN/condprop.ll
+++ b/llvm/test/Transforms/GVN/condprop.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: @test1(
 define i32 @test1() nounwind {
 entry:
-	%0 = load i32* @a, align 4
+	%0 = load i32, i32* @a, align 4
 	%1 = icmp eq i32 %0, 4
 	br i1 %1, label %bb, label %bb1
 
@@ -13,7 +13,7 @@
 	br label %bb8
 
 bb1:		; preds = %entry
-	%2 = load i32* @a, align 4
+	%2 = load i32, i32* @a, align 4
 	%3 = icmp eq i32 %2, 5
 	br i1 %3, label %bb2, label %bb3
 
@@ -21,29 +21,29 @@
 	br label %bb8
 
 bb3:		; preds = %bb1
-	%4 = load i32* @a, align 4
+	%4 = load i32, i32* @a, align 4
 	%5 = icmp eq i32 %4, 4
 ; CHECK: br i1 false, label %bb4, label %bb5
 	br i1 %5, label %bb4, label %bb5
 
 bb4:		; preds = %bb3
-	%6 = load i32* @a, align 4
+	%6 = load i32, i32* @a, align 4
 	%7 = add i32 %6, 5
 	br label %bb8
 
 bb5:		; preds = %bb3
-	%8 = load i32* @a, align 4
+	%8 = load i32, i32* @a, align 4
 	%9 = icmp eq i32 %8, 5
 ; CHECK: br i1 false, label %bb6, label %bb7
 	br i1 %9, label %bb6, label %bb7
 
 bb6:		; preds = %bb5
-	%10 = load i32* @a, align 4
+	%10 = load i32, i32* @a, align 4
 	%11 = add i32 %10, 4
 	br label %bb8
 
 bb7:		; preds = %bb5
-	%12 = load i32* @a, align 4
+	%12 = load i32, i32* @a, align 4
 	br label %bb8
 
 bb8:		; preds = %bb7, %bb6, %bb4, %bb2, %bb
diff --git a/llvm/test/Transforms/GVN/crash-no-aa.ll b/llvm/test/Transforms/GVN/crash-no-aa.ll
index cc30329..f076a8d 100644
--- a/llvm/test/Transforms/GVN/crash-no-aa.ll
+++ b/llvm/test/Transforms/GVN/crash-no-aa.ll
@@ -9,7 +9,7 @@
   store i16 42, i16* %P2
 
   %P3 = getelementptr {i16, i32}, {i16, i32} *%P, i32 0, i32 1
-  %V = load i32* %P3
+  %V = load i32, i32* %P3
   ret i32 %V
 }
 
diff --git a/llvm/test/Transforms/GVN/crash.ll b/llvm/test/Transforms/GVN/crash.ll
index 5efba40..2abb419 100644
--- a/llvm/test/Transforms/GVN/crash.ll
+++ b/llvm/test/Transforms/GVN/crash.ll
@@ -22,7 +22,7 @@
 
 lor.lhs.false:                                    ; preds = %while.body
   %tmp20 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
-  %tmp22 = load i32* %tmp20                       ; <i32> [#uses=0]
+  %tmp22 = load i32, i32* %tmp20                       ; <i32> [#uses=0]
   br i1 undef, label %land.lhs.true24, label %if.end31
 
 land.lhs.true24:                                  ; preds = %lor.lhs.false
@@ -34,11 +34,11 @@
 
 if.end41:                                         ; preds = %if.end31
   %tmp43 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
-  %tmp45 = load i32* %tmp43                       ; <i32> [#uses=0]
+  %tmp45 = load i32, i32* %tmp43                       ; <i32> [#uses=0]
   br i1 undef, label %if.then50, label %if.else
 
 if.then50:                                        ; preds = %if.end41
-  %tmp53 = load i32** undef                       ; <i32*> [#uses=1]
+  %tmp53 = load i32*, i32** undef                       ; <i32*> [#uses=1]
   br label %while.body.backedge
 
 if.else:                                          ; preds = %if.end41
@@ -75,14 +75,14 @@
 
 bb69.i:                                           ; preds = %bb57.i.preheader
   %tmp4 = getelementptr inbounds [4 x %struct.attribute_spec*], [4 x %struct.attribute_spec*]* @attribute_tables, i32 0, i32 undef ; <%struct.attribute_spec**> [#uses=1]
-  %tmp3 = load %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  %tmp3 = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
   br label %bb65.i
 
 bb65.i:                                           ; preds = %bb65.i.preheader, %bb64.i
   %storemerge6.i = phi i32 [ 1, %bb64.i ], [ 0, %bb69.i ] ; <i32> [#uses=3]
   %scevgep14 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp3, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=1]
-  %tmp2 = load i8** %scevgep14, align 4           ; <i8*> [#uses=0]
-  %tmp = load %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  %tmp2 = load i8*, i8** %scevgep14, align 4           ; <i8*> [#uses=0]
+  %tmp = load %struct.attribute_spec*, %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
   %scevgep1516 = getelementptr inbounds %struct.attribute_spec, %struct.attribute_spec* %tmp, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=0]
   unreachable
 
@@ -101,7 +101,7 @@
 
 define i32* @test3() {
 do.end17.i:
-  %tmp18.i = load i7** undef
+  %tmp18.i = load i7*, i7** undef
   %tmp1 = bitcast i7* %tmp18.i to i8*
   br i1 undef, label %do.body36.i, label %if.then21.i
 
@@ -110,12 +110,12 @@
   ret i32* undef
 
 do.body36.i:
-  %ivar38.i = load i64* @g 
+  %ivar38.i = load i64, i64* @g 
   %tmp3 = bitcast i7* %tmp18.i to i8*
   %add.ptr39.sum.i = add i64 %ivar38.i, 8
   %tmp40.i = getelementptr inbounds i8, i8* %tmp3, i64 %add.ptr39.sum.i
   %tmp4 = bitcast i8* %tmp40.i to i64*
-  %tmp41.i = load i64* %tmp4
+  %tmp41.i = load i64, i64* %tmp4
   br i1 undef, label %if.then48.i, label %do.body57.i
 
 if.then48.i:
@@ -123,13 +123,13 @@
   br label %do.body57.i
 
 do.body57.i:
-  %tmp58.i = load i7** undef
-  %ivar59.i = load i64* @g
+  %tmp58.i = load i7*, i7** undef
+  %ivar59.i = load i64, i64* @g
   %tmp5 = bitcast i7* %tmp58.i to i8*
   %add.ptr65.sum.i = add i64 %ivar59.i, 8
   %tmp66.i = getelementptr inbounds i8, i8* %tmp5, i64 %add.ptr65.sum.i
   %tmp6 = bitcast i8* %tmp66.i to i64*
-  %tmp67.i = load i64* %tmp6
+  %tmp67.i = load i64, i64* %tmp6
   ret i32* undef
 }
 
@@ -145,7 +145,7 @@
   %P2 = getelementptr i32, i32 *%P2, i32 52
   %Q2 = getelementptr i32, i32 *%Q2, i32 52
   store i32 4, i32* %P2
-  %A = load i32* %Q2
+  %A = load i32, i32* %Q2
   br i1 true, label %dead, label %dead2
   
 dead2:
@@ -156,10 +156,10 @@
 ; PR9841
 define fastcc i8 @test5(i8* %P) nounwind {
 entry:
-  %0 = load i8* %P, align 2
+  %0 = load i8, i8* %P, align 2
 
   %Q = getelementptr i8, i8* %P, i32 1
-  %1 = load i8* %Q, align 1
+  %1 = load i8, i8* %Q, align 1
   ret i8 %1
 }
 
@@ -187,7 +187,7 @@
   br label %unreachable.bb
 
 u2.bb:
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %conv.i.i.i.i.i = zext i32 %0 to i64
   br label %u2.bb
 
diff --git a/llvm/test/Transforms/GVN/invariant-load.ll b/llvm/test/Transforms/GVN/invariant-load.ll
index 2a83c45..162d498 100644
--- a/llvm/test/Transforms/GVN/invariant-load.ll
+++ b/llvm/test/Transforms/GVN/invariant-load.ll
@@ -3,13 +3,13 @@
 
 define i32 @test1(i32* nocapture %p, i8* nocapture %q) {
 ; CHECK-LABEL: test1
-; CHECK: %x = load i32* %p, align 4, !invariant.load !0
+; CHECK: %x = load i32, i32* %p, align 4, !invariant.load !0
 ; CHECK-NOT: %y = load
 entry:
-  %x = load i32* %p, align 4, !invariant.load !0
+  %x = load i32, i32* %p, align 4, !invariant.load !0
   %conv = trunc i32 %x to i8
   store i8 %conv, i8* %q, align 1
-  %y = load i32* %p, align 4, !invariant.load !0
+  %y = load i32, i32* %p, align 4, !invariant.load !0
   %add = add i32 %y, 1
   ret i32 %add
 }
@@ -19,10 +19,10 @@
 ; CHECK-NOT: !invariant.load
 ; CHECK-NOT: %y = load
 entry:
-  %x = load i32* %p, align 4
+  %x = load i32, i32* %p, align 4
   %conv = trunc i32 %x to i8
   store i8 %conv, i8* %q, align 1
-  %y = load i32* %p, align 4, !invariant.load !0
+  %y = load i32, i32* %p, align 4, !invariant.load !0
   %add = add i32 %y, 1
   ret i32 %add
 }
@@ -33,7 +33,7 @@
 ; CHECK-LABEL: test3
 ; CHECK-NOT: load
 entry:
-  %v1 = load i32* %p
+  %v1 = load i32, i32* %p
   br i1 %cnd, label %bb1, label %bb2
 
 bb1:
@@ -41,7 +41,7 @@
   br label %bb2
 
 bb2:
-  %v2 = load i32* %p, !invariant.load !0
+  %v2 = load i32, i32* %p, !invariant.load !0
   %res = sub i32 %v1, %v2
   ret i32 %res
 }
@@ -52,7 +52,7 @@
 ; CHECK-LABEL: test4
 ; %v2 is redundant, but GVN currently doesn't catch that
 entry:
-  %v1 = load i32* %p, !invariant.load !0
+  %v1 = load i32, i32* %p, !invariant.load !0
   br i1 %cnd, label %bb1, label %bb2
 
 bb1:
@@ -60,7 +60,7 @@
   br label %bb2
 
 bb2:
-  %v2 = load i32* %p
+  %v2 = load i32, i32* %p
   %res = sub i32 %v1, %v2
   ret i32 %res
 }
diff --git a/llvm/test/Transforms/GVN/lifetime-simple.ll b/llvm/test/Transforms/GVN/lifetime-simple.ll
index 02f7bcc..d03b62c 100644
--- a/llvm/test/Transforms/GVN/lifetime-simple.ll
+++ b/llvm/test/Transforms/GVN/lifetime-simple.ll
@@ -9,10 +9,10 @@
 ; CHECK: lifetime.end
 entry:
   call void @llvm.lifetime.start(i64 32, i8* %P)
-  %0 = load i8* %P
+  %0 = load i8, i8* %P
   store i8 1, i8* %P
   call void @llvm.lifetime.end(i64 32, i8* %P)
-  %1 = load i8* %P
+  %1 = load i8, i8* %P
   ret i8 %1
 }
 
diff --git a/llvm/test/Transforms/GVN/load-constant-mem.ll b/llvm/test/Transforms/GVN/load-constant-mem.ll
index 9bcf69c..f870485 100644
--- a/llvm/test/Transforms/GVN/load-constant-mem.ll
+++ b/llvm/test/Transforms/GVN/load-constant-mem.ll
@@ -5,9 +5,9 @@
 define i32 @test(i8* %p, i32 %i) nounwind {
 entry:
 	%P = getelementptr [4 x i32], [4 x i32]* @G, i32 0, i32 %i
-	%A = load i32* %P
+	%A = load i32, i32* %P
 	store i8 4, i8* %p
-	%B = load i32* %P
+	%B = load i32, i32* %P
 	%C = sub i32 %A, %B
 	ret i32 %C
 }
diff --git a/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll b/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
index b676d95..29ea14d 100644
--- a/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
+++ b/llvm/test/Transforms/GVN/load-from-unreachable-predecessor.ll
@@ -8,13 +8,13 @@
 ; Load should be removed, since it's ignored.
 ; CHECK-NEXT: br label
 bb0:
-  %bar = load i32** %f
+  %bar = load i32*, i32** %f
   br label %bb2
 bb1:
-  %zed = load i32** %f
+  %zed = load i32*, i32** %f
   br i1 false, label %bb1, label %bb2
 bb2:
   %foo = phi i32* [ null, %bb0 ], [ %zed, %bb1 ]
-  %storemerge = load i32* %foo
+  %storemerge = load i32, i32* %foo
   ret i32 %storemerge
 }
diff --git a/llvm/test/Transforms/GVN/load-pre-align.ll b/llvm/test/Transforms/GVN/load-pre-align.ll
index 4816af2..1198caf 100644
--- a/llvm/test/Transforms/GVN/load-pre-align.ll
+++ b/llvm/test/Transforms/GVN/load-pre-align.ll
@@ -25,7 +25,7 @@
   br label %for.end
 
 for.body:
-  %tmp3 = load i32* @p, align 8
+  %tmp3 = load i32, i32* @p, align 8
   %dec = add i32 %tmp3, -1
   store i32 %dec, i32* @p
   %cmp6 = icmp slt i32 %dec, 0
@@ -39,6 +39,6 @@
   br label %for.cond
 
 for.end:
-  %tmp9 = load i32* @p, align 8
+  %tmp9 = load i32, i32* @p, align 8
   ret i32 %tmp9
 }
diff --git a/llvm/test/Transforms/GVN/load-pre-licm.ll b/llvm/test/Transforms/GVN/load-pre-licm.ll
index 8560853..d14b01c 100644
--- a/llvm/test/Transforms/GVN/load-pre-licm.ll
+++ b/llvm/test/Transforms/GVN/load-pre-licm.ll
@@ -19,8 +19,8 @@
   %arrayidx9 = getelementptr [5001 x i32], [5001 x i32]* @sortlist, i32 0, i32 %tmp5
   %tmp6 = add i32 %indvar, 1
   %arrayidx = getelementptr [5001 x i32], [5001 x i32]* @sortlist, i32 0, i32 %tmp6
-  %tmp7 = load i32* %arrayidx, align 4
-  %tmp10 = load i32* %arrayidx9, align 4
+  %tmp7 = load i32, i32* %arrayidx, align 4
+  %tmp10 = load i32, i32* %arrayidx9, align 4
   %cmp11 = icmp sgt i32 %tmp7, %tmp10
   br i1 %cmp11, label %if.then, label %if.end
 
diff --git a/llvm/test/Transforms/GVN/load-pre-nonlocal.ll b/llvm/test/Transforms/GVN/load-pre-nonlocal.ll
index f8a5b66..91c29bf 100644
--- a/llvm/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/llvm/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -12,8 +12,8 @@
 
 ; CHECK-LABEL: define i32 @volatile_load
 ; CHECK: for.body:
-; CHECK: %2 = load i32*
-; CHECK: %3 = load volatile i32*
+; CHECK: %2 = load i32, i32*
+; CHECK: %3 = load volatile i32, i32*
 ; CHECK: for.cond.for.end_crit_edge:
 
 define i32 @volatile_load(i32 %n) {
@@ -22,18 +22,18 @@
   br i1 %cmp6, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load i32** @a2, align 8, !tbaa !1
-  %1 = load i32** @a, align 8, !tbaa !1
+  %0 = load i32*, i32** @a2, align 8, !tbaa !1
+  %1 = load i32*, i32** @a, align 8, !tbaa !1
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %s.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %p.08 = phi i32* [ %0, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
-  %2 = load i32* %p.08, align 4, !tbaa !5
+  %2 = load i32, i32* %p.08, align 4, !tbaa !5
   %arrayidx = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
   store i32 %2, i32* %arrayidx, align 4, !tbaa !5
-  %3 = load volatile i32* %p.08, align 4, !tbaa !5
+  %3 = load volatile i32, i32* %p.08, align 4, !tbaa !5
   %add = add nsw i32 %3, %s.09
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %incdec.ptr = getelementptr inbounds i32, i32* %p.08, i64 1
@@ -54,7 +54,7 @@
 
 ; CHECK-LABEL: define i32 @overaligned_load
 ; CHECK: if.end:
-; CHECK-NOT: %1 = load i32*
+; CHECK-NOT: %1 = load i32, i32*
 
 define i32 @overaligned_load(i32 %a, i32* nocapture %b) {
 entry:
@@ -62,7 +62,7 @@
   br i1 %cmp, label %if.then, label %if.else
 
 if.then:
-  %0 = load i32* getelementptr inbounds (%struct.S1* @s1, i64 0, i32 0), align 8, !tbaa !5
+  %0 = load i32, i32* getelementptr inbounds (%struct.S1* @s1, i64 0, i32 0), align 8, !tbaa !5
   br label %if.end
 
 if.else:
@@ -74,7 +74,7 @@
   %i.0 = phi i32 [ %0, %if.then ], [ 0, %if.else ]
   %p.0 = phi i32* [ getelementptr inbounds (%struct.S1* @s1, i64 0, i32 0), %if.then ], [ %b, %if.else ]
   %add.ptr = getelementptr inbounds i32, i32* %p.0, i64 1
-  %1 = load i32* %add.ptr, align 4, !tbaa !5
+  %1 = load i32, i32* %add.ptr, align 4, !tbaa !5
   %add1 = add nsw i32 %1, %i.0
   ret i32 %add1
 }
diff --git a/llvm/test/Transforms/GVN/lpre-call-wrap-2.ll b/llvm/test/Transforms/GVN/lpre-call-wrap-2.ll
index c11c31c..5dc779e 100644
--- a/llvm/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/llvm/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -16,10 +16,10 @@
 define void @bi_windup(i8* %outbuf, i8 zeroext %bi_buf) nounwind {
 entry:
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%0 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @outcnt, align 4		; <i32> [#uses=1]
 	%1 = getelementptr i8, i8* %outbuf, i32 %0		; <i8*> [#uses=1]
 	store i8 %bi_buf, i8* %1, align 1
-	%2 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* @outcnt, align 4		; <i32> [#uses=1]
 	%3 = icmp eq i32 %2, 16384		; <i1> [#uses=1]
 	br i1 %3, label %bb, label %bb1
 
@@ -31,7 +31,7 @@
 ; CHECK: bb1:
 ; CHECK-NEXT: phi
 ; CHECK-NEXT: getelementptr
-	%4 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* @outcnt, align 4		; <i32> [#uses=1]
 	%5 = getelementptr i8, i8* %outbuf, i32 %4		; <i8*> [#uses=1]
 	store i8 %bi_buf, i8* %5, align 1
 	ret void
diff --git a/llvm/test/Transforms/GVN/lpre-call-wrap.ll b/llvm/test/Transforms/GVN/lpre-call-wrap.ll
index fb41d94..2748305 100644
--- a/llvm/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/llvm/test/Transforms/GVN/lpre-call-wrap.ll
@@ -20,7 +20,7 @@
 define void @_Z12testfunctionR1A(%struct.A* %iter) {
 entry:
 	%0 = getelementptr %struct.A, %struct.A* %iter, i32 0, i32 0		; <i32*> [#uses=3]
-	%1 = load i32* %0, align 4		; <i32> [#uses=2]
+	%1 = load i32, i32* %0, align 4		; <i32> [#uses=2]
 	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
 	br i1 %2, label %return, label %bb.nph
 
@@ -32,7 +32,7 @@
 	%.rle = phi i32 [ %1, %bb.nph ], [ %7, %bb3.backedge ]		; <i32> [#uses=1]
 	%4 = add i32 %.rle, 1		; <i32> [#uses=2]
 	store i32 %4, i32* %0, align 4
-	%5 = load i32* %3, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* %3, align 4		; <i32> [#uses=1]
 	%6 = icmp eq i32 %4, %5		; <i1> [#uses=1]
 	br i1 %6, label %bb1, label %bb3.backedge
 
@@ -44,7 +44,7 @@
 ; CHECK: bb3.backedge:
 ; CHECK-NEXT: phi
 ; CHECK-NEXT: icmp
-	%7 = load i32* %0, align 4		; <i32> [#uses=2]
+	%7 = load i32, i32* %0, align 4		; <i32> [#uses=2]
 	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
 	br i1 %8, label %return, label %bb
 
diff --git a/llvm/test/Transforms/GVN/malloc-load-removal.ll b/llvm/test/Transforms/GVN/malloc-load-removal.ll
index d2d2fd7..1d7a2dd 100644
--- a/llvm/test/Transforms/GVN/malloc-load-removal.ll
+++ b/llvm/test/Transforms/GVN/malloc-load-removal.ll
@@ -10,7 +10,7 @@
 define noalias i8* @test1() nounwind uwtable ssp {
 entry:
   %call = tail call i8* @malloc(i64 100) nounwind
-  %0 = load i8* %call, align 1
+  %0 = load i8, i8* %call, align 1
   %tobool = icmp eq i8 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -35,7 +35,7 @@
 define noalias i8* @test2() nounwind uwtable ssp {
 entry:
   %call = tail call i8* @_Znwm(i64 100) nounwind
-  %0 = load i8* %call, align 1
+  %0 = load i8, i8* %call, align 1
   %tobool = icmp eq i8 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/Transforms/GVN/noalias.ll b/llvm/test/Transforms/GVN/noalias.ll
index 6c310fa..cfff096 100644
--- a/llvm/test/Transforms/GVN/noalias.ll
+++ b/llvm/test/Transforms/GVN/noalias.ll
@@ -2,21 +2,21 @@
 
 define i32 @test1(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test1(i32* %p, i32* %q)
-; CHECK: load i32* %p
+; CHECK: load i32, i32* %p
 ; CHECK-NOT: noalias
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !noalias !0
-  %b = load i32* %p
+  %a = load i32, i32* %p, !noalias !0
+  %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test2(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test2(i32* %p, i32* %q)
-; CHECK: load i32* %p, !alias.scope !0
+; CHECK: load i32, i32* %p, !alias.scope !0
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !alias.scope !0
-  %b = load i32* %p, !alias.scope !0
+  %a = load i32, i32* %p, !alias.scope !0
+  %b = load i32, i32* %p, !alias.scope !0
   %c = add i32 %a, %b
   ret i32 %c
 }
@@ -27,10 +27,10 @@
 ; throw in between.
 define i32 @test3(i32* %p, i32* %q) {
 ; CHECK-LABEL: @test3(i32* %p, i32* %q)
-; CHECK: load i32* %p, !alias.scope !1
+; CHECK: load i32, i32* %p, !alias.scope !1
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !alias.scope !1
-  %b = load i32* %p, !alias.scope !2
+  %a = load i32, i32* %p, !alias.scope !1
+  %b = load i32, i32* %p, !alias.scope !2
   %c = add i32 %a, %b
   ret i32 %c
 }
diff --git a/llvm/test/Transforms/GVN/non-local-offset.ll b/llvm/test/Transforms/GVN/non-local-offset.ll
index 36d2f1a..2373ef5 100644
--- a/llvm/test/Transforms/GVN/non-local-offset.ll
+++ b/llvm/test/Transforms/GVN/non-local-offset.ll
@@ -18,7 +18,7 @@
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p
+  %t = load i32, i32* %p
   store i32 %t, i32* %q
   ret void
 
@@ -35,7 +35,7 @@
 ; CHECK-NEXT: store i32 0, i32* %q
 ; CHECK-NEXT: ret void
 ; CHECK: if.else:
-; CHECK: load i64* %pc
+; CHECK: load i64, i64* %pc
 ; CHECK: store i64
 
 define void @watch_out_for_size_change(i1 %c, i32* %p, i32* %q) nounwind {
@@ -46,14 +46,14 @@
   br i1 %c, label %if.else, label %if.then
 
 if.then:
-  %t = load i32* %p
+  %t = load i32, i32* %p
   store i32 %t, i32* %q
   ret void
 
 if.else:
   %pc = bitcast i32* %p to i64*
   %qc = bitcast i32* %q to i64*
-  %t64 = load i64* %pc
+  %t64 = load i64, i64* %pc
   store i64 %t64, i64* %qc
   ret void
 }
diff --git a/llvm/test/Transforms/GVN/nonescaping-malloc.ll b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
index 1ff9b14..f83b317 100644
--- a/llvm/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/llvm/test/Transforms/GVN/nonescaping-malloc.ll
@@ -40,16 +40,16 @@
 define linkonce_odr %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, %"struct.llvm::StringRef"* nocapture %Key) ssp align 2 {
 entry:
   %elt = bitcast %"struct.llvm::StringRef"* %Key to i64*
-  %val = load i64* %elt
+  %val = load i64, i64* %elt
   %tmp = getelementptr inbounds %"struct.llvm::StringRef", %"struct.llvm::StringRef"* %Key, i64 0, i32 1
-  %val2 = load i64* %tmp
+  %val2 = load i64, i64* %tmp
   %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>", %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0
   %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2)
   %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>", %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0
-  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8
+  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"*, %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8
   %tmp6.i = zext i32 %tmp3.i to i64
   %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket", %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1
-  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp8.i = load %"struct.llvm::StringMapEntryBase"*, %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
   %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null
   %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
   %or.cond.i = or i1 %tmp9.i, %tmp13.i
@@ -87,7 +87,7 @@
   %tmp.i.i.i = getelementptr inbounds i8, i8* %tmp.i20.i.i, i64 8
   %1 = bitcast i8* %tmp.i.i.i to i8**
   store i8* null, i8** %1, align 8
-  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp22.i = load %"struct.llvm::StringMapEntryBase"*, %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
   %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
   br i1 %tmp24.i, label %bb9.i, label %_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit
 
@@ -97,7 +97,7 @@
 
 bb9.i:                                            ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
   %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>", %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3
-  %tmp26.i = load i32* %tmp25.i, align 8
+  %tmp26.i = load i32, i32* %tmp25.i, align 8
   %tmp27.i = add i32 %tmp26.i, -1
   store i32 %tmp27.i, i32* %tmp25.i, align 8
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
diff --git a/llvm/test/Transforms/GVN/null-aliases-nothing.ll b/llvm/test/Transforms/GVN/null-aliases-nothing.ll
index 0826f9e..0b7c5eb 100644
--- a/llvm/test/Transforms/GVN/null-aliases-nothing.ll
+++ b/llvm/test/Transforms/GVN/null-aliases-nothing.ll
@@ -5,11 +5,11 @@
 
 define void @test1(%t* noalias %stuff ) {
     %p = getelementptr inbounds %t, %t* %stuff, i32 0, i32 0
-    %before = load i32* %p
+    %before = load i32, i32* %p
 
     call void @test1f(i8* null)
 
-    %after = load i32* %p ; <--- This should be a dead load
+    %after = load i32, i32* %p ; <--- This should be a dead load
     %sum = add i32 %before, %after
 
     store i32 %sum, i32* %p
diff --git a/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll b/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
index 84aeed1..f1cf53e 100644
--- a/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
+++ b/llvm/test/Transforms/GVN/phi-translate-partial-alias.ll
@@ -8,18 +8,18 @@
 
 ; CHECK: define void @test0(i8* %begin)
 ; CHECK: loop:
-; CHECK:   %l0 = load i8* %phi
+; CHECK:   %l0 = load i8, i8* %phi
 ; CHECK:   call void @bar(i8 %l0)
-; CHECK:   %l1 = load i8* %phi
+; CHECK:   %l1 = load i8, i8* %phi
 define void @test0(i8* %begin) {
 entry:
   br label %loop
 
 loop:
   %phi = phi i8* [ %begin, %entry ], [ %next, %loop ]
-  %l0 = load i8* %phi
+  %l0 = load i8, i8* %phi
   call void @bar(i8 %l0)
-  %l1 = load i8* %phi
+  %l1 = load i8, i8* %phi
   %next = getelementptr inbounds i8, i8* %phi, i8 %l1
   br label %loop
 }
diff --git a/llvm/test/Transforms/GVN/phi-translate.ll b/llvm/test/Transforms/GVN/phi-translate.ll
index 7fe95c6..6068b05 100644
--- a/llvm/test/Transforms/GVN/phi-translate.ll
+++ b/llvm/test/Transforms/GVN/phi-translate.ll
@@ -4,7 +4,7 @@
 
 ; CHECK-LABEL: @foo(
 ; CHECK: entry.end_crit_edge:
-; CHECK:   %n.pre = load i32* %q.phi.trans.insert
+; CHECK:   %n.pre = load i32, i32* %q.phi.trans.insert
 ; CHECK: then:
 ; CHECK:   store i32 %z
 ; CHECK: end:
@@ -26,6 +26,6 @@
 end:
   %j = sext i32 %x to i64
   %q = getelementptr [100 x i32], [100 x i32]* @G, i64 0, i64 %j
-  %n = load i32* %q
+  %n = load i32, i32* %q
   ret i32 %n
 }
diff --git a/llvm/test/Transforms/GVN/pr10820.ll b/llvm/test/Transforms/GVN/pr10820.ll
index 12c1e70..c6a9a93 100644
--- a/llvm/test/Transforms/GVN/pr10820.ll
+++ b/llvm/test/Transforms/GVN/pr10820.ll
@@ -11,7 +11,7 @@
 ; CHECK: store i32
   store i32 402662078, i32* bitcast (i31* @g to i32*), align 8
 ; CHECK-NOT: load i31
-  %0 = load i31* @g, align 8
+  %0 = load i31, i31* @g, align 8
 ; CHECK: store i31
   store i31 %0, i31* undef, align 1
   unreachable
diff --git a/llvm/test/Transforms/GVN/pr14166.ll b/llvm/test/Transforms/GVN/pr14166.ll
index 4d68205..eafe418 100644
--- a/llvm/test/Transforms/GVN/pr14166.ll
+++ b/llvm/test/Transforms/GVN/pr14166.ll
@@ -4,16 +4,16 @@
 define <2 x i32> @test1() {
   %v1 = alloca <2 x i32>
   call void @anything(<2 x i32>* %v1)
-  %v2 = load <2 x i32>* %v1
+  %v2 = load <2 x i32>, <2 x i32>* %v1
   %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
   %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
   store <2 x i8*> %v3, <2 x i8*>* %v4
-  %v5 = load <2 x i32>* %v1
+  %v5 = load <2 x i32>, <2 x i32>* %v1
   ret <2 x i32> %v5
 ; CHECK-LABEL: @test1(
 ; CHECK: %v1 = alloca <2 x i32>
 ; CHECK: call void @anything(<2 x i32>* %v1)
-; CHECK: %v2 = load <2 x i32>* %v1
+; CHECK: %v2 = load <2 x i32>, <2 x i32>* %v1
 ; CHECK: %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
 ; CHECK: %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
 ; CHECK: store <2 x i8*> %v3, <2 x i8*>* %v4
diff --git a/llvm/test/Transforms/GVN/pr17732.ll b/llvm/test/Transforms/GVN/pr17732.ll
index 606a195..bf838c9 100644
--- a/llvm/test/Transforms/GVN/pr17732.ll
+++ b/llvm/test/Transforms/GVN/pr17732.ll
@@ -15,10 +15,10 @@
 define i32 @main() {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ [2 x i8], i32, i8, [3 x i8] }* @main.obj_with_array, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
-  %0 = load i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
+  %0 = load i8, i8* getelementptr inbounds (%struct.with_array* @array_with_zeroinit, i64 0, i32 2), align 4
 
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 0, i64 0), i8* getelementptr inbounds ({ <2 x i8>, i32, i8, [3 x i8] }* @main.obj_with_vector, i64 0, i32 0, i64 0), i64 12, i32 4, i1 false)
-  %1 = load i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
+  %1 = load i8, i8* getelementptr inbounds (%struct.with_vector* @vector_with_zeroinit, i64 0, i32 2), align 4
   %conv0 = sext i8 %0 to i32
   %conv1 = sext i8 %1 to i32
   %and = and i32 %conv0, %conv1
diff --git a/llvm/test/Transforms/GVN/pr17852.ll b/llvm/test/Transforms/GVN/pr17852.ll
index 0d5ee30..9a8a709 100644
--- a/llvm/test/Transforms/GVN/pr17852.ll
+++ b/llvm/test/Transforms/GVN/pr17852.ll
@@ -13,7 +13,7 @@
 if.then:                                          ; preds = %for.end
   %f22 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 2
   %f7 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 5
-  %tmp7 = load i32* %f7, align 8
+  %tmp7 = load i32, i32* %f7, align 8
   br label %if.end40
 if.else:                                          ; preds = %for.end
   br i1 false, label %for.cond18, label %if.then6
@@ -22,7 +22,7 @@
   %tmp10 = bitcast %struct.S0* %p1 to i16*
   %f5 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 3
   %tmp11 = bitcast [2 x i8]* %f5 to i16*
-  %bf.load13 = load i16* %tmp11, align 8
+  %bf.load13 = load i16, i16* %tmp11, align 8
   br label %if.end36
 for.cond18:                                       ; preds = %if.else
   call void @fn4()
@@ -32,33 +32,33 @@
   %f925 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 7
   %f526 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 3
   %tmp15 = bitcast [2 x i8]* %f526 to i16*
-  %bf.load27 = load i16* %tmp15, align 8
+  %bf.load27 = load i16, i16* %tmp15, align 8
   %tmp16 = bitcast %struct.S0* %p1 to i16*
   br label %if.end36
 if.end36:                                         ; preds = %if.end, %for.cond18, %if.then6
   %f537 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 3
   %tmp17 = bitcast [2 x i8]* %f537 to i16*
-  %bf.load38 = load i16* %tmp17, align 8
+  %bf.load38 = load i16, i16* %tmp17, align 8
   %bf.clear39 = and i16 %bf.load38, -16384
   br label %if.end40
 if.end40:                                         ; preds = %if.end36, %if.then
   %f6 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 4
-  %tmp18 = load i32* %f6, align 4
+  %tmp18 = load i32, i32* %f6, align 4
   call void @fn2(i32 %tmp18)
   %f8 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 6
-  %tmp19 = load i32* %f8, align 4
+  %tmp19 = load i32, i32* %f8, align 4
   %tobool41 = icmp eq i32 %tmp19, 0
   br i1 true, label %if.end50, label %if.then42
 if.then42:                                        ; preds = %if.end40
   %tmp20 = bitcast %struct.S0* %p1 to i16*
   %f547 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 3
   %tmp21 = bitcast [2 x i8]* %f547 to i16*
-  %bf.load48 = load i16* %tmp21, align 8
+  %bf.load48 = load i16, i16* %tmp21, align 8
   br label %if.end50
 if.end50:                                         ; preds = %if.then42, %if.end40
   %f551 = getelementptr inbounds %struct.S0, %struct.S0* %p1, i64 0, i32 3
   %tmp22 = bitcast [2 x i8]* %f551 to i16*
-  %bf.load52 = load i16* %tmp22, align 8
+  %bf.load52 = load i16, i16* %tmp22, align 8
   %bf.clear53 = and i16 %bf.load52, -16384
   ret void
 }
diff --git a/llvm/test/Transforms/GVN/pre-basic-add.ll b/llvm/test/Transforms/GVN/pre-basic-add.ll
index 4bde05c..460d1f9 100644
--- a/llvm/test/Transforms/GVN/pre-basic-add.ll
+++ b/llvm/test/Transforms/GVN/pre-basic-add.ll
@@ -5,7 +5,7 @@
 
 define i32 @test() nounwind {
 entry:
-	%0 = load i32* @H, align 4		; <i32> [#uses=2]
+	%0 = load i32, i32* @H, align 4		; <i32> [#uses=2]
 	%1 = call i32 (...)* @foo() nounwind		; <i32> [#uses=1]
 	%2 = icmp ne i32 %1, 0		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %bb1
diff --git a/llvm/test/Transforms/GVN/pre-gep-load.ll b/llvm/test/Transforms/GVN/pre-gep-load.ll
index 765866f..291af35 100644
--- a/llvm/test/Transforms/GVN/pre-gep-load.ll
+++ b/llvm/test/Transforms/GVN/pre-gep-load.ll
@@ -14,9 +14,9 @@
 sw.bb:                                            ; preds = %entry, %entry
   %idxprom = sext i32 %i to i64
   %arrayidx = getelementptr inbounds double*, double** %p, i64 0
-  %0 = load double** %arrayidx, align 8
+  %0 = load double*, double** %arrayidx, align 8
   %arrayidx1 = getelementptr inbounds double, double* %0, i64 %idxprom
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   %sub = fsub double %1, 1.000000e+00
   %cmp = fcmp olt double %sub, 0.000000e+00
   br i1 %cmp, label %if.then, label %if.end
@@ -30,9 +30,9 @@
 sw.bb2:                                           ; preds = %if.end, %entry
   %idxprom3 = sext i32 %i to i64
   %arrayidx4 = getelementptr inbounds double*, double** %p, i64 0
-  %2 = load double** %arrayidx4, align 8
+  %2 = load double*, double** %arrayidx4, align 8
   %arrayidx5 = getelementptr inbounds double, double* %2, i64 %idxprom3
-  %3 = load double* %arrayidx5, align 8
+  %3 = load double, double* %arrayidx5, align 8
 ; CHECK: sw.bb2:
 ; CHECK-NEXT-NOT: sext
 ; CHECK-NEXT: phi double [
diff --git a/llvm/test/Transforms/GVN/pre-load.ll b/llvm/test/Transforms/GVN/pre-load.ll
index 1d75c491..24221d5 100644
--- a/llvm/test/Transforms/GVN/pre-load.ll
+++ b/llvm/test/Transforms/GVN/pre-load.ll
@@ -9,14 +9,14 @@
 block2:
  br label %block4
 ; CHECK: block2:
-; CHECK-NEXT: load i32* %p
+; CHECK-NEXT: load i32, i32* %p
 
 block3:
   store i32 0, i32* %p
   br label %block4
 
 block4:
-  %PRE = load i32* %p
+  %PRE = load i32, i32* %p
   ret i32 %PRE
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32
@@ -32,7 +32,7 @@
 block2:
  br label %block4
 ; CHECK: block2:
-; CHECK-NEXT: load i32* %q
+; CHECK-NEXT: load i32, i32* %q
 
 block3:
   store i32 0, i32* %p
@@ -40,7 +40,7 @@
 
 block4:
   %P2 = phi i32* [%p, %block3], [%q, %block2]
-  %PRE = load i32* %P2
+  %PRE = load i32, i32* %P2
   ret i32 %PRE
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32 [
@@ -59,7 +59,7 @@
 block2:
  br label %block4
 ; CHECK: block2:
-; CHECK-NEXT: load i32* %B
+; CHECK-NEXT: load i32, i32* %B
 
 block3:
   %A = getelementptr i32, i32* %p, i32 1
@@ -69,7 +69,7 @@
 block4:
   %P2 = phi i32* [%p, %block3], [%q, %block2]
   %P3 = getelementptr i32, i32* %P2, i32 1
-  %PRE = load i32* %P3
+  %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32 [
@@ -87,7 +87,7 @@
 block2:
  br label %block4
 ; CHECK: block2:
-; CHECK:   load i32*
+; CHECK:   load i32, i32*
 ; CHECK:   br label %block4
 
 block3:
@@ -101,7 +101,7 @@
 block4:
   %P2 = phi i32* [%p, %block3], [%q, %block2]
   %P3 = getelementptr i32, i32* %P2, i32 1
-  %PRE = load i32* %P3
+  %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32 [
@@ -127,7 +127,7 @@
   br label %bb
 
 ; CHECK: bb.nph:
-; CHECK: load double*
+; CHECK: load double, double*
 ; CHECK: br label %bb
 
 bb:             
@@ -135,8 +135,8 @@
   %tmp6 = add i64 %indvar, 1                    
   %scevgep = getelementptr double, double* %G, i64 %tmp6
   %scevgep7 = getelementptr double, double* %G, i64 %indvar
-  %2 = load double* %scevgep7, align 8
-  %3 = load double* %scevgep, align 8 
+  %2 = load double, double* %scevgep7, align 8
+  %3 = load double, double* %scevgep, align 8 
   %4 = fadd double %2, %3             
   store double %4, double* %scevgep7, align 8
   %exitcond = icmp eq i64 %tmp6, %tmp 
@@ -144,8 +144,8 @@
 
 ; Should only be one load in the loop.
 ; CHECK: bb:
-; CHECK: load double*
-; CHECK-NOT: load double*
+; CHECK: load double, double*
+; CHECK-NOT: load double, double*
 ; CHECK: br i1 %exitcond
 
 return:                               
@@ -170,7 +170,7 @@
   br label %bb
 
 ; CHECK: bb.nph:
-; CHECK: load double*
+; CHECK: load double, double*
 ; CHECK: br label %bb
 
 bb:             
@@ -178,8 +178,8 @@
   %tmp6 = add i64 %indvar, 1                    
   %scevgep = getelementptr double, double* %G, i64 %tmp6
   %scevgep7 = getelementptr double, double* %G, i64 %indvar
-  %2 = load double* %scevgep7, align 8
-  %3 = load double* %scevgep, align 8 
+  %2 = load double, double* %scevgep7, align 8
+  %3 = load double, double* %scevgep, align 8 
   %4 = fadd double %2, %3             
   store double %4, double* %scevgep, align 8
   %exitcond = icmp eq i64 %tmp6, %tmp 
@@ -187,8 +187,8 @@
 
 ; Should only be one load in the loop.
 ; CHECK: bb:
-; CHECK: load double*
-; CHECK-NOT: load double*
+; CHECK: load double, double*
+; CHECK-NOT: load double, double*
 ; CHECK: br i1 %exitcond
 
 return:                               
@@ -222,8 +222,8 @@
   %scevgep = getelementptr double, double* %G, i64 %tmp8  
   %tmp9 = add i64 %indvar, 1                      
   %scevgep10 = getelementptr double, double* %G, i64 %tmp9 
-  %3 = load double* %scevgep10, align 8           
-  %4 = load double* %scevgep, align 8             
+  %3 = load double, double* %scevgep10, align 8           
+  %4 = load double, double* %scevgep, align 8             
   %5 = fadd double %3, %4                         
   store double %5, double* %scevgep, align 8
   %exitcond = icmp eq i64 %tmp9, %tmp7            
@@ -231,8 +231,8 @@
 
 ; Should only be one load in the loop.
 ; CHECK: bb:
-; CHECK: load double*
-; CHECK-NOT: load double*
+; CHECK: load double, double*
+; CHECK-NOT: load double, double*
 ; CHECK: br i1 %exitcond
 
 return:                                           
@@ -249,7 +249,7 @@
 block2:
  br label %block4
 ; CHECK: block2:
-; CHECK:   load i32*
+; CHECK:   load i32, i32*
 ; CHECK:   br label %block4
 
 block3:
@@ -260,7 +260,7 @@
 block4:
   %P2 = phi i32* [%p, %block3], [%q, %block2]
   %P3 = getelementptr i32, i32* %P2, i32 1
-  %PRE = load i32* %P3
+  %PRE = load i32, i32* %P3
   ret i32 %PRE
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32 [
@@ -288,7 +288,7 @@
   br label %bb
 
 ; CHECK: bb.nph:
-; CHECK:   load double*
+; CHECK:   load double, double*
 ; CHECK:   br label %bb
 
 bb:                                               
@@ -297,8 +297,8 @@
   %scevgep = getelementptr double, double* %G, i64 %tmp8  
   %tmp9 = add i64 %indvar, 1                      
   %scevgep10 = getelementptr double, double* %G, i64 %tmp9 
-  %3 = load double* %scevgep10, align 8           
-  %4 = load double* %scevgep, align 8             
+  %3 = load double, double* %scevgep10, align 8           
+  %4 = load double, double* %scevgep, align 8             
   %5 = fadd double %3, %4                         
   store double %5, double* %scevgep, align 8
   %exitcond = icmp eq i64 %tmp9, %tmp7            
@@ -306,8 +306,8 @@
 
 ; Should only be one load in the loop.
 ; CHECK: bb:
-; CHECK: load double*
-; CHECK-NOT: load double*
+; CHECK: load double, double*
+; CHECK-NOT: load double, double*
 ; CHECK: br i1 %exitcond
 
 return:                                           
@@ -332,8 +332,8 @@
   %tmp8 = add i64 %tmp, -1
   br label %bb
 ; CHECK: bb.nph:
-; CHECK:   load double*
-; CHECK:   load double*
+; CHECK:   load double, double*
+; CHECK:   load double, double*
 ; CHECK:   br label %bb
 
 
@@ -344,10 +344,10 @@
   %scevgep10 = getelementptr double, double* %G, i64 %tmp9
   %tmp11 = add i64 %indvar, 1
   %scevgep12 = getelementptr double, double* %G, i64 %tmp11
-  %2 = load double* %scevgep12, align 8
-  %3 = load double* %scevgep10, align 8
+  %2 = load double, double* %scevgep12, align 8
+  %3 = load double, double* %scevgep10, align 8
   %4 = fadd double %2, %3
-  %5 = load double* %scevgep, align 8
+  %5 = load double, double* %scevgep, align 8
   %6 = fadd double %4, %5
   store double %6, double* %scevgep12, align 8
   %exitcond = icmp eq i64 %tmp11, %tmp8
@@ -355,8 +355,8 @@
 
 ; Should only be one load in the loop.
 ; CHECK: bb:
-; CHECK: load double*
-; CHECK-NOT: load double*
+; CHECK: load double, double*
+; CHECK-NOT: load double, double*
 ; CHECK: br i1 %exitcond
 
 return:
@@ -372,7 +372,7 @@
 block2:
  %cond = icmp sgt i32 %N, 1
  br i1 %cond, label %block4, label %block5
-; CHECK: load i32* %p
+; CHECK: load i32, i32* %p
 ; CHECK-NEXT: br label %block4
 
 block3:
@@ -380,7 +380,7 @@
   br label %block4
 
 block4:
-  %PRE = load i32* %p
+  %PRE = load i32, i32* %p
   br label %block5
 
 block5:
diff --git a/llvm/test/Transforms/GVN/pre-single-pred.ll b/llvm/test/Transforms/GVN/pre-single-pred.ll
index f1f5c71..0df45cf 100644
--- a/llvm/test/Transforms/GVN/pre-single-pred.ll
+++ b/llvm/test/Transforms/GVN/pre-single-pred.ll
@@ -23,9 +23,9 @@
 	br label %for.end
 
 ; CHECK: for.body:
-; CHECK-NEXT: %tmp3 = load i32* @p
+; CHECK-NEXT: %tmp3 = load i32, i32* @p
 for.body:		; preds = %for.cond
-	%tmp3 = load i32* @p		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* @p		; <i32> [#uses=1]
 	%dec = add i32 %tmp3, -1		; <i32> [#uses=2]
 	store i32 %dec, i32* @p
 	%cmp6 = icmp slt i32 %dec, 0		; <i1> [#uses=1]
@@ -40,6 +40,6 @@
 	br label %for.cond
 
 for.end:		; preds = %for.body.for.end_crit_edge, %for.cond.for.end_crit_edge
-	%tmp9 = load i32* @p		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* @p		; <i32> [#uses=1]
 	ret i32 %tmp9
 }
diff --git a/llvm/test/Transforms/GVN/preserve-tbaa.ll b/llvm/test/Transforms/GVN/preserve-tbaa.ll
index 587d463..19467ee 100644
--- a/llvm/test/Transforms/GVN/preserve-tbaa.ll
+++ b/llvm/test/Transforms/GVN/preserve-tbaa.ll
@@ -5,7 +5,7 @@
 ; GVN should preserve the TBAA tag on loads when doing PRE.
 
 ; CHECK-LABEL: @test(
-; CHECK: %tmp33.pre = load i16* %P, align 2, !tbaa !0
+; CHECK: %tmp33.pre = load i16, i16* %P, align 2, !tbaa !0
 ; CHECK: br label %for.body
 define void @test(i16 *%P, i16* %Q) nounwind {
 entry:
@@ -15,7 +15,7 @@
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %bb.nph
-  %tmp33 = load i16* %P, align 2, !tbaa !0
+  %tmp33 = load i16, i16* %P, align 2, !tbaa !0
   store i16 %tmp33, i16* %Q
 
   store i16 0, i16* %P, align 2, !tbaa !0
diff --git a/llvm/test/Transforms/GVN/range.ll b/llvm/test/Transforms/GVN/range.ll
index 3720232..297c6aa 100644
--- a/llvm/test/Transforms/GVN/range.ll
+++ b/llvm/test/Transforms/GVN/range.ll
@@ -2,82 +2,82 @@
 
 define i32 @test1(i32* %p) {
 ; CHECK: @test1(i32* %p)
-; CHECK: %a = load i32* %p, !range !0
+; CHECK: %a = load i32, i32* %p, !range !0
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !0
-  %b = load i32* %p, !range !0
+  %a = load i32, i32* %p, !range !0
+  %b = load i32, i32* %p, !range !0
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test2(i32* %p) {
 ; CHECK: @test2(i32* %p)
-; CHECK: %a = load i32* %p
+; CHECK: %a = load i32, i32* %p
 ; CHECK-NOT: range
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !0
-  %b = load i32* %p
+  %a = load i32, i32* %p, !range !0
+  %b = load i32, i32* %p
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test3(i32* %p) {
 ; CHECK: @test3(i32* %p)
-; CHECK: %a = load i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
+; CHECK: %a = load i32, i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !0
-  %b = load i32* %p, !range !1
+  %a = load i32, i32* %p, !range !0
+  %b = load i32, i32* %p, !range !1
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test4(i32* %p) {
 ; CHECK: @test4(i32* %p)
-; CHECK: %a = load i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
+; CHECK: %a = load i32, i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !0
-  %b = load i32* %p, !range !2
+  %a = load i32, i32* %p, !range !0
+  %b = load i32, i32* %p, !range !2
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test5(i32* %p) {
 ; CHECK: @test5(i32* %p)
-; CHECK: %a = load i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
+; CHECK: %a = load i32, i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !3
-  %b = load i32* %p, !range !4
+  %a = load i32, i32* %p, !range !3
+  %b = load i32, i32* %p, !range !4
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test6(i32* %p) {
 ; CHECK: @test6(i32* %p)
-; CHECK: %a = load i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
+; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !5
-  %b = load i32* %p, !range !6
+  %a = load i32, i32* %p, !range !5
+  %b = load i32, i32* %p, !range !6
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test7(i32* %p) {
 ; CHECK: @test7(i32* %p)
-; CHECK: %a = load i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
+; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !7
-  %b = load i32* %p, !range !8
+  %a = load i32, i32* %p, !range !7
+  %b = load i32, i32* %p, !range !8
   %c = add i32 %a, %b
   ret i32 %c
 }
 
 define i32 @test8(i32* %p) {
 ; CHECK: @test8(i32* %p)
-; CHECK: %a = load i32* %p
+; CHECK: %a = load i32, i32* %p
 ; CHECK-NOT: range
 ; CHECK: %c = add i32 %a, %a
-  %a = load i32* %p, !range !9
-  %b = load i32* %p, !range !10
+  %a = load i32, i32* %p, !range !9
+  %b = load i32, i32* %p, !range !10
   %c = add i32 %a, %b
   ret i32 %c
 }
diff --git a/llvm/test/Transforms/GVN/readattrs.ll b/llvm/test/Transforms/GVN/readattrs.ll
index ba624a7..fb36d07c 100644
--- a/llvm/test/Transforms/GVN/readattrs.ll
+++ b/llvm/test/Transforms/GVN/readattrs.ll
@@ -9,7 +9,7 @@
   %a = alloca i8
   store i8 1, i8* %a
   call void @use(i8* %a)
-  %b = load i8* %a
+  %b = load i8, i8* %a
   ret i8 %b
 ; CHECK-LABEL: define i8 @test(
 ; CHECK: call void @use(i8* %a)
diff --git a/llvm/test/Transforms/GVN/rle-must-alias.ll b/llvm/test/Transforms/GVN/rle-must-alias.ll
index fc83c53..0d181dd 100644
--- a/llvm/test/Transforms/GVN/rle-must-alias.ll
+++ b/llvm/test/Transforms/GVN/rle-must-alias.ll
@@ -18,21 +18,21 @@
 bb:		; preds = %entry
 	%2 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
 	%3 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	%4 = load i32, i32* %3, align 4		; <i32> [#uses=1]
 	store i32 %4, i32* @G, align 4
 	br label %bb3
 
 bb1:		; preds = %entry
 	%5 = tail call i32 (...)* @baz() nounwind		; <i32> [#uses=0]
 	%6 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=2]
+	%7 = load i32, i32* %6, align 4		; <i32> [#uses=2]
 	store i32 %7, i32* @G, align 4
 	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
 	br i1 %8, label %bb3, label %bb4
 
 bb3:		; preds = %bb1, %bb
 	%9 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
-	%DEAD = load i32* %9, align 4		; <i32> [#uses=1]
+	%DEAD = load i32, i32* %9, align 4		; <i32> [#uses=1]
 	ret i32 %DEAD
 
 bb4:		; preds = %bb1
diff --git a/llvm/test/Transforms/GVN/rle-no-phi-translate.ll b/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
index 96dbf48..c1fd201 100644
--- a/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
+++ b/llvm/test/Transforms/GVN/rle-no-phi-translate.ll
@@ -19,7 +19,7 @@
 
 bb2:		; preds = %bb1, %bb
 	%c_addr.0 = phi i32* [ %b, %entry ], [ %c, %bb ]		; <i32*> [#uses=1]
-	%cv = load i32* %c_addr.0, align 4		; <i32> [#uses=1]
+	%cv = load i32, i32* %c_addr.0, align 4		; <i32> [#uses=1]
 	ret i32 %cv
 ; CHECK: bb2:
 ; CHECK-NOT: load i32
diff --git a/llvm/test/Transforms/GVN/rle-nonlocal.ll b/llvm/test/Transforms/GVN/rle-nonlocal.ll
index 8229aaa..7975462 100644
--- a/llvm/test/Transforms/GVN/rle-nonlocal.ll
+++ b/llvm/test/Transforms/GVN/rle-nonlocal.ll
@@ -6,20 +6,20 @@
 	br i1 %cmp , label %block2, label %block3
 
 block2:
- %a = load i32** %p
+ %a = load i32*, i32** %p
  br label %block4
 
 block3:
-  %b = load i32** %p
+  %b = load i32*, i32** %p
   br label %block4
 
 block4:
 ; CHECK-NOT: %existingPHI = phi
 ; CHECK: %DEAD = phi
   %existingPHI = phi i32* [ %a, %block2 ], [ %b, %block3 ] 
-  %DEAD = load i32** %p
-  %c = load i32* %DEAD
-  %d = load i32* %existingPHI
+  %DEAD = load i32*, i32** %p
+  %c = load i32, i32* %DEAD
+  %d = load i32, i32* %existingPHI
   %e = add i32 %c, %d
   ret i32 %e
 }
diff --git a/llvm/test/Transforms/GVN/rle-phi-translate.ll b/llvm/test/Transforms/GVN/rle-phi-translate.ll
index b2afb70..7402e1a 100644
--- a/llvm/test/Transforms/GVN/rle-phi-translate.ll
+++ b/llvm/test/Transforms/GVN/rle-phi-translate.ll
@@ -11,13 +11,13 @@
 	br i1 %t1, label %bb, label %bb1
 
 bb:
-	%t2 = load i32* %c, align 4
+	%t2 = load i32, i32* %c, align 4
 	%t3 = add i32 %t2, 1
 	store i32 %t3, i32* %g, align 4
 	br label %bb2
 
 bb1:		; preds = %entry
-	%t5 = load i32* %b, align 4
+	%t5 = load i32, i32* %b, align 4
 	%t6 = add i32 %t5, 1
 	store i32 %t6, i32* %g, align 4
 	br label %bb2
@@ -25,8 +25,8 @@
 bb2:		; preds = %bb1, %bb
 	%c_addr.0 = phi i32* [ %g, %bb1 ], [ %c, %bb ]
 	%b_addr.0 = phi i32* [ %b, %bb1 ], [ %g, %bb ]
-	%cv = load i32* %c_addr.0, align 4
-	%bv = load i32* %b_addr.0, align 4
+	%cv = load i32, i32* %c_addr.0, align 4
+	%bv = load i32, i32* %b_addr.0, align 4
 ; CHECK: %bv = phi i32
 ; CHECK: %cv = phi i32
 ; CHECK-NOT: load
@@ -53,7 +53,7 @@
 bb2:
   %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
   %d1 = bitcast i32* %d to i8*
-  %dv = load i8* %d1
+  %dv = load i8, i8* %d1
 ; CHECK: %dv = phi i8 [ 92, %bb1 ], [ 4, %bb ]
 ; CHECK-NOT: load
 ; CHECK: ret i8 %dv
@@ -79,7 +79,7 @@
   %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
   %i = phi i32 [ 7, %bb1 ], [ 17, %bb ]
   %d1 = getelementptr i32, i32* %d, i32 %i
-  %dv = load i32* %d1
+  %dv = load i32, i32* %d1
 ; CHECK: %dv = phi i32 [ 82, %bb1 ], [ 4, %bb ]
 ; CHECK-NOT: load
 ; CHECK: ret i32 %dv
@@ -105,7 +105,7 @@
   %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
   %i = phi i32 [ 7, %bb1 ], [ 0, %bb ]
   %d1 = getelementptr i32, i32* %d, i32 %i
-  %dv = load i32* %d1
+  %dv = load i32, i32* %d1
 ; CHECK: %dv = phi i32 [ 82, %bb1 ], [ 4, %bb ]
 ; CHECK-NOT: load
 ; CHECK: ret i32 %dv
@@ -130,8 +130,8 @@
   %arrayidx6 = getelementptr double, double* %G, i64 %indvar
   %tmp = add i64 %indvar, 1
   %arrayidx = getelementptr double, double* %G, i64 %tmp
-  %tmp3 = load double* %arrayidx
-  %tmp7 = load double* %arrayidx6
+  %tmp3 = load double, double* %arrayidx
+  %tmp7 = load double, double* %arrayidx6
   %add = fadd double %tmp3, %tmp7
   store double %add, double* %arrayidx
   %exitcond = icmp eq i64 %tmp, 999
diff --git a/llvm/test/Transforms/GVN/rle-semidominated.ll b/llvm/test/Transforms/GVN/rle-semidominated.ll
index 923cd03..f80d040 100644
--- a/llvm/test/Transforms/GVN/rle-semidominated.ll
+++ b/llvm/test/Transforms/GVN/rle-semidominated.ll
@@ -2,7 +2,7 @@
 
 define i32 @main(i32* %p, i32 %x, i32 %y) {
 block1:
-  %z = load i32* %p
+  %z = load i32, i32* %p
   %cmp = icmp eq i32 %x, %y
 	br i1 %cmp, label %block2, label %block3
 
@@ -15,6 +15,6 @@
   br label %block4
 
 block4:
-  %DEAD = load i32* %p
+  %DEAD = load i32, i32* %p
   ret i32 %DEAD
 }
diff --git a/llvm/test/Transforms/GVN/rle.ll b/llvm/test/Transforms/GVN/rle.ll
index a19575d..ee68c0c 100644
--- a/llvm/test/Transforms/GVN/rle.ll
+++ b/llvm/test/Transforms/GVN/rle.ll
@@ -5,7 +5,7 @@
 define i32 @test0(i32 %V, i32* %P) {
   store i32 %V, i32* %P
 
-  %A = load i32* %P
+  %A = load i32, i32* %P
   ret i32 %A
 ; CHECK-LABEL: @test0(
 ; CHECK: ret i32 %V
@@ -20,7 +20,7 @@
 define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
   store {i32, i32} %A, {i32, i32}* %P
   %X = bitcast {i32, i32}* %P to i8*
-  %Y = load i8* %X
+  %Y = load i8, i8* %X
   ret i8 %Y
 }
 
@@ -28,7 +28,7 @@
 declare void @helper()
 define void @crash1() {
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 undef, i32 1, i1 false) nounwind
-  %tmp = load i8* bitcast (void ()* @helper to i8*)
+  %tmp = load i8, i8* bitcast (void ()* @helper to i8*)
   %x = icmp eq i8 %tmp, 15
   ret void
 }
@@ -45,7 +45,7 @@
    
   %P2 = bitcast i32* %P to float*
 
-  %A = load float* %P2
+  %A = load float, float* %P2
   ret float %A
 ; CHECK-LABEL: @coerce_mustalias1(
 ; CHECK-NOT: load
@@ -58,7 +58,7 @@
    
   %P2 = bitcast i32** %P to float*
 
-  %A = load float* %P2
+  %A = load float, float* %P2
   ret float %A
 ; CHECK-LABEL: @coerce_mustalias2(
 ; CHECK-NOT: load
@@ -71,7 +71,7 @@
    
   %P2 = bitcast float* %P to i32**
 
-  %A = load i32** %P2
+  %A = load i32*, i32** %P2
   ret i32* %A
 ; CHECK-LABEL: @coerce_mustalias3(
 ; CHECK-NOT: load
@@ -80,10 +80,10 @@
 
 ;; i32 -> f32 load forwarding.
 define float @coerce_mustalias4(i32* %P, i1 %cond) {
-  %A = load i32* %P
+  %A = load i32, i32* %P
   
   %P2 = bitcast i32* %P to float*
-  %B = load float* %P2
+  %B = load float, float* %P2
   br i1 %cond, label %T, label %F
 T:
   ret float %B
@@ -93,7 +93,7 @@
   ret float %X
 
 ; CHECK-LABEL: @coerce_mustalias4(
-; CHECK: %A = load i32* %P
+; CHECK: %A = load i32, i32* %P
 ; CHECK-NOT: load
 ; CHECK: ret float
 ; CHECK: F:
@@ -105,7 +105,7 @@
    
   %P2 = bitcast i32* %P to i8*
 
-  %A = load i8* %P2
+  %A = load i8, i8* %P2
   ret i8 %A
 ; CHECK-LABEL: @coerce_mustalias5(
 ; CHECK-NOT: load
@@ -118,7 +118,7 @@
    
   %P2 = bitcast i64* %P to float*
 
-  %A = load float* %P2
+  %A = load float, float* %P2
   ret float %A
 ; CHECK-LABEL: @coerce_mustalias6(
 ; CHECK-NOT: load
@@ -131,7 +131,7 @@
    
   %P2 = bitcast i64* %P to i8**
 
-  %A = load i8** %P2
+  %A = load i8*, i8** %P2
   ret i8* %A
 ; CHECK-LABEL: @coerce_mustalias7(
 ; CHECK-NOT: load
@@ -144,7 +144,7 @@
   %conv = bitcast i16* %A to i8* 
   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i16, i16* %A, i64 42
-  %tmp2 = load i16* %arrayidx
+  %tmp2 = load i16, i16* %arrayidx
   ret i16 %tmp2
 ; CHECK-LABEL: @memset_to_i16_local(
 ; CHECK-NOT: load
@@ -157,7 +157,7 @@
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 42 ; <float*> [#uses=1]
-  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
 ; CHECK-LABEL: @memset_to_float_local(
 ; CHECK-NOT: load
@@ -184,7 +184,7 @@
 
 Cont:
   %P2 = getelementptr i16, i16* %P, i32 4
-  %A = load i16* %P2
+  %A = load i16, i16* %P2
   ret i16 %A
 
 ; CHECK-LABEL: @memset_to_i16_nonlocal0(
@@ -203,7 +203,7 @@
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
-  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
 ; CHECK-LABEL: @memcpy_to_float_local(
 ; CHECK-NOT: load
@@ -216,7 +216,7 @@
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
   tail call void @llvm.memcpy.p0i8.p1i8.i64(i8* %conv, i8 addrspace(1)* bitcast ({i32, float, i32 } addrspace(1)* @GCst_as1 to i8 addrspace(1)*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float, float* %A, i64 1 ; <float*> [#uses=1]
-  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  %tmp2 = load float, float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
 ; CHECK-LABEL: @memcpy_to_float_local_as1(
 ; CHECK-NOT: load
@@ -237,7 +237,7 @@
   br label %Cont
 
 Cont:
-  %A = load i8* %P3
+  %A = load i8, i8* %P3
   ret i8 %A
 
 ; CHECK-LABEL: @coerce_mustalias_nonlocal0(
@@ -263,7 +263,7 @@
 
 Cont:
   %P3 = bitcast i32* %P to i8*
-  %A = load i8* %P3
+  %A = load i8, i8* %P3
   ret i8 %A
 
 ; CHECK-LABEL: @coerce_mustalias_nonlocal1(
@@ -286,12 +286,12 @@
   br label %Cont
 
 Cont:
-  %A = load i8* %P3
+  %A = load i8, i8* %P3
   ret i8 %A
 
 ; CHECK-LABEL: @coerce_mustalias_pre0(
 ; CHECK: F:
-; CHECK:   load i8* %P3
+; CHECK:   load i8, i8* %P3
 ; CHECK: Cont:
 ; CHECK:   %A = phi i8 [
 ; CHECK-NOT: load
@@ -311,7 +311,7 @@
   %P2 = bitcast i32* %P to i8*
   %P3 = getelementptr i8, i8* %P2, i32 2
 
-  %A = load i8* %P3
+  %A = load i8, i8* %P3
   ret i8 %A
 ; CHECK-LABEL: @coerce_offset0(
 ; CHECK-NOT: load
@@ -324,7 +324,7 @@
   %P2 = addrspacecast i32* %P to i8 addrspace(1)*
   %P3 = getelementptr i8, i8 addrspace(1)* %P2, i32 2
 
-  %A = load i8 addrspace(1)* %P3
+  %A = load i8, i8 addrspace(1)* %P3
   ret i8 %A
 ; CHECK-LABEL: @coerce_offset0_addrspacecast(
 ; CHECK-NOT: load
@@ -346,7 +346,7 @@
   br label %Cont
 
 Cont:
-  %A = load i8* %P4
+  %A = load i8, i8* %P4
   ret i8 %A
 
 ; CHECK-LABEL: @coerce_offset_nonlocal0(
@@ -370,12 +370,12 @@
   br label %Cont
 
 Cont:
-  %A = load i8* %P4
+  %A = load i8, i8* %P4
   ret i8 %A
 
 ; CHECK-LABEL: @coerce_offset_pre0(
 ; CHECK: F:
-; CHECK:   load i8* %P4
+; CHECK:   load i8, i8* %P4
 ; CHECK: Cont:
 ; CHECK:   %A = phi i8 [
 ; CHECK-NOT: load
@@ -386,28 +386,28 @@
 block1:
   %A = alloca i32*
 
-  %z = load i32** %p
+  %z = load i32*, i32** %p
   store i32* %z, i32** %A
   %cmp = icmp eq i32 %x, %y
   br i1 %cmp, label %block2, label %block3
 
 block2:
- %a = load i32** %p
+ %a = load i32*, i32** %p
  br label %block4
 
 block3:
-  %b = load i32** %p
+  %b = load i32*, i32** %p
   br label %block4
 
 block4:
-  %c = load i32** %p
-  %d = load i32* %c
+  %c = load i32*, i32** %p
+  %d = load i32, i32* %c
   ret i32 %d
   
 ; CHECK-LABEL: @chained_load(
-; CHECK: %z = load i32** %p
+; CHECK: %z = load i32*, i32** %p
 ; CHECK-NOT: load
-; CHECK: %d = load i32* %z
+; CHECK: %d = load i32, i32* %z
 ; CHECK-NEXT: ret i32 %d
 }
 
@@ -428,7 +428,7 @@
   
 T1:
   %P2 = getelementptr i32, i32* %P, i32 %A
-  %x = load i32* %P2
+  %x = load i32, i32* %P2
   %cond = call i1 @cond2()
   br i1 %cond, label %TX, label %F
   
@@ -485,7 +485,7 @@
   br i1 %cmpxy, label %block7, label %exit
   
 block7:
-  %D = load i32* %C
+  %D = load i32, i32* %C
   ret i32 %D
   
 ; CHECK: block7:
@@ -502,13 +502,13 @@
   store i8 192, i8* %X3
   
   %X = getelementptr i8, i8* %p, i32 4
-  %Y = load i8* %X
+  %Y = load i8, i8* %X
   br label %loop
 
 loop:
   %i = phi i32 [4, %entry], [192, %loop]
   %X2 = getelementptr i8, i8* %p, i32 %i
-  %Y2 = load i8* %X2
+  %Y2 = load i8, i8* %X2
   
 ; CHECK: loop:
 ; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
@@ -533,13 +533,13 @@
   store i8 19, i8* %X4
   
   %X = getelementptr i8, i8* %p, i32 4
-  %Y = load i8* %X
+  %Y = load i8, i8* %X
   br label %loop
 
 loop:
   %i = phi i32 [4, %entry], [3, %cont]
   %X2 = getelementptr i8, i8* %p, i32 %i
-  %Y2 = load i8* %X2  ; Ensure this load is not being incorrectly replaced.
+  %Y2 = load i8, i8* %X2  ; Ensure this load is not being incorrectly replaced.
   %cond = call i1 @cond2()
   br i1 %cond, label %cont, label %out
 
@@ -551,7 +551,7 @@
 
 ; CHECK: store i32
 ; CHECK-NEXT: getelementptr i8, i8* %p, i32 3
-; CHECK-NEXT: load i8*
+; CHECK-NEXT: load i8, i8*
   br label %loop
   
 out:
@@ -567,7 +567,7 @@
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %x, i32 0, i32 0 ; <i32*>
-  %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
+  %tmp1 = load i32, i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
 ; CHECK-LABEL: @memset_to_load(
 ; CHECK: ret i32 0
@@ -581,15 +581,15 @@
 define i32 @load_load_partial_alias(i8* %P) nounwind ssp {
 entry:
   %0 = bitcast i8* %P to i32*
-  %tmp2 = load i32* %0
+  %tmp2 = load i32, i32* %0
   %add.ptr = getelementptr inbounds i8, i8* %P, i64 1
-  %tmp5 = load i8* %add.ptr
+  %tmp5 = load i8, i8* %add.ptr
   %conv = zext i8 %tmp5 to i32
   %add = add nsw i32 %tmp2, %conv
   ret i32 %add
 
 ; TEMPORARILYDISABLED-LABEL: @load_load_partial_alias(
-; TEMPORARILYDISABLED: load i32*
+; TEMPORARILYDISABLED: load i32, i32*
 ; TEMPORARILYDISABLED-NOT: load
 ; TEMPORARILYDISABLED: lshr i32 {{.*}}, 8
 ; TEMPORARILYDISABLED-NOT: load
@@ -603,13 +603,13 @@
 define i32 @load_load_partial_alias_cross_block(i8* %P) nounwind ssp {
 entry:
   %xx = bitcast i8* %P to i32*
-  %x1 = load i32* %xx, align 4
+  %x1 = load i32, i32* %xx, align 4
   %cmp = icmp eq i32 %x1, 127
   br i1 %cmp, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %entry
   %arrayidx4 = getelementptr inbounds i8, i8* %P, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
+  %tmp5 = load i8, i8* %arrayidx4, align 1
   %conv6 = zext i8 %tmp5 to i32
   ret i32 %conv6
 
@@ -632,39 +632,39 @@
 
 define i32 @test_widening1(i8* %P) nounwind ssp noredzone {
 entry:
-  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
+  %tmp = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
-  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
+  %tmp1 = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
   ret i32 %add
 ; CHECK-LABEL: @test_widening1(
 ; CHECK-NOT: load
-; CHECK: load i16*
+; CHECK: load i16, i16*
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
 
 define i32 @test_widening2() nounwind ssp noredzone {
 entry:
-  %tmp = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
+  %tmp = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 1), align 4
   %conv = zext i8 %tmp to i32
-  %tmp1 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
+  %tmp1 = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 2), align 1
   %conv2 = zext i8 %tmp1 to i32
   %add = add nsw i32 %conv, %conv2
 
-  %tmp2 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
+  %tmp2 = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 3), align 2
   %conv3 = zext i8 %tmp2 to i32
   %add2 = add nsw i32 %add, %conv3
 
-  %tmp3 = load i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
+  %tmp3 = load i8, i8* getelementptr inbounds (%widening1* @f, i64 0, i32 4), align 1
   %conv4 = zext i8 %tmp3 to i32
   %add3 = add nsw i32 %add2, %conv3
 
   ret i32 %add3
 ; CHECK-LABEL: @test_widening2(
 ; CHECK-NOT: load
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NOT: load
 ; CHECK: ret i32
 }
@@ -693,7 +693,7 @@
   %x = alloca i8**, align 8
   store i8** getelementptr inbounds ([5 x i8*]* @_ZTV1X, i64 0, i64 2), i8*** %x, align 8
   call void @use() nounwind
-  %DEAD = load i8*** %x, align 8
+  %DEAD = load i8**, i8*** %x, align 8
   call void @use3(i8*** %x, i8** %DEAD) nounwind
   ret void
 ; CHECK: test_escape1
diff --git a/llvm/test/Transforms/GVN/tbaa.ll b/llvm/test/Transforms/GVN/tbaa.ll
index 71fbed4..b5a717b 100644
--- a/llvm/test/Transforms/GVN/tbaa.ll
+++ b/llvm/test/Transforms/GVN/tbaa.ll
@@ -81,9 +81,9 @@
 ; Since we know the location is invariant, we can forward the
 ; load across the potentially aliasing store.
 
-  %a = load i32* %q, !tbaa !10
+  %a = load i32, i32* %q, !tbaa !10
   store i32 15, i32* %p
-  %b = load i32* %q, !tbaa !10
+  %b = load i32, i32* %q, !tbaa !10
   %c = sub i32 %a, %b
   ret i32 %c
 }
@@ -94,9 +94,9 @@
 ; Since we know the location is invariant, we can forward the
 ; load across the potentially aliasing store (within the call).
 
-  %a = load i32* %q, !tbaa !10
+  %a = load i32, i32* %q, !tbaa !10
   call void @clobber()
-  %b = load i32* %q, !tbaa !10
+  %b = load i32, i32* %q, !tbaa !10
   %c = sub i32 %a, %b
   ret i32 %c
 }
diff --git a/llvm/test/Transforms/GVN/volatile.ll b/llvm/test/Transforms/GVN/volatile.ll
index 5ba03d9..b31058d 100644
--- a/llvm/test/Transforms/GVN/volatile.ll
+++ b/llvm/test/Transforms/GVN/volatile.ll
@@ -6,12 +6,12 @@
 ; for dependencies of a non-volatile load
 define i32 @test1(i32* nocapture %p, i32* nocapture %q) {
 ; CHECK-LABEL: test1
-; CHECK:      %0 = load volatile i32* %q
+; CHECK:      %0 = load volatile i32, i32* %q
 ; CHECK-NEXT: ret i32 0
 entry:
-  %x = load i32* %p
-  load volatile i32* %q
-  %y = load i32* %p
+  %x = load i32, i32* %p
+  load volatile i32, i32* %q
+  %y = load i32, i32* %p
   %add = sub i32 %y, %x
   ret i32 %add
 }
@@ -20,12 +20,12 @@
 ; volatile, this would be (in effect) removing the volatile load
 define i32 @test2(i32* nocapture %p, i32* nocapture %q) {
 ; CHECK-LABEL: test2
-; CHECK:      %x = load i32* %p
-; CHECK-NEXT: %y = load volatile i32* %p
+; CHECK:      %x = load i32, i32* %p
+; CHECK-NEXT: %y = load volatile i32, i32* %p
 ; CHECK-NEXT: %add = sub i32 %y, %x
 entry:
-  %x = load i32* %p
-  %y = load volatile i32* %p
+  %x = load i32, i32* %p
+  %y = load volatile i32, i32* %p
   %add = sub i32 %y, %x
   ret i32 %add
 }
@@ -34,13 +34,13 @@
 ; reorder it even if p and q are noalias
 define i32 @test3(i32* noalias nocapture %p, i32* noalias nocapture %q) {
 ; CHECK-LABEL: test3
-; CHECK:      %x = load i32* %p
-; CHECK-NEXT: %0 = load volatile i32* %q
-; CHECK-NEXT: %y = load volatile i32* %p
+; CHECK:      %x = load i32, i32* %p
+; CHECK-NEXT: %0 = load volatile i32, i32* %q
+; CHECK-NEXT: %y = load volatile i32, i32* %p
 entry:
-  %x = load i32* %p
-  load volatile i32* %q
-  %y = load volatile i32* %p
+  %x = load i32, i32* %p
+  load volatile i32, i32* %q
+  %y = load volatile i32, i32* %p
   %add = sub i32 %y, %x
   ret i32 %add
 }
@@ -50,13 +50,13 @@
 ; case, the ordering prevents forwarding.
 define i32 @test4(i32* noalias nocapture %p, i32* noalias nocapture %q) {
 ; CHECK-LABEL: test4
-; CHECK:      %x = load i32* %p
-; CHECK-NEXT: %0 = load atomic volatile i32* %q seq_cst 
-; CHECK-NEXT: %y = load atomic i32* %p seq_cst
+; CHECK:      %x = load i32, i32* %p
+; CHECK-NEXT: %0 = load atomic volatile i32, i32* %q seq_cst 
+; CHECK-NEXT: %y = load atomic i32, i32* %p seq_cst
 entry:
-  %x = load i32* %p
-  load atomic volatile i32* %q seq_cst, align 4
-  %y = load atomic i32* %p seq_cst, align 4
+  %x = load i32, i32* %p
+  load atomic volatile i32, i32* %q seq_cst, align 4
+  %y = load atomic i32, i32* %p seq_cst, align 4
   %add = sub i32 %y, %x
   ret i32 %add
 }
@@ -64,11 +64,11 @@
 ; Value forwarding from a volatile load is perfectly legal
 define i32 @test5(i32* nocapture %p, i32* nocapture %q) {
 ; CHECK-LABEL: test5
-; CHECK:      %x = load volatile i32* %p
+; CHECK:      %x = load volatile i32, i32* %p
 ; CHECK-NEXT: ret i32 0
 entry:
-  %x = load volatile i32* %p
-  %y = load i32* %p
+  %x = load volatile i32, i32* %p
+  %y = load i32, i32* %p
   %add = sub i32 %y, %x
   ret i32 %add
 }
@@ -76,17 +76,17 @@
 ; Does cross block redundancy elimination work with volatiles?
 define i32 @test6(i32* noalias nocapture %p, i32* noalias nocapture %q) {
 ; CHECK-LABEL: test6
-; CHECK:      %y1 = load i32* %p
+; CHECK:      %y1 = load i32, i32* %p
 ; CHECK-LABEL: header
-; CHECK:      %x = load volatile i32* %q
+; CHECK:      %x = load volatile i32, i32* %q
 ; CHECK-NEXT: %add = sub i32 %y1, %x
 entry:
-  %y1 = load i32* %p
+  %y1 = load i32, i32* %p
   call void @use(i32 %y1)
   br label %header
 header:
-  %x = load volatile i32* %q
-  %y = load i32* %p
+  %x = load volatile i32, i32* %q
+  %y = load i32, i32* %p
   %add = sub i32 %y, %x
   %cnd = icmp eq i32 %add, 0
   br i1 %cnd, label %exit, label %header
@@ -98,22 +98,22 @@
 define i32 @test7(i1 %c, i32* noalias nocapture %p, i32* noalias nocapture %q) {
 ; CHECK-LABEL: test7
 ; CHECK-LABEL: entry.header_crit_edge:
-; CHECK:       %y.pre = load i32* %p
+; CHECK:       %y.pre = load i32, i32* %p
 ; CHECK-LABEL: skip:
-; CHECK:       %y1 = load i32* %p
+; CHECK:       %y1 = load i32, i32* %p
 ; CHECK-LABEL: header:
 ; CHECK:      %y = phi i32
-; CHECK-NEXT: %x = load volatile i32* %q
+; CHECK-NEXT: %x = load volatile i32, i32* %q
 ; CHECK-NEXT: %add = sub i32 %y, %x
 entry:
   br i1 %c, label %header, label %skip
 skip:
-  %y1 = load i32* %p
+  %y1 = load i32, i32* %p
   call void @use(i32 %y1)
   br label %header
 header:
-  %x = load volatile i32* %q
-  %y = load i32* %p
+  %x = load volatile i32, i32* %q
+  %y = load i32, i32* %p
   %add = sub i32 %y, %x
   %cnd = icmp eq i32 %add, 0
   br i1 %cnd, label %exit, label %header
@@ -126,20 +126,20 @@
 define i32 @test8(i1 %b, i1 %c, i32* noalias %p, i32* noalias %q) {
 ; CHECK-LABEL: test8
 ; CHECK-LABEL: entry
-; CHECK:       %y1 = load i32* %p
+; CHECK:       %y1 = load i32, i32* %p
 ; CHECK-LABEL: header:
 ; CHECK:      %y = phi i32
-; CHECK-NEXT: %x = load volatile i32* %q
+; CHECK-NEXT: %x = load volatile i32, i32* %q
 ; CHECK-NOT:  load
 ; CHECK-LABEL: skip.header_crit_edge:
-; CHECK:       %y.pre = load i32* %p
+; CHECK:       %y.pre = load i32, i32* %p
 entry:
-  %y1 = load i32* %p
+  %y1 = load i32, i32* %p
   call void @use(i32 %y1)
   br label %header
 header:
-  %x = load volatile i32* %q
-  %y = load i32* %p
+  %x = load volatile i32, i32* %q
+  %y = load i32, i32* %p
   call void @use(i32 %y)
   br i1 %b, label %skip, label %header
 skip:
diff --git a/llvm/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll b/llvm/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
index 766c227..49a87d9 100644
--- a/llvm/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
+++ b/llvm/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
@@ -11,7 +11,7 @@
 
  ;; dies when %b dies.
 define internal i32 @foo() {
-        %ret = load i32* @foo.upgrd.1           ; <i32> [#uses=1]
+        %ret = load i32, i32* @foo.upgrd.1           ; <i32> [#uses=1]
         ret i32 %ret
 }
 
diff --git a/llvm/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll b/llvm/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
index 6221fa3..afa2629 100644
--- a/llvm/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
+++ b/llvm/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
@@ -5,7 +5,7 @@
 @bar = internal global [2 x { i32*, i32 }] [ { i32*, i32 } { i32* @foo.upgrd.1, i32 7 }, { i32*, i32 } { i32* @foo.upgrd.1, i32 1 } ]            ; <[2 x { i32*, i32 }]*> [#uses=0]
 
 define internal i32 @foo() {
-        %ret = load i32* @foo.upgrd.1           ; <i32> [#uses=1]
+        %ret = load i32, i32* @foo.upgrd.1           ; <i32> [#uses=1]
         ret i32 %ret
 }
 
diff --git a/llvm/test/Transforms/GlobalDCE/complex-constantexpr.ll b/llvm/test/Transforms/GlobalDCE/complex-constantexpr.ll
index 4bf1aee..b67d0b6 100644
--- a/llvm/test/Transforms/GlobalDCE/complex-constantexpr.ll
+++ b/llvm/test/Transforms/GlobalDCE/complex-constantexpr.ll
@@ -19,7 +19,7 @@
   br label %bb2
 
 bb1:                                              ; preds = %bb11
-  %tmp = load i32* @global5, align 4
+  %tmp = load i32, i32* @global5, align 4
   br label %bb2
 
 bb2:                                              ; preds = %bb1, %bb
@@ -34,12 +34,12 @@
   br label %bb8
 
 bb8:                                              ; preds = %bb6, %bb2
-  %tmp9 = load i32* @global7, align 4
+  %tmp9 = load i32, i32* @global7, align 4
   %tmp10 = icmp eq i32 %tmp9, 0
   br i1 %tmp10, label %bb11, label %bb15
 
 bb11:                                             ; preds = %bb8
-  %tmp12 = load i32* @global6, align 4
+  %tmp12 = load i32, i32* @global6, align 4
   %tmp13 = add nsw i32 %tmp12, 1
   store i32 %tmp13, i32* @global6, align 4
   %tmp14 = icmp slt i32 %tmp13, 42
@@ -71,7 +71,7 @@
 bb:
   store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
   store i32 0, i32* @global9, align 4
-  %tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
+  %tmp = load i32, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
   br label %bb1
 
 bb1:                                              ; preds = %bb1, %bb
diff --git a/llvm/test/Transforms/GlobalDCE/global_ctors_integration.ll b/llvm/test/Transforms/GlobalDCE/global_ctors_integration.ll
index 5e6cc79..f7f702a 100644
--- a/llvm/test/Transforms/GlobalDCE/global_ctors_integration.ll
+++ b/llvm/test/Transforms/GlobalDCE/global_ctors_integration.ll
@@ -12,7 +12,7 @@
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
 
 define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
-  %1 = load i32* @_ZN3Bar18LINKER_INITIALIZEDE, align 4
+  %1 = load i32, i32* @_ZN3Bar18LINKER_INITIALIZEDE, align 4
   call void @_ZN3FooC1E17LinkerInitialized(%class.Foo* @foo, i32 %1)
   ret void
 }
@@ -23,8 +23,8 @@
   %3 = alloca i32, align 4
   store %class.Foo* %this, %class.Foo** %2, align 8
   store i32 %0, i32* %3, align 4
-  %4 = load %class.Foo** %2
-  %5 = load i32* %3, align 4
+  %4 = load %class.Foo*, %class.Foo** %2
+  %5 = load i32, i32* %3, align 4
   call void @_ZN3FooC2E17LinkerInitialized(%class.Foo* %4, i32 %5)
   ret void
 }
@@ -35,7 +35,7 @@
   %3 = alloca i32, align 4
   store %class.Foo* %this, %class.Foo** %2, align 8
   store i32 %0, i32* %3, align 4
-  %4 = load %class.Foo** %2
+  %4 = load %class.Foo*, %class.Foo** %2
   ret void
 }
 
diff --git a/llvm/test/Transforms/GlobalDCE/indirectbr.ll b/llvm/test/Transforms/GlobalDCE/indirectbr.ll
index 048dd7b..5671aea 100644
--- a/llvm/test/Transforms/GlobalDCE/indirectbr.ll
+++ b/llvm/test/Transforms/GlobalDCE/indirectbr.ll
@@ -10,7 +10,7 @@
 
 L1:
   %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @L, i32 0, i32 %idx
-  %l = load i8** %arrayidx
+  %l = load i8*, i8** %arrayidx
   indirectbr i8* %l, [label %L1, label %L2]
 
 L2:
diff --git a/llvm/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll b/llvm/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
index bdcf1fa..061b9b0 100644
--- a/llvm/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
+++ b/llvm/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
@@ -4,8 +4,8 @@
 @G = internal global i32* null          ; <i32**> [#uses=2]
 
 define i32 @user() {
-        %P = load i32** @G              ; <i32*> [#uses=1]
-        %Q = load i32* %P               ; <i32> [#uses=1]
+        %P = load i32*, i32** @G              ; <i32*> [#uses=1]
+        %Q = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %Q
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll b/llvm/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
index 7bcb1d43..f4f361b 100644
--- a/llvm/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
@@ -4,7 +4,7 @@
 @g_40507551 = internal global i16 31038         ; <i16*> [#uses=1]
 
 define void @main() {
-        %tmp.4.i.1 = load i8* getelementptr (i8* bitcast (i16* @g_40507551 to i8*), i32 1)              ; <i8> [#uses=0]
+        %tmp.4.i.1 = load i8, i8* getelementptr (i8* bitcast (i16* @g_40507551 to i8*), i32 1)              ; <i8> [#uses=0]
         ret void
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll b/llvm/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
index c971219..419ae10 100644
--- a/llvm/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
@@ -106,12 +106,12 @@
 	%__priority_addr = alloca i32		; <i32*> [#uses=2]
 	store i32 %__initialize_p, i32* %__initialize_p_addr
 	store i32 %__priority, i32* %__priority_addr
-	%tmp = load i32* %__priority_addr		; <i32> [#uses=1]
+	%tmp = load i32, i32* %__priority_addr		; <i32> [#uses=1]
 	%tmp.upgrd.1 = icmp eq i32 %tmp, 65535		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.1, label %cond_true, label %cond_next14
 
 cond_true:		; preds = %entry
-	%tmp8 = load i32* %__initialize_p_addr		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %__initialize_p_addr		; <i32> [#uses=1]
 	%tmp9 = icmp eq i32 %tmp8, 1		; <i1> [#uses=1]
 	br i1 %tmp9, label %cond_true10, label %cond_next14
 
diff --git a/llvm/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll b/llvm/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
index 352639a..7b62cf0 100644
--- a/llvm/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
@@ -10,7 +10,7 @@
 
 cond_true16.i:          ; preds = %cond_true16.i, %entry
         %low.0.in.i.0 = phi i32* [ @nrow, %entry ], [ null, %cond_true16.i ]            ; <i32*> [#uses=1]
-        %low.0.i = load i32* %low.0.in.i.0              ; <i32> [#uses=0]
+        %low.0.i = load i32, i32* %low.0.in.i.0              ; <i32> [#uses=0]
         br label %cond_true16.i
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/2007-04-05-Crash.ll b/llvm/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
index c7aca62..f312fbb 100644
--- a/llvm/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
@@ -9,7 +9,7 @@
 define zeroext i16 @__NSCharToUnicharCFWrapper(i8 zeroext  %ch)   {
 entry:
 	%iftmp.0.0.in.in = select i1 false, i16* @replacementUnichar, i16* null		; <i16*> [#uses=1]
-	%iftmp.0.0.in = load i16* %iftmp.0.0.in.in		; <i16> [#uses=1]
+	%iftmp.0.0.in = load i16, i16* %iftmp.0.0.in.in		; <i16> [#uses=1]
 	ret i16 %iftmp.0.0.in
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/2007-05-13-Crash.ll b/llvm/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
index 2b7e737..5586355 100644
--- a/llvm/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
@@ -19,13 +19,13 @@
 
 define %struct.__CFString* @_Z19SFLGetVisibilityKeyv() {
 entry:
-        %tmp1 = load %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey              ; <%struct.__CFString*> [#uses=1]
+        %tmp1 = load %struct.__CFString*, %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey              ; <%struct.__CFString*> [#uses=1]
         ret %struct.__CFString* %tmp1
 }
 
 define %struct.__CFString* @_Z22SFLGetAlwaysVisibleKeyv() {
 entry:
-        %tmp1 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        %tmp1 = load %struct.__CFString*, %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
         %tmp2 = icmp eq %struct.__CFString* %tmp1, null         ; <i1> [#uses=1]
         br i1 %tmp2, label %cond_true, label %cond_next
 
@@ -34,25 +34,25 @@
         br label %cond_next
 
 cond_next:              ; preds = %entry, %cond_true
-        %tmp4 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        %tmp4 = load %struct.__CFString*, %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
         ret %struct.__CFString* %tmp4
 }
 
 define %struct.__CFString* @_Z21SFLGetNeverVisibleKeyv() {
 entry:
-        %tmp1 = load %struct.__CFString** @_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey          ; <%struct.__CFString*> [#uses=1]
+        %tmp1 = load %struct.__CFString*, %struct.__CFString** @_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey          ; <%struct.__CFString*> [#uses=1]
         ret %struct.__CFString* %tmp1
 }
 
 define %struct.__CFDictionary* @_ZN18SFLMutableListItem18GetPrefsDictionaryEv(%struct.SFLMutableListItem* %this) {
 entry:
         %tmp4 = getelementptr %struct.SFLMutableListItem, %struct.SFLMutableListItem* %this, i32 0, i32 0  ; <i16*> [#uses=1]
-        %tmp5 = load i16* %tmp4         ; <i16> [#uses=1]
+        %tmp5 = load i16, i16* %tmp4         ; <i16> [#uses=1]
         %tmp6 = icmp eq i16 %tmp5, 0            ; <i1> [#uses=1]
         br i1 %tmp6, label %cond_next22, label %cond_true
 
 cond_true:              ; preds = %entry
-        %tmp9 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        %tmp9 = load %struct.__CFString*, %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
         %tmp10 = icmp eq %struct.__CFString* %tmp9, null                ; <i1> [#uses=1]
         br i1 %tmp10, label %cond_true13, label %cond_next22
 
@@ -62,8 +62,8 @@
 
 cond_next22:            ; preds = %entry, %cond_true13, %cond_true
         %iftmp.1.0.in = phi %struct.__CFString** [ @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey, %cond_true ], [ @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey, %cond_true13 ], [ @_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey, %entry ]             ; <%struct.__CFString**> [#uses=1]
-        %iftmp.1.0 = load %struct.__CFString** %iftmp.1.0.in            ; <%struct.__CFString*> [#uses=1]
-        %tmp24 = load %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey             ; <%struct.__CFString*> [#uses=1]
+        %iftmp.1.0 = load %struct.__CFString*, %struct.__CFString** %iftmp.1.0.in            ; <%struct.__CFString*> [#uses=1]
+        %tmp24 = load %struct.__CFString*, %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey             ; <%struct.__CFString*> [#uses=1]
         %tmp2728 = bitcast %struct.__CFString* %tmp24 to i8*            ; <i8*> [#uses=1]
         %tmp2930 = bitcast %struct.__CFString* %iftmp.1.0 to i8*               ; <i8*> [#uses=1]
         call void @_Z20CFDictionaryAddValuePKvS0_( i8* %tmp2728, i8* %tmp2930 )
diff --git a/llvm/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll b/llvm/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
index ede505b..45ab055 100644
--- a/llvm/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
@@ -10,7 +10,7 @@
 define i8 @func() {
 entry:
         %tmp10 = getelementptr [2 x i32], [2 x i32]* getelementptr ([6 x [2 x i32]]* @aaui1, i32 0, i32 0), i32 5, i32 1           ; <i32*> [#uses=1]
-        %tmp11 = load i32* %tmp10, align 4              ; <i32> [#uses=1]
+        %tmp11 = load i32, i32* %tmp10, align 4              ; <i32> [#uses=1]
         %tmp12 = call i32 (...)* @func3( i32* null, i32 0, i32 %tmp11 )         ; <i32> [#uses=0]
         ret i8 undef
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2008-01-03-Crash.ll b/llvm/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
index 4105ab1..dc41fdb 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
@@ -20,7 +20,7 @@
 	ret i32 0
 
 cond_next21.i.i23.i:		; preds = %cond_next20.i
-	%tmp6.i4.i = load i32* bitcast (void (i32)** @indirect1 to i32*), align 4		; <i32> [#uses=0]
+	%tmp6.i4.i = load i32, i32* bitcast (void (i32)** @indirect1 to i32*), align 4		; <i32> [#uses=0]
 	ret i32 0
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/llvm/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
index ec246ac..8df7050 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
@@ -11,6 +11,6 @@
 
 define double @get(i32 %X) {
 	%P = getelementptr [16 x [31 x double]], [16 x [31 x double]]* @mm, i32 0, i32 0, i32 %X
-	%V = load double* %P
+	%V = load double, double* %P
 	ret double %V
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/llvm/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index 08b2cb1..ac66360 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -3,7 +3,7 @@
 
 define double @foo() nounwind  {
 entry:
-	%tmp1 = load volatile double* @t0.1441, align 8		; <double> [#uses=2]
+	%tmp1 = load volatile double, double* @t0.1441, align 8		; <double> [#uses=2]
 	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
 	ret double %tmp4
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index d58becd..2d40877 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -23,9 +23,9 @@
 }
 
 define double @test2() {
-  %V1 = load double* getelementptr (%T* @G, i32 0, i32 0), align 16
-  %V2 = load double* getelementptr (%T* @G, i32 0, i32 1), align 8
-  %V3 = load double* getelementptr (%T* @G, i32 0, i32 2), align 16
+  %V1 = load double, double* getelementptr (%T* @G, i32 0, i32 0), align 16
+  %V2 = load double, double* getelementptr (%T* @G, i32 0, i32 1), align 8
+  %V3 = load double, double* getelementptr (%T* @G, i32 0, i32 2), align 16
   %R = fadd double %V1, %V2
   %R2 = fadd double %R, %V3
   ret double %R2
diff --git a/llvm/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/llvm/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index 0867ca9..da6e660 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -10,8 +10,8 @@
 @array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer 
 
 define i32 @foo() {
-  %A = load i32 addrspace(1) * getelementptr ({ i32, i32 } addrspace(1) * @struct, i32 0, i32 0)
-  %B = load i32 addrspace(1) * getelementptr ([ 2 x i32 ] addrspace(1) * @array, i32 0, i32 0)
+  %A = load i32, i32 addrspace(1) * getelementptr ({ i32, i32 } addrspace(1) * @struct, i32 0, i32 0)
+  %B = load i32, i32 addrspace(1) * getelementptr ([ 2 x i32 ] addrspace(1) * @array, i32 0, i32 0)
   ; Use the loaded values, so they won't get removed completely
   %R = add i32 %A, %B
   ret i32 %R
diff --git a/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll b/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
index 6a8e221..628c108 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
@@ -18,7 +18,7 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=2]
+	%tmpLD1 = load %struct.foo*, %struct.foo** @X, align 4		; <%struct.foo*> [#uses=2]
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
diff --git a/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll b/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
index b6e9e97..8bcc2d3 100644
--- a/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
+++ b/llvm/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
@@ -17,7 +17,7 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=3]
+	%tmpLD1 = load %struct.foo*, %struct.foo** @X, align 4		; <%struct.foo*> [#uses=3]
 	store %struct.foo* %tmpLD1, %struct.foo** null
 	br label %bb1
 
diff --git a/llvm/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/llvm/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
index c127b85..21ec5267 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
@@ -13,14 +13,14 @@
 
 bb:		; preds = %bb1
 	%0 = getelementptr %struct.node, %struct.node* %t.0, i64 0, i32 1		; <i32*> [#uses=1]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	%2 = getelementptr %struct.node, %struct.node* %t.0, i64 0, i32 0		; <%struct.node**> [#uses=1]
 	br label %bb1
 
 bb1:		; preds = %bb, %entry
 	%value.0 = phi i32 [ undef, %entry ], [ %1, %bb ]		; <i32> [#uses=1]
 	%t.0.in = phi %struct.node** [ @head, %entry ], [ %2, %bb ]		; <%struct.node**> [#uses=1]
-	%t.0 = load %struct.node** %t.0.in		; <%struct.node*> [#uses=3]
+	%t.0 = load %struct.node*, %struct.node** %t.0.in		; <%struct.node*> [#uses=3]
 	%3 = icmp eq %struct.node* %t.0, null		; <i1> [#uses=1]
 	br i1 %3, label %bb2, label %bb
 
diff --git a/llvm/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/llvm/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index d6a565a..a3e9004 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -5,6 +5,6 @@
 @a = alias bitcast (i32* @g to i8*)
 
 define void @f() {
-	%tmp = load i8* @a
+	%tmp = load i8, i8* @a
 	ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/llvm/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index 049eef1..c82b830 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -33,7 +33,7 @@
 define i32 @bar() nounwind ssp {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %0 = load i32* @Stop, align 4, !dbg !13         ; <i32> [#uses=1]
+  %0 = load i32, i32* @Stop, align 4, !dbg !13         ; <i32> [#uses=1]
   %1 = icmp eq i32 %0, 1, !dbg !13                ; <i1> [#uses=1]
   br i1 %1, label %bb, label %bb1, !dbg !13
 
diff --git a/llvm/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/llvm/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
index 059af1c..469fa76 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -12,6 +12,6 @@
 
 define i32* @get() nounwind {
 entry:
-	%0 = load i32** @X, align 4		; <i32*> [#uses=1]
+	%0 = load i32*, i32** @X, align 4		; <i32*> [#uses=1]
 	ret i32* %0
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll b/llvm/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
index d3c3ff5..30e4d42 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
@@ -33,7 +33,7 @@
 	unreachable
 
 my_calloc.exit5.i:		; preds = %my_calloc.exit.i
-	%.pre.i58 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	%.pre.i58 = load %struct.s_net*, %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
 	br label %bb17.i78
 
 bb1.i61:		; preds = %bb4.preheader.i, %bb1.i61
@@ -58,11 +58,11 @@
 	unreachable
 
 my_malloc.exit.i70:		; preds = %bb8.i67
-	%0 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	%0 = load %struct.s_net*, %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
 	br i1 undef, label %bb9.i71, label %bb16.i77
 
 bb9.i71:		; preds = %bb9.i71, %my_malloc.exit.i70
-	%1 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	%1 = load %struct.s_net*, %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
 	br i1 undef, label %bb9.i71, label %bb16.i77
 
 bb16.i77:		; preds = %bb9.i71, %my_malloc.exit.i70, %bb15.preheader.i
diff --git a/llvm/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll b/llvm/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
index 40862bd..7fad289 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
@@ -13,7 +13,7 @@
 
 ; based on linit in office-ispell
 define void @test() nounwind ssp {
-  %1 = load i32* getelementptr inbounds (%struct.hashheader* @hashheader, i64 0, i32 13), align 8 ; <i32> [#uses=1]
+  %1 = load i32, i32* getelementptr inbounds (%struct.hashheader* @hashheader, i64 0, i32 13), align 8 ; <i32> [#uses=1]
   %2 = sext i32 %1 to i64                         ; <i64> [#uses=1]
   %3 = mul i64 %2, ptrtoint (%struct.strchartype* getelementptr (%struct.strchartype* null, i64 1) to i64) ; <i64> [#uses=1]
   %4 = tail call i8* @malloc(i64 %3)              ; <i8*> [#uses=1]
diff --git a/llvm/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll b/llvm/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
index 7c5e8e4..25bb976 100644
--- a/llvm/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
+++ b/llvm/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
@@ -14,14 +14,14 @@
 ; Derived from @DescribeChannel() in yacr2
 define void @test() nounwind ssp {
   store i64 2335, i64* @channelColumns, align 8
-  %1 = load i64* @channelColumns, align 8         ; <i64> [#uses=1]
+  %1 = load i64, i64* @channelColumns, align 8         ; <i64> [#uses=1]
   %2 = shl i64 %1, 3                              ; <i64> [#uses=1]
   %3 = add i64 %2, 8                              ; <i64> [#uses=1]
   %4 = call noalias i8* @malloc(i64 %3) nounwind  ; <i8*> [#uses=1]
 ; CHECK: call noalias i8* @malloc
   %5 = bitcast i8* %4 to i64*                     ; <i64*> [#uses=1]
   store i64* %5, i64** @TOP, align 8
-  %6 = load i64** @TOP, align 8                   ; <i64*> [#uses=1]
+  %6 = load i64*, i64** @TOP, align 8                   ; <i64*> [#uses=1]
   %7 = getelementptr inbounds i64, i64* %6, i64 13     ; <i64*> [#uses=1]
   store i64 0, i64* %7, align 8
   ret void
diff --git a/llvm/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll b/llvm/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
index 629d57c..9f53ce4 100644
--- a/llvm/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
+++ b/llvm/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
@@ -12,7 +12,7 @@
   %malloccall.i10 = call i8* @malloc(i32 16) nounwind ; <i8*> [#uses=1]
   %0 = bitcast i8* %malloccall.i10 to i32*        ; <i32*> [#uses=1]
   store i32* %0, i32** @fixLRBT, align 8
-  %1 = load i32** @fixLRBT, align 8               ; <i32*> [#uses=0]
-  %A = load i32* %1
+  %1 = load i32*, i32** @fixLRBT, align 8               ; <i32*> [#uses=0]
+  %A = load i32, i32* %1
   ret i32 %A
 }
diff --git a/llvm/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll b/llvm/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
index ab7721f..12fa341 100644
--- a/llvm/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
+++ b/llvm/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
@@ -10,7 +10,7 @@
 
 define fastcc void @init_net() nounwind {
 entry:
-  %0 = load i32* @numf2s, align 4                 ; <i32> [#uses=1]
+  %0 = load i32, i32* @numf2s, align 4                 ; <i32> [#uses=1]
   %mallocsize2 = shl i32 %0, 4                    ; <i32> [#uses=1]
   %malloccall3 = tail call i8* @malloc(i32 %mallocsize2) nounwind ; <i8*> [#uses=1]
   %1 = bitcast i8* %malloccall3 to %struct.xyz*   ; <%struct.xyz*> [#uses=1]
@@ -20,7 +20,7 @@
 
 define fastcc void @load_train(i8* %trainfile, i32 %mode, i32 %objects) nounwind {
 entry:
-  %0 = load %struct.xyz** @Y, align 8             ; <%struct.xyz*> [#uses=0]
+  %0 = load %struct.xyz*, %struct.xyz** @Y, align 8             ; <%struct.xyz*> [#uses=0]
   ret void
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/array-elem-refs.ll b/llvm/test/Transforms/GlobalOpt/array-elem-refs.ll
index ec472b0..eb9a5f4 100644
--- a/llvm/test/Transforms/GlobalOpt/array-elem-refs.ll
+++ b/llvm/test/Transforms/GlobalOpt/array-elem-refs.ll
@@ -11,9 +11,9 @@
 ; Function Attrs: nounwind uwtable
 define signext i8 @foo() #0 {
 entry:
-  %0 = load i8*** @c, align 8
-  %1 = load i8** %0, align 8
-  %2 = load i8* %1, align 1
+  %0 = load i8**, i8*** @c, align 8
+  %1 = load i8*, i8** %0, align 8
+  %2 = load i8, i8* %1, align 1
   ret i8 %2
 
 ; CHECK-LABEL: @foo
diff --git a/llvm/test/Transforms/GlobalOpt/atomic.ll b/llvm/test/Transforms/GlobalOpt/atomic.ll
index ac05bfd..563c1fe 100644
--- a/llvm/test/Transforms/GlobalOpt/atomic.ll
+++ b/llvm/test/Transforms/GlobalOpt/atomic.ll
@@ -8,7 +8,7 @@
 
 define void @test1() {
 entry:
-  %0 = load atomic i8* bitcast (i64* @GV1 to i8*) acquire, align 8
+  %0 = load atomic i8, i8* bitcast (i64* @GV1 to i8*) acquire, align 8
   ret void
 }
 
@@ -20,6 +20,6 @@
 }
 define i32 @test2b() {
 entry:
-  %atomic-load = load atomic i32* @GV2 seq_cst, align 4
+  %atomic-load = load atomic i32, i32* @GV2 seq_cst, align 4
   ret i32 %atomic-load
 }
diff --git a/llvm/test/Transforms/GlobalOpt/basictest.ll b/llvm/test/Transforms/GlobalOpt/basictest.ll
index 4332d3d..4c25e4f 100644
--- a/llvm/test/Transforms/GlobalOpt/basictest.ll
+++ b/llvm/test/Transforms/GlobalOpt/basictest.ll
@@ -3,7 +3,7 @@
 @X = internal global i32 4              ; <i32*> [#uses=1]
 
 define i32 @foo() {
-        %V = load i32* @X               ; <i32> [#uses=1]
+        %V = load i32, i32* @X               ; <i32> [#uses=1]
         ret i32 %V
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll b/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
index 871bfbf..2d41dfe 100644
--- a/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/llvm/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -20,7 +20,7 @@
 define internal void @test1() {
 entry:
   store i32 1, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 0)
-  %0 = load i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
+  %0 = load i32, i32* getelementptr inbounds ([2 x i32]* @xs, i32 0, i64 0), align 4
   store i32 %0, i32* getelementptr inbounds ([2 x i32]* @xs, i64 0, i64 1)
   ret void
 }
@@ -37,7 +37,7 @@
 define internal i32 @test2_helper(%closure* %this, i32 %b) {
 entry:
   %0 = getelementptr inbounds %closure, %closure* %this, i32 0, i32 0
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %add = add nsw i32 %1, %b
   ret i32 %add
 }
@@ -85,7 +85,7 @@
 @test6_v2 = global i32 0, align 4
 ; CHECK: @test6_v2 = global i32 42, align 4
 define internal void @test6() {
-  %load = load { i32, i32 }* @test6_v1, align 8
+  %load = load { i32, i32 }, { i32, i32 }* @test6_v1, align 8
   %xv0 = extractvalue { i32, i32 } %load, 0
   %iv = insertvalue { i32, i32 } %load, i32 %xv0, 1
   %xv1 = extractvalue { i32, i32 } %iv, 1
diff --git a/llvm/test/Transforms/GlobalOpt/crash-2.ll b/llvm/test/Transforms/GlobalOpt/crash-2.ll
index 684f6ce..ed8b588 100644
--- a/llvm/test/Transforms/GlobalOpt/crash-2.ll
+++ b/llvm/test/Transforms/GlobalOpt/crash-2.ll
@@ -14,6 +14,6 @@
 @g_271 = internal global [7 x [6 x [5 x i8*]]] [[6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* null, i8* @g_25, i8* @g_25, i8* @g_25, i8* null], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* @g_25], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114], [5 x i8*] [i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* null], [5 x i8*] [i8* @g_114, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0)], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25]], [6 x [5 x i8*]] [[5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* null], [5 x i8*] [i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* @g_25, i8* @g_25, i8* @g_114], [5 x i8*] [i8* null, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_25, i8* null, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_114, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* @g_114, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1)], [5 x i8*] [i8* @g_25, i8* @g_25, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25], [5 x i8*] [i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25, i8* @g_25, i8* getelementptr (i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), i64 1), i8* @g_25]]], align 4
 
 define i32 @func() {
-  %tmp = load i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), align 1
+  %tmp = load i8, i8* getelementptr inbounds (%struct.S0.1.7.13* @g_71, i32 0, i32 0), align 1
   ret i32 0
 }
diff --git a/llvm/test/Transforms/GlobalOpt/crash.ll b/llvm/test/Transforms/GlobalOpt/crash.ll
index 0bef820..c1458a5 100644
--- a/llvm/test/Transforms/GlobalOpt/crash.ll
+++ b/llvm/test/Transforms/GlobalOpt/crash.ll
@@ -35,7 +35,7 @@
   unreachable
 
 xx:
-  %E = load %T** @switch_inf, align 8 
+  %E = load %T*, %T** @switch_inf, align 8 
   unreachable
 }
 
@@ -46,7 +46,7 @@
 @permute_bitrev.bitrev = internal global i32* null, align 8
 define void @permute_bitrev() nounwind {
 entry:
-  %tmp = load i32** @permute_bitrev.bitrev, align 8
+  %tmp = load i32*, i32** @permute_bitrev.bitrev, align 8
   %conv = sext i32 0 to i64
   %mul = mul i64 %conv, 4
   %call = call i8* @malloc(i64 %mul)
@@ -72,7 +72,7 @@
 
 define void @icmp_user_of_stored_once() nounwind ssp {
 entry:
-  %tmp4 = load i32*** @g_52, align 8
+  %tmp4 = load i32**, i32*** @g_52, align 8
   store i32** @g_90, i32*** @g_52
   %cmp17 = icmp ne i32*** undef, @g_52
   ret void
diff --git a/llvm/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll b/llvm/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
index 9b11985..b0db2ce 100644
--- a/llvm/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
+++ b/llvm/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
@@ -15,9 +15,9 @@
 define internal void @CTOR() {
   store i32 1, i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
   store i32 2, i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
-  %t = load i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  %t = load i32, i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
   store i32 %t, i32* @H
-  %s = load i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  %s = load i32, i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
   store i32 %s, i32* @I
   ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/ctor-list-opt.ll b/llvm/test/Transforms/GlobalOpt/ctor-list-opt.ll
index f041472..efeabf3 100644
--- a/llvm/test/Transforms/GlobalOpt/ctor-list-opt.ll
+++ b/llvm/test/Transforms/GlobalOpt/ctor-list-opt.ll
@@ -51,7 +51,7 @@
 }
 
 define internal void @CTOR4() {
-	%X = load i32* @G3		; <i32> [#uses=1]
+	%X = load i32, i32* @G3		; <i32> [#uses=1]
 	%Y = add i32 %X, 123		; <i32> [#uses=1]
 	store i32 %Y, i32* @G3
 	ret void
@@ -59,7 +59,7 @@
 
 define internal void @CTOR5() {
 	%X.2p = getelementptr inbounds { i32, [2 x i32] }, { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
-	%X.2 = load i32* %X.2p		; <i32> [#uses=1]
+	%X.2 = load i32, i32* %X.2p		; <i32> [#uses=1]
 	%X.1p = getelementptr inbounds { i32, [2 x i32] }, { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %X.2, i32* %X.1p
 	store i32 42, i32* %X.2p
@@ -68,9 +68,9 @@
 
 define internal void @CTOR6() {
 	%A = alloca i32		; <i32*> [#uses=2]
-	%y = load i32* @Y		; <i32> [#uses=1]
+	%y = load i32, i32* @Y		; <i32> [#uses=1]
 	store i32 %y, i32* %A
-	%Av = load i32* %A		; <i32> [#uses=1]
+	%Av = load i32, i32* %A		; <i32> [#uses=1]
 	%Av1 = add i32 %Av, 1		; <i32> [#uses=1]
 	store i32 %Av1, i32* @Y
 	ret void
@@ -95,7 +95,7 @@
 }
 
 define i1 @accessor() {
-	%V = load i1* @CTORGV		; <i1> [#uses=1]
+	%V = load i1, i1* @CTORGV		; <i1> [#uses=1]
 	ret i1 %V
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/deadfunction.ll b/llvm/test/Transforms/GlobalOpt/deadfunction.ll
index f9a0e92..5771c4c 100644
--- a/llvm/test/Transforms/GlobalOpt/deadfunction.ll
+++ b/llvm/test/Transforms/GlobalOpt/deadfunction.ll
@@ -11,7 +11,7 @@
 entry:
   %idxprom = sext i32 %n to i64
   %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @test.x, i64 0, i64 %idxprom
-  %0 = load i8** %arrayidx, align 8
+  %0 = load i8*, i8** %arrayidx, align 8
   indirectbr i8* %0, [label %a, label %b, label %c]
 
 a:
diff --git a/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll b/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
index 675211b..d4bf803 100644
--- a/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
+++ b/llvm/test/Transforms/GlobalOpt/externally-initialized-global-ctr.ll
@@ -13,7 +13,7 @@
 @llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([7 x i8]* @"\01L_OBJC_METH_VAR_NAME_40", i32 0, i32 0),  i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_41" to i8*)]
 
 define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
-  %1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
+  %1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_41", !invariant.load !2009
   store i8* %1, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
   ret void
 }
@@ -26,8 +26,8 @@
 declare void @test(i8*)
 
 define void @print() {
-; CHECK: %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
-  %1 = load i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+; CHECK: %1 = load i8*, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
+  %1 = load i8*, i8** getelementptr inbounds ([1 x %struct.ButtonInitData]* @_ZL14buttonInitData, i32 0, i32 0, i32 0), align 4
   call void @test(i8* %1)
   ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll
index 76122b2..26398d2 100644
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -2,13 +2,13 @@
 
 define internal i32 @f(i32* %m) {
 ; CHECK-LABEL: define internal fastcc i32 @f
-  %v = load i32* %m
+  %v = load i32, i32* %m
   ret i32 %v
 }
 
 define internal x86_thiscallcc i32 @g(i32* %m) {
 ; CHECK-LABEL: define internal fastcc i32 @g
-  %v = load i32* %m
+  %v = load i32, i32* %m
   ret i32 %v
 }
 
@@ -16,13 +16,13 @@
 ; convention.
 define internal coldcc i32 @h(i32* %m) {
 ; CHECK-LABEL: define internal coldcc i32 @h
-  %v = load i32* %m
+  %v = load i32, i32* %m
   ret i32 %v
 }
 
 define internal i32 @j(i32* %m) {
 ; CHECK-LABEL: define internal i32 @j
-  %v = load i32* %m
+  %v = load i32, i32* %m
   ret i32 %v
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-partial.ll b/llvm/test/Transforms/GlobalOpt/globalsra-partial.ll
index df9c72f..c37558a 100644
--- a/llvm/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/llvm/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -18,7 +18,7 @@
 
 define float @readval(i32 %i) {
         %Ptr = getelementptr { i32, [4 x float] }, { i32, [4 x float] }* @G, i32 0, i32 1, i32 %i             ; <float*> [#uses=1]
-        %V = load float* %Ptr           ; <float> [#uses=1]
+        %V = load float, float* %Ptr           ; <float> [#uses=1]
         ret float %V
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/llvm/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
index 296b12c1..9b51fb7 100644
--- a/llvm/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
+++ b/llvm/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -17,25 +17,25 @@
 }
 define i32 @borf(i64 %i, i64 %j) {
   %p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0
-  %a = load i32* %p
+  %a = load i32, i32* %p
   %q = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0
-  %b = load i32* %q
+  %b = load i32, i32* %q
   %c = add i32 %a, %b
   ret i32 %c
 }
 define i32 @borg(i64 %i, i64 %j) {
   %p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1
-  %a = load i32* %p
+  %a = load i32, i32* %p
   %q = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1
-  %b = load i32* %q
+  %b = load i32, i32* %q
   %c = add i32 %a, %b
   ret i32 %c
 }
 define i32 @borh(i64 %i, i64 %j) {
   %p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2
-  %a = load i32* %p
+  %a = load i32, i32* %p
   %q = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2
-  %b = load i32* %q
+  %b = load i32, i32* %q
   %c = add i32 %a, %b
   ret i32 %c
 }
diff --git a/llvm/test/Transforms/GlobalOpt/globalsra.ll b/llvm/test/Transforms/GlobalOpt/globalsra.ll
index 6d8f220..af6c27c 100644
--- a/llvm/test/Transforms/GlobalOpt/globalsra.ll
+++ b/llvm/test/Transforms/GlobalOpt/globalsra.ll
@@ -13,12 +13,12 @@
 
 define float @storeinit() {
         store float 1.000000e+00, float* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 1)
-        %X = load float* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 1)           ; <float> [#uses=1]
+        %X = load float, float* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 1)           ; <float> [#uses=1]
         ret float %X
 }
 
 define double @constantize() {
-        %X = load double* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 2, i32 0)           ; <double> [#uses=1]
+        %X = load double, double* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 2, i32 0)           ; <double> [#uses=1]
         ret double %X
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll
index 5388401..6035eaa 100644
--- a/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll
+++ b/llvm/test/Transforms/GlobalOpt/heap-sra-1.ll
@@ -19,14 +19,14 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%0 = load %struct.foo** @X, align 4		
+	%0 = load %struct.foo*, %struct.foo** @X, align 4		
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
 	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
 	%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = add i32 %2, %sum.0.reg2mem.0	
 	%indvar.next = add i32 %i.0.reg2mem.0, 1	
 	%exitcond = icmp eq i32 %indvar.next, 1200		
diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll
index feeb709..d66c627 100644
--- a/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll
+++ b/llvm/test/Transforms/GlobalOpt/heap-sra-2.ll
@@ -19,14 +19,14 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%0 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+	%0 = load %struct.foo*, %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
 	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]		; <i32> [#uses=1]
 	%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0		; <i32*> [#uses=1]
-	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* %1, align 4		; <i32> [#uses=1]
 	%3 = add i32 %2, %sum.0.reg2mem.0		; <i32> [#uses=2]
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%exitcond = icmp eq i32 %indvar.next, 1200		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll
index 4ae9ec0..6a34364b 100644
--- a/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll
+++ b/llvm/test/Transforms/GlobalOpt/heap-sra-3.ll
@@ -20,14 +20,14 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%0 = load %struct.foo** @X, align 4		
+	%0 = load %struct.foo*, %struct.foo** @X, align 4		
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
 	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
 	%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = add i32 %2, %sum.0.reg2mem.0	
 	%indvar.next = add i32 %i.0.reg2mem.0, 1	
 	%exitcond = icmp eq i32 %indvar.next, 1200		
diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll
index a6e7578..2176b9f 100644
--- a/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll
+++ b/llvm/test/Transforms/GlobalOpt/heap-sra-4.ll
@@ -20,14 +20,14 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%0 = load %struct.foo** @X, align 4		
+	%0 = load %struct.foo*, %struct.foo** @X, align 4		
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
 	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
 	%1 = getelementptr %struct.foo, %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
-	%2 = load i32* %1, align 4
+	%2 = load i32, i32* %1, align 4
 	%3 = add i32 %2, %sum.0.reg2mem.0	
 	%indvar.next = add i32 %i.0.reg2mem.0, 1	
 	%exitcond = icmp eq i32 %indvar.next, 1200		
diff --git a/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll b/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll
index 9449827..bf9ca7b 100644
--- a/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll
+++ b/llvm/test/Transforms/GlobalOpt/heap-sra-phi.ll
@@ -18,7 +18,7 @@
 
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
-	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+	%tmpLD1 = load %struct.foo*, %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
 	br label %bb1
 
 bb1:		; preds = %bb1, %bb1.thread
@@ -26,14 +26,14 @@
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
 	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %tmp3, %bb1 ]		; <i32> [#uses=1]
 	%tmp1 = getelementptr %struct.foo, %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
 	%tmp6 = add i32 %tmp2, %sum.0.reg2mem.0		; <i32> [#uses=2]
 	%tmp4 = getelementptr %struct.foo, %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 1		; <i32*> [#uses=1]
-        %tmp5 = load i32 * %tmp4
+        %tmp5 = load i32 , i32 * %tmp4
         %tmp3 = add i32 %tmp5, %tmp6
 	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
         
-      	%tmpLD2 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+      	%tmpLD2 = load %struct.foo*, %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
 
 	%exitcond = icmp eq i32 %indvar.next, 1200		; <i1> [#uses=1]
 	br i1 %exitcond, label %bb2, label %bb1
diff --git a/llvm/test/Transforms/GlobalOpt/integer-bool.ll b/llvm/test/Transforms/GlobalOpt/integer-bool.ll
index abf5fdd..617febd 100644
--- a/llvm/test/Transforms/GlobalOpt/integer-bool.ll
+++ b/llvm/test/Transforms/GlobalOpt/integer-bool.ll
@@ -20,7 +20,7 @@
 
 define i1 @get() {
 ; CHECK-LABEL: @get(
-  %A = load i32 addrspace(1) * @G
+  %A = load i32, i32 addrspace(1) * @G
   %C = icmp slt i32 %A, 2
   ret i1 %C
 ; CHECK: ret i1 true
diff --git a/llvm/test/Transforms/GlobalOpt/iterate.ll b/llvm/test/Transforms/GlobalOpt/iterate.ll
index 7466874..056d74e 100644
--- a/llvm/test/Transforms/GlobalOpt/iterate.ll
+++ b/llvm/test/Transforms/GlobalOpt/iterate.ll
@@ -4,8 +4,8 @@
 @H = internal global { i32* } { i32* @G }               ; <{ i32* }*> [#uses=1]
 
 define i32 @loadg() {
-        %G = load i32** getelementptr ({ i32* }* @H, i32 0, i32 0)              ; <i32*> [#uses=1]
-        %GV = load i32* %G              ; <i32> [#uses=1]
+        %G = load i32*, i32** getelementptr ({ i32* }* @H, i32 0, i32 0)              ; <i32*> [#uses=1]
+        %GV = load i32, i32* %G              ; <i32> [#uses=1]
         ret i32 %GV
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/load-store-global.ll b/llvm/test/Transforms/GlobalOpt/load-store-global.ll
index cbd3cdb..e01358e 100644
--- a/llvm/test/Transforms/GlobalOpt/load-store-global.ll
+++ b/llvm/test/Transforms/GlobalOpt/load-store-global.ll
@@ -4,7 +4,7 @@
 ; CHECK-NOT: @G
 
 define void @foo() {
-        %V = load i32* @G               ; <i32> [#uses=1]
+        %V = load i32, i32* @G               ; <i32> [#uses=1]
         store i32 %V, i32* @G
         ret void
 ; CHECK-LABEL: @foo(
@@ -12,7 +12,7 @@
 }
 
 define i32 @bar() {
-        %X = load i32* @G               ; <i32> [#uses=1]
+        %X = load i32, i32* @G               ; <i32> [#uses=1]
         ret i32 %X
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT: ret i32 17
@@ -29,7 +29,7 @@
   %cmp2 = icmp eq i8* null, %b
   %cmp3 = icmp eq i64** null, %g
   store i64* inttoptr (i64 1 to i64*), i64** @a, align 8
-  %l = load i64** @a, align 8
+  %l = load i64*, i64** @a, align 8
   ret void
 ; CHECK-LABEL: @qux(
 ; CHECK-NOT: store
diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-1.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-1.ll
index 51ccbbd..d5087de 100644
--- a/llvm/test/Transforms/GlobalOpt/malloc-promote-1.ll
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-1.ll
@@ -8,7 +8,7 @@
         %malloccall = tail call i8* @malloc(i64 4)      ; <i8*> [#uses=1]
         %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
         store i32* %P, i32** @G
-        %GV = load i32** @G             ; <i32*> [#uses=1]
+        %GV = load i32*, i32** @G             ; <i32*> [#uses=1]
         store i32 0, i32* %GV
         ret void
 }
@@ -16,8 +16,8 @@
 declare noalias i8* @malloc(i64)
 
 define i32 @get() {
-        %GV = load i32** @G             ; <i32*> [#uses=1]
-        %V = load i32* %GV              ; <i32> [#uses=1]
+        %GV = load i32*, i32** @G             ; <i32*> [#uses=1]
+        %V = load i32, i32* %GV              ; <i32> [#uses=1]
         ret i32 %V
 ; CHECK: ret i32 0
 }
diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-2.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-2.ll
index 373a792..335ed82 100644
--- a/llvm/test/Transforms/GlobalOpt/malloc-promote-2.ll
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-2.ll
@@ -10,7 +10,7 @@
   %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4))
   %P = bitcast i8* %malloccall to i32*
   store i32* %P, i32** @G
-  %GV = load i32** @G
+  %GV = load i32*, i32** @G
   %GVe = getelementptr i32, i32* %GV, i32 40
   store i32 20, i32* %GVe
   ret void
diff --git a/llvm/test/Transforms/GlobalOpt/malloc-promote-3.ll b/llvm/test/Transforms/GlobalOpt/malloc-promote-3.ll
index b4e7986..1e42c3b 100644
--- a/llvm/test/Transforms/GlobalOpt/malloc-promote-3.ll
+++ b/llvm/test/Transforms/GlobalOpt/malloc-promote-3.ll
@@ -9,7 +9,7 @@
   %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4)) nobuiltin
   %P = bitcast i8* %malloccall to i32*
   store i32* %P, i32** @G
-  %GV = load i32** @G
+  %GV = load i32*, i32** @G
   %GVe = getelementptr i32, i32* %GV, i32 40
   store i32 20, i32* %GVe
   ret void
diff --git a/llvm/test/Transforms/GlobalOpt/memset-null.ll b/llvm/test/Transforms/GlobalOpt/memset-null.ll
index 53ec755..838ac09 100644
--- a/llvm/test/Transforms/GlobalOpt/memset-null.ll
+++ b/llvm/test/Transforms/GlobalOpt/memset-null.ll
@@ -23,7 +23,7 @@
 
 define internal void @_GLOBAL__I_b() nounwind {
 entry:
-  %tmp.i.i.i = load i8** @y, align 8
+  %tmp.i.i.i = load i8*, i8** @y, align 8
   tail call void @llvm.memset.p0i8.i64(i8* %tmp.i.i.i, i8 0, i64 10, i32 1, i1 false) nounwind
   ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/phi-select.ll b/llvm/test/Transforms/GlobalOpt/phi-select.ll
index cd8a7dc..e5cb981 100644
--- a/llvm/test/Transforms/GlobalOpt/phi-select.ll
+++ b/llvm/test/Transforms/GlobalOpt/phi-select.ll
@@ -8,7 +8,7 @@
 
 define i32 @test1(i1 %C) {
         %P = select i1 %C, i32* @X, i32* @Y             ; <i32*> [#uses=1]
-        %V = load i32* %P               ; <i32> [#uses=1]
+        %V = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %V
 }
 
@@ -21,7 +21,7 @@
 
 Cont:           ; preds = %T, %0
         %P = phi i32* [ @X, %0 ], [ @Y, %T ]            ; <i32*> [#uses=1]
-        %V = load i32* %P               ; <i32> [#uses=1]
+        %V = load i32, i32* %P               ; <i32> [#uses=1]
         ret i32 %V
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/storepointer-compare.ll b/llvm/test/Transforms/GlobalOpt/storepointer-compare.ll
index 09e20a8..bcf7caf 100644
--- a/llvm/test/Transforms/GlobalOpt/storepointer-compare.ll
+++ b/llvm/test/Transforms/GlobalOpt/storepointer-compare.ll
@@ -16,7 +16,7 @@
 }
 
 define void @doit() {
-        %FP = load void ()** @G         ; <void ()*> [#uses=2]
+        %FP = load void ()*, void ()** @G         ; <void ()*> [#uses=2]
         %CC = icmp eq void ()* %FP, null                ; <i1> [#uses=1]
         br i1 %CC, label %isNull, label %DoCall
 
diff --git a/llvm/test/Transforms/GlobalOpt/storepointer.ll b/llvm/test/Transforms/GlobalOpt/storepointer.ll
index 8019076..9003004 100644
--- a/llvm/test/Transforms/GlobalOpt/storepointer.ll
+++ b/llvm/test/Transforms/GlobalOpt/storepointer.ll
@@ -12,7 +12,7 @@
 }
 
 define void @doit() {
-        %FP = load void ()** @G         ; <void ()*> [#uses=1]
+        %FP = load void ()*, void ()** @G         ; <void ()*> [#uses=1]
         call void %FP( )
         ret void
 }
diff --git a/llvm/test/Transforms/GlobalOpt/tls.ll b/llvm/test/Transforms/GlobalOpt/tls.ll
index 59ae23a..beea178 100644
--- a/llvm/test/Transforms/GlobalOpt/tls.ll
+++ b/llvm/test/Transforms/GlobalOpt/tls.ll
@@ -24,14 +24,14 @@
   store i32 0, i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), align 4
 
   ; Read the value of @ip, which now points at x[1] for thread 2.
-  %0 = load i32** @ip, align 8
+  %0 = load i32*, i32** @ip, align 8
 
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   ret i32 %1
 
 ; CHECK-LABEL: @f(
 ; Make sure that the load from @ip hasn't been removed.
-; CHECK: load i32** @ip
+; CHECK: load i32*, i32** @ip
 ; CHECK: ret
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/trivialstore.ll b/llvm/test/Transforms/GlobalOpt/trivialstore.ll
index 21437f3..f907d83 100644
--- a/llvm/test/Transforms/GlobalOpt/trivialstore.ll
+++ b/llvm/test/Transforms/GlobalOpt/trivialstore.ll
@@ -8,7 +8,7 @@
 }
 
 define i32 @bar() {
-        %X = load i32* @G               ; <i32> [#uses=1]
+        %X = load i32, i32* @G               ; <i32> [#uses=1]
         ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/undef-init.ll b/llvm/test/Transforms/GlobalOpt/undef-init.ll
index c149497..d304821 100644
--- a/llvm/test/Transforms/GlobalOpt/undef-init.ll
+++ b/llvm/test/Transforms/GlobalOpt/undef-init.ll
@@ -5,7 +5,7 @@
 
 define i32 @_Z3foov() {
 entry:
-        %tmp.1 = load i32* @X.0         ; <i32> [#uses=1]
+        %tmp.1 = load i32, i32* @X.0         ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
 
diff --git a/llvm/test/Transforms/GlobalOpt/unnamed-addr.ll b/llvm/test/Transforms/GlobalOpt/unnamed-addr.ll
index c2ce0b9..85ed829 100644
--- a/llvm/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/llvm/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -13,7 +13,7 @@
 ; CHECK: @e = linkonce_odr global i32 0
 
 define i32 @get_e() {
-       %t = load i32* @e
+       %t = load i32, i32* @e
        ret i32 %t
 }
 
@@ -25,9 +25,9 @@
 define i1 @bah(i64 %i) nounwind readonly optsize ssp {
 entry:
   %arrayidx4 = getelementptr inbounds [4 x i8], [4 x i8]* @d, i64 0, i64 %i
-  %tmp5 = load i8* %arrayidx4, align 1
+  %tmp5 = load i8, i8* %arrayidx4, align 1
   %array0 = bitcast [4 x i8]* @d to i8*
-  %tmp6 = load i8* %array0, align 1
+  %tmp6 = load i8, i8* %array0, align 1
   %cmp = icmp eq i8 %tmp5, %tmp6
   ret i1 %cmp
 }
@@ -63,6 +63,6 @@
 
 define i32 @zed() {
 entry:
-  %tmp1 = load i32* @c, align 4
+  %tmp1 = load i32, i32* @c, align 4
   ret i32 %tmp1
 }
diff --git a/llvm/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll b/llvm/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
index ad16a64..51feb48 100644
--- a/llvm/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
+++ b/llvm/test/Transforms/GlobalOpt/zeroinitializer-gep-load.ll
@@ -4,7 +4,7 @@
 
 define i32 @test1(i64 %idx) nounwind {
   %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @zero, i64 0, i64 %idx
-  %l = load i32* %arrayidx
+  %l = load i32, i32* %arrayidx
   ret i32 %l
 ; CHECK-LABEL: @test1(
 ; CHECK: ret i32 0
diff --git a/llvm/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
index 03ebf0b..735597a 100644
--- a/llvm/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -20,11 +20,11 @@
 define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
 entry:
   %0 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
-  %1 = load i32* %0
-; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1)
+  %1 = load i32, i32* %0
+; CHECK: load i32, i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1)
   %2 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
-  %3 = load i8* %2
-; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0)
+  %3 = load i8, i8* %2
+; CHECK: load i8, i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0)
   %4 = zext i8 %3 to i32
   %5 = add i32 %4, %1
   ret i32 %5
diff --git a/llvm/test/Transforms/IPConstantProp/dangling-block-address.ll b/llvm/test/Transforms/IPConstantProp/dangling-block-address.ll
index 487375c..abd0b0f 100644
--- a/llvm/test/Transforms/IPConstantProp/dangling-block-address.ll
+++ b/llvm/test/Transforms/IPConstantProp/dangling-block-address.ll
@@ -30,9 +30,9 @@
 indirectgoto:                                     ; preds = %lab0, %entry
   %indvar = phi i32 [ %indvar.next, %lab0 ], [ 0, %entry ] ; <i32> [#uses=2]
   %pc.addr.0 = getelementptr i32, i32* %pc, i32 %indvar ; <i32*> [#uses=1]
-  %tmp1.pn = load i32* %pc.addr.0                 ; <i32> [#uses=1]
+  %tmp1.pn = load i32, i32* %pc.addr.0                 ; <i32> [#uses=1]
   %indirect.goto.dest.in = getelementptr inbounds [2 x i8*], [2 x i8*]* @bar.l, i32 0, i32 %tmp1.pn ; <i8**> [#uses=1]
-  %indirect.goto.dest = load i8** %indirect.goto.dest.in ; <i8*> [#uses=1]
+  %indirect.goto.dest = load i8*, i8** %indirect.goto.dest.in ; <i8*> [#uses=1]
   indirectbr i8* %indirect.goto.dest, [label %lab0, label %end]
 }
 
diff --git a/llvm/test/Transforms/IPConstantProp/global.ll b/llvm/test/Transforms/IPConstantProp/global.ll
index 6715293..d3ba146 100644
--- a/llvm/test/Transforms/IPConstantProp/global.ll
+++ b/llvm/test/Transforms/IPConstantProp/global.ll
@@ -4,7 +4,7 @@
 
 define void @_Z7test1f1v() nounwind {
 entry:
-  %tmp = load i32* @_ZL6test1g, align 4
+  %tmp = load i32, i32* @_ZL6test1g, align 4
   %cmp = icmp eq i32 %tmp, 0
   br i1 %cmp, label %if.then, label %if.end
 
@@ -21,6 +21,6 @@
 ; CHECK-NEXT: ret i32 42
 define i32 @_Z7test1f2v() nounwind {
 entry:
-  %tmp = load i32* @_ZL6test1g, align 4
+  %tmp = load i32, i32* @_ZL6test1g, align 4
   ret i32 %tmp
 }
diff --git a/llvm/test/Transforms/IPConstantProp/return-argument.ll b/llvm/test/Transforms/IPConstantProp/return-argument.ll
index 2a14f05..927531b 100644
--- a/llvm/test/Transforms/IPConstantProp/return-argument.ll
+++ b/llvm/test/Transforms/IPConstantProp/return-argument.ll
@@ -4,7 +4,7 @@
 
 ;; This function returns its second argument on all return statements
 define internal i32* @incdec(i1 %C, i32* %V) {
-        %X = load i32* %V
+        %X = load i32, i32* %V
         br i1 %C, label %T, label %F
 
 T:              ; preds = %0
diff --git a/llvm/test/Transforms/IRCE/decrementing-loop.ll b/llvm/test/Transforms/IRCE/decrementing-loop.ll
index e70dacd..6ff3e76 100644
--- a/llvm/test/Transforms/IRCE/decrementing-loop.ll
+++ b/llvm/test/Transforms/IRCE/decrementing-loop.ll
@@ -2,7 +2,7 @@
 
 define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   %start = sub i32 %n, 1
   br i1 %first.itr.check, label %loop, label %exit
diff --git a/llvm/test/Transforms/IRCE/low-becount.ll b/llvm/test/Transforms/IRCE/low-becount.ll
index c2eea08..89b91d6 100644
--- a/llvm/test/Transforms/IRCE/low-becount.ll
+++ b/llvm/test/Transforms/IRCE/low-becount.ll
@@ -4,7 +4,7 @@
 
 define void @low_profiled_be_count(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll
index 721dbe7..9963a57 100644
--- a/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll
+++ b/llvm/test/Transforms/IRCE/multiple-access-no-preloop.ll
@@ -4,8 +4,8 @@
     i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) {
 
  entry:
-  %len.a = load i32* %a_len_ptr, !range !0
-  %len.b = load i32* %b_len_ptr, !range !0
+  %len.a = load i32, i32* %a_len_ptr, !range !0
+  %len.b = load i32, i32* %b_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/not-likely-taken.ll b/llvm/test/Transforms/IRCE/not-likely-taken.ll
index 5f623f5..50a64a2 100644
--- a/llvm/test/Transforms/IRCE/not-likely-taken.ll
+++ b/llvm/test/Transforms/IRCE/not-likely-taken.ll
@@ -6,8 +6,8 @@
     i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) {
 
  entry:
-  %len.a = load i32* %a_len_ptr, !range !0
-  %len.b = load i32* %b_len_ptr, !range !0
+  %len.a = load i32, i32* %a_len_ptr, !range !0
+  %len.b = load i32, i32* %b_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
index 60485c8..4824d95 100644
--- a/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
+++ b/llvm/test/Transforms/IRCE/single-access-no-preloop.ll
@@ -2,7 +2,7 @@
 
 define void @single_access_no_preloop_no_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
@@ -57,7 +57,7 @@
 
 define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll
index c0d86f0..e581014 100644
--- a/llvm/test/Transforms/IRCE/single-access-with-preloop.ll
+++ b/llvm/test/Transforms/IRCE/single-access-with-preloop.ll
@@ -2,7 +2,7 @@
 
 define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %offset) {
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/unhandled.ll b/llvm/test/Transforms/IRCE/unhandled.ll
index 098642a..0d5ff95 100644
--- a/llvm/test/Transforms/IRCE/unhandled.ll
+++ b/llvm/test/Transforms/IRCE/unhandled.ll
@@ -7,7 +7,7 @@
                                         i32 %scale, i32 %offset) {
 ; CHECK-NOT: constrained Loop at depth
  entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
diff --git a/llvm/test/Transforms/IRCE/with-parent-loops.ll b/llvm/test/Transforms/IRCE/with-parent-loops.ll
index 449a077..00af20b 100644
--- a/llvm/test/Transforms/IRCE/with-parent-loops.ll
+++ b/llvm/test/Transforms/IRCE/with-parent-loops.ll
@@ -8,7 +8,7 @@
 ; CHECK: irce: in function inner_loop: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
 
 entry:
-  %len = load i32* %a_len_ptr, !range !0
+  %len = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check = icmp sgt i32 %n, 0
   br i1 %first.itr.check, label %loop, label %exit
 
@@ -42,7 +42,7 @@
   %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit ]
   %idx.next = add i32 %idx, 1
   %next = icmp ult i32 %idx.next, %parent.count
-  %len.i = load i32* %a_len_ptr, !range !0
+  %len.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i, label %loop.i, label %exit.i
 
@@ -88,7 +88,7 @@
   %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ]
   %idx.next.i = add i32 %idx.i, 1
   %next.i = icmp ult i32 %idx.next.i, %parent.count
-  %len.i.i = load i32* %a_len_ptr, !range !0
+  %len.i.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
 
@@ -132,7 +132,7 @@
   %idx = phi i32 [ 0, %entry ], [ %idx.next, %inner_loop.exit12 ]
   %idx.next = add i32 %idx, 1
   %next = icmp ult i32 %idx.next, %parent.count
-  %len.i = load i32* %a_len_ptr, !range !0
+  %len.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i, label %loop.i, label %exit.i
 
@@ -155,7 +155,7 @@
   br label %inner_loop.exit
 
 inner_loop.exit:                                  ; preds = %exit.i, %out.of.bounds.i
-  %len.i1 = load i32* %a_len_ptr, !range !0
+  %len.i1 = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i2 = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i2, label %loop.i6, label %exit.i11
 
@@ -202,7 +202,7 @@
   %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %inner_loop.exit.i ]
   %idx.next.i = add i32 %idx.i, 1
   %next.i = icmp ult i32 %idx.next.i, %parent.count
-  %len.i.i = load i32* %a_len_ptr, !range !0
+  %len.i.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
 
@@ -234,7 +234,7 @@
   %idx.i1 = phi i32 [ 0, %with_parent.exit ], [ %idx.next.i2, %inner_loop.exit.i16 ]
   %idx.next.i2 = add i32 %idx.i1, 1
   %next.i3 = icmp ult i32 %idx.next.i2, %parent.count
-  %len.i.i4 = load i32* %a_len_ptr, !range !0
+  %len.i.i4 = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i.i5 = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i.i5, label %loop.i.i10, label %exit.i.i15
 
@@ -278,7 +278,7 @@
   %idx = phi i32 [ 0, %entry ], [ %idx.next, %with_parent.exit ]
   %idx.next = add i32 %idx, 1
   %next = icmp ult i32 %idx.next, %grandparent.count
-  %len.i = load i32* %a_len_ptr, !range !0
+  %len.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i, label %loop.i, label %exit.i
 
@@ -307,7 +307,7 @@
   %idx.i1 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i2, %inner_loop.exit.i ]
   %idx.next.i2 = add i32 %idx.i1, 1
   %next.i3 = icmp ult i32 %idx.next.i2, %parent.count
-  %len.i.i = load i32* %a_len_ptr, !range !0
+  %len.i.i = load i32, i32* %a_len_ptr, !range !0
   %first.itr.check.i.i = icmp sgt i32 %n, 0
   br i1 %first.itr.check.i.i, label %loop.i.i, label %exit.i.i
 
diff --git a/llvm/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll b/llvm/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
index ce043be..16ad635 100644
--- a/llvm/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
@@ -45,7 +45,7 @@
 	ret void
 
 endif.4:		; preds = %_ZNK7QString2atEi.exit
-	%tmp.115 = load i8* null		; <i8> [#uses=1]
+	%tmp.115 = load i8, i8* null		; <i8> [#uses=1]
 	br i1 false, label %loopexit.1, label %no_exit.0
 
 no_exit.0:		; preds = %no_exit.0, %endif.4
diff --git a/llvm/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll b/llvm/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
index 36ec2b8..2d40f88 100644
--- a/llvm/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
@@ -6,9 +6,9 @@
 
 no_exit.30:             ; preds = %no_exit.30, %endif.0
         %x.12.0 = phi i32 [ %inc.28, %no_exit.30 ], [ -2, %endif.0 ]            ; <i32> [#uses=1]
-        %tmp.583 = load i16* null               ; <i16> [#uses=1]
+        %tmp.583 = load i16, i16* null               ; <i16> [#uses=1]
         %tmp.584 = zext i16 %tmp.583 to i32             ; <i32> [#uses=1]
-        %tmp.588 = load i32* null               ; <i32> [#uses=1]
+        %tmp.588 = load i32, i32* null               ; <i32> [#uses=1]
         %tmp.589 = mul i32 %tmp.584, %tmp.588           ; <i32> [#uses=1]
         %tmp.591 = add i32 %tmp.589, 0          ; <i32> [#uses=1]
         %inc.28 = add i32 %x.12.0, 1            ; <i32> [#uses=2]
diff --git a/llvm/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll b/llvm/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
index 56e9ca6..3dda657 100644
--- a/llvm/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
@@ -25,7 +25,7 @@
 
 cond_next:		; preds = %bb2
 	%tmp2 = getelementptr [5 x i8], [5 x i8]* @foo, i32 0, i32 %i.0		; <i8*> [#uses=1]
-	%tmp3 = load i8* %tmp2		; <i8> [#uses=1]
+	%tmp3 = load i8, i8* %tmp2		; <i8> [#uses=1]
 	%tmp5 = icmp eq i8 %tmp3, 0		; <i1> [#uses=1]
 	br i1 %tmp5, label %bb6, label %bb
 
diff --git a/llvm/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll b/llvm/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
index 624567d..4c0c327 100644
--- a/llvm/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
@@ -19,7 +19,7 @@
 define signext i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp)  nounwind {
 entry:
 	%tmp4311 = getelementptr %struct.JPEGGlobals, %struct.JPEGGlobals* %globp, i32 0, i32 70		; <i32*> [#uses=1]
-	%tmp4412 = load i32* %tmp4311, align 16		; <i32> [#uses=2]
+	%tmp4412 = load i32, i32* %tmp4311, align 16		; <i32> [#uses=2]
 	%tmp4613 = icmp sgt i32 %tmp4412, 0		; <i1> [#uses=1]
 	br i1 %tmp4613, label %bb, label %bb49
 
@@ -27,9 +27,9 @@
 	%component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ]		; <i16> [#uses=2]
 	%tmp12 = sext i16 %component.09 to i32		; <i32> [#uses=2]
 	%tmp6 = getelementptr %struct.JPEGGlobals, %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12		; <i16**> [#uses=2]
-	%tmp7 = load i16** %tmp6, align 4		; <i16*> [#uses=2]
+	%tmp7 = load i16*, i16** %tmp6, align 4		; <i16*> [#uses=2]
 	%tmp235 = getelementptr %struct.JPEGGlobals, %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12		; <i32*> [#uses=1]
-	%tmp246 = load i32* %tmp235, align 4		; <i32> [#uses=2]
+	%tmp246 = load i32, i32* %tmp235, align 4		; <i32> [#uses=2]
 	%tmp267 = icmp sgt i32 %tmp246, 0		; <i1> [#uses=1]
 	br i1 %tmp267, label %bb8, label %bb28
 
diff --git a/llvm/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll b/llvm/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
index 23e7884..1248154 100644
--- a/llvm/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
@@ -8,7 +8,7 @@
 	br i1 false, label %bb.nph, label %return
 
 bb.nph:		; preds = %entry
-	%0 = load i32* @foo, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @foo, align 4		; <i32> [#uses=1]
 	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
 	br label %bb
 
diff --git a/llvm/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/llvm/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
index befdcb5..ab2af32 100644
--- a/llvm/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
@@ -15,92 +15,92 @@
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %84, %bb1 ]		; <i32> [#uses=19]
-	%0 = load i32** @a, align 8		; <i32*> [#uses=1]
-	%1 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%0 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
+	%1 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%2 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%3 = getelementptr i32, i32* %1, i64 %2		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 1		; <i32> [#uses=1]
-	%5 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%4 = load i32, i32* %3, align 1		; <i32> [#uses=1]
+	%5 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%6 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%7 = getelementptr i32, i32* %5, i64 %6		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 1		; <i32> [#uses=1]
+	%8 = load i32, i32* %7, align 1		; <i32> [#uses=1]
 	%9 = add i32 %8, %4		; <i32> [#uses=1]
 	%10 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%11 = getelementptr i32, i32* %0, i64 %10		; <i32*> [#uses=1]
 	store i32 %9, i32* %11, align 1
-	%12 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%12 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
 	%13 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
-	%14 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%14 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%15 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%16 = sext i32 %15 to i64		; <i64> [#uses=1]
 	%17 = getelementptr i32, i32* %14, i64 %16		; <i32*> [#uses=1]
-	%18 = load i32* %17, align 1		; <i32> [#uses=1]
-	%19 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%18 = load i32, i32* %17, align 1		; <i32> [#uses=1]
+	%19 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%20 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%21 = sext i32 %20 to i64		; <i64> [#uses=1]
 	%22 = getelementptr i32, i32* %19, i64 %21		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 1		; <i32> [#uses=1]
+	%23 = load i32, i32* %22, align 1		; <i32> [#uses=1]
 	%24 = add i32 %23, %18		; <i32> [#uses=1]
 	%25 = sext i32 %13 to i64		; <i64> [#uses=1]
 	%26 = getelementptr i32, i32* %12, i64 %25		; <i32*> [#uses=1]
 	store i32 %24, i32* %26, align 1
-	%27 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%27 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
 	%28 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
-	%29 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%29 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%30 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%31 = sext i32 %30 to i64		; <i64> [#uses=1]
 	%32 = getelementptr i32, i32* %29, i64 %31		; <i32*> [#uses=1]
-	%33 = load i32* %32, align 1		; <i32> [#uses=1]
-	%34 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%33 = load i32, i32* %32, align 1		; <i32> [#uses=1]
+	%34 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%35 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%36 = sext i32 %35 to i64		; <i64> [#uses=1]
 	%37 = getelementptr i32, i32* %34, i64 %36		; <i32*> [#uses=1]
-	%38 = load i32* %37, align 1		; <i32> [#uses=1]
+	%38 = load i32, i32* %37, align 1		; <i32> [#uses=1]
 	%39 = add i32 %38, %33		; <i32> [#uses=1]
 	%40 = sext i32 %28 to i64		; <i64> [#uses=1]
 	%41 = getelementptr i32, i32* %27, i64 %40		; <i32*> [#uses=1]
 	store i32 %39, i32* %41, align 1
-	%42 = load i32** @d, align 8		; <i32*> [#uses=1]
-	%43 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%42 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
+	%43 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%44 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%45 = getelementptr i32, i32* %43, i64 %44		; <i32*> [#uses=1]
-	%46 = load i32* %45, align 1		; <i32> [#uses=1]
-	%47 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%46 = load i32, i32* %45, align 1		; <i32> [#uses=1]
+	%47 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%48 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%49 = getelementptr i32, i32* %47, i64 %48		; <i32*> [#uses=1]
-	%50 = load i32* %49, align 1		; <i32> [#uses=1]
+	%50 = load i32, i32* %49, align 1		; <i32> [#uses=1]
 	%51 = add i32 %50, %46		; <i32> [#uses=1]
 	%52 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%53 = getelementptr i32, i32* %42, i64 %52		; <i32*> [#uses=1]
 	store i32 %51, i32* %53, align 1
-	%54 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%54 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
 	%55 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
-	%56 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%56 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%57 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%58 = sext i32 %57 to i64		; <i64> [#uses=1]
 	%59 = getelementptr i32, i32* %56, i64 %58		; <i32*> [#uses=1]
-	%60 = load i32* %59, align 1		; <i32> [#uses=1]
-	%61 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%60 = load i32, i32* %59, align 1		; <i32> [#uses=1]
+	%61 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%62 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%63 = sext i32 %62 to i64		; <i64> [#uses=1]
 	%64 = getelementptr i32, i32* %61, i64 %63		; <i32*> [#uses=1]
-	%65 = load i32* %64, align 1		; <i32> [#uses=1]
+	%65 = load i32, i32* %64, align 1		; <i32> [#uses=1]
 	%66 = add i32 %65, %60		; <i32> [#uses=1]
 	%67 = sext i32 %55 to i64		; <i64> [#uses=1]
 	%68 = getelementptr i32, i32* %54, i64 %67		; <i32*> [#uses=1]
 	store i32 %66, i32* %68, align 1
-	%69 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%69 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
 	%70 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
-	%71 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%71 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%72 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%73 = sext i32 %72 to i64		; <i64> [#uses=1]
 	%74 = getelementptr i32, i32* %71, i64 %73		; <i32*> [#uses=1]
-	%75 = load i32* %74, align 1		; <i32> [#uses=1]
-	%76 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%75 = load i32, i32* %74, align 1		; <i32> [#uses=1]
+	%76 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%77 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%78 = sext i32 %77 to i64		; <i64> [#uses=1]
 	%79 = getelementptr i32, i32* %76, i64 %78		; <i32*> [#uses=1]
-	%80 = load i32* %79, align 1		; <i32> [#uses=1]
+	%80 = load i32, i32* %79, align 1		; <i32> [#uses=1]
 	%81 = add i32 %80, %75		; <i32> [#uses=1]
 	%82 = sext i32 %70 to i64		; <i64> [#uses=1]
 	%83 = getelementptr i32, i32* %69, i64 %82		; <i32*> [#uses=1]
diff --git a/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
index 6815f31..921a393 100644
--- a/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -29,123 +29,123 @@
 
 bb1:		; preds = %bb1, %bb1.thread
 	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %116, %bb1 ]		; <i32> [#uses=22]
-	%0 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%0 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
 	%1 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
-	%2 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%2 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%3 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
 	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
 	%5 = getelementptr i32, i32* %2, i64 %4		; <i32*> [#uses=1]
-	%6 = load i32* %5, align 1		; <i32> [#uses=1]
-	%7 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%6 = load i32, i32* %5, align 1		; <i32> [#uses=1]
+	%7 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%8 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
 	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
 	%10 = getelementptr i32, i32* %7, i64 %9		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 1		; <i32> [#uses=1]
+	%11 = load i32, i32* %10, align 1		; <i32> [#uses=1]
 	%12 = add i32 %11, %6		; <i32> [#uses=1]
 	%13 = zext i32 %1 to i64		; <i64> [#uses=1]
 	%14 = getelementptr i32, i32* %0, i64 %13		; <i32*> [#uses=1]
 	store i32 %12, i32* %14, align 1
-	%15 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%15 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
 	%16 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%17 = and i32 %16, 15		; <i32> [#uses=1]
-	%18 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%18 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%19 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%20 = and i32 %19, 15		; <i32> [#uses=1]
 	%21 = zext i32 %20 to i64		; <i64> [#uses=1]
 	%22 = getelementptr i32, i32* %18, i64 %21		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 1		; <i32> [#uses=1]
-	%24 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%23 = load i32, i32* %22, align 1		; <i32> [#uses=1]
+	%24 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%25 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%26 = and i32 %25, 15		; <i32> [#uses=1]
 	%27 = zext i32 %26 to i64		; <i64> [#uses=1]
 	%28 = getelementptr i32, i32* %24, i64 %27		; <i32*> [#uses=1]
-	%29 = load i32* %28, align 1		; <i32> [#uses=1]
+	%29 = load i32, i32* %28, align 1		; <i32> [#uses=1]
 	%30 = add i32 %29, %23		; <i32> [#uses=1]
 	%31 = zext i32 %17 to i64		; <i64> [#uses=1]
 	%32 = getelementptr i32, i32* %15, i64 %31		; <i32*> [#uses=1]
 	store i32 %30, i32* %32, align 1
-	%33 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%33 = load i32*, i32** @a, align 8		; <i32*> [#uses=1]
 	%34 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%35 = and i32 %34, 15		; <i32> [#uses=1]
-	%36 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%36 = load i32*, i32** @b, align 8		; <i32*> [#uses=1]
 	%37 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%38 = and i32 %37, 15		; <i32> [#uses=1]
 	%39 = zext i32 %38 to i64		; <i64> [#uses=1]
 	%40 = getelementptr i32, i32* %36, i64 %39		; <i32*> [#uses=1]
-	%41 = load i32* %40, align 1		; <i32> [#uses=1]
-	%42 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%41 = load i32, i32* %40, align 1		; <i32> [#uses=1]
+	%42 = load i32*, i32** @c, align 8		; <i32*> [#uses=1]
 	%43 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%44 = and i32 %43, 15		; <i32> [#uses=1]
 	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
 	%46 = getelementptr i32, i32* %42, i64 %45		; <i32*> [#uses=1]
-	%47 = load i32* %46, align 1		; <i32> [#uses=1]
+	%47 = load i32, i32* %46, align 1		; <i32> [#uses=1]
 	%48 = add i32 %47, %41		; <i32> [#uses=1]
 	%49 = zext i32 %35 to i64		; <i64> [#uses=1]
 	%50 = getelementptr i32, i32* %33, i64 %49		; <i32*> [#uses=1]
 	store i32 %48, i32* %50, align 1
-	%51 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%51 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
 	%52 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
-	%53 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%53 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%54 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
 	%55 = zext i32 %54 to i64		; <i64> [#uses=1]
 	%56 = getelementptr i32, i32* %53, i64 %55		; <i32*> [#uses=1]
-	%57 = load i32* %56, align 1		; <i32> [#uses=1]
-	%58 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%57 = load i32, i32* %56, align 1		; <i32> [#uses=1]
+	%58 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%59 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
 	%60 = zext i32 %59 to i64		; <i64> [#uses=1]
 	%61 = getelementptr i32, i32* %58, i64 %60		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 1		; <i32> [#uses=1]
+	%62 = load i32, i32* %61, align 1		; <i32> [#uses=1]
 	%63 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
 	%64 = getelementptr [256 x i32], [256 x i32]* @K, i64 0, i64 %63		; <i32*> [#uses=1]
-	%65 = load i32* %64, align 4		; <i32> [#uses=1]
+	%65 = load i32, i32* %64, align 4		; <i32> [#uses=1]
 	%66 = add i32 %62, %57		; <i32> [#uses=1]
 	%67 = add i32 %66, %65		; <i32> [#uses=1]
 	%68 = zext i32 %52 to i64		; <i64> [#uses=1]
 	%69 = getelementptr i32, i32* %51, i64 %68		; <i32*> [#uses=1]
 	store i32 %67, i32* %69, align 1
-	%70 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%70 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
 	%71 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%72 = and i32 %71, 15		; <i32> [#uses=1]
-	%73 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%73 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%74 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%75 = and i32 %74, 15		; <i32> [#uses=1]
 	%76 = zext i32 %75 to i64		; <i64> [#uses=1]
 	%77 = getelementptr i32, i32* %73, i64 %76		; <i32*> [#uses=1]
-	%78 = load i32* %77, align 1		; <i32> [#uses=1]
-	%79 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%78 = load i32, i32* %77, align 1		; <i32> [#uses=1]
+	%79 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%80 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%81 = and i32 %80, 15		; <i32> [#uses=1]
 	%82 = zext i32 %81 to i64		; <i64> [#uses=1]
 	%83 = getelementptr i32, i32* %79, i64 %82		; <i32*> [#uses=1]
-	%84 = load i32* %83, align 1		; <i32> [#uses=1]
+	%84 = load i32, i32* %83, align 1		; <i32> [#uses=1]
 	%85 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
 	%86 = sext i32 %85 to i64		; <i64> [#uses=1]
 	%87 = getelementptr [256 x i32], [256 x i32]* @K, i64 0, i64 %86		; <i32*> [#uses=1]
-	%88 = load i32* %87, align 4		; <i32> [#uses=1]
+	%88 = load i32, i32* %87, align 4		; <i32> [#uses=1]
 	%89 = add i32 %84, %78		; <i32> [#uses=1]
 	%90 = add i32 %89, %88		; <i32> [#uses=1]
 	%91 = zext i32 %72 to i64		; <i64> [#uses=1]
 	%92 = getelementptr i32, i32* %70, i64 %91		; <i32*> [#uses=1]
 	store i32 %90, i32* %92, align 1
-	%93 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%93 = load i32*, i32** @d, align 8		; <i32*> [#uses=1]
 	%94 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%95 = and i32 %94, 15		; <i32> [#uses=1]
-	%96 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%96 = load i32*, i32** @e, align 8		; <i32*> [#uses=1]
 	%97 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%98 = and i32 %97, 15		; <i32> [#uses=1]
 	%99 = zext i32 %98 to i64		; <i64> [#uses=1]
 	%100 = getelementptr i32, i32* %96, i64 %99		; <i32*> [#uses=1]
-	%101 = load i32* %100, align 1		; <i32> [#uses=1]
-	%102 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%101 = load i32, i32* %100, align 1		; <i32> [#uses=1]
+	%102 = load i32*, i32** @f, align 8		; <i32*> [#uses=1]
 	%103 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%104 = and i32 %103, 15		; <i32> [#uses=1]
 	%105 = zext i32 %104 to i64		; <i64> [#uses=1]
 	%106 = getelementptr i32, i32* %102, i64 %105		; <i32*> [#uses=1]
-	%107 = load i32* %106, align 1		; <i32> [#uses=1]
+	%107 = load i32, i32* %106, align 1		; <i32> [#uses=1]
 	%108 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
 	%109 = sext i32 %108 to i64		; <i64> [#uses=1]
 	%110 = getelementptr [256 x i32], [256 x i32]* @K, i64 0, i64 %109		; <i32*> [#uses=1]
-	%111 = load i32* %110, align 4		; <i32> [#uses=1]
+	%111 = load i32, i32* %110, align 4		; <i32> [#uses=1]
 	%112 = add i32 %107, %101		; <i32> [#uses=1]
 	%113 = add i32 %112, %111		; <i32> [#uses=1]
 	%114 = zext i32 %95 to i64		; <i64> [#uses=1]
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-09-27-hoistsext.ll b/llvm/test/Transforms/IndVarSimplify/2011-09-27-hoistsext.ll
index 9716778c..f0765e7 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-09-27-hoistsext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-09-27-hoistsext.ll
@@ -18,7 +18,7 @@
   %add174 = add nsw i32 %i2.115, %x
   %idxprom177 = sext i32 %add174 to i64
   %arrayidx179 = getelementptr inbounds double, double* %data, i64 %idxprom177
-  %tmp180 = load double* %arrayidx179, align 8
+  %tmp180 = load double, double* %arrayidx179, align 8
   %add249 = add nsw i32 %i2.115, %y
   %cmp168 = icmp sgt i32 %add249, %n
   br i1 %cmp168, label %exit, label %for.body
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index 2c628f6..a0b1e84 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -26,7 +26,7 @@
   %p.01.us.us = phi i8* [ null, %preheader ], [ %gep, %loop ]
   %s = phi i8 [0, %preheader], [%snext, %loop]
   %gep = getelementptr inbounds i8, i8* %p.01.us.us, i64 1
-  %snext = load i8* %gep
+  %snext = load i8, i8* %gep
   %cmp = icmp ult i8* %gep, %end
   br i1 %cmp, label %loop, label %exit
 
@@ -51,7 +51,7 @@
   %p.01.us.us = phi i8* [ %buf, %preheader ], [ %gep, %loop ]
   %s = phi i8 [0, %preheader], [%snext, %loop]
   %gep = getelementptr inbounds i8, i8* %p.01.us.us, i64 1
-  %snext = load i8* %gep
+  %snext = load i8, i8* %gep
   %cmp = icmp ult i8* %gep, %end
   br i1 %cmp, label %loop, label %exit
 
@@ -80,7 +80,7 @@
   %iv = phi i32 [ 0, %preheader ], [ %ivnext, %loop ]
   %s = phi i8 [0, %preheader], [%snext, %loop]
   %gep = getelementptr inbounds i8, i8* %p.01.us.us, i64 1
-  %snext = load i8* %gep
+  %snext = load i8, i8* %gep
   %ivnext = add i32 %iv, 1
   %cmp = icmp ult i32 %ivnext, %cnt
   br i1 %cmp, label %loop, label %exit
@@ -110,7 +110,7 @@
   %iv = phi i32 [ %bi, %preheader ], [ %ivnext, %loop ]
   %s = phi i8 [0, %preheader], [%snext, %loop]
   %gep = getelementptr inbounds i8, i8* %p.01.us.us, i64 1
-  %snext = load i8* %gep
+  %snext = load i8, i8* %gep
   %ivnext = add i32 %iv, 1
   %cmp = icmp ult i32 %ivnext, %cnt
   br i1 %cmp, label %loop, label %exit
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
index 2683e90..65b2cf6 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-15-multiexit.ll
@@ -35,6 +35,6 @@
 
 if.end:
   %tmp = phi i32* [ %first.lcssa, %early.exit ], [ %first, %if.then ], [ %first, %entry ], [ undef, %if.else ]
-  %val = load i32* %tmp
+  %val = load i32, i32* %tmp
   ret i32 %val
 }
diff --git a/llvm/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll b/llvm/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
index 2c738de..1d80e75 100644
--- a/llvm/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2014-06-21-congruent-constant.ll
@@ -13,9 +13,9 @@
   br i1 undef, label %for.end12, label %for.cond.preheader
 
 for.cond.preheader:                               ; preds = %entry
-  %0 = load i32*** @c, align 8
-  %1 = load i32** %0, align 8
-  %2 = load i32* %1, align 4
+  %0 = load i32**, i32*** @c, align 8
+  %1 = load i32*, i32** %0, align 8
+  %2 = load i32, i32* %1, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.cond.backedge, %for.body9.us, %for.cond.preheader
@@ -26,7 +26,7 @@
   br i1 %tobool1, label %if.end, label %for.cond.backedge
 
 if.end:                                           ; preds = %for.body
-  %5 = load i32* %3, align 4
+  %5 = load i32, i32* %3, align 4
   %tobool4 = icmp eq i32 %5, 0
   br i1 %tobool4, label %for.cond3, label %for.body9.preheader
 
@@ -35,8 +35,8 @@
   br i1 %tobool8, label %for.body9.us, label %for.body9
 
 for.body9.us:                                     ; preds = %for.body9.preheader
-  %6 = load i32** undef, align 8
-  %7 = load i32* %6, align 4
+  %6 = load i32*, i32** undef, align 8
+  %7 = load i32, i32* %6, align 4
   br label %for.body
 
 for.cond3:                                        ; preds = %for.cond3, %if.end
diff --git a/llvm/test/Transforms/IndVarSimplify/ashr-tripcount.ll b/llvm/test/Transforms/IndVarSimplify/ashr-tripcount.ll
index 04446a1..5f33730 100644
--- a/llvm/test/Transforms/IndVarSimplify/ashr-tripcount.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ashr-tripcount.ll
@@ -31,8 +31,8 @@
 	%.pn2 = zext i32 %.pn2.in to i64		; <i64> [#uses=1]
 	%.pn.in = getelementptr [0 x float], [0 x float]* %pow_2_tab.pn, i64 0, i64 %.pn3		; <float*> [#uses=1]
 	%.pn1.in = getelementptr [0 x float], [0 x float]* %pow_2_025_tab.pn, i64 0, i64 %.pn2		; <float*> [#uses=1]
-	%.pn = load float* %.pn.in		; <float> [#uses=1]
-	%.pn1 = load float* %.pn1.in		; <float> [#uses=1]
+	%.pn = load float, float* %.pn.in		; <float> [#uses=1]
+	%.pn1 = load float, float* %.pn1.in		; <float> [#uses=1]
 	%invQuantizer.0 = fmul float %.pn, %.pn1		; <float> [#uses=4]
 	%t3 = ashr i32 %noOfLines, 2		; <i32> [#uses=1]
 	%t4 = icmp sgt i32 %t3, 0		; <i1> [#uses=1]
@@ -47,31 +47,31 @@
 	%k.04 = phi i32 [ %t48, %bb4 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
 	%t6 = sext i32 %i.05 to i64		; <i64> [#uses=1]
 	%t7 = getelementptr i32, i32* %quaSpectrum, i64 %t6		; <i32*> [#uses=1]
-	%t8 = load i32* %t7, align 4		; <i32> [#uses=1]
+	%t8 = load i32, i32* %t7, align 4		; <i32> [#uses=1]
 	%t9 = zext i32 %t8 to i64		; <i64> [#uses=1]
 	%t10 = getelementptr float, float* %pow4_3_tab_ptr, i64 %t9		; <float*> [#uses=1]
-	%t11 = load float* %t10, align 4		; <float> [#uses=1]
+	%t11 = load float, float* %t10, align 4		; <float> [#uses=1]
 	%t12 = or i32 %i.05, 1		; <i32> [#uses=1]
 	%t13 = sext i32 %t12 to i64		; <i64> [#uses=1]
 	%t14 = getelementptr i32, i32* %quaSpectrum, i64 %t13		; <i32*> [#uses=1]
-	%t15 = load i32* %t14, align 4		; <i32> [#uses=1]
+	%t15 = load i32, i32* %t14, align 4		; <i32> [#uses=1]
 	%t16 = zext i32 %t15 to i64		; <i64> [#uses=1]
 	%t17 = getelementptr float, float* %pow4_3_tab_ptr, i64 %t16		; <float*> [#uses=1]
-	%t18 = load float* %t17, align 4		; <float> [#uses=1]
+	%t18 = load float, float* %t17, align 4		; <float> [#uses=1]
 	%t19 = or i32 %i.05, 2		; <i32> [#uses=1]
 	%t20 = sext i32 %t19 to i64		; <i64> [#uses=1]
 	%t21 = getelementptr i32, i32* %quaSpectrum, i64 %t20		; <i32*> [#uses=1]
-	%t22 = load i32* %t21, align 4		; <i32> [#uses=1]
+	%t22 = load i32, i32* %t21, align 4		; <i32> [#uses=1]
 	%t23 = zext i32 %t22 to i64		; <i64> [#uses=1]
 	%t24 = getelementptr float, float* %pow4_3_tab_ptr, i64 %t23		; <float*> [#uses=1]
-	%t25 = load float* %t24, align 4		; <float> [#uses=1]
+	%t25 = load float, float* %t24, align 4		; <float> [#uses=1]
 	%t26 = or i32 %i.05, 3		; <i32> [#uses=1]
 	%t27 = sext i32 %t26 to i64		; <i64> [#uses=1]
 	%t28 = getelementptr i32, i32* %quaSpectrum, i64 %t27		; <i32*> [#uses=1]
-	%t29 = load i32* %t28, align 4		; <i32> [#uses=1]
+	%t29 = load i32, i32* %t28, align 4		; <i32> [#uses=1]
 	%t30 = zext i32 %t29 to i64		; <i64> [#uses=1]
 	%t31 = getelementptr float, float* %pow4_3_tab_ptr, i64 %t30		; <float*> [#uses=1]
-	%t32 = load float* %t31, align 4		; <float> [#uses=1]
+	%t32 = load float, float* %t31, align 4		; <float> [#uses=1]
 	%t33 = fmul float %t11, %invQuantizer.0		; <float> [#uses=1]
 	%t34 = sext i32 %i.05 to i64		; <i64> [#uses=1]
 	%t35 = getelementptr float, float* %iquaSpectrum, i64 %t34		; <float*> [#uses=1]
diff --git a/llvm/test/Transforms/IndVarSimplify/avoid-i0.ll b/llvm/test/Transforms/IndVarSimplify/avoid-i0.ll
index 22f2e4b..cc38590 100644
--- a/llvm/test/Transforms/IndVarSimplify/avoid-i0.ll
+++ b/llvm/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -34,25 +34,25 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %_si1, i32* %_si1_addr
 	store i8 %_si2, i8* %_si2_addr
-	%1 = load i8* %_si2_addr, align 1		; <i8> [#uses=1]
+	%1 = load i8, i8* %_si2_addr, align 1		; <i8> [#uses=1]
 	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
-	%3 = load i32* %_si1_addr, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* %_si1_addr, align 4		; <i32> [#uses=1]
 	%4 = xor i32 %2, %3		; <i32> [#uses=1]
-	%5 = load i8* %_si2_addr, align 1		; <i8> [#uses=1]
+	%5 = load i8, i8* %_si2_addr, align 1		; <i8> [#uses=1]
 	%6 = sext i8 %5 to i32		; <i32> [#uses=1]
 	%7 = sub i32 7, %6		; <i32> [#uses=1]
-	%8 = load i32* %_si1_addr, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* %_si1_addr, align 4		; <i32> [#uses=1]
 	%9 = shl i32 %8, %7		; <i32> [#uses=1]
 	%10 = and i32 %4, %9		; <i32> [#uses=1]
 	%11 = icmp slt i32 %10, 0		; <i1> [#uses=1]
 	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
 	store i32 %12, i32* %0, align 4
-	%13 = load i32* %0, align 4		; <i32> [#uses=1]
+	%13 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	store i32 %13, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	%retval12 = trunc i32 %retval1 to i8		; <i8> [#uses=1]
 	ret i8 %retval12
 }
@@ -66,15 +66,15 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %_ui1, i32* %_ui1_addr
 	store i32 %_ui2, i32* %_ui2_addr
-	%1 = load i32* %_ui1_addr, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* %_ui1_addr, align 4		; <i32> [#uses=1]
 	%2 = sub i32 %1, 1		; <i32> [#uses=1]
 	store i32 %2, i32* %0, align 4
-	%3 = load i32* %0, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	store i32 %3, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval1
 }
 
@@ -90,31 +90,31 @@
 	br label %bb4
 
 bb:		; preds = %bb4
-	%0 = load volatile i32* @x, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32, i32* @x, align 4		; <i32> [#uses=1]
 	store i32 %0, i32* %vol.0, align 4
 	store i32 0, i32* %l_52, align 4
 	br label %bb2
 
 bb1:		; preds = %bb2
-	%1 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* %l_52, align 4		; <i32> [#uses=1]
 	%2 = call i32 @safe_sub_func_uint64_t_u_u(i32 %1, i32 1) nounwind		; <i32> [#uses=1]
 	store i32 %2, i32* %l_52, align 4
 	br label %bb2
 
 bb2:		; preds = %bb1, %bb
-	%3 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* %l_52, align 4		; <i32> [#uses=1]
 	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
 	br i1 %4, label %bb1, label %bb3
 
 bb3:		; preds = %bb2
-	%5 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%5 = load i32, i32* %l_52, align 4		; <i32> [#uses=1]
 	%6 = call signext i8 @safe_sub_func_int32_t_s_s(i32 %5, i8 signext 1) nounwind		; <i8> [#uses=1]
 	%7 = sext i8 %6 to i32		; <i32> [#uses=1]
 	store i32 %7, i32* %l_52, align 4
 	br label %bb4
 
 bb4:		; preds = %bb3, %entry
-	%8 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%8 = load i32, i32* %l_52, align 4		; <i32> [#uses=1]
 	%9 = icmp ne i32 %8, 0		; <i1> [#uses=1]
 	br i1 %9, label %bb, label %bb5
 
diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index b3655c7..4d14b36 100644
--- a/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -16,7 +16,7 @@
   br i1 %cmp9, label %pre, label %return
 
 pre:
-  %t3 = load i32* %p
+  %t3 = load i32, i32* %p
   %tobool.not = icmp ne i32 %t3, 0
   br label %loop
 
@@ -50,22 +50,22 @@
   br label %bb18
 
 bb13:
-  %tmp66 = load i64** %tmp65, align 4
+  %tmp66 = load i64*, i64** %tmp65, align 4
   %tmp68 = getelementptr inbounds i64, i64* %tmp66, i32 %i
-  %tmp69 = load i64* %tmp68, align 4
-  %tmp74 = load i64** %tmp73, align 4
+  %tmp69 = load i64, i64* %tmp68, align 4
+  %tmp74 = load i64*, i64** %tmp73, align 4
   %tmp76 = getelementptr inbounds i64, i64* %tmp74, i32 %i
-  %tmp77 = load i64* %tmp76, align 4
+  %tmp77 = load i64, i64* %tmp76, align 4
   %tmp78 = icmp ugt i64 %tmp69, %tmp77
   br i1 %tmp78, label %bb20.loopexit, label %bb15
 
 bb15:
-  %tmp83 = load i64** %tmp82, align 4
+  %tmp83 = load i64*, i64** %tmp82, align 4
   %tmp85 = getelementptr inbounds i64, i64* %tmp83, i32 %i
-  %tmp86 = load i64* %tmp85, align 4
-  %tmp91 = load i64** %tmp90, align 4
+  %tmp86 = load i64, i64* %tmp85, align 4
+  %tmp91 = load i64*, i64** %tmp90, align 4
   %tmp93 = getelementptr inbounds i64, i64* %tmp91, i32 %i
-  %tmp94 = load i64* %tmp93, align 4
+  %tmp94 = load i64, i64* %tmp93, align 4
   %tmp95 = icmp ult i64 %tmp86, %tmp94
   br i1 %tmp95, label %bb20.loopexit, label %bb17
 
diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-rem.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-rem.ll
index d10ff1c..6f8e6bb 100644
--- a/llvm/test/Transforms/IndVarSimplify/eliminate-rem.ll
+++ b/llvm/test/Transforms/IndVarSimplify/eliminate-rem.ll
@@ -82,12 +82,12 @@
   %t30 = add nsw i64 %t27, %t22                   ; <i64> [#uses=1]
   %t31 = getelementptr inbounds i64, i64* %arg, i64 %t28 ; <i64*> [#uses=1]
   %t32 = zext i32 %t23 to i64                     ; <i64> [#uses=1]
-  %t33 = load i64* %t29                           ; <i64> [#uses=1]
+  %t33 = load i64, i64* %t29                           ; <i64> [#uses=1]
   %t34 = getelementptr inbounds i64, i64* %arg, i64 %t30 ; <i64*> [#uses=1]
-  %t35 = load i64* %t31                           ; <i64> [#uses=1]
+  %t35 = load i64, i64* %t31                           ; <i64> [#uses=1]
   %t36 = add nsw i64 %t32, %t33                   ; <i64> [#uses=1]
   %t37 = add nsw i64 %t36, %t35                   ; <i64> [#uses=1]
-  %t38 = load i64* %t34                           ; <i64> [#uses=1]
+  %t38 = load i64, i64* %t34                           ; <i64> [#uses=1]
   %t39 = add nsw i64 %t37, %t38                   ; <i64> [#uses=1]
   %t40 = trunc i64 %t39 to i32                    ; <i32> [#uses=2]
   %t41 = add nsw i64 %t22, 1                      ; <i64> [#uses=2]
diff --git a/llvm/test/Transforms/IndVarSimplify/indirectbr.ll b/llvm/test/Transforms/IndVarSimplify/indirectbr.ll
index 3f973a8..d580169 100644
--- a/llvm/test/Transforms/IndVarSimplify/indirectbr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/indirectbr.ll
@@ -33,7 +33,7 @@
   %S.31.0 = phi i64 [ %3, %bb16 ], [ 1, %bb7.preheader ], [ 1, %bb14 ] ; <i64> [#uses=2]
   %0 = add nsw i64 %S.31.0, -1                    ; <i64> [#uses=1]
   %1 = getelementptr inbounds [3 x double], [3 x double]* undef, i64 0, i64 %0 ; <double*> [#uses=1]
-  %2 = load double* %1, align 8                   ; <double> [#uses=0]
+  %2 = load double, double* %1, align 8                   ; <double> [#uses=0]
   %3 = add nsw i64 %S.31.0, 1                     ; <i64> [#uses=1]
   br label %bb16
 }
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-fold.ll b/llvm/test/Transforms/IndVarSimplify/iv-fold.ll
index 26a51ce..af8a33b 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-fold.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-fold.ll
@@ -14,11 +14,11 @@
   %0 = phi i32 [ 0, %entry ], [ %inc.2, %while.body ]
   %shr = lshr i32 %0, 5
   %arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
-  %tmp6 = load i32* %arrayidx, align 4
+  %tmp6 = load i32, i32* %arrayidx, align 4
   %inc.1 = add i32 %0, 1
   %shr.1 = lshr i32 %inc.1, 5
   %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1
-  %tmp6.1 = load i32* %arrayidx.1, align 4
+  %tmp6.1 = load i32, i32* %arrayidx.1, align 4
   %inc.2 = add i32 %inc.1, 1
   %exitcond.3 = icmp eq i32 %inc.2, 128
   br i1 %exitcond.3, label %while.end, label %while.body
@@ -41,11 +41,11 @@
   %0 = phi i32 [ 0, %entry ], [ %inc.3, %while.body ]
   %shr = lshr i32 %0, 5
   %arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
-  %tmp6 = load i32* %arrayidx, align 4
+  %tmp6 = load i32, i32* %arrayidx, align 4
   %inc.1 = add i32 %0, 1
   %shr.1 = lshr i32 %inc.1, 5
   %arrayidx.1 = getelementptr inbounds i32, i32* %bitmap, i32 %shr.1
-  %tmp6.1 = load i32* %arrayidx.1, align 4
+  %tmp6.1 = load i32, i32* %arrayidx.1, align 4
   %inc.3 = add i32 %inc.1, 2
   %exitcond.3 = icmp eq i32 %inc.3, 96
   br i1 %exitcond.3, label %while.end, label %while.body
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-sext.ll b/llvm/test/Transforms/IndVarSimplify/iv-sext.ll
index 64062c3..89e21e1 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-sext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-sext.ll
@@ -8,7 +8,7 @@
 
 define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
 entry:
-	%tmp = load float* %peakWeight, align 4		; <float> [#uses=1]
+	%tmp = load float, float* %peakWeight, align 4		; <float> [#uses=1]
 	%tmp2 = icmp sgt i32 %bandEdgeIndex, 0		; <i1> [#uses=1]
 	br i1 %tmp2, label %bb.nph22, label %return
 
@@ -34,12 +34,12 @@
 	%tmp5 = add i32 %part.016, -1		; <i32> [#uses=1]
 	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
 	%tmp7 = getelementptr float, float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
-	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp8 = load float, float* %tmp7, align 4		; <float> [#uses=1]
 	%tmp9 = fadd float %tmp8, %distERBlo.120		; <float> [#uses=1]
 	%tmp10 = add i32 %part.016, -1		; <i32> [#uses=1]
 	%tmp11 = sext i32 %tmp10 to i64		; <i64> [#uses=1]
 	%tmp12 = getelementptr float, float* %pTmp1, i64 %tmp11		; <float*> [#uses=1]
-	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
+	%tmp13 = load float, float* %tmp12, align 4		; <float> [#uses=1]
 	%tmp14 = fsub float %distERBhi.121, %tmp13		; <float> [#uses=1]
 	br label %bb3.preheader
 
@@ -58,11 +58,11 @@
 	%peakCount.01 = phi float [ %tmp23, %bb3 ], [ %peakCount.117, %bb.nph ]		; <float> [#uses=1]
 	%tmp16 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
 	%tmp17 = getelementptr float, float* %pTmp1, i64 %tmp16		; <float*> [#uses=1]
-	%tmp18 = load float* %tmp17, align 4		; <float> [#uses=1]
+	%tmp18 = load float, float* %tmp17, align 4		; <float> [#uses=1]
 	%tmp19 = fsub float %distERBlo.03, %tmp18		; <float> [#uses=3]
 	%tmp20 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
 	%tmp21 = getelementptr float, float* %peakWeight, i64 %tmp20		; <float*> [#uses=1]
-	%tmp22 = load float* %tmp21, align 4		; <float> [#uses=1]
+	%tmp22 = load float, float* %tmp21, align 4		; <float> [#uses=1]
 	%tmp23 = fsub float %peakCount.01, %tmp22		; <float> [#uses=2]
 	%tmp24 = add i32 %loPart.02, 1		; <i32> [#uses=2]
 	br label %bb3
@@ -98,12 +98,12 @@
 	%peakCount.27 = phi float [ %tmp35, %bb5 ], [ %peakCount.0.lcssa, %bb.nph12 ]		; <float> [#uses=1]
 	%tmp27 = sext i32 %hiPart.08 to i64		; <i64> [#uses=1]
 	%tmp28 = getelementptr float, float* %pTmp1, i64 %tmp27		; <float*> [#uses=1]
-	%tmp29 = load float* %tmp28, align 4		; <float> [#uses=1]
+	%tmp29 = load float, float* %tmp28, align 4		; <float> [#uses=1]
 	%tmp30 = fadd float %tmp29, %distERBhi.29		; <float> [#uses=3]
 	%tmp31 = add i32 %hiPart.08, 1		; <i32> [#uses=4]
 	%tmp32 = sext i32 %tmp31 to i64		; <i64> [#uses=1]
 	%tmp33 = getelementptr float, float* %peakWeight, i64 %tmp32		; <float*> [#uses=1]
-	%tmp34 = load float* %tmp33, align 4		; <float> [#uses=1]
+	%tmp34 = load float, float* %tmp33, align 4		; <float> [#uses=1]
 	%tmp35 = fadd float %tmp34, %peakCount.27		; <float> [#uses=2]
 	br label %bb5
 
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
index 497f9f9..2b69cb1 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -21,7 +21,7 @@
   %tmp23 = zext i32 %.02 to i64
   %tmp33 = add i32 %.02, 1
   %o = getelementptr i32, i32* %a, i32 %.02
-  %v = load i32* %o
+  %v = load i32, i32* %o
   %t = icmp eq i32 %v, 0
   br i1 %t, label %exit24, label %B24
 
diff --git a/llvm/test/Transforms/IndVarSimplify/iv-zext.ll b/llvm/test/Transforms/IndVarSimplify/iv-zext.ll
index 88d6fa2..629a85e 100644
--- a/llvm/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -12,16 +12,16 @@
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
 	%indvar.i8 = and i64 %indvar, 255
 	%t0 = getelementptr double, double* %d, i64 %indvar.i8
-	%t1 = load double* %t0
+	%t1 = load double, double* %t0
 	%t2 = fmul double %t1, 0.1
 	store double %t2, double* %t0
 	%indvar.i24 = and i64 %indvar, 16777215
 	%t3 = getelementptr double, double* %d, i64 %indvar.i24
-	%t4 = load double* %t3
+	%t4 = load double, double* %t3
 	%t5 = fmul double %t4, 2.3
 	store double %t5, double* %t3
 	%t6 = getelementptr double, double* %d, i64 %indvar
-	%t7 = load double* %t6
+	%t7 = load double, double* %t6
 	%t8 = fmul double %t7, 4.5
 	store double %t8, double* %t6
 	%indvar.next = add i64 %indvar, 1
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll b/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll
index 8f82fbd..4fe3191 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-promote.ll
@@ -18,7 +18,7 @@
 	%i.01 = phi i32 [ %7, %bb3 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
 	%1 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%2 = getelementptr double, double* %p, i64 %1		; <double*> [#uses=1]
-	%3 = load double* %2, align 8		; <double> [#uses=1]
+	%3 = load double, double* %2, align 8		; <double> [#uses=1]
 	%4 = fmul double %3, 1.100000e+00		; <double> [#uses=1]
 	%5 = sext i32 %i.01 to i64		; <i64> [#uses=1]
 	%6 = getelementptr double, double* %p, i64 %5		; <double*> [#uses=1]
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll
index fa46250..befbb9e 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll
@@ -109,10 +109,10 @@
   %diagidx = add nsw i32 %rowidx, %i
   %diagidxw = sext i32 %diagidx to i64
   %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw
-  %v1 = load double* %matrixp
+  %v1 = load double, double* %matrixp
   %iw = sext i32 %i to i64
   %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw
-  %v2 = load double* %vectorp
+  %v2 = load double, double* %vectorp
   %row.inc = add nsw i32 %rowidx, %ilead
   %i.inc = add nsw i32 %i, 1
   %cmp196 = icmp slt i32 %i.inc, %irow
@@ -143,10 +143,10 @@
   %diagidx = add nsw i32 %rowidx, %i
   %diagidxw = sext i32 %diagidx to i64
   %matrixp = getelementptr inbounds [0 x double], [0 x double]* %matrix, i32 0, i64 %diagidxw
-  %v1 = load double* %matrixp
+  %v1 = load double, double* %matrixp
   %iw = sext i32 %i to i64
   %vectorp = getelementptr inbounds [0 x double], [0 x double]* %vector, i32 0, i64 %iw
-  %v2 = load double* %vectorp
+  %v2 = load double, double* %vectorp
   %row.inc = add nsw i32 %rowidx, %ilead
   %i.inc = add nsw i32 %i, 1
   %cmp196 = icmp slt i32 %i.inc, %irow
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-zext.ll b/llvm/test/Transforms/IndVarSimplify/lftr-zext.ll
index a5dda92..f5641bc 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-zext.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-zext.ll
@@ -14,7 +14,7 @@
   %p.0 = phi i8* [ getelementptr inbounds ([240 x i8]* @data, i64 0, i64 0), %0 ], [ %4, %1 ]
   %.0 = phi i8* [ %a, %0 ], [ %2, %1 ]
   %2 = getelementptr inbounds i8, i8* %.0, i64 1
-  %3 = load i8* %.0, align 1
+  %3 = load i8, i8* %.0, align 1
   %4 = getelementptr inbounds i8, i8* %p.0, i64 1
   store i8 %3, i8* %p.0, align 1
   %5 = add i8 %i.0, 1
diff --git a/llvm/test/Transforms/IndVarSimplify/loop_evaluate7.ll b/llvm/test/Transforms/IndVarSimplify/loop_evaluate7.ll
index 8f86d7b..333ab7a 100644
--- a/llvm/test/Transforms/IndVarSimplify/loop_evaluate7.ll
+++ b/llvm/test/Transforms/IndVarSimplify/loop_evaluate7.ll
@@ -21,7 +21,7 @@
 	br i1 undef, label %bb20, label %bb29
 
 bb20:		; preds = %bb19
-	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%1 = sub i32 %0, %n		; <i32> [#uses=1]
 	br label %bb23
 
diff --git a/llvm/test/Transforms/IndVarSimplify/loop_evaluate8.ll b/llvm/test/Transforms/IndVarSimplify/loop_evaluate8.ll
index 0d18ddf..28d05df 100644
--- a/llvm/test/Transforms/IndVarSimplify/loop_evaluate8.ll
+++ b/llvm/test/Transforms/IndVarSimplify/loop_evaluate8.ll
@@ -23,7 +23,7 @@
 	br i1 undef, label %bb20, label %bb29
 
 bb20:		; preds = %bb19
-	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* undef, align 4		; <i32> [#uses=1]
 	%1 = sub i32 %0, undef		; <i32> [#uses=1]
 	br label %bb23
 
diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index 8d8a451..ca7b899 100644
--- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -29,7 +29,7 @@
   %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
   %ofs = sext i32 %i.02 to i64
   %adr = getelementptr inbounds i32, i32* %arr, i64 %ofs
-  %val = load i32* %adr
+  %val = load i32, i32* %adr
   %sinc = add nsw i32 %s.01, %val
   %iinc = add nsw i32 %i.02, 1
   %cond = icmp slt i32 %iinc, %n
@@ -70,7 +70,7 @@
   %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
   %ofs = sext i32 %i.02 to i64
   %adr = getelementptr inbounds i32, i32* %arr, i64 %ofs
-  %val = load i32* %adr
+  %val = load i32, i32* %adr
   %vall = sext i32 %val to i64
   %sinc = add nsw i64 %s.01, %vall
   %iinc = add nsw i32 %i.02, 1
@@ -171,7 +171,7 @@
   %max = phi i32 [ 0, %entry ], [ %max.next, %loop.inc ]
   %idxprom = sext i32 %idx to i64
   %adr = getelementptr inbounds i32, i32* %base, i64 %idxprom
-  %val = load i32* %adr
+  %val = load i32, i32* %adr
   %cmp19 = icmp sgt i32 %val, %max
   br i1 %cmp19, label %if.then, label %if.else
 
@@ -240,7 +240,7 @@
   %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
   %t1 = sext i32 %iv to i64
   %adr = getelementptr i64, i64* %base, i64 %t1
-  %val = load i64* %adr
+  %val = load i64, i64* %adr
   %t2 = or i32 %iv, 1
   %t3 = sext i32 %t2 to i64
   %iv.next = add i32 %iv, 2
diff --git a/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll b/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll
index 0ced040..c3c033d 100644
--- a/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll
+++ b/llvm/test/Transforms/IndVarSimplify/overflowcheck.ll
@@ -28,7 +28,7 @@
   %zxt = zext i32 %i to i64
   %ofs = shl nuw nsw i64 %zxt, 3
   %gep = getelementptr i64, i64* %a, i64 %zxt
-  %v = load i64* %gep, align 8
+  %v = load i64, i64* %gep, align 8
   %truncv = trunc i64 %v to i32
   %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
   %ovflows = extractvalue { i32, i1 } %adds, 1
diff --git a/llvm/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/llvm/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
index dc36b99..519d34d 100644
--- a/llvm/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
+++ b/llvm/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -13,7 +13,7 @@
 	br label %bb38.i
 
 bb14.i27:
-	%t0 = load i64* @ue, align 8
+	%t0 = load i64, i64* @ue, align 8
 	%t1 = sub i64 %t0, %i.0.i35
 	%t2 = add i64 %t1, 1
 	br i1 undef, label %bb15.i28, label %bb19.i31
diff --git a/llvm/test/Transforms/IndVarSimplify/polynomial-expand.ll b/llvm/test/Transforms/IndVarSimplify/polynomial-expand.ll
index 6128c12..5708c64d 100644
--- a/llvm/test/Transforms/IndVarSimplify/polynomial-expand.ll
+++ b/llvm/test/Transforms/IndVarSimplify/polynomial-expand.ll
@@ -21,7 +21,7 @@
   %tmp32 = phi i32 [ %tmp37, %bb30 ], [ %tmp27, %bb24 ] ; <i32> [#uses=2]
   %tmp33 = sext i32 %tmp32 to i64                 ; <i64> [#uses=1]
   %tmp35 = getelementptr float, float* %tmp4, i64 %tmp33 ; <%0*> [#uses=1]
-  %tmp36 = load float* %tmp35, align 4               ; <%0> [#uses=0]
+  %tmp36 = load float, float* %tmp35, align 4               ; <%0> [#uses=0]
   %tmp37 = add nsw i32 %tmp32, -1                 ; <i32> [#uses=1]
   %tmp39 = add nsw i32 %tmp31, -1                 ; <i32> [#uses=1]
   %tmp38 = icmp eq i32 %tmp31, 1                  ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/IndVarSimplify/pr18223.ll b/llvm/test/Transforms/IndVarSimplify/pr18223.ll
index 738f75c..f922aa4 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr18223.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr18223.ll
@@ -8,7 +8,7 @@
 
 define i32 @main() #0 {
 entry:
-  %0 = load i32* @c, align 4
+  %0 = load i32, i32* @c, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.body, label %exit
 
diff --git a/llvm/test/Transforms/IndVarSimplify/pr20680.ll b/llvm/test/Transforms/IndVarSimplify/pr20680.ll
index 716e013..0713f31 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr20680.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr20680.ll
@@ -13,9 +13,9 @@
 ; CHECK-NEXT: %[[indvars_iv:.*]] = phi i32 [ %[[indvars_iv_next:.*]], %[[for_inc13:.*]] ], [ -14, %entry ]
 ; br i1 {{.*}}, label %[[for_inc13]], label %
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool2 = icmp eq i32 %0, 0
-  %1 = load i32* @a, align 4
+  %1 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %1, 0
   br label %for.cond2.preheader
 
@@ -51,7 +51,7 @@
 
 cond.end.us.us:                                   ; preds = %cond.false.us.us, %for.body3.us.us
   %cond.us.us = phi i32 [ %div, %cond.false.us.us ], [ %conv7, %for.body3.us.us ]
-  %4 = load i32* @b, align 4
+  %4 = load i32, i32* @b, align 4
   %cmp91.us.us = icmp slt i32 %4, 1
   br i1 %cmp91.us.us, label %for.inc.lr.ph.us.us, label %for.cond2.loopexit.us.us
 
@@ -87,7 +87,7 @@
 
 cond.end.us:                                      ; preds = %cond.false.us, %for.body3.us
   %cond.us = phi i32 [ %div, %cond.false.us ], [ %conv7, %for.body3.us ]
-  %6 = load i32* @b, align 4
+  %6 = load i32, i32* @b, align 4
   %cmp91.us = icmp slt i32 %6, 1
   br i1 %cmp91.us, label %for.inc.lr.ph.us, label %for.cond2.loopexit.us
 
@@ -133,7 +133,7 @@
 
 cond.end.us5:                                     ; preds = %cond.false.us4, %for.body3.us3
   %cond.us6 = phi i32 [ %div, %cond.false.us4 ], [ %conv7, %for.body3.us3 ]
-  %8 = load i32* @b, align 4
+  %8 = load i32, i32* @b, align 4
   %cmp91.us7 = icmp slt i32 %8, 1
   br i1 %cmp91.us7, label %for.inc.lr.ph.us12, label %for.cond2.loopexit.us11
 
@@ -177,7 +177,7 @@
 
 cond.end:                                         ; preds = %cond.false, %for.body3
   %cond = phi i32 [ %div, %cond.false ], [ %conv7, %for.body3 ]
-  %10 = load i32* @b, align 4
+  %10 = load i32, i32* @b, align 4
   %cmp91 = icmp slt i32 %10, 1
   br i1 %cmp91, label %for.inc.lr.ph, label %for.cond2.loopexit
 
diff --git a/llvm/test/Transforms/IndVarSimplify/pr22222.ll b/llvm/test/Transforms/IndVarSimplify/pr22222.ll
index ccdfe53..d1f0490 100644
--- a/llvm/test/Transforms/IndVarSimplify/pr22222.ll
+++ b/llvm/test/Transforms/IndVarSimplify/pr22222.ll
@@ -9,7 +9,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define i32 @main() {
 entry:
-  %a.promoted13 = load i32* @a, align 4
+  %a.promoted13 = load i32, i32* @a, align 4
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %entry, %for.end
diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
index 8396bf5..b676501 100644
--- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
+++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -14,7 +14,7 @@
 	%n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
 	%t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
 	%t2 = getelementptr i64, i64* %first, i64 %t1		; <i64*> [#uses=1]
-	%t3 = load i64* %t2, align 8		; <i64> [#uses=1]
+	%t3 = load i64, i64* %t2, align 8		; <i64> [#uses=1]
 	%t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
 	%t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
 	%t6 = add i32 %n.01, 1		; <i32> [#uses=2]
diff --git a/llvm/test/Transforms/IndVarSimplify/sharpen-range.ll b/llvm/test/Transforms/IndVarSimplify/sharpen-range.ll
index cb090f0..c103da9 100644
--- a/llvm/test/Transforms/IndVarSimplify/sharpen-range.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sharpen-range.ll
@@ -8,7 +8,7 @@
 define i1 @bounded_below_slt(i32* nocapture readonly %buffer) {
 ; CHECK-LABEL: bounded_below_slt
 entry:
-  %length = load i32* %buffer, !range !0
+  %length = load i32, i32* %buffer, !range !0
   %entry.pred = icmp eq i32 %length, 0
   br i1 %entry.pred, label %abort, label %loop.preheader
 
@@ -42,7 +42,7 @@
 define i1 @bounded_below_sle(i32* nocapture readonly %buffer) {
 ; CHECK-LABEL: bounded_below_sle
 entry:
-  %length = load i32* %buffer, !range !0
+  %length = load i32, i32* %buffer, !range !0
   %entry.pred = icmp eq i32 %length, 0
   br i1 %entry.pred, label %abort, label %loop.preheader
 
diff --git a/llvm/test/Transforms/IndVarSimplify/single-element-range.ll b/llvm/test/Transforms/IndVarSimplify/single-element-range.ll
index 4b035ee..e047a0b 100644
--- a/llvm/test/Transforms/IndVarSimplify/single-element-range.ll
+++ b/llvm/test/Transforms/IndVarSimplify/single-element-range.ll
@@ -8,7 +8,7 @@
 	br i1 undef, label %return, label %bb
 
 bb:		; preds = %entry
-	%0 = load i8** undef, align 4		; <i8*> [#uses=2]
+	%0 = load i8*, i8** undef, align 4		; <i8*> [#uses=2]
 	%1 = ptrtoint i8* %0 to i32		; <i32> [#uses=1]
 	%2 = icmp sgt i8* %0, inttoptr (i32 1 to i8*)		; <i1> [#uses=1]
 	br i1 %2, label %bb1, label %bb5
diff --git a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll b/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll
index c7bb003..8179470 100644
--- a/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll
+++ b/llvm/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -19,7 +19,7 @@
 
 while.end:                                        ; preds = %while.cond
   store volatile i32 0, i32* %result.i
-  %tmp.i = load volatile i32* %result.i           ; <i32> [#uses=0]
+  %tmp.i = load volatile i32, i32* %result.i           ; <i32> [#uses=0]
   ret i32 0
 }
 declare i32 @bar()
diff --git a/llvm/test/Transforms/IndVarSimplify/udiv.ll b/llvm/test/Transforms/IndVarSimplify/udiv.ll
index 1925e86..04458ff 100644
--- a/llvm/test/Transforms/IndVarSimplify/udiv.ll
+++ b/llvm/test/Transforms/IndVarSimplify/udiv.ll
@@ -18,7 +18,7 @@
 
 cond.true:                                        ; preds = %entry
   %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1 ; <i8**> [#uses=1]
-  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %tmp2 = load i8*, i8** %arrayidx                     ; <i8*> [#uses=1]
   %call = tail call i32 @atoi(i8* %tmp2) nounwind readonly ; <i32> [#uses=1]
   br label %while.cond.preheader
 
@@ -82,7 +82,7 @@
   %count.212 = phi i32 [ 0, %bb.nph16 ], [ %count.1, %for.cond12 ] ; <i32> [#uses=2]
   %i.17 = phi i64 [ 2, %bb.nph16 ], [ %inc37, %for.cond12 ] ; <i64> [#uses=4]
   %arrayidx17 = getelementptr inbounds [8193 x i8], [8193 x i8]* @main.flags, i64 0, i64 %i.17 ; <i8*> [#uses=1]
-  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tmp18 = load i8, i8* %arrayidx17                   ; <i8> [#uses=1]
   %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
   br i1 %tobool19, label %for.inc35, label %if.then
 
diff --git a/llvm/test/Transforms/IndVarSimplify/uglygep.ll b/llvm/test/Transforms/IndVarSimplify/uglygep.ll
index 2993e8d..e434389 100644
--- a/llvm/test/Transforms/IndVarSimplify/uglygep.ll
+++ b/llvm/test/Transforms/IndVarSimplify/uglygep.ll
@@ -27,10 +27,10 @@
   br i1 %tmp8, label %bb1, label %bb3
 
 bb1:                                              ; preds = %bb2
-  %tmp = load double*** @tds, align 8             ; <double**> [#uses=1]
+  %tmp = load double**, double*** @tds, align 8             ; <double**> [#uses=1]
   %tmp1 = sext i32 %i.0 to i64                    ; <i64> [#uses=1]
   %tmp2 = getelementptr inbounds double*, double** %tmp, i64 %tmp1 ; <double**> [#uses=1]
-  %tmp3 = load double** %tmp2, align 1            ; <double*> [#uses=1]
+  %tmp3 = load double*, double** %tmp2, align 1            ; <double*> [#uses=1]
   %tmp6 = add nsw i32 %j.0, 1                     ; <i32> [#uses=1]
   br label %bb2
 
diff --git a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
index 1901f28..642d1ba 100644
--- a/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ b/llvm/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -12,10 +12,10 @@
   %add = add i32 %0, %sample
   %idxprom = zext i32 %add to i64
   %arrayidx = getelementptr inbounds float, float* %data, i64 %idxprom
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %mul = fmul float %1, %d
   %arrayidx2 = getelementptr inbounds float, float* %autoc, i64 %indvars.iv
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %add3 = fadd float %2, %mul
   store float %add3, float* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/IndVarSimplify/use-range-metadata.ll b/llvm/test/Transforms/IndVarSimplify/use-range-metadata.ll
index ea3b12d..1f01426 100644
--- a/llvm/test/Transforms/IndVarSimplify/use-range-metadata.ll
+++ b/llvm/test/Transforms/IndVarSimplify/use-range-metadata.ll
@@ -6,7 +6,7 @@
 
 define i1 @iterate(i32* nocapture readonly %buffer) {
 entry:
-  %length = load i32* %buffer, !range !0
+  %length = load i32, i32* %buffer, !range !0
   br label %loop.preheader
 
 loop.preheader:
diff --git a/llvm/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/llvm/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
index a622c01..5fa4a17 100644
--- a/llvm/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
+++ b/llvm/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -7,11 +7,11 @@
 define void @vnum_test8(i32* %data) {
 entry:
         %tmp.1 = getelementptr i32, i32* %data, i32 3                ; <i32*> [#uses=1]
-        %tmp.2 = load i32* %tmp.1               ; <i32> [#uses=2]
+        %tmp.2 = load i32, i32* %tmp.1               ; <i32> [#uses=2]
         %tmp.4 = getelementptr i32, i32* %data, i32 4                ; <i32*> [#uses=1]
-        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=2]
+        %tmp.5 = load i32, i32* %tmp.4               ; <i32> [#uses=2]
         %tmp.8 = getelementptr i32, i32* %data, i32 2                ; <i32*> [#uses=1]
-        %tmp.9 = load i32* %tmp.8               ; <i32> [#uses=3]
+        %tmp.9 = load i32, i32* %tmp.8               ; <i32> [#uses=3]
         %tmp.125 = icmp sgt i32 %tmp.2, 0               ; <i1> [#uses=1]
         br i1 %tmp.125, label %no_exit.preheader, label %return
 
diff --git a/llvm/test/Transforms/IndVarSimplify/verify-scev.ll b/llvm/test/Transforms/IndVarSimplify/verify-scev.ll
index b9ce3d6..ddf2e7f 100644
--- a/llvm/test/Transforms/IndVarSimplify/verify-scev.ll
+++ b/llvm/test/Transforms/IndVarSimplify/verify-scev.ll
@@ -175,7 +175,7 @@
   br i1 undef, label %if.end256, label %for.end562
 
 if.end256:                                        ; preds = %if.end250
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   br i1 undef, label %if.then274, label %for.cond404.preheader
 
 for.cond404.preheader:                            ; preds = %if.end256
@@ -379,7 +379,7 @@
   br i1 undef, label %for.inc221, label %for.body65.lr.ph
 
 for.body65.lr.ph:                                 ; preds = %for.body48
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %1 = sext i32 %0 to i64
   br label %for.body65.us
 
diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index 414cae4..6be2238 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -19,13 +19,13 @@
 define i32 @test1() {
 entry:
   store i32 -1, i32* @idx, align 4
-  %0 = load i32* @e, align 4
+  %0 = load i32, i32* @e, align 4
   %cmp4 = icmp slt i32 %0, 0
   br i1 %cmp4, label %for.end.loopexit, label %for.body.lr.ph
 
 for.body.lr.ph:
-  %1 = load i32** @ptr, align 8
-  %2 = load i32* @e, align 4
+  %1 = load i32*, i32** @ptr, align 8
+  %2 = load i32, i32* @e, align 4
   br label %for.body
 
 for.cond:
@@ -37,7 +37,7 @@
   %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.cond ]
   %idxprom = sext i32 %i.05 to i64
   %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %3, 0
   br i1 %tobool, label %if.then, label %for.cond
 
@@ -53,7 +53,7 @@
   br label %for.end
 
 for.end:
-  %4 = load i32* @idx, align 4
+  %4 = load i32, i32* @idx, align 4
   ret i32 %4
 }
 
@@ -82,10 +82,10 @@
   %storemerge14.us = phi i32 [ 0, %for.body4.lr.ph.us ], [ %inc.us, %for.body4.us ]
   %idxprom.us = sext i32 %storemerge14.us to i64
   %arrayidx6.us = getelementptr inbounds [8 x i8], [8 x i8]* %a, i64 %idxprom5.us, i64 %idxprom.us
-  %0 = load i8* %arrayidx6.us, align 1
+  %0 = load i8, i8* %arrayidx6.us, align 1
   %idxprom7.us = zext i8 %0 to i64
   %arrayidx8.us = getelementptr inbounds i8, i8* %b, i64 %idxprom7.us
-  %1 = load i8* %arrayidx8.us, align 1
+  %1 = load i8, i8* %arrayidx8.us, align 1
   store i8 %1, i8* %arrayidx6.us, align 1
   %inc.us = add nsw i32 %storemerge14.us, 1
   %cmp2.us = icmp slt i32 %inc.us, %conv
@@ -127,7 +127,7 @@
 for.body:
   %idxprom = sext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %sum.0, %0
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
@@ -181,7 +181,7 @@
 for.body:
   %idxprom = zext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %sum.0, %0
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
diff --git a/llvm/test/Transforms/IndVarSimplify/widen-nsw.ll b/llvm/test/Transforms/IndVarSimplify/widen-nsw.ll
index 01aa6f9..8dbbb51 100644
--- a/llvm/test/Transforms/IndVarSimplify/widen-nsw.ll
+++ b/llvm/test/Transforms/IndVarSimplify/widen-nsw.ll
@@ -17,7 +17,7 @@
 for.body:                                         ; preds = %for.cond
   %idxprom = sext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %sum.0, %0
   %inc = add nsw i32 %i.0, 1
   br label %for.cond
diff --git a/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll b/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
index 6934562..a0ddacd 100644
--- a/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
+++ b/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
@@ -214,7 +214,7 @@
 	br label %bb3
 
 bb3:		; preds = %bb, %entry
-	%tmp5 = load i8** null		; <i8*> [#uses=1]
+	%tmp5 = load i8*, i8** null		; <i8*> [#uses=1]
 	%tmp = icmp ne i8* null, %tmp5		; <i1> [#uses=1]
 	br i1 %tmp, label %cond_true, label %cond_false
 
diff --git a/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll b/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
index db2a799..1bfb551 100644
--- a/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
+++ b/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
@@ -10,15 +10,15 @@
 	%p = alloca i8*		; <i8**> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
-	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %i_addr, align 4		; <i32> [#uses=1]
 	%1 = alloca i8, i32 %0		; <i8*> [#uses=1]
 	store i8* %1, i8** %p, align 4
-	%2 = load i8** %p, align 4		; <i8*> [#uses=1]
+	%2 = load i8*, i8** %p, align 4		; <i8*> [#uses=1]
 	store i8* %2, i8** @q, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i8** %retval		; <i8*> [#uses=1]
+	%retval1 = load i8*, i8** %retval		; <i8*> [#uses=1]
 	ret i8* %retval1
 }
 
@@ -27,7 +27,7 @@
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
-	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* %i_addr, align 4		; <i32> [#uses=1]
 	%1 = call i8* @a(i32 %0) nounwind		; <i8*> [#uses=0]
 	br label %return
 
diff --git a/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll b/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
index c0fc691..8d8f20f 100644
--- a/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
+++ b/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
@@ -76,22 +76,22 @@
 
 bb:		; preds = %entry
 	%0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 3		; <%struct.quad_struct**> [#uses=1]
-	%1 = load %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=1]
+	%1 = load %struct.quad_struct*, %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=1]
 	ret %struct.quad_struct* %1
 
 bb1:		; preds = %entry
 	%2 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 2		; <%struct.quad_struct**> [#uses=1]
-	%3 = load %struct.quad_struct** %2, align 4		; <%struct.quad_struct*> [#uses=1]
+	%3 = load %struct.quad_struct*, %struct.quad_struct** %2, align 4		; <%struct.quad_struct*> [#uses=1]
 	ret %struct.quad_struct* %3
 
 bb2:		; preds = %entry
 	%4 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 5		; <%struct.quad_struct**> [#uses=1]
-	%5 = load %struct.quad_struct** %4, align 4		; <%struct.quad_struct*> [#uses=1]
+	%5 = load %struct.quad_struct*, %struct.quad_struct** %4, align 4		; <%struct.quad_struct*> [#uses=1]
 	ret %struct.quad_struct* %5
 
 bb3:		; preds = %entry
 	%6 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 4		; <%struct.quad_struct**> [#uses=1]
-	%7 = load %struct.quad_struct** %6, align 4		; <%struct.quad_struct*> [#uses=1]
+	%7 = load %struct.quad_struct*, %struct.quad_struct** %6, align 4		; <%struct.quad_struct*> [#uses=1]
 	ret %struct.quad_struct* %7
 
 bb5:		; preds = %entry
@@ -101,9 +101,9 @@
 define internal fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* nocapture %tree, i32 %d) nounwind readonly {
 entry:
 	%0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 6		; <%struct.quad_struct**> [#uses=1]
-	%1 = load %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=4]
+	%1 = load %struct.quad_struct*, %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=4]
 	%2 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 1		; <i32*> [#uses=1]
-	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%3 = load i32, i32* %2, align 4		; <i32> [#uses=2]
 	%4 = icmp eq %struct.quad_struct* %1, null		; <i1> [#uses=1]
 	br i1 %4, label %bb3, label %bb
 
@@ -123,7 +123,7 @@
 
 bb4:		; preds = %bb3
 	%9 = getelementptr %struct.quad_struct, %struct.quad_struct* %q.0, i32 0, i32 0		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%10 = load i32, i32* %9, align 4		; <i32> [#uses=1]
 	%11 = icmp eq i32 %10, 2		; <i1> [#uses=1]
 	br i1 %11, label %bb5, label %bb7
 
@@ -141,27 +141,27 @@
 define i32 @perimeter(%struct.quad_struct* nocapture %tree, i32 %size) nounwind readonly {
 entry:
 	%0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 0		; <i32*> [#uses=1]
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%1 = load i32, i32* %0, align 4		; <i32> [#uses=1]
 	%2 = icmp eq i32 %1, 2		; <i1> [#uses=1]
 	br i1 %2, label %bb, label %bb2
 
 bb:		; preds = %entry
 	%3 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 4		; <%struct.quad_struct**> [#uses=1]
-	%4 = load %struct.quad_struct** %3, align 4		; <%struct.quad_struct*> [#uses=1]
+	%4 = load %struct.quad_struct*, %struct.quad_struct** %3, align 4		; <%struct.quad_struct*> [#uses=1]
 	%5 = sdiv i32 %size, 2		; <i32> [#uses=1]
 	%6 = call i32 @perimeter(%struct.quad_struct* %4, i32 %5) nounwind		; <i32> [#uses=1]
 	%7 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 5		; <%struct.quad_struct**> [#uses=1]
-	%8 = load %struct.quad_struct** %7, align 4		; <%struct.quad_struct*> [#uses=1]
+	%8 = load %struct.quad_struct*, %struct.quad_struct** %7, align 4		; <%struct.quad_struct*> [#uses=1]
 	%9 = sdiv i32 %size, 2		; <i32> [#uses=1]
 	%10 = call i32 @perimeter(%struct.quad_struct* %8, i32 %9) nounwind		; <i32> [#uses=1]
 	%11 = add i32 %10, %6		; <i32> [#uses=1]
 	%12 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 3		; <%struct.quad_struct**> [#uses=1]
-	%13 = load %struct.quad_struct** %12, align 4		; <%struct.quad_struct*> [#uses=1]
+	%13 = load %struct.quad_struct*, %struct.quad_struct** %12, align 4		; <%struct.quad_struct*> [#uses=1]
 	%14 = sdiv i32 %size, 2		; <i32> [#uses=1]
 	%15 = call i32 @perimeter(%struct.quad_struct* %13, i32 %14) nounwind		; <i32> [#uses=1]
 	%16 = add i32 %15, %11		; <i32> [#uses=1]
 	%17 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 2		; <%struct.quad_struct**> [#uses=1]
-	%18 = load %struct.quad_struct** %17, align 4		; <%struct.quad_struct*> [#uses=1]
+	%18 = load %struct.quad_struct*, %struct.quad_struct** %17, align 4		; <%struct.quad_struct*> [#uses=1]
 	%19 = sdiv i32 %size, 2		; <i32> [#uses=1]
 	%20 = call i32 @perimeter(%struct.quad_struct* %18, i32 %19) nounwind		; <i32> [#uses=1]
 	%21 = add i32 %20, %16		; <i32> [#uses=1]
@@ -169,7 +169,7 @@
 
 bb2:		; preds = %entry
 	%22 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 0		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 4		; <i32> [#uses=1]
+	%23 = load i32, i32* %22, align 4		; <i32> [#uses=1]
 	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
 	br i1 %24, label %bb3, label %bb23
 
@@ -180,13 +180,13 @@
 
 bb4:		; preds = %bb3
 	%27 = getelementptr %struct.quad_struct, %struct.quad_struct* %25, i32 0, i32 0		; <i32*> [#uses=1]
-	%28 = load i32* %27, align 4		; <i32> [#uses=1]
+	%28 = load i32, i32* %27, align 4		; <i32> [#uses=1]
 	%29 = icmp eq i32 %28, 1		; <i1> [#uses=1]
 	br i1 %29, label %bb8, label %bb6
 
 bb6:		; preds = %bb4
 	%30 = getelementptr %struct.quad_struct, %struct.quad_struct* %25, i32 0, i32 0		; <i32*> [#uses=1]
-	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%31 = load i32, i32* %30, align 4		; <i32> [#uses=1]
 	%32 = icmp eq i32 %31, 2		; <i1> [#uses=1]
 	br i1 %32, label %bb7, label %bb8
 
@@ -202,7 +202,7 @@
 
 bb9:		; preds = %bb8
 	%36 = getelementptr %struct.quad_struct, %struct.quad_struct* %34, i32 0, i32 0		; <i32*> [#uses=1]
-	%37 = load i32* %36, align 4		; <i32> [#uses=1]
+	%37 = load i32, i32* %36, align 4		; <i32> [#uses=1]
 	%38 = icmp eq i32 %37, 1		; <i1> [#uses=1]
 	br i1 %38, label %bb10, label %bb11
 
@@ -212,7 +212,7 @@
 
 bb11:		; preds = %bb9
 	%40 = getelementptr %struct.quad_struct, %struct.quad_struct* %34, i32 0, i32 0		; <i32*> [#uses=1]
-	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%41 = load i32, i32* %40, align 4		; <i32> [#uses=1]
 	%42 = icmp eq i32 %41, 2		; <i1> [#uses=1]
 	br i1 %42, label %bb12, label %bb13
 
@@ -229,7 +229,7 @@
 
 bb14:		; preds = %bb13
 	%47 = getelementptr %struct.quad_struct, %struct.quad_struct* %45, i32 0, i32 0		; <i32*> [#uses=1]
-	%48 = load i32* %47, align 4		; <i32> [#uses=1]
+	%48 = load i32, i32* %47, align 4		; <i32> [#uses=1]
 	%49 = icmp eq i32 %48, 1		; <i1> [#uses=1]
 	br i1 %49, label %bb15, label %bb16
 
@@ -239,7 +239,7 @@
 
 bb16:		; preds = %bb14
 	%51 = getelementptr %struct.quad_struct, %struct.quad_struct* %45, i32 0, i32 0		; <i32*> [#uses=1]
-	%52 = load i32* %51, align 4		; <i32> [#uses=1]
+	%52 = load i32, i32* %51, align 4		; <i32> [#uses=1]
 	%53 = icmp eq i32 %52, 2		; <i1> [#uses=1]
 	br i1 %53, label %bb17, label %bb18
 
@@ -256,7 +256,7 @@
 
 bb19:		; preds = %bb18
 	%58 = getelementptr %struct.quad_struct, %struct.quad_struct* %56, i32 0, i32 0		; <i32*> [#uses=1]
-	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%59 = load i32, i32* %58, align 4		; <i32> [#uses=1]
 	%60 = icmp eq i32 %59, 1		; <i1> [#uses=1]
 	br i1 %60, label %bb20, label %bb21
 
@@ -266,7 +266,7 @@
 
 bb21:		; preds = %bb19
 	%62 = getelementptr %struct.quad_struct, %struct.quad_struct* %56, i32 0, i32 0		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%63 = load i32, i32* %62, align 4		; <i32> [#uses=1]
 	%64 = icmp eq i32 %63, 2		; <i1> [#uses=1]
 	br i1 %64, label %bb22, label %bb23
 
diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll
index 48e6b1b..c91fe80 100644
--- a/llvm/test/Transforms/Inline/align.ll
+++ b/llvm/test/Transforms/Inline/align.ll
@@ -4,7 +4,7 @@
 
 define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx, align 4
   ret void
@@ -13,7 +13,7 @@
 define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
 entry:
   tail call void @hello(float* %a, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -25,10 +25,10 @@
 ; CHECK:   %maskedptr = and i64 %ptrint, 127
 ; CHECK:   %maskcond = icmp eq i64 %maskedptr, 0
 ; CHECK:   call void @llvm.assume(i1 %maskcond)
-; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %0 = load float, float* %c, align 4
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
@@ -37,7 +37,7 @@
 define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
 entry:
   tail call void @hello(float* %a, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -45,10 +45,10 @@
 
 ; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %0 = load float, float* %c, align 4
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
@@ -56,7 +56,7 @@
 
 define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds float, float* %b, i64 8
@@ -67,7 +67,7 @@
 define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 entry:
   tail call void @hello2(float* %a, float* %b, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -83,12 +83,12 @@
 ; CHECK:   %maskedptr2 = and i64 %ptrint1, 127
 ; CHECK:   %maskcond3 = icmp eq i64 %maskedptr2, 0
 ; CHECK:   call void @llvm.assume(i1 %maskcond3)
-; CHECK:   %0 = load float* %c, align 4
+; CHECK:   %0 = load float, float* %c, align 4
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4
 ; CHECK:   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %0, float* %arrayidx1.i, align 4
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
diff --git a/llvm/test/Transforms/Inline/alloca-bonus.ll b/llvm/test/Transforms/Inline/alloca-bonus.ll
index bb654da..649fac9 100644
--- a/llvm/test/Transforms/Inline/alloca-bonus.ll
+++ b/llvm/test/Transforms/Inline/alloca-bonus.ll
@@ -15,7 +15,7 @@
 }
 
 define void @inner1(i32 *%ptr) {
-  %A = load i32* %ptr
+  %A = load i32, i32* %ptr
   store i32 0, i32* %ptr
   %C = getelementptr inbounds i32, i32* %ptr, i32 0
   %D = getelementptr inbounds i32, i32* %ptr, i32 1
@@ -35,7 +35,7 @@
 
 ; %D poisons this call, scalar-repl can't handle that instruction.
 define void @inner2(i32 *%ptr) {
-  %A = load i32* %ptr
+  %A = load i32, i32* %ptr
   store i32 0, i32* %ptr
   %C = getelementptr inbounds i32, i32* %ptr, i32 0
   %D = getelementptr inbounds i32, i32* %ptr, i32 %A
@@ -59,7 +59,7 @@
   br i1 %A, label %bb.true, label %bb.false
 bb.true:
   ; This block musn't be counted in the inline cost.
-  %t1 = load i32* %ptr
+  %t1 = load i32, i32* %ptr
   %t2 = add i32 %t1, 1
   %t3 = add i32 %t2, 1
   %t4 = add i32 %t3, 1
@@ -100,7 +100,7 @@
   br i1 %C, label %bb.true, label %bb.false
 bb.true:
   ; This block musn't be counted in the inline cost.
-  %t1 = load i32* %ptr
+  %t1 = load i32, i32* %ptr
   %t2 = add i32 %t1, 1
   %t3 = add i32 %t2, 1
   %t4 = add i32 %t3, 1
@@ -137,7 +137,7 @@
 ; the flag is set appropriately, the poisoning instruction is inside of dead
 ; code, and so shouldn't be counted.
 define void @inner5(i1 %flag, i32 *%ptr) {
-  %A = load i32* %ptr
+  %A = load i32, i32* %ptr
   store i32 0, i32* %ptr
   %C = getelementptr inbounds i32, i32* %ptr, i32 0
   br i1 %flag, label %if.then, label %exit
diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
index ea69868..03661a7 100644
--- a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -43,7 +43,7 @@
 ; CHECK-NEXT:   call void @llvm.dbg.declare(metadata [20 x i8]* %agg.tmp.sroa.3.i,
   %agg.tmp.sroa.3 = alloca [20 x i8], align 4
   tail call void @llvm.dbg.declare(metadata [20 x i8]* %agg.tmp.sroa.3, metadata !46, metadata !48), !dbg !49
-  %agg.tmp.sroa.0.0.copyload = load i32* getelementptr inbounds (%struct.A* @b, i64 0, i32 0), align 8, !dbg !50
+  %agg.tmp.sroa.0.0.copyload = load i32, i32* getelementptr inbounds (%struct.A* @b, i64 0, i32 0), align 8, !dbg !50
   tail call void @llvm.dbg.value(metadata i32 %agg.tmp.sroa.0.0.copyload, i64 0, metadata !46, metadata !51), !dbg !49
   %agg.tmp.sroa.3.0..sroa_idx = getelementptr inbounds [20 x i8], [20 x i8]* %agg.tmp.sroa.3, i64 0, i64 0, !dbg !50
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.tmp.sroa.3.0..sroa_idx, i8* getelementptr (i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i32 4, i1 false), !dbg !50
diff --git a/llvm/test/Transforms/Inline/alloca-merge-align-nodl.ll b/llvm/test/Transforms/Inline/alloca-merge-align-nodl.ll
index 85d26ac..d51c562 100644
--- a/llvm/test/Transforms/Inline/alloca-merge-align-nodl.ll
+++ b/llvm/test/Transforms/Inline/alloca-merge-align-nodl.ll
@@ -8,11 +8,11 @@
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
@@ -23,11 +23,11 @@
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
@@ -40,11 +40,11 @@
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
diff --git a/llvm/test/Transforms/Inline/alloca-merge-align.ll b/llvm/test/Transforms/Inline/alloca-merge-align.ll
index 84fcffd..ef053a7 100644
--- a/llvm/test/Transforms/Inline/alloca-merge-align.ll
+++ b/llvm/test/Transforms/Inline/alloca-merge-align.ll
@@ -9,11 +9,11 @@
 entry:
   %x = alloca [2 x i32], align 4
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
@@ -24,11 +24,11 @@
 entry:
   %x = alloca [2 x i32]
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
@@ -39,11 +39,11 @@
 entry:
   %x = alloca [2 x i32], align 1
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 4
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
@@ -56,11 +56,11 @@
 entry:
   %x = alloca [2 x i32], align 32
   %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0
-  %0 = load i32* %a1, align 4
+  %0 = load i32, i32* %a1, align 4
   %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0
   store i32 %0, i32* %arrayidx, align 32
   %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1
-  %1 = load i32* %b, align 4
+  %1 = load i32, i32* %b, align 4
   %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1
   store i32 %1, i32* %arrayidx2, align 4
   call void @bar(i32* %arrayidx) #2
diff --git a/llvm/test/Transforms/Inline/basictest.ll b/llvm/test/Transforms/Inline/basictest.ll
index a8dbf39..8f0b3ea 100644
--- a/llvm/test/Transforms/Inline/basictest.ll
+++ b/llvm/test/Transforms/Inline/basictest.ll
@@ -38,7 +38,7 @@
   
   %B = call %T* @test2f(i1 %cond, %T* %A)
   %C = getelementptr %T, %T* %B, i32 0, i32 0
-  %D = load i32* %C
+  %D = load i32, i32* %C
   ret i32 %D
   
 ; CHECK-LABEL: @test2(
diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll
index 154f397..1e50463 100644
--- a/llvm/test/Transforms/Inline/byval-tail-call.ll
+++ b/llvm/test/Transforms/Inline/byval-tail-call.ll
@@ -31,7 +31,7 @@
 define void @frob(i32* %x) {
 ; CHECK-LABEL: define void @frob(
 ; CHECK: %[[POS:.*]] = alloca i32
-; CHECK: %[[VAL:.*]] = load i32* %x
+; CHECK: %[[VAL:.*]] = load i32, i32* %x
 ; CHECK: store i32 %[[VAL]], i32* %[[POS]]
 ; CHECK: {{^ *}}call void @ext(i32* %[[POS]]
 ; CHECK: tail call void @ext(i32* null)
diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll
index 39bf1ff..b145566 100644
--- a/llvm/test/Transforms/Inline/byval.ll
+++ b/llvm/test/Transforms/Inline/byval.ll
@@ -8,7 +8,7 @@
 define internal void @f(%struct.ss* byval  %b) nounwind  {
 entry:
 	%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
 	store i32 %tmp2, i32* %tmp, align 4
 	ret void
@@ -38,7 +38,7 @@
 define internal i32 @f2(%struct.ss* byval  %b) nounwind readonly {
 entry:
 	%tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
-	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp1 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
 	ret i32 %tmp2
 }
@@ -113,7 +113,7 @@
 entry:
 	store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
 	%f2 = getelementptr inbounds %struct.S0, %struct.S0* %p, i64 0, i32 0
-	%0 = load i32* %f2, align 4
+	%0 = load i32, i32* %f2, align 4
 	store i32 %0, i32* @a, align 4
 	ret void
 }
@@ -121,9 +121,9 @@
 define i32 @test5() {
 entry:
 	tail call void @f5(%struct.S0* byval align 4 @b)
-	%0 = load i32* @a, align 4
+	%0 = load i32, i32* @a, align 4
 	ret i32 %0
 ; CHECK: @test5()
 ; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
-; CHECK-NOT: load i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
+; CHECK-NOT: load i32, i32* getelementptr inbounds (%struct.S0* @b, i64 0, i32 0), align 4
 }
diff --git a/llvm/test/Transforms/Inline/byval_lifetime.ll b/llvm/test/Transforms/Inline/byval_lifetime.ll
index 6006685..b500eae 100644
--- a/llvm/test/Transforms/Inline/byval_lifetime.ll
+++ b/llvm/test/Transforms/Inline/byval_lifetime.ll
@@ -12,7 +12,7 @@
 entry:
   %a1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1
   %arrayidx = getelementptr inbounds [16 x i32], [16 x i32]* %a1, i32 0, i32 %a
-  %tmp2 = load i32* %arrayidx, align 1
+  %tmp2 = load i32, i32* %arrayidx, align 1
   ret i32 %tmp2
 }
 
diff --git a/llvm/test/Transforms/Inline/crash2.ll b/llvm/test/Transforms/Inline/crash2.ll
index be634f6..4c0dfae 100644
--- a/llvm/test/Transforms/Inline/crash2.ll
+++ b/llvm/test/Transforms/Inline/crash2.ll
@@ -14,7 +14,7 @@
   %__f_addr = alloca void (i8*)*, align 8
   store void (i8*)* %__f, void (i8*)** %__f_addr
 
-  %0 = load void (i8*)** %__f_addr, align 8
+  %0 = load void (i8*)*, void (i8*)** %__f_addr, align 8
   call void %0(i8* undef)
   call i8* @f1(i8* undef) ssp
   unreachable
diff --git a/llvm/test/Transforms/Inline/devirtualize-3.ll b/llvm/test/Transforms/Inline/devirtualize-3.ll
index b496808..76c8150 100644
--- a/llvm/test/Transforms/Inline/devirtualize-3.ll
+++ b/llvm/test/Transforms/Inline/devirtualize-3.ll
@@ -17,7 +17,7 @@
   %tmp = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
   %tmp1 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2]
   store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1
-  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  %tmp2 = load void (i8*, i32)*, void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
   store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp
   %tmp3 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1]
   store i8* null, i8** %tmp3
@@ -51,14 +51,14 @@
 define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp {
 entry:
   %tmp1 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
-  %tmp2 = load %struct.cont_t** %tmp1             ; <%struct.cont_t*> [#uses=1]
+  %tmp2 = load %struct.cont_t*, %struct.cont_t** %tmp1             ; <%struct.cont_t*> [#uses=1]
   %tmp3 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
-  %tmp4 = load void (i8*, i32)** %tmp3            ; <void (i8*, i32)*> [#uses=1]
+  %tmp4 = load void (i8*, i32)*, void (i8*, i32)** %tmp3            ; <void (i8*, i32)*> [#uses=1]
   %tmp6 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
-  %tmp7 = load %struct.cont_t** %tmp6             ; <%struct.cont_t*> [#uses=1]
+  %tmp7 = load %struct.cont_t*, %struct.cont_t** %tmp6             ; <%struct.cont_t*> [#uses=1]
   %conv = bitcast %struct.cont_t* %tmp7 to i8*    ; <i8*> [#uses=1]
   %tmp9 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
-  %tmp10 = load i32* %tmp9                        ; <i32> [#uses=1]
+  %tmp10 = load i32, i32* %tmp9                        ; <i32> [#uses=1]
   %mul = mul i32 %tmp10, %y                       ; <i32> [#uses=1]
   call void %tmp4(i8* %conv, i32 %mul)
   ret void
@@ -67,9 +67,9 @@
 define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp {
 entry:
   %tmp1 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
-  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  %tmp2 = load void (i8*, i32)*, void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
   %tmp4 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1]
-  %tmp5 = load i8** %tmp4                         ; <i8*> [#uses=1]
+  %tmp5 = load i8*, i8** %tmp4                         ; <i8*> [#uses=1]
   %add = add nsw i32 %y, 5                        ; <i32> [#uses=1]
   call void %tmp2(i8* %tmp5, i32 %add)
   ret void
diff --git a/llvm/test/Transforms/Inline/devirtualize.ll b/llvm/test/Transforms/Inline/devirtualize.ll
index c009e02..320071f 100644
--- a/llvm/test/Transforms/Inline/devirtualize.ll
+++ b/llvm/test/Transforms/Inline/devirtualize.ll
@@ -9,7 +9,7 @@
 entry:
   store i32 ()* @bar, i32 ()** %p
   store i64 0, i64* %q
-  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %tmp3 = load i32 ()*, i32 ()** %p                        ; <i32 ()*> [#uses=1]
   %call = call i32 %tmp3()                        ; <i32> [#uses=1]
   %X = add i32 %call, 4
   ret i32 %X
@@ -85,9 +85,9 @@
 cast.end:                                         ; preds = %entry, %cast.notnull
   %3 = phi %struct.A* [ %2, %cast.notnull ], [ null, %entry ] ; <%struct.A*> [#uses=2]
   %4 = bitcast %struct.A* %3 to i32 (%struct.A*)*** ; <i32 (%struct.A*)***> [#uses=1]
-  %5 = load i32 (%struct.A*)*** %4                ; <i32 (%struct.A*)**> [#uses=1]
+  %5 = load i32 (%struct.A*)**, i32 (%struct.A*)*** %4                ; <i32 (%struct.A*)**> [#uses=1]
   %vfn = getelementptr inbounds i32 (%struct.A*)*, i32 (%struct.A*)** %5, i64 0 ; <i32 (%struct.A*)**> [#uses=1]
-  %6 = load i32 (%struct.A*)** %vfn               ; <i32 (%struct.A*)*> [#uses=1]
+  %6 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn               ; <i32 (%struct.A*)*> [#uses=1]
   %call = call i32 %6(%struct.A* %3)              ; <i32> [#uses=1]
   ret i32 %call
 }
diff --git a/llvm/test/Transforms/Inline/ephemeral.ll b/llvm/test/Transforms/Inline/ephemeral.ll
index d1135c6..7e5ae03 100644
--- a/llvm/test/Transforms/Inline/ephemeral.ll
+++ b/llvm/test/Transforms/Inline/ephemeral.ll
@@ -3,7 +3,7 @@
 @a = global i32 4
 
 define i1 @inner() {
-  %a1 = load volatile i32* @a
+  %a1 = load volatile i32, i32* @a
   %x1 = add i32 %a1, %a1
   %c = icmp eq i32 %x1, 0
 
diff --git a/llvm/test/Transforms/Inline/gvn-inline-iteration.ll b/llvm/test/Transforms/Inline/gvn-inline-iteration.ll
index 7916a13..b87c060 100644
--- a/llvm/test/Transforms/Inline/gvn-inline-iteration.ll
+++ b/llvm/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -8,7 +8,7 @@
 entry:
   store i32 ()* @bar, i32 ()** %p
   store i64 0, i64* %q
-  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %tmp3 = load i32 ()*, i32 ()** %p                        ; <i32 ()*> [#uses=1]
   %call = tail call i32 %tmp3() nounwind          ; <i32> [#uses=1]
   ret i32 %call
 }
diff --git a/llvm/test/Transforms/Inline/inline-byval-bonus.ll b/llvm/test/Transforms/Inline/inline-byval-bonus.ll
index 966bce8..7f0c0e1 100644
--- a/llvm/test/Transforms/Inline/inline-byval-bonus.ll
+++ b/llvm/test/Transforms/Inline/inline-byval-bonus.ll
@@ -30,36 +30,36 @@
 
 define i32 @ray_sphere(%struct.sphere* nocapture %sph, %struct.ray* nocapture byval align 8 %ray, %struct.spoint* %sp) nounwind uwtable ssp {
   %1 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 0
-  %2 = load double* %1, align 8
+  %2 = load double, double* %1, align 8
   %3 = fmul double %2, %2
   %4 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 1
-  %5 = load double* %4, align 8
+  %5 = load double, double* %4, align 8
   %6 = fmul double %5, %5
   %7 = fadd double %3, %6
   %8 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 2
-  %9 = load double* %8, align 8
+  %9 = load double, double* %8, align 8
   %10 = fmul double %9, %9
   %11 = fadd double %7, %10
   %12 = fmul double %2, 2.000000e+00
   %13 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 0
-  %14 = load double* %13, align 8
+  %14 = load double, double* %13, align 8
   %15 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 0
-  %16 = load double* %15, align 8
+  %16 = load double, double* %15, align 8
   %17 = fsub double %14, %16
   %18 = fmul double %12, %17
   %19 = fmul double %5, 2.000000e+00
   %20 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 1
-  %21 = load double* %20, align 8
+  %21 = load double, double* %20, align 8
   %22 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 1
-  %23 = load double* %22, align 8
+  %23 = load double, double* %22, align 8
   %24 = fsub double %21, %23
   %25 = fmul double %19, %24
   %26 = fadd double %18, %25
   %27 = fmul double %9, 2.000000e+00
   %28 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 2
-  %29 = load double* %28, align 8
+  %29 = load double, double* %28, align 8
   %30 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 2
-  %31 = load double* %30, align 8
+  %31 = load double, double* %30, align 8
   %32 = fsub double %29, %31
   %33 = fmul double %27, %32
   %34 = fadd double %26, %33
@@ -83,7 +83,7 @@
   %52 = fmul double %51, 2.000000e+00
   %53 = fadd double %52, %45
   %54 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 1
-  %55 = load double* %54, align 8
+  %55 = load double, double* %54, align 8
   %56 = fmul double %55, %55
   %57 = fsub double %53, %56
   %58 = fmul double %34, %34
@@ -135,21 +135,21 @@
   %89 = fadd double %29, %88
   %90 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 0, i32 2
   store double %89, double* %90, align 8
-  %91 = load double* %15, align 8
+  %91 = load double, double* %15, align 8
   %92 = fsub double %83, %91
-  %93 = load double* %54, align 8
+  %93 = load double, double* %54, align 8
   %94 = fdiv double %92, %93
   %95 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 0
   store double %94, double* %95, align 8
-  %96 = load double* %22, align 8
+  %96 = load double, double* %22, align 8
   %97 = fsub double %86, %96
-  %98 = load double* %54, align 8
+  %98 = load double, double* %54, align 8
   %99 = fdiv double %97, %98
   %100 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 1
   store double %99, double* %100, align 8
-  %101 = load double* %30, align 8
+  %101 = load double, double* %30, align 8
   %102 = fsub double %89, %101
-  %103 = load double* %54, align 8
+  %103 = load double, double* %54, align 8
   %104 = fdiv double %102, %103
   %105 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 2
   store double %104, double* %105, align 8
diff --git a/llvm/test/Transforms/Inline/inline-cold.ll b/llvm/test/Transforms/Inline/inline-cold.ll
index 5743377..b3c1345 100644
--- a/llvm/test/Transforms/Inline/inline-cold.ll
+++ b/llvm/test/Transforms/Inline/inline-cold.ll
@@ -17,29 +17,29 @@
 ; Function Attrs: nounwind readnone uwtable
 define i32 @simpleFunction(i32 %a) #0 {
 entry:
-  %a1 = load volatile i32* @a
+  %a1 = load volatile i32, i32* @a
   %x1 = add i32 %a1,  %a1
-  %a2 = load volatile i32* @a
+  %a2 = load volatile i32, i32* @a
   %x2 = add i32 %x1, %a2
-  %a3 = load volatile i32* @a
+  %a3 = load volatile i32, i32* @a
   %x3 = add i32 %x2, %a3
-  %a4 = load volatile i32* @a
+  %a4 = load volatile i32, i32* @a
   %x4 = add i32 %x3, %a4
-  %a5 = load volatile i32* @a
+  %a5 = load volatile i32, i32* @a
   %x5 = add i32 %x4, %a5
-  %a6 = load volatile i32* @a
+  %a6 = load volatile i32, i32* @a
   %x6 = add i32 %x5, %a6
-  %a7 = load volatile i32* @a
+  %a7 = load volatile i32, i32* @a
   %x7 = add i32 %x6, %a6
-  %a8 = load volatile i32* @a
+  %a8 = load volatile i32, i32* @a
   %x8 = add i32 %x7, %a8
-  %a9 = load volatile i32* @a
+  %a9 = load volatile i32, i32* @a
   %x9 = add i32 %x8, %a9
-  %a10 = load volatile i32* @a
+  %a10 = load volatile i32, i32* @a
   %x10 = add i32 %x9, %a10
-  %a11 = load volatile i32* @a
+  %a11 = load volatile i32, i32* @a
   %x11 = add i32 %x10, %a11
-  %a12 = load volatile i32* @a
+  %a12 = load volatile i32, i32* @a
   %x12 = add i32 %x11, %a12
   %add = add i32 %x12, %a
   ret i32 %add
@@ -54,29 +54,29 @@
 ; DEFAULT-LABEL: @ColdFunction
 ; DEFAULT: ret
 entry:
-  %a1 = load volatile i32* @a
+  %a1 = load volatile i32, i32* @a
   %x1 = add i32 %a1,  %a1
-  %a2 = load volatile i32* @a
+  %a2 = load volatile i32, i32* @a
   %x2 = add i32 %x1, %a2
-  %a3 = load volatile i32* @a
+  %a3 = load volatile i32, i32* @a
   %x3 = add i32 %x2, %a3
-  %a4 = load volatile i32* @a
+  %a4 = load volatile i32, i32* @a
   %x4 = add i32 %x3, %a4
-  %a5 = load volatile i32* @a
+  %a5 = load volatile i32, i32* @a
   %x5 = add i32 %x4, %a5
-  %a6 = load volatile i32* @a
+  %a6 = load volatile i32, i32* @a
   %x6 = add i32 %x5, %a6
-  %a7 = load volatile i32* @a
+  %a7 = load volatile i32, i32* @a
   %x7 = add i32 %x6, %a6
-  %a8 = load volatile i32* @a
+  %a8 = load volatile i32, i32* @a
   %x8 = add i32 %x7, %a8
-  %a9 = load volatile i32* @a
+  %a9 = load volatile i32, i32* @a
   %x9 = add i32 %x8, %a9
-  %a10 = load volatile i32* @a
+  %a10 = load volatile i32, i32* @a
   %x10 = add i32 %x9, %a10
-  %a11 = load volatile i32* @a
+  %a11 = load volatile i32, i32* @a
   %x11 = add i32 %x10, %a11
-  %a12 = load volatile i32* @a
+  %a12 = load volatile i32, i32* @a
   %x12 = add i32 %x11, %a12
   %add = add i32 %x12, %a
   ret i32 %add
@@ -91,79 +91,79 @@
 ; DEFAULT-LABEL: @ColdFunction2
 ; DEFAULT: ret
 entry:
-  %a1 = load volatile i32* @a
+  %a1 = load volatile i32, i32* @a
   %x1 = add i32 %a1,  %a1
-  %a2 = load volatile i32* @a
+  %a2 = load volatile i32, i32* @a
   %x2 = add i32 %x1, %a2
-  %a3 = load volatile i32* @a
+  %a3 = load volatile i32, i32* @a
   %x3 = add i32 %x2, %a3
-  %a4 = load volatile i32* @a
+  %a4 = load volatile i32, i32* @a
   %x4 = add i32 %x3, %a4
-  %a5 = load volatile i32* @a
+  %a5 = load volatile i32, i32* @a
   %x5 = add i32 %x4, %a5
-  %a6 = load volatile i32* @a
+  %a6 = load volatile i32, i32* @a
   %x6 = add i32 %x5, %a6
-  %a7 = load volatile i32* @a
+  %a7 = load volatile i32, i32* @a
   %x7 = add i32 %x6, %a7
-  %a8 = load volatile i32* @a
+  %a8 = load volatile i32, i32* @a
   %x8 = add i32 %x7, %a8
-  %a9 = load volatile i32* @a
+  %a9 = load volatile i32, i32* @a
   %x9 = add i32 %x8, %a9
-  %a10 = load volatile i32* @a
+  %a10 = load volatile i32, i32* @a
   %x10 = add i32 %x9, %a10
-  %a11 = load volatile i32* @a
+  %a11 = load volatile i32, i32* @a
   %x11 = add i32 %x10, %a11
-  %a12 = load volatile i32* @a
+  %a12 = load volatile i32, i32* @a
   %x12 = add i32 %x11, %a12
 
-  %a21 = load volatile i32* @a
+  %a21 = load volatile i32, i32* @a
   %x21 = add i32 %x12, %a21
-  %a22 = load volatile i32* @a
+  %a22 = load volatile i32, i32* @a
   %x22 = add i32 %x21, %a22
-  %a23 = load volatile i32* @a
+  %a23 = load volatile i32, i32* @a
   %x23 = add i32 %x22, %a23
-  %a24 = load volatile i32* @a
+  %a24 = load volatile i32, i32* @a
   %x24 = add i32 %x23, %a24
-  %a25 = load volatile i32* @a
+  %a25 = load volatile i32, i32* @a
   %x25 = add i32 %x24, %a25
-  %a26 = load volatile i32* @a
+  %a26 = load volatile i32, i32* @a
   %x26 = add i32 %x25, %a26
-  %a27 = load volatile i32* @a
+  %a27 = load volatile i32, i32* @a
   %x27 = add i32 %x26, %a27
-  %a28 = load volatile i32* @a
+  %a28 = load volatile i32, i32* @a
   %x28 = add i32 %x27, %a28
-  %a29 = load volatile i32* @a
+  %a29 = load volatile i32, i32* @a
   %x29 = add i32 %x28, %a29
-  %a30 = load volatile i32* @a
+  %a30 = load volatile i32, i32* @a
   %x30 = add i32 %x29, %a30
-  %a31 = load volatile i32* @a
+  %a31 = load volatile i32, i32* @a
   %x31 = add i32 %x30, %a31
-  %a32 = load volatile i32* @a
+  %a32 = load volatile i32, i32* @a
   %x32 = add i32 %x31, %a32
 
-  %a41 = load volatile i32* @a
+  %a41 = load volatile i32, i32* @a
   %x41 = add i32 %x32, %a41
-  %a42 = load volatile i32* @a
+  %a42 = load volatile i32, i32* @a
   %x42 = add i32 %x41, %a42
-  %a43 = load volatile i32* @a
+  %a43 = load volatile i32, i32* @a
   %x43 = add i32 %x42, %a43
-  %a44 = load volatile i32* @a
+  %a44 = load volatile i32, i32* @a
   %x44 = add i32 %x43, %a44
-  %a45 = load volatile i32* @a
+  %a45 = load volatile i32, i32* @a
   %x45 = add i32 %x44, %a45
-  %a46 = load volatile i32* @a
+  %a46 = load volatile i32, i32* @a
   %x46 = add i32 %x45, %a46
-  %a47 = load volatile i32* @a
+  %a47 = load volatile i32, i32* @a
   %x47 = add i32 %x46, %a47
-  %a48 = load volatile i32* @a
+  %a48 = load volatile i32, i32* @a
   %x48 = add i32 %x47, %a48
-  %a49 = load volatile i32* @a
+  %a49 = load volatile i32, i32* @a
   %x49 = add i32 %x48, %a49
-  %a50 = load volatile i32* @a
+  %a50 = load volatile i32, i32* @a
   %x50 = add i32 %x49, %a50
-  %a51 = load volatile i32* @a
+  %a51 = load volatile i32, i32* @a
   %x51 = add i32 %x50, %a51
-  %a52 = load volatile i32* @a
+  %a52 = load volatile i32, i32* @a
   %x52 = add i32 %x51, %a52
 
   %add = add i32 %x52, %a
diff --git a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
index 01074f5..e5ff8f7 100644
--- a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
+++ b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll
@@ -8,15 +8,15 @@
 
 define float @foo(float* %a, float %b) {
 entry:
-  %a0 = load float* %a, align 4
+  %a0 = load float, float* %a, align 4
   %mul = fmul fast float %a0, %b
   %tobool = fcmp une float %mul, 0.000000e+00
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %entry
-  %a1 = load float* %a, align 8
+  %a1 = load float, float* %a, align 8
   %arrayidx1 = getelementptr inbounds float, float* %a, i64 1
-  %a2 = load float* %arrayidx1, align 4
+  %a2 = load float, float* %arrayidx1, align 4
   %add = fadd fast float %a1, %a2
   br label %if.end
 
diff --git a/llvm/test/Transforms/Inline/inline-fp.ll b/llvm/test/Transforms/Inline/inline-fp.ll
index 4d18ce8..cf42d82 100644
--- a/llvm/test/Transforms/Inline/inline-fp.ll
+++ b/llvm/test/Transforms/Inline/inline-fp.ll
@@ -18,19 +18,19 @@
   call void @getY(i32* %responseY, i8* %valueY)
   call void @getZ(i32* %responseZ, i8* %valueZ)
 
-  %0 = load i32* %responseX
-  %1 = load i8* %valueX
+  %0 = load i32, i32* %responseX
+  %1 = load i8, i8* %valueX
   %call = call float @f_nofp(i32 %0, i8 zeroext %1)
-  %2 = load i32* %responseZ
-  %3 = load i8* %valueZ
+  %2 = load i32, i32* %responseZ
+  %3 = load i8, i8* %valueZ
   %call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
   %call3 = call float @fabsf(float %call)
   %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
   br i1 %cmp, label %if.end12, label %if.else
 
 if.else:                                          ; preds = %entry
-  %4 = load i32* %responseY
-  %5 = load i8* %valueY
+  %4 = load i32, i32* %responseY
+  %5 = load i8, i8* %valueY
   %call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
   %call4 = call float @fabsf(float %call1)
   %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
@@ -65,19 +65,19 @@
   call void @getY(i32* %responseY, i8* %valueY)
   call void @getZ(i32* %responseZ, i8* %valueZ)
 
-  %0 = load i32* %responseX
-  %1 = load i8* %valueX
+  %0 = load i32, i32* %responseX
+  %1 = load i8, i8* %valueX
   %call = call float @f_hasfp(i32 %0, i8 zeroext %1)
-  %2 = load i32* %responseZ
-  %3 = load i8* %valueZ
+  %2 = load i32, i32* %responseZ
+  %3 = load i8, i8* %valueZ
   %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
   %call3 = call float @fabsf(float %call)
   %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
   br i1 %cmp, label %if.end12, label %if.else
 
 if.else:                                          ; preds = %entry
-  %4 = load i32* %responseY
-  %5 = load i8* %valueY
+  %4 = load i32, i32* %responseY
+  %5 = load i8, i8* %valueY
   %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
   %call4 = call float @fabsf(float %call1)
   %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
diff --git a/llvm/test/Transforms/Inline/inline-invoke-tail.ll b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
index e077523..5fef4ba 100644
--- a/llvm/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/llvm/test/Transforms/Inline/inline-invoke-tail.ll
@@ -19,7 +19,7 @@
 			to label %invcont unwind label %lpad
 
 invcont:
-	%retval = load i32* %a, align 4
+	%retval = load i32, i32* %a, align 4
 	ret i32 %retval
 
 lpad:
diff --git a/llvm/test/Transforms/Inline/inline-optsize.ll b/llvm/test/Transforms/Inline/inline-optsize.ll
index 3ad573a..820e56f 100644
--- a/llvm/test/Transforms/Inline/inline-optsize.ll
+++ b/llvm/test/Transforms/Inline/inline-optsize.ll
@@ -11,15 +11,15 @@
 ; This function should be larger than the inline threshold for -Oz (25), but
 ; smaller than the inline threshold for optsize (75).
 define i32 @inner() {
-  %a1 = load volatile i32* @a
+  %a1 = load volatile i32, i32* @a
   %x1 = add i32 %a1,  %a1
-  %a2 = load volatile i32* @a
+  %a2 = load volatile i32, i32* @a
   %x2 = add i32 %x1, %a2
-  %a3 = load volatile i32* @a
+  %a3 = load volatile i32, i32* @a
   %x3 = add i32 %x2, %a3
-  %a4 = load volatile i32* @a
+  %a4 = load volatile i32, i32* @a
   %x4 = add i32 %x3, %a4
-  %a5 = load volatile i32* @a
+  %a5 = load volatile i32, i32* @a
   %x5 = add i32 %x3, %a5
   ret i32 %x5
 }
diff --git a/llvm/test/Transforms/Inline/inline_constprop.ll b/llvm/test/Transforms/Inline/inline_constprop.ll
index b59a270..de23b61 100644
--- a/llvm/test/Transforms/Inline/inline_constprop.ll
+++ b/llvm/test/Transforms/Inline/inline_constprop.ll
@@ -267,8 +267,8 @@
   br i1 %cmp, label %if.then, label %if.end3
 
 if.then:
-  %0 = load i32* %a
-  %1 = load i32* %b
+  %0 = load i32, i32* %a
+  %1 = load i32, i32* %b
   %cmp1 = icmp eq i32 %0, %1
   br i1 %cmp1, label %return, label %if.end3
 
diff --git a/llvm/test/Transforms/Inline/inline_dbg_declare.ll b/llvm/test/Transforms/Inline/inline_dbg_declare.ll
index c5362fc..2f646e0 100644
--- a/llvm/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/llvm/test/Transforms/Inline/inline_dbg_declare.ll
@@ -28,7 +28,7 @@
   %x.addr = alloca float, align 4

   store float %x, float* %x.addr, align 4

   call void @llvm.dbg.declare(metadata float* %x.addr, metadata !16, metadata !17), !dbg !18

-  %0 = load float* %x.addr, align 4, !dbg !19

+  %0 = load float, float* %x.addr, align 4, !dbg !19

   ret float %0, !dbg !19

 }

 

@@ -46,15 +46,15 @@
   %dst.addr = alloca float*, align 4

   store float* %dst, float** %dst.addr, align 4

   call void @llvm.dbg.declare(metadata float** %dst.addr, metadata !20, metadata !17), !dbg !21

-  %0 = load float** %dst.addr, align 4, !dbg !22

+  %0 = load float*, float** %dst.addr, align 4, !dbg !22

   %arrayidx = getelementptr inbounds float, float* %0, i32 0, !dbg !22

-  %1 = load float* %arrayidx, align 4, !dbg !22

+  %1 = load float, float* %arrayidx, align 4, !dbg !22

   %call = call float @foo(float %1), !dbg !22

 

 ; CHECK-NOT: call float @foo

 ; CHECK: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !17), !dbg [[m24:![0-9]+]]

 

-  %2 = load float** %dst.addr, align 4, !dbg !22

+  %2 = load float*, float** %dst.addr, align 4, !dbg !22

   %arrayidx1 = getelementptr inbounds float, float* %2, i32 0, !dbg !22

   store float %call, float* %arrayidx1, align 4, !dbg !22

   ret void, !dbg !23

diff --git a/llvm/test/Transforms/Inline/inline_minisize.ll b/llvm/test/Transforms/Inline/inline_minisize.ll
index d266931..0bf75d7 100644
--- a/llvm/test/Transforms/Inline/inline_minisize.ll
+++ b/llvm/test/Transforms/Inline/inline_minisize.ll
@@ -8,17 +8,17 @@
   %res = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   %idxprom = sext i32 %tmp to i64
-  %tmp1 = load i32** @data, align 8
+  %tmp1 = load i32*, i32** @data, align 8
   %arrayidx = getelementptr inbounds i32, i32* %tmp1, i64 %idxprom
-  %tmp2 = load i32* %arrayidx, align 4
-  %tmp3 = load i32* %a.addr, align 4
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %tmp3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %tmp3, 1
   %idxprom1 = sext i32 %add to i64
-  %tmp4 = load i32** @data, align 8
+  %tmp4 = load i32*, i32** @data, align 8
   %arrayidx2 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom1
-  %tmp5 = load i32* %arrayidx2, align 4
+  %tmp5 = load i32, i32* %arrayidx2, align 4
   %mul = mul nsw i32 %tmp2, %tmp5
   store i32 %mul, i32* %res, align 4
   store i32 0, i32* %i, align 4
@@ -26,21 +26,21 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %tmp6 = load i32* %i, align 4
-  %tmp7 = load i32* %res, align 4
+  %tmp6 = load i32, i32* %i, align 4
+  %tmp7 = load i32, i32* %res, align 4
   %cmp = icmp slt i32 %tmp6, %tmp7
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %tmp8 = load i32* %i, align 4
+  %tmp8 = load i32, i32* %i, align 4
   %idxprom3 = sext i32 %tmp8 to i64
-  %tmp9 = load i32** @data, align 8
+  %tmp9 = load i32*, i32** @data, align 8
   %arrayidx4 = getelementptr inbounds i32, i32* %tmp9, i64 %idxprom3
   call void @fct0(i32* %arrayidx4)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %tmp10 = load i32* %i, align 4
+  %tmp10 = load i32, i32* %i, align 4
   %inc = add nsw i32 %tmp10, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -50,21 +50,21 @@
   br label %for.cond5
 
 for.cond5:                                        ; preds = %for.inc10, %for.end
-  %tmp11 = load i32* %i, align 4
-  %tmp12 = load i32* %res, align 4
+  %tmp11 = load i32, i32* %i, align 4
+  %tmp12 = load i32, i32* %res, align 4
   %cmp6 = icmp slt i32 %tmp11, %tmp12
   br i1 %cmp6, label %for.body7, label %for.end12
 
 for.body7:                                        ; preds = %for.cond5
-  %tmp13 = load i32* %i, align 4
+  %tmp13 = load i32, i32* %i, align 4
   %idxprom8 = sext i32 %tmp13 to i64
-  %tmp14 = load i32** @data, align 8
+  %tmp14 = load i32*, i32** @data, align 8
   %arrayidx9 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom8
   call void @fct0(i32* %arrayidx9)
   br label %for.inc10
 
 for.inc10:                                        ; preds = %for.body7
-  %tmp15 = load i32* %i, align 4
+  %tmp15 = load i32, i32* %i, align 4
   %inc11 = add nsw i32 %tmp15, 1
   store i32 %inc11, i32* %i, align 4
   br label %for.cond5
@@ -74,27 +74,27 @@
   br label %for.cond13
 
 for.cond13:                                       ; preds = %for.inc18, %for.end12
-  %tmp16 = load i32* %i, align 4
-  %tmp17 = load i32* %res, align 4
+  %tmp16 = load i32, i32* %i, align 4
+  %tmp17 = load i32, i32* %res, align 4
   %cmp14 = icmp slt i32 %tmp16, %tmp17
   br i1 %cmp14, label %for.body15, label %for.end20
 
 for.body15:                                       ; preds = %for.cond13
-  %tmp18 = load i32* %i, align 4
+  %tmp18 = load i32, i32* %i, align 4
   %idxprom16 = sext i32 %tmp18 to i64
-  %tmp19 = load i32** @data, align 8
+  %tmp19 = load i32*, i32** @data, align 8
   %arrayidx17 = getelementptr inbounds i32, i32* %tmp19, i64 %idxprom16
   call void @fct0(i32* %arrayidx17)
   br label %for.inc18
 
 for.inc18:                                        ; preds = %for.body15
-  %tmp20 = load i32* %i, align 4
+  %tmp20 = load i32, i32* %i, align 4
   %inc19 = add nsw i32 %tmp20, 1
   store i32 %inc19, i32* %i, align 4
   br label %for.cond13
 
 for.end20:                                        ; preds = %for.cond13
-  %tmp21 = load i32* %res, align 4
+  %tmp21 = load i32, i32* %res, align 4
   ret i32 %tmp21
 }
 
@@ -106,19 +106,19 @@
   %res = alloca i32, align 4
   %i = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %tmp = load i32* %a.addr, align 4
+  %tmp = load i32, i32* %a.addr, align 4
   %shl = shl i32 %tmp, 1
   %idxprom = sext i32 %shl to i64
-  %tmp1 = load i32** @data, align 8
+  %tmp1 = load i32*, i32** @data, align 8
   %arrayidx = getelementptr inbounds i32, i32* %tmp1, i64 %idxprom
-  %tmp2 = load i32* %arrayidx, align 4
-  %tmp3 = load i32* %a.addr, align 4
+  %tmp2 = load i32, i32* %arrayidx, align 4
+  %tmp3 = load i32, i32* %a.addr, align 4
   %shl1 = shl i32 %tmp3, 1
   %add = add nsw i32 %shl1, 13
   %idxprom2 = sext i32 %add to i64
-  %tmp4 = load i32** @data, align 8
+  %tmp4 = load i32*, i32** @data, align 8
   %arrayidx3 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom2
-  %tmp5 = load i32* %arrayidx3, align 4
+  %tmp5 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %tmp2, %tmp5
   store i32 %mul, i32* %res, align 4
   store i32 0, i32* %i, align 4
@@ -126,21 +126,21 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %tmp6 = load i32* %i, align 4
-  %tmp7 = load i32* %res, align 4
+  %tmp6 = load i32, i32* %i, align 4
+  %tmp7 = load i32, i32* %res, align 4
   %cmp = icmp slt i32 %tmp6, %tmp7
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %tmp8 = load i32* %i, align 4
+  %tmp8 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %tmp8 to i64
-  %tmp9 = load i32** @data, align 8
+  %tmp9 = load i32*, i32** @data, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %tmp9, i64 %idxprom4
   call void @fct0(i32* %arrayidx5)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %tmp10 = load i32* %i, align 4
+  %tmp10 = load i32, i32* %i, align 4
   %inc = add nsw i32 %tmp10, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -150,21 +150,21 @@
   br label %for.cond6
 
 for.cond6:                                        ; preds = %for.inc11, %for.end
-  %tmp11 = load i32* %i, align 4
-  %tmp12 = load i32* %res, align 4
+  %tmp11 = load i32, i32* %i, align 4
+  %tmp12 = load i32, i32* %res, align 4
   %cmp7 = icmp slt i32 %tmp11, %tmp12
   br i1 %cmp7, label %for.body8, label %for.end13
 
 for.body8:                                        ; preds = %for.cond6
-  %tmp13 = load i32* %i, align 4
+  %tmp13 = load i32, i32* %i, align 4
   %idxprom9 = sext i32 %tmp13 to i64
-  %tmp14 = load i32** @data, align 8
+  %tmp14 = load i32*, i32** @data, align 8
   %arrayidx10 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom9
   call void @fct0(i32* %arrayidx10)
   br label %for.inc11
 
 for.inc11:                                        ; preds = %for.body8
-  %tmp15 = load i32* %i, align 4
+  %tmp15 = load i32, i32* %i, align 4
   %inc12 = add nsw i32 %tmp15, 1
   store i32 %inc12, i32* %i, align 4
   br label %for.cond6
@@ -174,27 +174,27 @@
   br label %for.cond14
 
 for.cond14:                                       ; preds = %for.inc19, %for.end13
-  %tmp16 = load i32* %i, align 4
-  %tmp17 = load i32* %res, align 4
+  %tmp16 = load i32, i32* %i, align 4
+  %tmp17 = load i32, i32* %res, align 4
   %cmp15 = icmp slt i32 %tmp16, %tmp17
   br i1 %cmp15, label %for.body16, label %for.end21
 
 for.body16:                                       ; preds = %for.cond14
-  %tmp18 = load i32* %i, align 4
+  %tmp18 = load i32, i32* %i, align 4
   %idxprom17 = sext i32 %tmp18 to i64
-  %tmp19 = load i32** @data, align 8
+  %tmp19 = load i32*, i32** @data, align 8
   %arrayidx18 = getelementptr inbounds i32, i32* %tmp19, i64 %idxprom17
   call void @fct0(i32* %arrayidx18)
   br label %for.inc19
 
 for.inc19:                                        ; preds = %for.body16
-  %tmp20 = load i32* %i, align 4
+  %tmp20 = load i32, i32* %i, align 4
   %inc20 = add nsw i32 %tmp20, 1
   store i32 %inc20, i32* %i, align 4
   br label %for.cond14
 
 for.end21:                                        ; preds = %for.cond14
-  %tmp21 = load i32* %res, align 4
+  %tmp21 = load i32, i32* %res, align 4
   ret i32 %tmp21
 }
 
@@ -206,9 +206,9 @@
   ;CHECK-NOT: call i32 @fct2
   %c.addr = alloca i32, align 4
   store i32 %c, i32* %c.addr, align 4
-  %tmp = load i32* %c.addr, align 4
+  %tmp = load i32, i32* %c.addr, align 4
   %call = call i32 @fct1(i32 %tmp)
-  %tmp1 = load i32* %c.addr, align 4
+  %tmp1 = load i32, i32* %c.addr, align 4
   %call1 = call i32 @fct2(i32 %tmp1)
   %add = add nsw i32 %call, %call1
   ret i32 %add
@@ -223,9 +223,9 @@
   ;CHECK: call i32 @fct2
   %c.addr = alloca i32, align 4
   store i32 %c, i32* %c.addr, align 4
-  %tmp = load i32* %c.addr, align 4
+  %tmp = load i32, i32* %c.addr, align 4
   %call = call i32 @fct1(i32 %tmp)
-  %tmp1 = load i32* %c.addr, align 4
+  %tmp1 = load i32, i32* %c.addr, align 4
   %call1 = call i32 @fct2(i32 %tmp1)
   %add = add nsw i32 %call, %call1
   ret i32 %add
diff --git a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
index 5f06039..89a4cc9 100644
--- a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
+++ b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll
@@ -20,7 +20,7 @@
 lpad:
   %lp = landingpad i32 personality i8* null
       catch i8* @exception_inner
-  %cond = load i1* @condition
+  %cond = load i1, i1* @condition
   br i1 %cond, label %resume1, label %resume2
 resume1:
   resume i32 1
diff --git a/llvm/test/Transforms/Inline/noalias-cs.ll b/llvm/test/Transforms/Inline/noalias-cs.ll
index bf1babd..8528a39 100644
--- a/llvm/test/Transforms/Inline/noalias-cs.ll
+++ b/llvm/test/Transforms/Inline/noalias-cs.ll
@@ -5,12 +5,12 @@
 ; Function Attrs: nounwind uwtable
 define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4, !noalias !3
+  %0 = load float, float* %c, align 4, !noalias !3
   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
   store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
-  %1 = load float* %c, align 4
+  %1 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %1, float* %arrayidx, align 4
   ret void
@@ -25,20 +25,20 @@
 
 ; CHECK: define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4, !noalias !6
+; CHECK:   %0 = load float, float* %c, align 4, !noalias !6
 ; CHECK:   %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !12, !noalias !13
 ; CHECK:   %arrayidx1.i.i = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %0, float* %arrayidx1.i.i, align 4, !alias.scope !14, !noalias !15
-; CHECK:   %1 = load float* %c, align 4, !noalias !16
+; CHECK:   %1 = load float, float* %c, align 4, !noalias !16
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx.i, align 4, !noalias !16
-; CHECK:   %2 = load float* %a, align 4, !alias.scope !16, !noalias !17
+; CHECK:   %2 = load float, float* %a, align 4, !alias.scope !16, !noalias !17
 ; CHECK:   %arrayidx.i.i1 = getelementptr inbounds float, float* %b, i64 5
 ; CHECK:   store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
 ; CHECK:   %arrayidx1.i.i2 = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
-; CHECK:   %3 = load float* %a, align 4, !alias.scope !16
+; CHECK:   %3 = load float, float* %a, align 4, !alias.scope !16
 ; CHECK:   %arrayidx.i3 = getelementptr inbounds float, float* %b, i64 7
 ; CHECK:   store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
 ; CHECK:   ret void
diff --git a/llvm/test/Transforms/Inline/noalias.ll b/llvm/test/Transforms/Inline/noalias.ll
index f48aa3d..27e53af 100644
--- a/llvm/test/Transforms/Inline/noalias.ll
+++ b/llvm/test/Transforms/Inline/noalias.ll
@@ -4,7 +4,7 @@
 
 define void @hello(float* noalias nocapture %a, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx, align 4
   ret void
@@ -13,7 +13,7 @@
 define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
 entry:
   tail call void @hello(float* %a, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -21,10 +21,10 @@
 
 ; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4, !noalias !0
+; CHECK:   %0 = load float, float* %c, align 4, !noalias !0
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !0
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
@@ -32,7 +32,7 @@
 
 define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds float, float* %b, i64 8
@@ -43,7 +43,7 @@
 define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 entry:
   tail call void @hello2(float* %a, float* %b, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -51,12 +51,12 @@
 
 ; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4, !noalias !3
+; CHECK:   %0 = load float, float* %c, align 4, !noalias !3
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8
 ; CHECK:   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
diff --git a/llvm/test/Transforms/Inline/noalias2.ll b/llvm/test/Transforms/Inline/noalias2.ll
index a785329..432fccf 100644
--- a/llvm/test/Transforms/Inline/noalias2.ll
+++ b/llvm/test/Transforms/Inline/noalias2.ll
@@ -4,7 +4,7 @@
 
 define void @hello(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 5
   store float %0, float* %arrayidx, align 4
   ret void
@@ -13,7 +13,7 @@
 define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
 entry:
   tail call void @hello(float* %a, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -21,10 +21,10 @@
 
 ; CHECK: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4, !alias.scope !0, !noalias !3
+; CHECK:   %0 = load float, float* %c, align 4, !alias.scope !0, !noalias !3
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0
-; CHECK:   %1 = load float* %c, align 4
+; CHECK:   %1 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx, align 4
 ; CHECK:   ret void
@@ -32,7 +32,7 @@
 
 define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 {
 entry:
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 6
   store float %0, float* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds float, float* %b, i64 8
@@ -46,7 +46,7 @@
 entry:
   tail call void @foo(float* %a, float* %c)
   tail call void @hello2(float* %a, float* %b, float* %c)
-  %0 = load float* %c, align 4
+  %0 = load float, float* %c, align 4
   %arrayidx = getelementptr inbounds float, float* %a, i64 7
   store float %0, float* %arrayidx, align 4
   ret void
@@ -54,18 +54,18 @@
 
 ; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 {
 ; CHECK: entry:
-; CHECK:   %0 = load float* %c, align 4, !alias.scope !5, !noalias !10
+; CHECK:   %0 = load float, float* %c, align 4, !alias.scope !5, !noalias !10
 ; CHECK:   %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5
 ; CHECK:   store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5
-; CHECK:   %1 = load float* %c, align 4, !alias.scope !13, !noalias !14
+; CHECK:   %1 = load float, float* %c, align 4, !alias.scope !13, !noalias !14
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
-; CHECK:   %2 = load float* %c, align 4, !noalias !15
+; CHECK:   %2 = load float, float* %c, align 4, !noalias !15
 ; CHECK:   %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6
 ; CHECK:   store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
 ; CHECK:   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
-; CHECK:   %3 = load float* %c, align 4
+; CHECK:   %3 = load float, float* %c, align 4
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %3, float* %arrayidx, align 4
 ; CHECK:   ret void
diff --git a/llvm/test/Transforms/Inline/optimization-remarks.ll b/llvm/test/Transforms/Inline/optimization-remarks.ll
index fb1b047..8a3e4d1 100644
--- a/llvm/test/Transforms/Inline/optimization-remarks.ll
+++ b/llvm/test/Transforms/Inline/optimization-remarks.ll
@@ -12,8 +12,8 @@
   %y.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
-  %0 = load i32* %x.addr, align 4
-  %1 = load i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
   %add = add nsw i32 %0, %1
   ret i32 %add
 }
@@ -25,8 +25,8 @@
   %y.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
-  %0 = load i32* %x.addr, align 4
-  %1 = load i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4
+  %1 = load i32, i32* %y.addr, align 4
   %mul = mul nsw i32 %0, %1
   %conv = sitofp i32 %mul to float
   ret float %conv
@@ -37,14 +37,14 @@
 entry:
   %j.addr = alloca i32, align 4
   store i32 %j, i32* %j.addr, align 4
-  %0 = load i32* %j.addr, align 4
-  %1 = load i32* %j.addr, align 4
+  %0 = load i32, i32* %j.addr, align 4
+  %1 = load i32, i32* %j.addr, align 4
   %sub = sub nsw i32 %1, 2
   %call = call i32 @foo(i32 %0, i32 %sub)
   %conv = sitofp i32 %call to float
-  %2 = load i32* %j.addr, align 4
+  %2 = load i32, i32* %j.addr, align 4
   %sub1 = sub nsw i32 %2, 2
-  %3 = load i32* %j.addr, align 4
+  %3 = load i32, i32* %j.addr, align 4
   %call2 = call float @foz(i32 %sub1, i32 %3)
   %mul = fmul float %conv, %call2
   %conv3 = fptosi float %mul to i32
diff --git a/llvm/test/Transforms/Inline/ptr-diff.ll b/llvm/test/Transforms/Inline/ptr-diff.ll
index 8388e4f..728f379 100644
--- a/llvm/test/Transforms/Inline/ptr-diff.ll
+++ b/llvm/test/Transforms/Inline/ptr-diff.ll
@@ -25,7 +25,7 @@
   ret i32 3
 
 else:
-  %t = load i32* %begin
+  %t = load i32, i32* %begin
   ret i32 %t
 }
 
@@ -53,7 +53,7 @@
   ret i32 3
 
 else:
-  %t = load i32* %begin
+  %t = load i32, i32* %begin
   ret i32 %t
 }
 
@@ -63,9 +63,9 @@
   %p1 = inttoptr i32 %a to i32 addrspace(1)*
   %p2 = inttoptr i32 %b to i32 addrspace(1)*
   %p3 = inttoptr i32 %c to i32 addrspace(1)*
-  %t1 = load i32 addrspace(1)* %p1
-  %t2 = load i32 addrspace(1)* %p2
-  %t3 = load i32 addrspace(1)* %p3
+  %t1 = load i32, i32 addrspace(1)* %p1
+  %t2 = load i32, i32 addrspace(1)* %p2
+  %t3 = load i32, i32 addrspace(1)* %p3
   %s = add i32 %t1, %t2
   %s1 = add i32 %s, %t3
   ret i32 %s1
@@ -84,9 +84,9 @@
   %p1 = inttoptr i32 %a to i32 addrspace(2)*
   %p2 = inttoptr i32 %b to i32 addrspace(2)*
   %p3 = inttoptr i32 %c to i32 addrspace(2)*
-  %t1 = load i32 addrspace(2)* %p1
-  %t2 = load i32 addrspace(2)* %p2
-  %t3 = load i32 addrspace(2)* %p3
+  %t1 = load i32, i32 addrspace(2)* %p1
+  %t2 = load i32, i32 addrspace(2)* %p2
+  %t3 = load i32, i32 addrspace(2)* %p3
   %s = add i32 %t1, %t2
   %s1 = add i32 %s, %t3
   ret i32 %s1
diff --git a/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll b/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
index f550c83..b381545 100644
--- a/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
+++ b/llvm/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
@@ -31,14 +31,14 @@
         %a.0 = alloca i32               ; <i32*> [#uses=2]
         %result = alloca i32            ; <i32*> [#uses=2]
         store i32 %a.1, i32* %a.0
-        %tmp.0 = load i32* %a.0         ; <i32> [#uses=1]
-        %tmp.1 = load i32* @silly               ; <i32> [#uses=1]
+        %tmp.0 = load i32, i32* %a.0         ; <i32> [#uses=1]
+        %tmp.1 = load i32, i32* @silly               ; <i32> [#uses=1]
         %tmp.2 = add i32 %tmp.0, %tmp.1         ; <i32> [#uses=1]
         store i32 %tmp.2, i32* %result
         br label %return
 
 return:         ; preds = %entry
-        %tmp.3 = load i32* %result              ; <i32> [#uses=1]
+        %tmp.3 = load i32, i32* %result              ; <i32> [#uses=1]
         ret i32 %tmp.3
 }
 
diff --git a/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
index 7f73908..3e0cf12 100644
--- a/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
+++ b/llvm/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -2,6 +2,6 @@
 
 define void @test(i32* %P) {
         ; Dead but not deletable!
-        %X = load volatile i32* %P              ; <i32> [#uses=0]
+        %X = load volatile i32, i32* %P              ; <i32> [#uses=0]
         ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll b/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
index 6df30c7..a086c01 100644
--- a/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
+++ b/llvm/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
@@ -19,7 +19,7 @@
 
 cont:           ; preds = %call, %entry
         %P2 = phi i32* [ %P, %call ], [ null, %entry ]          ; <i32*> [#uses=1]
-        %V = load i32* %P2              ; <i32> [#uses=1]
+        %V = load i32, i32* %P2              ; <i32> [#uses=1]
         ret i32 %V
 
 N:              ; preds = %call
diff --git a/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll b/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
index 8b54937..ff855dc 100644
--- a/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
+++ b/llvm/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
@@ -4,7 +4,7 @@
 
 define i32 @test(%Ty* %X) {
         %Y = bitcast %Ty* %X to i32*            ; <i32*> [#uses=1]
-        %Z = load i32* %Y               ; <i32> [#uses=1]
+        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
         ret i32 %Z
 }
 
diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
index d17db8d..10122e4 100644
--- a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
+++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
@@ -12,7 +12,7 @@
         store i32 2, i32* %X2
         %Y = select i1 %C, i32* %X, i32* %X2            ; <i32*> [#uses=1]
         store i32 3, i32* %X
-        %Z = load i32* %Y               ; <i32> [#uses=1]
+        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
         ret i32 %Z
 }
 
diff --git a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
index 0d5fc81..981a4f3 100644
--- a/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
+++ b/llvm/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -18,7 +18,7 @@
 cond_continue.i:                ; preds = %cond_true.i, %entry
         %mem_tmp.i.0 = phi i32* [ %X, %cond_true.i ], [ %X2, %entry ]           ; <i32*> [#uses=1]
         store i32 3, i32* %X
-        %tmp.3 = load i32* %mem_tmp.i.0         ; <i32> [#uses=1]
+        %tmp.3 = load i32, i32* %mem_tmp.i.0         ; <i32> [#uses=1]
         ret i32 %tmp.3
 }
 
diff --git a/llvm/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll b/llvm/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
index 0a513c6..9c989b9 100644
--- a/llvm/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
+++ b/llvm/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
@@ -5,7 +5,7 @@
 @g_07918478 = external global i32               ; <i32*> [#uses=1]
 
 define i1 @test() {
-        %tmp.0 = load i32* @g_07918478          ; <i32> [#uses=2]
+        %tmp.0 = load i32, i32* @g_07918478          ; <i32> [#uses=2]
         %tmp.1 = icmp ne i32 %tmp.0, 0          ; <i1> [#uses=1]
         %tmp.4 = icmp ult i32 %tmp.0, 4111              ; <i1> [#uses=1]
         %bothcond = or i1 %tmp.1, %tmp.4                ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll b/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
index ee261ced..2eb28a3f 100644
--- a/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
+++ b/llvm/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
@@ -2,7 +2,7 @@
 ; PR913
 
 define i32 @test(i32* %tmp1) {
-        %tmp.i = load i32* %tmp1                ; <i32> [#uses=1]
+        %tmp.i = load i32, i32* %tmp1                ; <i32> [#uses=1]
         %tmp = bitcast i32 %tmp.i to i32                ; <i32> [#uses=1]
         %tmp2.ui = lshr i32 %tmp, 5             ; <i32> [#uses=1]
         %tmp2 = bitcast i32 %tmp2.ui to i32             ; <i32> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
index f906e5e..60113fb 100644
--- a/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
+++ b/llvm/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
@@ -23,13 +23,13 @@
 	%tmp.upgrd.2 = icmp eq i32 %direction, 0		; <i1> [#uses=1]
 	%tmp5 = bitcast %struct.point* %p1_addr to { i64 }*		; <{ i64 }*> [#uses=1]
 	%tmp6 = getelementptr { i64 }, { i64 }* %tmp5, i64 0, i32 0		; <i64*> [#uses=1]
-	%tmp.upgrd.3 = load i64* %tmp6		; <i64> [#uses=1]
+	%tmp.upgrd.3 = load i64, i64* %tmp6		; <i64> [#uses=1]
 	%tmp7 = bitcast %struct.point* %p2_addr to { i64 }*		; <{ i64 }*> [#uses=1]
 	%tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i64 0, i32 0		; <i64*> [#uses=1]
-	%tmp9 = load i64* %tmp8		; <i64> [#uses=1]
+	%tmp9 = load i64, i64* %tmp8		; <i64> [#uses=1]
 	%tmp10 = bitcast %struct.point* %p3_addr to { i64 }*		; <{ i64 }*> [#uses=1]
 	%tmp11 = getelementptr { i64 }, { i64 }* %tmp10, i64 0, i32 0		; <i64*> [#uses=1]
-	%tmp12 = load i64* %tmp11		; <i64> [#uses=1]
+	%tmp12 = load i64, i64* %tmp11		; <i64> [#uses=1]
 	%tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )		; <i32> [#uses=2]
 	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
 
diff --git a/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
index 1e5acfd..f54416d 100644
--- a/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
+++ b/llvm/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -23,13 +23,13 @@
         %tmp.upgrd.2 = icmp eq i32 %direction, 0                ; <i1> [#uses=1]
         %tmp5 = bitcast %struct.point* %p1_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
         %tmp6 = getelementptr { i64 }, { i64 }* %tmp5, i32 0, i32 0              ; <i64*> [#uses=1]
-        %tmp.upgrd.3 = load i64* %tmp6          ; <i64> [#uses=1]
+        %tmp.upgrd.3 = load i64, i64* %tmp6          ; <i64> [#uses=1]
         %tmp7 = bitcast %struct.point* %p2_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
         %tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0              ; <i64*> [#uses=1]
-        %tmp9 = load i64* %tmp8         ; <i64> [#uses=1]
+        %tmp9 = load i64, i64* %tmp8         ; <i64> [#uses=1]
         %tmp10 = bitcast %struct.point* %p3_addr to { i64 }*            ; <{ i64 }*> [#uses=1]
         %tmp11 = getelementptr { i64 }, { i64 }* %tmp10, i32 0, i32 0            ; <i64*> [#uses=1]
-        %tmp12 = load i64* %tmp11               ; <i64> [#uses=1]
+        %tmp12 = load i64, i64* %tmp11               ; <i64> [#uses=1]
         %tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )         ; <i32> [#uses=2]
         %tmp14 = icmp slt i32 %tmp13, 0         ; <i1> [#uses=1]
         %tmp26 = icmp sgt i32 %tmp13, 0         ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
index cee6610..784b3e4 100644
--- a/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
+++ b/llvm/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
@@ -22,7 +22,7 @@
 
 cond_true:              ; preds = %newFuncRoot
         %tmp15 = getelementptr [17 x i32], [17 x i32]* @r, i32 0, i32 %tmp12.reload         ; <i32*> [#uses=1]
-        %tmp16 = load i32* %tmp15               ; <i32> [#uses=4]
+        %tmp16 = load i32, i32* %tmp15               ; <i32> [#uses=4]
         %tmp18 = icmp slt i32 %tmp16, -31               ; <i1> [#uses=1]
         %tmp21 = icmp sgt i32 %tmp16, 31                ; <i1> [#uses=1]
         %bothcond = or i1 %tmp18, %tmp21                ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll b/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
index eba1ac1..9251e9b 100644
--- a/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
+++ b/llvm/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
@@ -14,9 +14,9 @@
 
 define void @mng_write_basi() {
 entry:
-	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp = load i8, i8* null		; <i8> [#uses=1]
 	%tmp.upgrd.1 = icmp ugt i8 %tmp, 8		; <i1> [#uses=1]
-	%tmp.upgrd.2 = load i16* null		; <i16> [#uses=2]
+	%tmp.upgrd.2 = load i16, i16* null		; <i16> [#uses=2]
 	%tmp3 = icmp eq i16 %tmp.upgrd.2, 255		; <i1> [#uses=1]
 	%tmp7 = icmp eq i16 %tmp.upgrd.2, -1		; <i1> [#uses=1]
 	%bOpaque.0.in = select i1 %tmp.upgrd.1, i1 %tmp7, i1 %tmp3		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
index e2bebec..20bbd28 100644
--- a/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
+++ b/llvm/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -11,14 +11,14 @@
 	%A = alloca i32
 	%B = alloca i32
 	%tmp = call i32 (...)* @bar( i32* %A )		; <i32> [#uses=0]
-	%T = load i32* %A		; <i32> [#uses=1]
+	%T = load i32, i32* %A		; <i32> [#uses=1]
 	%tmp2 = icmp eq i32 %C, 0		; <i1> [#uses=1]
 	br i1 %tmp2, label %cond_next, label %cond_true
 
 cond_true:		; preds = %entry
 	store i32 123, i32* %B
 	call i32 @test2( i32 123 )		; <i32>:0 [#uses=0]
-	%T1 = load i32* %B		; <i32> [#uses=1]
+	%T1 = load i32, i32* %B		; <i32> [#uses=1]
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %entry
diff --git a/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll b/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
index eedaaff..760b6dc 100644
--- a/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
+++ b/llvm/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
@@ -13,7 +13,7 @@
 define i32 @main(i32 %x, i8** %a) {
 entry:
         %tmp = getelementptr [6 x i8], [6 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
-        %tmp1 = load i8** %a            ; <i8*> [#uses=1]
+        %tmp1 = load i8*, i8** %a            ; <i8*> [#uses=1]
         %tmp2 = ptrtoint i8* %tmp1 to i32               ; <i32> [#uses=1]
         %tmp3 = zext i32 %tmp2 to i64           ; <i64> [#uses=1]
         %tmp.upgrd.1 = call i32 (i8*, ...)* @printf( i8* %tmp, i64 %tmp3 )              ; <i32> [#uses=0]
diff --git a/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
index 93ba6c6..adaaf78 100644
--- a/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
+++ b/llvm/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -12,7 +12,7 @@
 	%tmp3 = getelementptr %struct..1anon, %struct..1anon* %u, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp34 = bitcast double* %tmp3 to %struct..0anon*		; <%struct..0anon*> [#uses=1]
 	%tmp5 = getelementptr %struct..0anon, %struct..0anon* %tmp34, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp6 = load i32* %tmp5		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %tmp5		; <i32> [#uses=1]
 	%tmp7 = shl i32 %tmp6, 1		; <i32> [#uses=1]
 	%tmp8 = lshr i32 %tmp7, 21		; <i32> [#uses=1]
 	%tmp89 = trunc i32 %tmp8 to i16		; <i16> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
index 082b215..2b89a9d 100644
--- a/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
+++ b/llvm/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
@@ -9,13 +9,13 @@
 	ret void
 
 cond_true:		; preds = %newFuncRoot
-	%tmp30 = load i64* %tmp10		; <i64> [#uses=1]
+	%tmp30 = load i64, i64* %tmp10		; <i64> [#uses=1]
 	%.cast = zext i32 63 to i64		; <i64> [#uses=1]
 	%tmp31 = ashr i64 %tmp30, %.cast		; <i64> [#uses=1]
 	%tmp3132 = trunc i64 %tmp31 to i32		; <i32> [#uses=1]
 	%tmp33 = or i32 %tmp3132, 1		; <i32> [#uses=1]
 	store i32 %tmp33, i32* %tmp9
-	%tmp34 = load i32* %tmp9		; <i32> [#uses=1]
+	%tmp34 = load i32, i32* %tmp9		; <i32> [#uses=1]
 	store i32 %tmp34, i32* %retval
 	br label %return.exitStub
 }
diff --git a/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 6259893..7f06f00 100644
--- a/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/llvm/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -9,7 +9,7 @@
 
 define i1 @__gthread_active_p() {
 entry:
-	%tmp1 = load i8** @__gthread_active_ptr.5335, align 4		; <i8*> [#uses=1]
+	%tmp1 = load i8*, i8** @__gthread_active_ptr.5335, align 4		; <i8*> [#uses=1]
 	%tmp2 = icmp ne i8* %tmp1, null		; <i1> [#uses=1]
 	ret i1 %tmp2
 }
diff --git a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index f9e8220..67a94e5 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -8,7 +8,7 @@
 entry:
   %P_addr = alloca i8*
   store i8* %P, i8** %P_addr
-  %tmp = load i8** %P_addr, align 4
+  %tmp = load i8*, i8** %P_addr, align 4
   %tmp1 = getelementptr [4 x i8], [4 x i8]* @.str, i32 0, i32 0
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
   br label %return
diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
index 8105b4b..8c2cb07 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
@@ -11,7 +11,7 @@
 	br label %bb51.i.i
 
 bb27.i.i:		; preds = %bb51.i.i
-	%tmp31.i.i = load i16* null, align 2		; <i16> [#uses=2]
+	%tmp31.i.i = load i16, i16* null, align 2		; <i16> [#uses=2]
 	%tmp35.i.i = icmp ult i16 %tmp31.i.i, 1		; <i1> [#uses=1]
 	%tmp41.i.i = icmp ugt i16 %tmp31.i.i, -1		; <i1> [#uses=1]
 	%bothcond.i.i = or i1 %tmp35.i.i, %tmp41.i.i		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll b/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
index 220f3e2..876cdd5 100644
--- a/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
+++ b/llvm/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
@@ -7,7 +7,7 @@
 define void @_start(i32 %argc, i8** %argv, i8** %envp) {
 entry:
 	%tmp1 = bitcast void ()* @__darwin_gcc3_preregister_frame_info to i32*		; <i32*> [#uses=1]
-	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
 	%tmp3 = icmp ne i32 %tmp2, 0		; <i1> [#uses=1]
 	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll b/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
index e6c9bcd..ff31072 100644
--- a/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
+++ b/llvm/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
@@ -13,8 +13,8 @@
 
 define i32 @foo() {
 entry:
-        %x = load i8* bitcast (%opaque_t* @g to i8*)
-        %y = load i32* bitcast (%op_ts* @h to i32*)
+        %x = load i8, i8* bitcast (%opaque_t* @g to i8*)
+        %y = load i32, i32* bitcast (%op_ts* @h to i32*)
 	%z = zext i8 %x to i32
 	%r = add i32 %y, %z
         ret i32 %r
diff --git a/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll b/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
index 8721c83..7260c00 100644
--- a/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
+++ b/llvm/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
@@ -4,7 +4,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp ule i32 %sub, 0
 	%retval = select i1 %cmp, i32 0, i32 1
@@ -15,7 +15,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp ugt i32 %sub, 0
 	%retval = select i1 %cmp, i32 1, i32 0
@@ -26,7 +26,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp slt i32 %sub, 0
 	%retval = select i1 %cmp, i32 1, i32 0
@@ -37,7 +37,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp sle i32 %sub, 0
 	%retval = select i1 %cmp, i32 1, i32 0
@@ -48,7 +48,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp sge i32 %sub, 0
 	%retval = select i1 %cmp, i32 0, i32 1
@@ -59,7 +59,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp sgt i32 %sub, 0
 	%retval = select i1 %cmp, i32 0, i32 1
@@ -70,7 +70,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp eq i32 %sub, 0
 	%retval = select i1 %cmp, i32 0, i32 1
@@ -81,7 +81,7 @@
 entry:
 	%z = alloca i32
 	store i32 0, i32* %z
-	%tmp = load i32* %z
+	%tmp = load i32, i32* %z
 	%sub = sub i32 %tmp, 1
 	%cmp = icmp ne i32 %sub, 0
 	%retval = select i1 %cmp, i32 1, i32 0
diff --git a/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
index 0fa4d71..d086f4b 100644
--- a/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
+++ b/llvm/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep "16" | count 1
 
 define i8* @bork(i8** %qux) {
-  %tmp275 = load i8** %qux, align 1
+  %tmp275 = load i8*, i8** %qux, align 1
   %tmp275276 = ptrtoint i8* %tmp275 to i32
   %tmp277 = add i32 %tmp275276, 16
   %tmp277278 = inttoptr i32 %tmp277 to i8*
diff --git a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
index fd0217e..af662bd 100644
--- a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
+++ b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -6,7 +6,7 @@
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
@@ -16,7 +16,7 @@
 	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
index 8022414..3c67e51 100644
--- a/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
+++ b/llvm/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -7,11 +7,11 @@
 define i32 @main(i32 %i) nounwind  {
 entry:
 	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
-	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp93, label %bb11, label %bb
 
 bb:		; preds = %bb, %entry
-	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb11
 
 bb11:		; preds = %bb
diff --git a/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll b/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
index 823ccb6..722f8f0 100644
--- a/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
+++ b/llvm/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
@@ -23,7 +23,7 @@
 			to label %invcont37 unwind label %lpad		; <i32**> [#uses=1]
 
 invcont37:		; preds = %invcont31
-	%tmp39 = load i32** %tmp38, align 8		; <i32*> [#uses=1]
+	%tmp39 = load i32*, i32** %tmp38, align 8		; <i32*> [#uses=1]
 	%tmp41 = getelementptr %"struct.std::ctype<char>", %"struct.std::ctype<char>"* %this, i32 0, i32 4		; <i32**> [#uses=1]
 	store i32* %tmp39, i32** %tmp41, align 8
 	ret void
diff --git a/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll b/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
index 2939a48..f0ccc05 100644
--- a/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
+++ b/llvm/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
@@ -10,9 +10,9 @@
 	%blockSize100k = alloca i32		; <i32*> [#uses=2]
 	store i32 %0, i32* %blockSize100k
 	%n = alloca i32		; <i32*> [#uses=2]
-	load i32* %blockSize100k		; <i32>:1 [#uses=1]
+	load i32, i32* %blockSize100k		; <i32>:1 [#uses=1]
 	store i32 %1, i32* %n
-	load i32* %n		; <i32>:2 [#uses=1]
+	load i32, i32* %n		; <i32>:2 [#uses=1]
 	add i32 %2, 2		; <i32>:3 [#uses=1]
 	mul i32 %3, ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)		; <i32>:4 [#uses=1]
 	call void @BZALLOC( i32 %4 )
diff --git a/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
index b729677..b10aac9 100644
--- a/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
+++ b/llvm/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -5,7 +5,7 @@
 ; CHECK: ret i1 false
 define i1 @f(i8* %x) {
 entry:
-       %tmp462 = load i8* %x, align 1          ; <i8> [#uses=1]
+       %tmp462 = load i8, i8* %x, align 1          ; <i8> [#uses=1]
        %tmp462463 = sitofp i8 %tmp462 to float         ; <float> [#uses=1]
        %tmp464 = fcmp ugt float %tmp462463, 0x47EFFFFFE0000000         ; <i1>
        ret i1 %tmp464
diff --git a/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll b/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
index 05f1c52..c3aab46 100644
--- a/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
+++ b/llvm/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
@@ -4,13 +4,13 @@
 declare i32 @x(i32*)
 define i32 @b(i32* %a, i32* %b) {
 entry:
-        %tmp1 = load i32* %a            
-        %tmp3 = load i32* %b           
+        %tmp1 = load i32, i32* %a            
+        %tmp3 = load i32, i32* %b           
         %add = add i32 %tmp1, %tmp3   
         %call = call i32 @x( i32* %a )
         %tobool = icmp ne i32 %add, 0
 	; not safe to turn into an uncond load
         %cond = select i1 %tobool, i32* %b, i32* %a             
-        %tmp8 = load i32* %cond       
+        %tmp8 = load i32, i32* %cond       
         ret i32 %tmp8
 }
diff --git a/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
index dcf4bef..17ec9cd 100644
--- a/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
+++ b/llvm/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -7,7 +7,7 @@
 define i32 @main() nounwind  {
 entry:
 	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
-	%tmp34 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp34 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br label %bb
 
 bb:		; preds = %bb, %entry
@@ -17,7 +17,7 @@
 	store volatile i32 %tmp4, i32* @g_1, align 4
 	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
 	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
-	%tmp3 = load volatile i32* @g_1, align 4		; <i32> [#uses=1]
+	%tmp3 = load volatile i32, i32* @g_1, align 4		; <i32> [#uses=1]
 	br i1 %tmp9, label %bb, label %bb11
 
 bb11:		; preds = %bb
diff --git a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll
index 124c214..91f1c0b 100644
--- a/llvm/test/Transforms/InstCombine/2008-08-05-And.ll
+++ b/llvm/test/Transforms/InstCombine/2008-08-05-And.ll
@@ -7,7 +7,7 @@
 
 bb:
 	%g1 = getelementptr i8, i8* %x, i32 0
-        %l1 = load i8* %g1, align 1
+        %l1 = load i8, i8* %g1, align 1
 	%s1 = sub i8 %l1, 6
 	%c1 = icmp ugt i8 %s1, 2
 	%s2 = sub i8 %l1, 10
diff --git a/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
index 67d56d5..23ed5aa 100644
--- a/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
+++ b/llvm/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -21,7 +21,7 @@
 	%4 = bitcast { i32, i32 }* %3 to i64*		; <i64*> [#uses=1]
 	store i64 %key_token2, i64* %4, align 4
 	%5 = call i32 (...)* @foo(%struct.Key* byval align 4 %iospec, i32* %ret) nounwind		; <i32> [#uses=0]
-	%6 = load i32* %ret, align 4		; <i32> [#uses=1]
+	%6 = load i32, i32* %ret, align 4		; <i32> [#uses=1]
 	ret i32 %6
 }
 
diff --git a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
index 68c51b4..5bff5a8 100644
--- a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
+++ b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -15,9 +15,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF0000000000000, float* %x, align 4
 	store float 0x7FF8000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -36,9 +36,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF0000000000000, float* %x, align 4
 	store float 0.000000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -55,9 +55,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF0000000000000, float* %x, align 4
 	store float 3.500000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -74,9 +74,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF0000000000000, float* %x, align 4
 	store float 0x7FF0000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -93,9 +93,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF8000000000000, float* %x, align 4
 	store float 0x7FF0000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -112,9 +112,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF8000000000000, float* %x, align 4
 	store float 0.000000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -131,9 +131,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF8000000000000, float* %x, align 4
 	store float 3.500000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -150,9 +150,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0x7FF8000000000000, float* %x, align 4
 	store float 0x7FF8000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -169,9 +169,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0.000000e+00, float* %x, align 4
 	store float 0x7FF8000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -188,9 +188,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0.000000e+00, float* %x, align 4
 	store float 0x7FF0000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -207,9 +207,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0.000000e+00, float* %x, align 4
 	store float 0.000000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -226,9 +226,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 0.000000e+00, float* %x, align 4
 	store float 3.500000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -245,9 +245,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 3.500000e+00, float* %x, align 4
 	store float 0x7FF8000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -264,9 +264,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 3.500000e+00, float* %x, align 4
 	store float 0x7FF0000000000000, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -283,9 +283,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 3.500000e+00, float* %x, align 4
 	store float 0.000000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
@@ -302,9 +302,9 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store float 3.500000e+00, float* %x, align 4
 	store float 3.500000e+00, float* %y, align 4
-	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%0 = load float, float* %y, align 4		; <float> [#uses=1]
 	%1 = fpext float %0 to double		; <double> [#uses=1]
-	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%2 = load float, float* %x, align 4		; <float> [#uses=1]
 	%3 = fpext float %2 to double		; <double> [#uses=1]
 	%4 = frem double %3, %1		; <double> [#uses=1]
 	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
index 6bc7ce3..5adcb6b 100644
--- a/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
+++ b/llvm/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
@@ -14,12 +14,12 @@
 	%1 = frem double 1.000000e-01, 1.000000e+00	; <double> [#uses=1]
 	%2 = fptrunc double %1 to float		; <float> [#uses=1]
 	store float %2, float* %0, align 4
-	%3 = load float* %0, align 4		; <float> [#uses=1]
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
 	store float %3, float* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load float* %retval		; <float> [#uses=1]
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
 	ret float %retval1
 }
 
@@ -31,12 +31,12 @@
 	%1 = frem double -1.000000e-01, 1.000000e+00	; <double> [#uses=1]
 	%2 = fptrunc double %1 to float		; <float> [#uses=1]
 	store float %2, float* %0, align 4
-	%3 = load float* %0, align 4		; <float> [#uses=1]
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
 	store float %3, float* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load float* %retval		; <float> [#uses=1]
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
 	ret float %retval1
 }
 
@@ -48,12 +48,12 @@
 	%1 = frem double 1.000000e-01, -1.000000e+00	; <double> [#uses=1]
 	%2 = fptrunc double %1 to float		; <float> [#uses=1]
 	store float %2, float* %0, align 4
-	%3 = load float* %0, align 4		; <float> [#uses=1]
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
 	store float %3, float* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load float* %retval		; <float> [#uses=1]
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
 	ret float %retval1
 }
 
@@ -65,11 +65,11 @@
 	%1 = frem double -1.000000e-01, -1.000000e+00	; <double> [#uses=1]
 	%2 = fptrunc double %1 to float		; <float> [#uses=1]
 	store float %2, float* %0, align 4
-	%3 = load float* %0, align 4		; <float> [#uses=1]
+	%3 = load float, float* %0, align 4		; <float> [#uses=1]
 	store float %3, float* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load float* %retval		; <float> [#uses=1]
+	%retval1 = load float, float* %retval		; <float> [#uses=1]
 	ret float %retval1
 }
diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index 9f0851c..0f8b38c 100644
--- a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -30,11 +30,11 @@
   %5 = getelementptr %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
   %6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >", %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0
   %7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl", %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1
-  %8 = load i32** %7, align 4
+  %8 = load i32*, i32** %7, align 4
   %9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
   store i32* %8, i32** %9, align 4
   %10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
-  %11 = load i32** %10, align 4
+  %11 = load i32*, i32** %10, align 4
   %tmp2.i = ptrtoint i32* %11 to i32
   %tmp1.i = inttoptr i32 %tmp2.i to i32*
   %tmp3 = ptrtoint i32* %tmp1.i to i32
@@ -42,11 +42,11 @@
   %12 = getelementptr %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
   %13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >", %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0
   %14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl", %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0
-  %15 = load i32** %14, align 4
+  %15 = load i32*, i32** %14, align 4
   %16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
   store i32* %15, i32** %16, align 4
   %17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
-  %18 = load i32** %17, align 4
+  %18 = load i32*, i32** %17, align 4
   %tmp2.i17 = ptrtoint i32* %18 to i32
   %tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*
   %tmp8 = ptrtoint i32* %tmp1.i18 to i32
@@ -54,14 +54,14 @@
   %19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
   store i32* %tmp6, i32** %19
   %20 = getelementptr %"struct.std::bidirectional_iterator_tag", %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0
-  %21 = load i8* %20, align 1
+  %21 = load i8, i8* %20, align 1
   %22 = or i8 %21, 0
   %23 = or i8 %22, 0
   %24 = or i8 %23, 0
   %25 = getelementptr %"struct.std::bidirectional_iterator_tag", %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0
   store i8 0, i8* %25, align 1
   %elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
-  %val.i = load i32** %elt.i
+  %val.i = load i32*, i32** %elt.i
   %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
   %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
@@ -70,10 +70,10 @@
   %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
   store i32* %tmp2, i32** %27
   %28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
-  %29 = load i32** %28, align 4
+  %29 = load i32*, i32** %28, align 4
   %30 = ptrtoint i32* %29 to i32
   %31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %32 = load i32** %31, align 4
+  %32 = load i32*, i32** %31, align 4
   %33 = ptrtoint i32* %32 to i32
   %34 = sub i32 %30, %33
   %35 = ashr i32 %34, 2
@@ -82,9 +82,9 @@
 
 bb.i.i:                                           ; preds = %bb12.i.i
   %37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %38 = load i32** %37, align 4
-  %39 = load i32* %38, align 4
-  %40 = load i32* %4, align 4
+  %38 = load i32*, i32** %37, align 4
+  %39 = load i32, i32* %38, align 4
+  %40 = load i32, i32* %4, align 4
   %41 = icmp eq i32 %39, %40
   %42 = zext i1 %41 to i8
   %toBool.i.i = icmp ne i8 %42, 0
@@ -92,19 +92,19 @@
 
 bb1.i.i:                                          ; preds = %bb.i.i
   %43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %44 = load i32** %43, align 4
+  %44 = load i32*, i32** %43, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb2.i.i:                                          ; preds = %bb.i.i
   %45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %46 = load i32** %45, align 4
+  %46 = load i32*, i32** %45, align 4
   %47 = getelementptr i32, i32* %46, i64 1
   %48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %47, i32** %48, align 4
   %49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %50 = load i32** %49, align 4
-  %51 = load i32* %50, align 4
-  %52 = load i32* %4, align 4
+  %50 = load i32*, i32** %49, align 4
+  %51 = load i32, i32* %50, align 4
+  %52 = load i32, i32* %4, align 4
   %53 = icmp eq i32 %51, %52
   %54 = zext i1 %53 to i8
   %toBool3.i.i = icmp ne i8 %54, 0
@@ -112,19 +112,19 @@
 
 bb4.i.i:                                          ; preds = %bb2.i.i
   %55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %56 = load i32** %55, align 4
+  %56 = load i32*, i32** %55, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb5.i.i:                                          ; preds = %bb2.i.i
   %57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %58 = load i32** %57, align 4
+  %58 = load i32*, i32** %57, align 4
   %59 = getelementptr i32, i32* %58, i64 1
   %60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %59, i32** %60, align 4
   %61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %62 = load i32** %61, align 4
-  %63 = load i32* %62, align 4
-  %64 = load i32* %4, align 4
+  %62 = load i32*, i32** %61, align 4
+  %63 = load i32, i32* %62, align 4
+  %64 = load i32, i32* %4, align 4
   %65 = icmp eq i32 %63, %64
   %66 = zext i1 %65 to i8
   %toBool6.i.i = icmp ne i8 %66, 0
@@ -132,19 +132,19 @@
 
 bb7.i.i:                                          ; preds = %bb5.i.i
   %67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %68 = load i32** %67, align 4
+  %68 = load i32*, i32** %67, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb8.i.i:                                          ; preds = %bb5.i.i
   %69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %70 = load i32** %69, align 4
+  %70 = load i32*, i32** %69, align 4
   %71 = getelementptr i32, i32* %70, i64 1
   %72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %71, i32** %72, align 4
   %73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %74 = load i32** %73, align 4
-  %75 = load i32* %74, align 4
-  %76 = load i32* %4, align 4
+  %74 = load i32*, i32** %73, align 4
+  %75 = load i32, i32* %74, align 4
+  %76 = load i32, i32* %4, align 4
   %77 = icmp eq i32 %75, %76
   %78 = zext i1 %77 to i8
   %toBool9.i.i = icmp ne i8 %78, 0
@@ -152,12 +152,12 @@
 
 bb10.i.i:                                         ; preds = %bb8.i.i
   %79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %80 = load i32** %79, align 4
+  %80 = load i32*, i32** %79, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb11.i.i:                                         ; preds = %bb8.i.i
   %81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %82 = load i32** %81, align 4
+  %82 = load i32*, i32** %81, align 4
   %83 = getelementptr i32, i32* %82, i64 1
   %84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %83, i32** %84, align 4
@@ -171,10 +171,10 @@
 
 bb13.i.i:                                         ; preds = %bb12.i.i
   %87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
-  %88 = load i32** %87, align 4
+  %88 = load i32*, i32** %87, align 4
   %89 = ptrtoint i32* %88 to i32
   %90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %91 = load i32** %90, align 4
+  %91 = load i32*, i32** %90, align 4
   %92 = ptrtoint i32* %91 to i32
   %93 = sub i32 %89, %92
   %94 = ashr i32 %93, 2
@@ -186,9 +186,9 @@
 
 bb14.i.i:                                         ; preds = %bb13.i.i
   %95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %96 = load i32** %95, align 4
-  %97 = load i32* %96, align 4
-  %98 = load i32* %4, align 4
+  %96 = load i32*, i32** %95, align 4
+  %97 = load i32, i32* %96, align 4
+  %98 = load i32, i32* %4, align 4
   %99 = icmp eq i32 %97, %98
   %100 = zext i1 %99 to i8
   %toBool15.i.i = icmp ne i8 %100, 0
@@ -196,12 +196,12 @@
 
 bb16.i.i:                                         ; preds = %bb14.i.i
   %101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %102 = load i32** %101, align 4
+  %102 = load i32*, i32** %101, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb17.i.i:                                         ; preds = %bb14.i.i
   %103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %104 = load i32** %103, align 4
+  %104 = load i32*, i32** %103, align 4
   %105 = getelementptr i32, i32* %104, i64 1
   %106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %105, i32** %106, align 4
@@ -209,9 +209,9 @@
 
 bb18.i.i:                                         ; preds = %bb17.i.i, %bb13.i.i
   %107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %108 = load i32** %107, align 4
-  %109 = load i32* %108, align 4
-  %110 = load i32* %4, align 4
+  %108 = load i32*, i32** %107, align 4
+  %109 = load i32, i32* %108, align 4
+  %110 = load i32, i32* %4, align 4
   %111 = icmp eq i32 %109, %110
   %112 = zext i1 %111 to i8
   %toBool19.i.i = icmp ne i8 %112, 0
@@ -219,12 +219,12 @@
 
 bb20.i.i:                                         ; preds = %bb18.i.i
   %113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %114 = load i32** %113, align 4
+  %114 = load i32*, i32** %113, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb21.i.i:                                         ; preds = %bb18.i.i
   %115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %116 = load i32** %115, align 4
+  %116 = load i32*, i32** %115, align 4
   %117 = getelementptr i32, i32* %116, i64 1
   %118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %117, i32** %118, align 4
@@ -232,9 +232,9 @@
 
 bb22.i.i:                                         ; preds = %bb21.i.i, %bb13.i.i
   %119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %120 = load i32** %119, align 4
-  %121 = load i32* %120, align 4
-  %122 = load i32* %4, align 4
+  %120 = load i32*, i32** %119, align 4
+  %121 = load i32, i32* %120, align 4
+  %122 = load i32, i32* %4, align 4
   %123 = icmp eq i32 %121, %122
   %124 = zext i1 %123 to i8
   %toBool23.i.i = icmp ne i8 %124, 0
@@ -242,12 +242,12 @@
 
 bb24.i.i:                                         ; preds = %bb22.i.i
   %125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %126 = load i32** %125, align 4
+  %126 = load i32*, i32** %125, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 bb25.i.i:                                         ; preds = %bb22.i.i
   %127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
-  %128 = load i32** %127, align 4
+  %128 = load i32*, i32** %127, align 4
   %129 = getelementptr i32, i32* %128, i64 1
   %130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
   store i32* %129, i32** %130, align 4
@@ -255,7 +255,7 @@
 
 bb26.i.i:                                         ; preds = %bb25.i.i, %bb13.i.i
   %131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >", %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
-  %132 = load i32** %131, align 4
+  %132 = load i32*, i32** %131, align 4
   br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
 _ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit: ; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
diff --git a/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
index cb8a77c..90ec6d5 100644
--- a/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
+++ b/llvm/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
@@ -7,6 +7,6 @@
 @.str1 = internal constant [4 x i8] c"\B5%8\00"
 
 define i32 @test() {
-  %rhsv = load i32* bitcast ([4 x i8]* @.str1 to i32*), align 1
+  %rhsv = load i32, i32* bitcast ([4 x i8]* @.str1 to i32*), align 1
   ret i32 %rhsv
 }
diff --git a/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll b/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
index 4880dcb..ef1734b 100644
--- a/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
+++ b/llvm/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
@@ -9,7 +9,7 @@
 	%tmp6 = mul i32 %max289, 4		; <i32> [#uses=1]
 	%tmp7 = alloca i8, i32 0		; <i8*> [#uses=1]
 	%tmp8 = bitcast i8* %tmp7 to [0 x [0 x i32]]*		; <[0 x [0 x i32]]*> [#uses=1]
-	%tmp11 = load i32* null, align 1		; <i32> [#uses=1]
+	%tmp11 = load i32, i32* null, align 1		; <i32> [#uses=1]
 	%tmp12 = icmp eq i32 %tmp11, 3		; <i1> [#uses=1]
 	%tmp13 = zext i1 %tmp12 to i8		; <i8> [#uses=1]
 	%tmp14 = ashr i32 %tmp6, 2		; <i32> [#uses=1]
@@ -20,7 +20,7 @@
 	%tmp19 = bitcast i8* %tmp18 to [0 x i32]*		; <[0 x i32]*> [#uses=1]
 	%tmp20 = bitcast [0 x i32]* %tmp19 to i32*		; <i32*> [#uses=1]
 	%tmp21 = getelementptr i32, i32* %tmp20, i32 0		; <i32*> [#uses=1]
-	%tmp22 = load i32* %tmp21, align 1		; <i32> [#uses=1]
+	%tmp22 = load i32, i32* %tmp21, align 1		; <i32> [#uses=1]
 	%tmp23 = icmp eq i32 %tmp22, 4		; <i1> [#uses=1]
 	%tmp24 = zext i1 %tmp23 to i8		; <i8> [#uses=1]
 	%toBool709 = icmp ne i8 %tmp13, 0		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll b/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
index c617ca4..3847abd 100644
--- a/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
+++ b/llvm/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
@@ -3,9 +3,9 @@
 
 define void @0(<4 x i16>*, <4 x i16>*) {
 	%3 = alloca <4 x i16>*		; <<4 x i16>**> [#uses=1]
-	%4 = load <4 x i16>* null, align 1		; <<4 x i16>> [#uses=1]
+	%4 = load <4 x i16>, <4 x i16>* null, align 1		; <<4 x i16>> [#uses=1]
 	%5 = ashr <4 x i16> %4, <i16 5, i16 5, i16 5, i16 5>		; <<4 x i16>> [#uses=1]
-	%6 = load <4 x i16>** %3		; <<4 x i16>*> [#uses=1]
+	%6 = load <4 x i16>*, <4 x i16>** %3		; <<4 x i16>*> [#uses=1]
 	store <4 x i16> %5, <4 x i16>* %6, align 1
 	ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll b/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
index dd14c6b..ced317c 100644
--- a/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
+++ b/llvm/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
@@ -2,7 +2,7 @@
 ; rdar://6903175
 
 define i1 @f0(i32 *%a) nounwind {
-       %b = load i32* %a, align 4
+       %b = load i32, i32* %a, align 4
        %c = uitofp i32 %b to double
        %d = fcmp ogt double %c, 0x41EFFFFFFFE00000
        ret i1 %d
diff --git a/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
index e0def99..faeffb6 100644
--- a/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
+++ b/llvm/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -8,7 +8,7 @@
 
 define i1 @PR6486() nounwind {
 ; CHECK-LABEL: @PR6486(
-  %tmp = load i32*** @g_177                       ; <i32**> [#uses=1]
+  %tmp = load i32**, i32*** @g_177                       ; <i32**> [#uses=1]
   %cmp = icmp ne i32** null, %tmp                 ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
   %cmp1 = icmp sle i32 0, %conv                   ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll b/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
index 02b64e3..116c971 100644
--- a/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
+++ b/llvm/test/Transforms/InstCombine/2011-05-02-VectorBoolean.ll
@@ -6,9 +6,9 @@
   %a.addr = alloca <2 x i16>, align 4
   %.compoundliteral = alloca <2 x i16>, align 4
   store <2 x i16> %a, <2 x i16>* %a.addr, align 4
-  %tmp = load <2 x i16>* %a.addr, align 4
+  %tmp = load <2 x i16>, <2 x i16>* %a.addr, align 4
   store <2 x i16> zeroinitializer, <2 x i16>* %.compoundliteral
-  %tmp1 = load <2 x i16>* %.compoundliteral
+  %tmp1 = load <2 x i16>, <2 x i16>* %.compoundliteral
   %cmp = icmp uge <2 x i16> %tmp, %tmp1
   %sext = sext <2 x i1> %cmp to <2 x i16>
   ret <2 x i16> %sext
diff --git a/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll b/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
index b096d1f..639b64a 100644
--- a/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
+++ b/llvm/test/Transforms/InstCombine/2011-05-28-swapmulsub.ll
@@ -7,14 +7,14 @@
   %on_off.addr = alloca i32, align 4
   %a = alloca i32, align 4
   store i32 %on_off, i32* %on_off.addr, align 4
-  %tmp = load i32* %on_off.addr, align 4
+  %tmp = load i32, i32* %on_off.addr, align 4
   %sub = sub i32 1, %tmp
 ; CHECK-NOT: mul i32
   %mul = mul i32 %sub, -2
 ; CHECK: shl
 ; CHECK-NEXT: add
   store i32 %mul, i32* %a, align 4
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   %conv = trunc i32 %tmp1 to i16
   ret i16 %conv
 }
@@ -26,15 +26,15 @@
   %a = alloca i32, align 4
   store i32 %on_off, i32* %on_off.addr, align 4
   store i32 %q, i32* %q.addr, align 4
-  %tmp = load i32* %q.addr, align 4
-  %tmp1 = load i32* %on_off.addr, align 4
+  %tmp = load i32, i32* %q.addr, align 4
+  %tmp1 = load i32, i32* %on_off.addr, align 4
   %sub = sub i32 %tmp, %tmp1
 ; CHECK-NOT: mul i32
   %mul = mul i32 %sub, -4
 ; CHECK: sub i32
 ; CHECK-NEXT: shl
   store i32 %mul, i32* %a, align 4
-  %tmp2 = load i32* %a, align 4
+  %tmp2 = load i32, i32* %a, align 4
   %conv = trunc i32 %tmp2 to i16
   ret i16 %conv
 }
@@ -44,14 +44,14 @@
   %on_off.addr = alloca i32, align 4
   %a = alloca i32, align 4
   store i32 %on_off, i32* %on_off.addr, align 4
-  %tmp = load i32* %on_off.addr, align 4
+  %tmp = load i32, i32* %on_off.addr, align 4
   %sub = sub i32 7, %tmp
 ; CHECK-NOT: mul i32
   %mul = mul i32 %sub, -4
 ; CHECK: shl
 ; CHECK-NEXT: add
   store i32 %mul, i32* %a, align 4
-  %tmp1 = load i32* %a, align 4
+  %tmp1 = load i32, i32* %a, align 4
   %conv = trunc i32 %tmp1 to i16
   ret i16 %conv
 }
diff --git a/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
index c8f0351..15c11db 100644
--- a/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
+++ b/llvm/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -9,12 +9,12 @@
   %ptr = alloca double*, align 4
   store i32 %parm, i32* %1, align 4
   store double* null, double** %ptr, align 4
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = icmp ne i32 %2, 0
   br i1 %3, label %4, label %10
 
 ; <label>:4                                       ; preds = %0
-  %5 = load i32* %1, align 4
+  %5 = load i32, i32* %1, align 4
   %6 = shl nsw i32 %5, 3
 ; With "nsw", the alloca and its bitcast can be fused:
   %7 = add nsw i32 %6, 2048
@@ -25,7 +25,7 @@
   store double* %9, double** %ptr, align 4
   br label %10
 ; <label>:10                                      ; preds = %4, %0
-  %11 = load double** %ptr, align 4
+  %11 = load double*, double** %ptr, align 4
   call void @bar(double* %11)
 ; CHECK: ret
   ret void
@@ -39,12 +39,12 @@
   %ptr = alloca double*, align 4
   store i32 %parm, i32* %1, align 4
   store double* null, double** %ptr, align 4
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = icmp ne i32 %2, 0
   br i1 %3, label %4, label %10
 
 ; <label>:4                                       ; preds = %0
-  %5 = load i32* %1, align 4
+  %5 = load i32, i32* %1, align 4
   %6 = mul nsw i32 %5, 8
 ; Without "nsw", the alloca and its bitcast cannot be fused:
   %7 = add  i32 %6, 2048
@@ -57,7 +57,7 @@
   br label %10
 
 ; <label>:10                                      ; preds = %4, %0
-  %11 = load double** %ptr, align 4
+  %11 = load double*, double** %ptr, align 4
   call void @bar(double* %11)
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll b/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll
index 22061b2..122669e 100644
--- a/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll
+++ b/llvm/test/Transforms/InstCombine/2011-10-07-AlignPromotion.ll
@@ -10,7 +10,7 @@
   %Point = alloca %struct.CGPoint, align 4
   %1 = bitcast %struct.CGPoint* %a to i64*
   %2 = bitcast %struct.CGPoint* %Point to i64*
-  %3 = load i64* %1, align 4
+  %3 = load i64, i64* %1, align 4
   store i64 %3, i64* %2, align 4
   call void @foo(i64* %2) nounwind
   ret void
diff --git a/llvm/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll b/llvm/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
index ba83fe9..ba57baf 100644
--- a/llvm/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
+++ b/llvm/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
@@ -20,7 +20,7 @@
 
 define void @fn4() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @d, align 4
+  %0 = load i32, i32* @d, align 4
   %cmp = icmp eq i32 %0, 0
   %conv = zext i1 %cmp to i32
   store i32 %conv, i32* @c, align 4
@@ -34,12 +34,12 @@
   store i32 %and, i32* @e, align 4
   %sub = add nsw i32 %and, -1
   store i32 %sub, i32* @f, align 4
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.else, label %if.then
 
 if.then:                                          ; preds = %entry
-  %1 = load i32* @b, align 4
+  %1 = load i32, i32* @b, align 4
   %.lobit = lshr i32 %1, 31
   %2 = trunc i32 %.lobit to i8
   %.not = xor i8 %2, 1
diff --git a/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll b/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll
index db1dbd5..c514dd1 100644
--- a/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll
+++ b/llvm/test/Transforms/InstCombine/2012-05-28-select-hang.ll
@@ -6,7 +6,7 @@
 
 define void @func() nounwind uwtable ssp {
 entry:
-  %0 = load i8* @c, align 1
+  %0 = load i8, i8* @c, align 1
   %conv = zext i8 %0 to i32
   %or = or i32 %conv, 1
   %conv1 = trunc i32 %or to i8
@@ -16,7 +16,7 @@
   %and = and i32 1, %neg
   %conv3 = trunc i32 %and to i8
   store i8 %conv3, i8* @b, align 1
-  %1 = load i8* @a, align 1
+  %1 = load i8, i8* @a, align 1
   %conv4 = zext i8 %1 to i32
   %conv5 = zext i8 %conv3 to i32
   %tobool = icmp ne i32 %conv4, 0
diff --git a/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll b/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
index 22466a96..4af1ca8 100644
--- a/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
+++ b/llvm/test/Transforms/InstCombine/2012-06-06-LoadOfPHIs.ll
@@ -34,16 +34,16 @@
 
 bb12:                                             ; preds = %bb11, %bb10, %bb7, %bb6
   %max.0 = phi double* [ %tmp, %bb6 ], [ %tmp2, %bb7 ], [ %tmp1, %bb10 ], [ %tmp2, %bb11 ]
-; CHECK: %tmp13 = load double* %tmp, align 8
-; CHECK: %tmp14 = load double* %tmp1, align 8
+; CHECK: %tmp13 = load double, double* %tmp, align 8
+; CHECK: %tmp14 = load double, double* %tmp1, align 8
 ; CHECK: %tmp15 = fcmp olt double %tmp13, %tmp14
-  %tmp13 = load double* %tmp, align 8
-  %tmp14 = load double* %tmp1, align 8
+  %tmp13 = load double, double* %tmp, align 8
+  %tmp14 = load double, double* %tmp1, align 8
   %tmp15 = fcmp olt double %tmp13, %tmp14
   br i1 %tmp15, label %bb16, label %bb21
 
 bb16:                                             ; preds = %bb12
-  %tmp17 = load double* %tmp2, align 8
+  %tmp17 = load double, double* %tmp2, align 8
   %tmp18 = fcmp olt double %tmp13, %tmp17
   br i1 %tmp18, label %bb19, label %bb20
 
@@ -54,7 +54,7 @@
   br label %bb26
 
 bb21:                                             ; preds = %bb12
-  %tmp22 = load double* %tmp2, align 8
+  %tmp22 = load double, double* %tmp2, align 8
   %tmp23 = fcmp olt double %tmp14, %tmp22
   br i1 %tmp23, label %bb24, label %bb25
 
@@ -66,16 +66,16 @@
 
 bb26:                                             ; preds = %bb25, %bb24, %bb20, %bb19
   %min.0 = phi double* [ %tmp, %bb19 ], [ %tmp2, %bb20 ], [ %tmp1, %bb24 ], [ %tmp2, %bb25 ]
-; CHECK: %tmp27 = load double* %min.0, align 8
-; CHECK: %tmp28 = load double* %max.0
+; CHECK: %tmp27 = load double, double* %min.0, align 8
+; CHECK: %tmp28 = load double, double* %max.0
 ; CHECK: %tmp29 = fadd double %tmp27, %tmp28
-  %tmp27 = load double* %min.0, align 8
-  %tmp28 = load double* %max.0
+  %tmp27 = load double, double* %min.0, align 8
+  %tmp28 = load double, double* %max.0
   %tmp29 = fadd double %tmp27, %tmp28
   %tmp30 = fdiv double %tmp29, 2.000000e+00
   store double %tmp30, double* %outL
-  %tmp31 = load double* %min.0
-  %tmp32 = load double* %max.0
+  %tmp31 = load double, double* %min.0
+  %tmp32 = load double, double* %max.0
   %tmp33 = fcmp oeq double %tmp31, %tmp32
   br i1 %tmp33, label %bb34, label %bb35
 
@@ -107,11 +107,11 @@
   br i1 %tmp46, label %bb47, label %bb55
 
 bb47:                                             ; preds = %bb45
-  %tmp48 = load double* %tmp1, align 8
-  %tmp49 = load double* %tmp2, align 8
+  %tmp48 = load double, double* %tmp1, align 8
+  %tmp49 = load double, double* %tmp2, align 8
   %tmp50 = fsub double %tmp48, %tmp49
-  %tmp51 = load double* %max.0
-  %tmp52 = load double* %min.0
+  %tmp51 = load double, double* %max.0
+  %tmp52 = load double, double* %min.0
   %tmp53 = fsub double %tmp51, %tmp52
   %tmp54 = fdiv double %tmp50, %tmp53
   store double %tmp54, double* %outH
@@ -122,11 +122,11 @@
   br i1 %tmp56, label %bb57, label %bb66
 
 bb57:                                             ; preds = %bb55
-  %tmp58 = load double* %tmp2, align 8
-  %tmp59 = load double* %tmp, align 8
+  %tmp58 = load double, double* %tmp2, align 8
+  %tmp59 = load double, double* %tmp, align 8
   %tmp60 = fsub double %tmp58, %tmp59
-  %tmp61 = load double* %max.0
-  %tmp62 = load double* %min.0
+  %tmp61 = load double, double* %max.0
+  %tmp62 = load double, double* %min.0
   %tmp63 = fsub double %tmp61, %tmp62
   %tmp64 = fdiv double %tmp60, %tmp63
   %tmp65 = fadd double 2.000000e+00, %tmp64
@@ -134,11 +134,11 @@
   br label %bb75
 
 bb66:                                             ; preds = %bb55
-  %tmp67 = load double* %tmp, align 8
-  %tmp68 = load double* %tmp1, align 8
+  %tmp67 = load double, double* %tmp, align 8
+  %tmp68 = load double, double* %tmp1, align 8
   %tmp69 = fsub double %tmp67, %tmp68
-  %tmp70 = load double* %max.0
-  %tmp71 = load double* %min.0
+  %tmp70 = load double, double* %max.0
+  %tmp71 = load double, double* %min.0
   %tmp72 = fsub double %tmp70, %tmp71
   %tmp73 = fdiv double %tmp69, %tmp72
   %tmp74 = fadd double 4.000000e+00, %tmp73
@@ -146,7 +146,7 @@
   br label %bb75
 
 bb75:                                             ; preds = %bb66, %bb57, %bb47
-  %tmp76 = load double* %outH
+  %tmp76 = load double, double* %outH
   %tmp77 = fdiv double %tmp76, 6.000000e+00
   store double %tmp77, double* %outH
   %tmp78 = fcmp olt double %tmp77, 0.000000e+00
diff --git a/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
index 18aab7f..d4f3130 100644
--- a/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
+++ b/llvm/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -5,7 +5,7 @@
 @test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
 
 define i64 @foo() {
-  %ret = load i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
+  %ret = load i64, i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
   ret i64 %ret
   ; 0x00030000_00020000 in [01 00/00 00 02 00 00 00 03 00/00 00 04 00 00 00]
   ; LE: ret i64 844424930263040
diff --git a/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll b/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
index 653d619..2321065 100644
--- a/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
+++ b/llvm/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
@@ -20,7 +20,7 @@
 
 bb3:                                              ; preds = %bb1
   %tmp4 = bitcast double** %tmp to <2 x double*>*
-  %tmp5 = load <2 x double*>* %tmp4, align 8
+  %tmp5 = load <2 x double*>, <2 x double*>* %tmp4, align 8
   %tmp6 = ptrtoint <2 x double*> %tmp5 to <2 x i64>
   %tmp7 = sub <2 x i64> zeroinitializer, %tmp6
   %tmp8 = ashr exact <2 x i64> %tmp7, <i64 3, i64 3>
diff --git a/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll b/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
index fc29b09..46702f8 100644
--- a/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
+++ b/llvm/test/Transforms/InstCombine/2012-12-14-simp-vgep.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
 define <4 x i32> @foo(<4 x i32*>* %in) {
-  %t17 = load <4 x i32*>* %in, align 8
+  %t17 = load <4 x i32*>, <4 x i32*>* %in, align 8
   %t18 = icmp eq <4 x i32*> %t17, zeroinitializer
   %t19 = zext <4 x i1> %t18 to <4 x i32>
   ret <4 x i32> %t19
diff --git a/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
index 9c50e66..86b5e0a 100644
--- a/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
+++ b/llvm/test/Transforms/InstCombine/2013-03-05-Combine-BitcastTy-Into-Alloca.ll
@@ -16,8 +16,8 @@
 ; CHECK-NOT: bitcast i96* %retval to %struct._my_struct*
 entry:
   %retval = alloca %struct._my_struct, align 8
-  %k.sroa.0.0.copyload = load i96* bitcast (%struct._my_struct* @initval to i96*), align 1
-  %k.sroa.1.12.copyload = load i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
+  %k.sroa.0.0.copyload = load i96, i96* bitcast (%struct._my_struct* @initval to i96*), align 1
+  %k.sroa.1.12.copyload = load i32, i32* bitcast ([4 x i8]* getelementptr inbounds (%struct._my_struct* @initval, i64 0, i32 1) to i32*), align 1
   %0 = zext i32 %x to i96
   %bf.value = shl nuw nsw i96 %0, 6
   %bf.clear = and i96 %k.sroa.0.0.copyload, -288230376151711744
@@ -39,7 +39,7 @@
   %.fca.0.insert = insertvalue { i64, i64 } undef, i64 %trunc, 0
   %retval.8.idx12 = getelementptr inbounds %struct._my_struct, %struct._my_struct* %retval, i64 0, i32 0, i64 8
   %retval.8.cast13 = bitcast i8* %retval.8.idx12 to i64*
-  %retval.8.load14 = load i64* %retval.8.cast13, align 8
+  %retval.8.load14 = load i64, i64* %retval.8.cast13, align 8
   %.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.8.load14, 1
   ret { i64, i64 } %.fca.1.insert
 }
diff --git a/llvm/test/Transforms/InstCombine/CPP_min_max.ll b/llvm/test/Transforms/InstCombine/CPP_min_max.ll
index b3d081b..04bf0ce 100644
--- a/llvm/test/Transforms/InstCombine/CPP_min_max.ll
+++ b/llvm/test/Transforms/InstCombine/CPP_min_max.ll
@@ -10,11 +10,11 @@
 
 define void @_Z5test1RiS_(i32* %x, i32* %y) {
 entry:
-        %tmp.1.i = load i32* %y         ; <i32> [#uses=1]
-        %tmp.3.i = load i32* %x         ; <i32> [#uses=1]
+        %tmp.1.i = load i32, i32* %y         ; <i32> [#uses=1]
+        %tmp.3.i = load i32, i32* %x         ; <i32> [#uses=1]
         %tmp.4.i = icmp slt i32 %tmp.1.i, %tmp.3.i              ; <i1> [#uses=1]
         %retval.i = select i1 %tmp.4.i, i32* %y, i32* %x                ; <i32*> [#uses=1]
-        %tmp.4 = load i32* %retval.i            ; <i32> [#uses=1]
+        %tmp.4 = load i32, i32* %retval.i            ; <i32> [#uses=1]
         store i32 %tmp.4, i32* %x
         ret void
 }
@@ -22,12 +22,12 @@
 define void @_Z5test2RiS_(i32* %x, i32* %y) {
 entry:
         %tmp.0 = alloca i32             ; <i32*> [#uses=2]
-        %tmp.2 = load i32* %x           ; <i32> [#uses=2]
+        %tmp.2 = load i32, i32* %x           ; <i32> [#uses=2]
         store i32 %tmp.2, i32* %tmp.0
-        %tmp.3.i = load i32* %y         ; <i32> [#uses=1]
+        %tmp.3.i = load i32, i32* %y         ; <i32> [#uses=1]
         %tmp.4.i = icmp slt i32 %tmp.2, %tmp.3.i                ; <i1> [#uses=1]
         %retval.i = select i1 %tmp.4.i, i32* %y, i32* %tmp.0            ; <i32*> [#uses=1]
-        %tmp.6 = load i32* %retval.i            ; <i32> [#uses=1]
+        %tmp.6 = load i32, i32* %retval.i            ; <i32> [#uses=1]
         store i32 %tmp.6, i32* %y
         ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/add3.ll b/llvm/test/Transforms/InstCombine/add3.ll
index d8e708a6..9d3842f 100644
--- a/llvm/test/Transforms/InstCombine/add3.ll
+++ b/llvm/test/Transforms/InstCombine/add3.ll
@@ -13,7 +13,7 @@
         add i32 %.val24, -16
         inttoptr i32 %2 to i32*
         getelementptr i32, i32* %3, i32 1
-        load i32* %4
+        load i32, i32* %4
         tail call i32 @callee( i32 %5 )
         ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/addrspacecast.ll b/llvm/test/Transforms/InstCombine/addrspacecast.ll
index b52b680..5a4b6b2 100644
--- a/llvm/test/Transforms/InstCombine/addrspacecast.ll
+++ b/llvm/test/Transforms/InstCombine/addrspacecast.ll
@@ -106,10 +106,10 @@
 ; CHECK-LABEL: @canonicalize_addrspacecast(
 ; CHECK-NEXT: getelementptr inbounds [16 x i32], [16 x i32] addrspace(1)* %arr, i32 0, i32 0
 ; CHECK-NEXT: addrspacecast i32 addrspace(1)* %{{[a-zA-Z0-9]+}} to i32*
-; CHECK-NEXT: load i32*
+; CHECK-NEXT: load i32, i32*
 ; CHECK-NEXT: ret i32
   %p = addrspacecast [16 x i32] addrspace(1)* %arr to i32*
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -134,7 +134,7 @@
   %i = phi i32 [ 0, %entry ], [ %i.inc, %loop.body ]
   %sum = phi i32 [ 0, %entry ], [ %sum.inc, %loop.body]
   %ptr = getelementptr i8, i8* %alloca, i32 %i
-  %load = load i8* %ptr
+  %load = load i8, i8* %ptr
   %ext = zext i8 %load to i32
   %sum.inc = add i32 %sum, %ext
   %i.inc = add i32 %i, 1
diff --git a/llvm/test/Transforms/InstCombine/alias-recursion.ll b/llvm/test/Transforms/InstCombine/alias-recursion.ll
index fa63726..8e53995 100644
--- a/llvm/test/Transforms/InstCombine/alias-recursion.ll
+++ b/llvm/test/Transforms/InstCombine/alias-recursion.ll
@@ -19,6 +19,6 @@
 
 for.end:                                          ; preds = %for.body, %entry
   %A = phi i32 (%class.A*)** [ bitcast (i8** @vtbl to i32 (%class.A*)**), %for.body ], [ null, %entry ]
-  %B = load i32 (%class.A*)** %A
+  %B = load i32 (%class.A*)*, i32 (%class.A*)** %A
   ret i32 (%class.A*)* %B
 }
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index cd4fc73..6286517 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -43,7 +43,7 @@
 
 define <16 x i8> @test1(<2 x i64> %x) {
 entry:
-	%tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
+	%tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
 	ret <16 x i8> %tmp
 }
 
@@ -53,7 +53,7 @@
 ; CHECK-LABEL: @test1_as1(
 ; CHECK: tmp = load
 ; CHECK: GLOBAL_as1{{.*}}align 16
-  %tmp = load <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast ([4 x i32] addrspace(1)* @GLOBAL_as1 to <16 x i8> addrspace(1)*), align 1
   ret <16 x i8> %tmp
 }
 
@@ -63,7 +63,7 @@
 ; CHECK-LABEL: @test1_as1_gep(
 ; CHECK: tmp = load
 ; CHECK: GLOBAL_as1_gep{{.*}}align 16
-  %tmp = load <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
+  %tmp = load <16 x i8>, <16 x i8> addrspace(1)* bitcast (i32 addrspace(1)* getelementptr ([8 x i32] addrspace(1)* @GLOBAL_as1_gep, i16 0, i16 4) to <16 x i8> addrspace(1)*), align 1
   ret <16 x i8> %tmp
 }
 
@@ -71,11 +71,11 @@
 ; When a load or store lacks an explicit alignment, add one.
 
 ; CHECK-LABEL: @test2(
-; CHECK: load double* %p, align 8
+; CHECK: load double, double* %p, align 8
 ; CHECK: store double %n, double* %p, align 8
 
 define double @test2(double* %p, double %n) nounwind {
-  %t = load double* %p
+  %t = load double, double* %p
   store double %n, double* %p
   ret double %t
 }
diff --git a/llvm/test/Transforms/InstCombine/align-attr.ll b/llvm/test/Transforms/InstCombine/align-attr.ll
index 9f366bf..99a17db 100644
--- a/llvm/test/Transforms/InstCombine/align-attr.ll
+++ b/llvm/test/Transforms/InstCombine/align-attr.ll
@@ -5,11 +5,11 @@
 ; Function Attrs: nounwind uwtable
 define i32 @foo1(i32* align 32 %a) #0 {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @foo1
-; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: load i32, i32* %a, align 32
 ; CHECK: ret i32
 }
 
diff --git a/llvm/test/Transforms/InstCombine/align-external.ll b/llvm/test/Transforms/InstCombine/align-external.ll
index 66ff9c1..ee98a01 100644
--- a/llvm/test/Transforms/InstCombine/align-external.ll
+++ b/llvm/test/Transforms/InstCombine/align-external.ll
@@ -23,7 +23,7 @@
 
 define i32 @bar() {
 ; CHECK-LABEL: @bar(
-  %r = load i32* @B, align 1
+  %r = load i32, i32* @B, align 1
 ; CHECK: align 1
   ret i32 %r
 }
diff --git a/llvm/test/Transforms/InstCombine/aligned-altivec.ll b/llvm/test/Transforms/InstCombine/aligned-altivec.ll
index 7ca6406..10b4e4d 100644
--- a/llvm/test/Transforms/InstCombine/aligned-altivec.ll
+++ b/llvm/test/Transforms/InstCombine/aligned-altivec.ll
@@ -14,7 +14,7 @@
 ; CHECK: @llvm.ppc.altivec.lvx
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
@@ -29,7 +29,7 @@
 ; CHECK-NOT: @llvm.ppc.altivec.lvx
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
@@ -42,7 +42,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2
@@ -56,7 +56,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2
@@ -76,7 +76,7 @@
 ; CHECK: @llvm.ppc.altivec.lvxl
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
@@ -91,7 +91,7 @@
 ; CHECK-NOT: @llvm.ppc.altivec.lvxl
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
@@ -104,7 +104,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2l
@@ -118,7 +118,7 @@
   %hv = bitcast <4 x i32>* %h1 to i8*
   call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
 
-  %v0 = load <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2l
diff --git a/llvm/test/Transforms/InstCombine/aligned-qpx.ll b/llvm/test/Transforms/InstCombine/aligned-qpx.ll
index a210418..e303ddd 100644
--- a/llvm/test/Transforms/InstCombine/aligned-qpx.ll
+++ b/llvm/test/Transforms/InstCombine/aligned-qpx.ll
@@ -14,7 +14,7 @@
 ; CHECK: @llvm.ppc.qpx.qvlfs
 ; CHECK: ret <4 x double>
 
-  %v0 = load <4 x float>* %h, align 8
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
   %v0e = fpext <4 x float> %v0 to <4 x double>
   %a = fadd <4 x double> %v0e, %vl
   ret <4 x double> %a
@@ -30,7 +30,7 @@
 ; CHECK-NOT: @llvm.ppc.qpx.qvlfs
 ; CHECK: ret <4 x double>
 
-  %v0 = load <4 x float>* %h, align 8
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
   %v0e = fpext <4 x float> %v0 to <4 x double>
   %a = fadd <4 x double> %v0e, %vl
   ret <4 x double> %a
@@ -44,7 +44,7 @@
   %hv = bitcast <4 x float>* %h1 to i8*
   call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
 
-  %v0 = load <4 x float>* %h, align 8
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
   ret <4 x float> %v0
 
 ; CHECK-LABEL: @test2
@@ -58,7 +58,7 @@
   %hv = bitcast <4 x float>* %h1 to i8*
   call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
 
-  %v0 = load <4 x float>* %h, align 8
+  %v0 = load <4 x float>, <4 x float>* %h, align 8
   ret <4 x float> %v0
 
 ; CHECK-LABEL: @test2
@@ -78,7 +78,7 @@
 ; CHECK: @llvm.ppc.qpx.qvlfd
 ; CHECK: ret <4 x double>
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   %a = fadd <4 x double> %v0, %vl
   ret <4 x double> %a
 }
@@ -93,7 +93,7 @@
 ; CHECK: @llvm.ppc.qpx.qvlfd
 ; CHECK: ret <4 x double>
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   %a = fadd <4 x double> %v0, %vl
   ret <4 x double> %a
 }
@@ -108,7 +108,7 @@
 ; CHECK-NOT: @llvm.ppc.qpx.qvlfd
 ; CHECK: ret <4 x double>
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   %a = fadd <4 x double> %v0, %vl
   ret <4 x double> %a
 }
@@ -121,7 +121,7 @@
   %hv = bitcast <4 x double>* %h1 to i8*
   call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   ret <4 x double> %v0
 
 ; CHECK-LABEL: @test2l
@@ -135,7 +135,7 @@
   %hv = bitcast <4 x double>* %h1 to i8*
   call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   ret <4 x double> %v0
 
 ; CHECK-LABEL: @test2ln
@@ -149,7 +149,7 @@
   %hv = bitcast <4 x double>* %h1 to i8*
   call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
 
-  %v0 = load <4 x double>* %h, align 8
+  %v0 = load <4 x double>, <4 x double>* %h, align 8
   ret <4 x double> %v0
 
 ; CHECK-LABEL: @test2l
diff --git a/llvm/test/Transforms/InstCombine/alloca.ll b/llvm/test/Transforms/InstCombine/alloca.ll
index e275556..105029f 100644
--- a/llvm/test/Transforms/InstCombine/alloca.ll
+++ b/llvm/test/Transforms/InstCombine/alloca.ll
@@ -18,7 +18,7 @@
         call void (...)* @use( i32* %Y )
         %Z = alloca {  }                ; <{  }*> [#uses=1]
         call void (...)* @use( {  }* %Z )
-        %size = load i32* @int
+        %size = load i32, i32* @int
         %A = alloca {{}}, i32 %size
         call void (...)* @use( {{}}* %A )
         ret void
diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
index 9fbc68e..e803ba6 100644
--- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll
+++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll
@@ -19,14 +19,14 @@
   br label %for.body
 
 ; CHECK-LABEL: @foo
-; CHECK: load i32* {{.*}} align 64
+; CHECK: load i32, i32* {{.*}} align 64
 ; CHECK: store i32 {{.*}}  align 64
 ; CHECK: ret
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx5, align 4
diff --git a/llvm/test/Transforms/InstCombine/assume-redundant.ll b/llvm/test/Transforms/InstCombine/assume-redundant.ll
index cb98755..4b869ef 100644
--- a/llvm/test/Transforms/InstCombine/assume-redundant.ll
+++ b/llvm/test/Transforms/InstCombine/assume-redundant.ll
@@ -16,7 +16,7 @@
 
 entry:
   %a = getelementptr inbounds %struct.s, %struct.s* %x, i64 0, i32 0
-  %0 = load double** %a, align 8
+  %0 = load double*, double** %a, align 8
   %ptrint = ptrtoint double* %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
@@ -26,7 +26,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next.1, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds double, double* %0, i64 %indvars.iv
-  %1 = load double* %arrayidx, align 16
+  %1 = load double, double* %arrayidx, align 16
   %add = fadd double %1, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond)
   %mul = fmul double %add, 2.000000e+00
@@ -34,7 +34,7 @@
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx.1 = getelementptr inbounds double, double* %0, i64 %indvars.iv.next
-  %2 = load double* %arrayidx.1, align 8
+  %2 = load double, double* %arrayidx.1, align 8
   %add.1 = fadd double %2, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond)
   %mul.1 = fmul double %add.1, 2.000000e+00
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
index 7e45c04..2f92138 100644
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -5,12 +5,12 @@
 ; Function Attrs: nounwind uwtable
 define i32 @foo1(i32* %a) #0 {
 entry:
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
 
 ; Check that the alignment has been upgraded and that the assume has not
 ; been removed:
 ; CHECK-LABEL: @foo1
-; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: load i32, i32* %a, align 32
 ; CHECK-DAG: call void @llvm.assume
 ; CHECK: ret i32
 
@@ -27,7 +27,7 @@
 entry:
 ; Same check as in @foo1, but make sure it works if the assume is first too.
 ; CHECK-LABEL: @foo2
-; CHECK-DAG: load i32* %a, align 32
+; CHECK-DAG: load i32, i32* %a, align 32
 ; CHECK-DAG: call void @llvm.assume
 ; CHECK: ret i32
 
@@ -36,7 +36,7 @@
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
 
-  %0 = load i32* %a, align 4
+  %0 = load i32, i32* %a, align 4
   ret i32 %0
 }
 
@@ -192,7 +192,7 @@
 ; metadata form?
 define i1 @nonnull1(i32** %a) {
 entry:
-  %load = load i32** %a
+  %load = load i32*, i32** %a
   %cmp = icmp ne i32* %load, null
   tail call void @llvm.assume(i1 %cmp)
   tail call void @escape(i32* %load)
@@ -209,7 +209,7 @@
 ; to pointer types.  Doing otherwise would be illegal.
 define i1 @nonnull2(i32* %a) {
 entry:
-  %load = load i32* %a
+  %load = load i32, i32* %a
   %cmp = icmp ne i32 %load, 0
   tail call void @llvm.assume(i1 %cmp)
   %rval = icmp eq i32 %load, 0
@@ -224,7 +224,7 @@
 ; if the assume is control dependent on something else
 define i1 @nonnull3(i32** %a, i1 %control) {
 entry:
-  %load = load i32** %a
+  %load = load i32*, i32** %a
   %cmp = icmp ne i32* %load, null
   br i1 %control, label %taken, label %not_taken
 taken:
@@ -244,7 +244,7 @@
 ; interrupted by an exception being thrown
 define i1 @nonnull4(i32** %a) {
 entry:
-  %load = load i32** %a
+  %load = load i32*, i32** %a
   ;; This call may throw!
   tail call void @escape(i32* %load)
   %cmp = icmp ne i32* %load, null
diff --git a/llvm/test/Transforms/InstCombine/atomic.ll b/llvm/test/Transforms/InstCombine/atomic.ll
index 98cecef..5754a5a 100644
--- a/llvm/test/Transforms/InstCombine/atomic.ll
+++ b/llvm/test/Transforms/InstCombine/atomic.ll
@@ -7,10 +7,10 @@
 
 define i32 @test2(i32* %p) {
 ; CHECK-LABEL: define i32 @test2(
-; CHECK: %x = load atomic i32* %p seq_cst, align 4
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
 ; CHECK: shl i32 %x, 1
-  %x = load atomic i32* %p seq_cst, align 4
-  %y = load i32* %p, align 4
+  %x = load atomic i32, i32* %p seq_cst, align 4
+  %y = load i32, i32* %p, align 4
   %z = add i32 %x, %y
   ret i32 %z
 }
diff --git a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
index cfec092..1a598a5 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -91,12 +91,12 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar
 ; CHECK: bitcast float* %source to i32*
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast float* %dest to i32*
 ; CHECK: store i32
-  %tmp = load float* %source, align 8
+  %tmp = load float, float* %source, align 8
   %call = call float @alias_i32_to_f32(float %tmp) nounwind
   store float %call, float* %dest, align 8
   ret void
@@ -107,12 +107,12 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector
 ; CHECK: bitcast <2 x float>* %source to <2 x i32>*
-; CHECK: load <2 x i32>*
+; CHECK: load <2 x i32>, <2 x i32>*
 ; CHECK-NOT: fptoui
 ; CHECK-NOT: uitofp
 ; CHECK: bitcast <2 x float>* %dest to <2 x i32>*
 ; CHECK: store <2 x i32>
-  %tmp = load <2 x float>* %source, align 8
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2i32_to_v2f32(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
   ret void
@@ -123,11 +123,11 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_same_size
 ; CHECK: bitcast <2 x float>* %source to i64*
-; CHECK: load i64*
+; CHECK: load i64, i64*
 ; CHECK: %call = call i64 @func_i64
 ; CHECK: bitcast <2 x float>* %dest to i64*
 ; CHECK: store i64
-  %tmp = load <2 x float>* %source, align 8
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2f32_to_i64(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
   ret void
@@ -137,11 +137,11 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_same_size
 ; CHECK: bitcast i64* %source to <2 x float>*
-; CHECK: load <2 x float>*
+; CHECK: load <2 x float>, <2 x float>*
 ; CHECK: call <2 x float> @func_v2f32
 ; CHECK: bitcast i64* %dest to <2 x float>*
 ; CHECK: store <2 x float>
-  %tmp = load i64* %source, align 8
+  %tmp = load i64, i64* %source, align 8
   %call = call i64 @alias_i64_to_v2f32(i64 %tmp) nounwind
   store i64 %call, i64* %dest, align 8
   ret void
@@ -151,11 +151,11 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_same_size
 ; CHECK: bitcast <2 x i64*>* %source to <2 x i32*>*
-; CHECK: load <2 x i32*>*
+; CHECK: load <2 x i32*>, <2 x i32*>*
 ; CHECK: call <2 x i32*> @func_v2i32p
 ; CHECK: bitcast <2 x i64*>* %dest to <2 x i32*>*
 ; CHECK: store <2 x i32*>
-  %tmp = load <2 x i64*>* %source, align 8
+  %tmp = load <2 x i64*>, <2 x i64*>* %source, align 8
   %call = call <2 x i64*> @alias_v2i32p_to_v2i64p(<2 x i64*> %tmp) nounwind
   store <2 x i64*> %call, <2 x i64*>* %dest, align 8
   ret void
@@ -169,7 +169,7 @@
 ; CHECK-NOT: fptoui
 ; CHECK: @alias_i64_to_f32
 ; CHECK-NOT: uitofp
-  %tmp = load float* %source, align 8
+  %tmp = load float, float* %source, align 8
   %call = call float @alias_i64_to_f32(float %tmp) nounwind
   store float %call, float* %dest, align 8
   ret void
@@ -181,7 +181,7 @@
 ; CHECK-NOT: fptoui <2 x float> %tmp to <2 x i64>
 ; CHECK: @alias_v2i64_to_v2f32
 ; CHECK-NOT: uitofp <2 x i64> %call to <2 x float>
-  %tmp = load <2 x float>* %source, align 8
+  %tmp = load <2 x float>, <2 x float>* %source, align 8
   %call = call <2 x float> @alias_v2i64_to_v2f32(<2 x float> %tmp) nounwind
   store <2 x float> %call, <2 x float>* %dest, align 8
   ret void
@@ -191,7 +191,7 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_mismatched_number_elements
 ; CHECK:  %call = call <4 x float> @alias_v2i32_to_v4f32
-  %tmp = load <4 x float>* %source, align 8
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
   %call = call <4 x float> @alias_v2i32_to_v4f32(<4 x float> %tmp) nounwind
   store <4 x float> %call, <4 x float>* %dest, align 8
   ret void
@@ -201,7 +201,7 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_scalar_mismatched_bit_size
 ; CHECK:  %call = call <4 x float> @alias_v4f32_to_i64
-  %tmp = load <4 x float>* %source, align 8
+  %tmp = load <4 x float>, <4 x float>* %source, align 8
   %call = call <4 x float> @alias_v4f32_to_i64(<4 x float> %tmp) nounwind
   store <4 x float> %call, <4 x float>* %dest, align 8
   ret void
@@ -211,7 +211,7 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_vector_ptrs_scalar_mismatched_bit_size
 ; CHECK: @alias_v4i32p_to_i64
-  %tmp = load <4 x i32*>* %source, align 8
+  %tmp = load <4 x i32*>, <4 x i32*>* %source, align 8
   %call = call <4 x i32*> @alias_v4i32p_to_i64(<4 x i32*> %tmp) nounwind
   store <4 x i32*> %call, <4 x i32*>* %dest, align 8
   ret void
@@ -221,7 +221,7 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_ptrs_same_size
 ; CHECK: @alias_i64_to_v2i32p
-  %tmp = load i64* %source, align 8
+  %tmp = load i64, i64* %source, align 8
   %call = call i64 @alias_i64_to_v2i32p(i64 %tmp) nounwind
   store i64 %call, i64* %dest, align 8
   ret void
@@ -231,7 +231,7 @@
 entry:
 ; CHECK-LABEL: @bitcast_alias_scalar_vector_mismatched_bit_size
 ; CHECK: call i64 @alias_i64_to_v4f32
-  %tmp = load i64* %source, align 8
+  %tmp = load i64, i64* %source, align 8
   %call = call i64 @alias_i64_to_v4f32(i64 %tmp) nounwind
   store i64 %call, i64* %dest, align 8
   ret void
diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll
index c7a520b..579839e 100644
--- a/llvm/test/Transforms/InstCombine/bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast.ll
@@ -148,7 +148,7 @@
 ; PR17293
 define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
   %cast = bitcast <2 x i8*>* %arg to <2 x i64>*
-  %load = load <2 x i64>* %cast, align 16
+  %load = load <2 x i64>, <2 x i64>* %cast, align 16
   ret <2 x i64> %load
 ; CHECK: @test7
 ; CHECK: bitcast
diff --git a/llvm/test/Transforms/InstCombine/bittest.ll b/llvm/test/Transforms/InstCombine/bittest.ll
index 84ee7dd..edf65d5 100644
--- a/llvm/test/Transforms/InstCombine/bittest.ll
+++ b/llvm/test/Transforms/InstCombine/bittest.ll
@@ -5,7 +5,7 @@
 
 define void @_Z12h000007_testv(i32* %P) {
 entry:
-        %tmp.2 = load i32* @b_rec.0             ; <i32> [#uses=1]
+        %tmp.2 = load i32, i32* @b_rec.0             ; <i32> [#uses=1]
         %tmp.9 = or i32 %tmp.2, -989855744              ; <i32> [#uses=2]
         %tmp.16 = and i32 %tmp.9, -805306369            ; <i32> [#uses=2]
         %tmp.17 = and i32 %tmp.9, -973078529            ; <i32> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/call2.ll b/llvm/test/Transforms/InstCombine/call2.ll
index 1f45c7ab7..467eb07 100644
--- a/llvm/test/Transforms/InstCombine/call2.ll
+++ b/llvm/test/Transforms/InstCombine/call2.ll
@@ -8,7 +8,7 @@
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval1
 }
 
@@ -20,6 +20,6 @@
 	br label %return
 
 return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval1 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval1
 }
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index d344699..c577501 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -731,8 +731,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr %s, %s*
   %r = bitcast i8* %pp to %s*
-  %l = load %s* %r
-; CHECK-NEXT: load %s*
+  %l = load %s, %s* %r
+; CHECK-NEXT: load %s, %s*
   ret %s %l
 ; CHECK-NEXT: ret %s
 }
@@ -741,13 +741,13 @@
 define %s @test68_addrspacecast(%s* %p, i64 %i) {
 ; CHECK-LABEL: @test68_addrspacecast(
 ; CHECK-NEXT: getelementptr %s, %s*
-; CHECK-NEXT: load %s*
+; CHECK-NEXT: load %s, %s*
 ; CHECK-NEXT: ret %s
   %o = mul i64 %i, 12
   %q = addrspacecast %s* %p to i8 addrspace(2)*
   %pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
   %r = addrspacecast i8 addrspace(2)* %pp to %s*
-  %l = load %s* %r
+  %l = load %s, %s* %r
   ret %s %l
 }
 
@@ -755,13 +755,13 @@
 ; CHECK-LABEL: @test68_addrspacecast_2(
 ; CHECK-NEXT: getelementptr %s, %s* %p
 ; CHECK-NEXT: addrspacecast
-; CHECK-NEXT: load %s addrspace(1)*
+; CHECK-NEXT: load %s, %s addrspace(1)*
 ; CHECK-NEXT: ret %s
   %o = mul i64 %i, 12
   %q = addrspacecast %s* %p to i8 addrspace(2)*
   %pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
   %r = addrspacecast i8 addrspace(2)* %pp to %s addrspace(1)*
-  %l = load %s addrspace(1)* %r
+  %l = load %s, %s addrspace(1)* %r
   ret %s %l
 }
 
@@ -772,8 +772,8 @@
   %pp = getelementptr inbounds i8, i8 addrspace(1)* %q, i32 %o
 ; CHECK-NEXT: getelementptr %s, %s addrspace(1)*
   %r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
-  %l = load %s addrspace(1)* %r
-; CHECK-NEXT: load %s addrspace(1)*
+  %l = load %s, %s addrspace(1)* %r
+; CHECK-NEXT: load %s, %s addrspace(1)*
   ret %s %l
 ; CHECK-NEXT: ret %s
 }
@@ -785,8 +785,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr inbounds double, double*
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -799,8 +799,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr inbounds %s, %s*
   %r = bitcast i8* %pp to %s*
-  %l = load %s* %r
-; CHECK-NEXT: load %s*
+  %l = load %s, %s* %r
+; CHECK-NEXT: load %s, %s*
   ret %s %l
 ; CHECK-NEXT: ret %s
 }
@@ -813,8 +813,8 @@
   %pp = getelementptr i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr double, double*
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -828,8 +828,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr inbounds double, double*
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -843,8 +843,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o
 ; CHECK-NEXT: getelementptr double, double*
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -855,8 +855,8 @@
   %pp = getelementptr inbounds i64, i64* %q, i64 %i
 ; CHECK-NEXT: getelementptr inbounds double, double*
   %r = bitcast i64* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -882,8 +882,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o2
 ; CHECK-NEXT: getelementptr %s, %s* %p, i64 %o2
   %r = bitcast i8* %pp to %s*
-  %l = load %s* %r
-; CHECK-NEXT: load %s*
+  %l = load %s, %s* %r
+; CHECK-NEXT: load %s, %s*
   ret %s %l
 ; CHECK-NEXT: ret %s
 }
@@ -898,8 +898,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %o2
 ; CHECK-NEXT: getelementptr inbounds %s, %s* %p, i64 %o2
   %r = bitcast i8* %pp to %s*
-  %l = load %s* %r
-; CHECK-NEXT: load %s*
+  %l = load %s, %s* %r
+; CHECK-NEXT: load %s, %s*
   ret %s %l
 ; CHECK-NEXT: ret %s
 }
@@ -926,8 +926,8 @@
   %pp = getelementptr inbounds i8, i8* %q, i64 %h
 ; CHECK-NEXT: getelementptr %s, %s* %p, i64 %h
   %r = bitcast i8* %pp to %s*
-  %load = load %s* %r
-; CHECK-NEXT: load %s*
+  %load = load %s, %s* %r
+; CHECK-NEXT: load %s, %s*
   ret %s %load
 ; CHECK-NEXT: ret %s
 }
@@ -942,7 +942,7 @@
 ; CHECK: bitcast
   %pp = getelementptr inbounds i8, i8* %q, i32 %c
   %r = bitcast i8* %pp to %s*
-  %l = load %s* %r
+  %l = load %s, %s* %r
   ret %s %l
 }
 
@@ -954,8 +954,8 @@
   %pp = getelementptr i8, i8* %q, i32 %tmp
 ; CHECK-NEXT: getelementptr [100 x double], [100 x double]*
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
-; CHECK-NEXT: load double*
+  %l = load double, double* %r
+; CHECK-NEXT: load double, double*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -963,13 +963,13 @@
 define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
 ; CHECK-LABEL: @test80_addrspacecast(
 ; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)* %p
-; CHECK-NEXT: load double addrspace(1)*
+; CHECK-NEXT: load double, double addrspace(1)*
 ; CHECK-NEXT: ret double
   %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8, i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(1)*
-  %l = load double addrspace(1)* %r
+  %l = load double, double addrspace(1)* %r
   ret double %l
 }
 
@@ -977,13 +977,13 @@
 ; CHECK-LABEL: @test80_addrspacecast_2(
 ; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)*
 ; CHECK-NEXT: addrspacecast double addrspace(1)*
-; CHECK-NEXT: load double addrspace(3)*
+; CHECK-NEXT: load double, double addrspace(3)*
 ; CHECK-NEXT: ret double
   %tmp = shl nsw i32 %i, 3
   %q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
   %pp = getelementptr i8, i8 addrspace(2)* %q, i32 %tmp
   %r = addrspacecast i8 addrspace(2)* %pp to double addrspace(3)*
-  %l = load double addrspace(3)* %r
+  %l = load double, double addrspace(3)* %r
   ret double %l
 }
 
@@ -995,8 +995,8 @@
   %pp = getelementptr i8, i8 addrspace(1)* %q, i16 %tmp
 ; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)*
   %r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
-  %l = load double addrspace(1)* %r
-; CHECK-NEXT: load double addrspace(1)*
+  %l = load double, double addrspace(1)* %r
+; CHECK-NEXT: load double, double addrspace(1)*
   ret double %l
 ; CHECK-NEXT: ret double
 }
@@ -1006,7 +1006,7 @@
   %q = bitcast double* %p to i8*
   %pp = getelementptr i8, i8* %q, i64 %i
   %r = bitcast i8* %pp to double*
-  %l = load double* %r
+  %l = load double, double* %r
   ret double %l
 }
 
diff --git a/llvm/test/Transforms/InstCombine/cast_ptr.ll b/llvm/test/Transforms/InstCombine/cast_ptr.ll
index cc7a2bf..eaf946e 100644
--- a/llvm/test/Transforms/InstCombine/cast_ptr.ll
+++ b/llvm/test/Transforms/InstCombine/cast_ptr.ll
@@ -107,7 +107,7 @@
 declare %op* @foo(%op* %X)
 
 define %unop* @test5(%op* %O) {
-        %tmp = load %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
+        %tmp = load %unop* (%op*)*, %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
         %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
         ret %unop* %tmp.2
 ; CHECK-LABEL: @test5(
@@ -122,8 +122,8 @@
 define i8 @test6(i8 addrspace(1)* %source) {
 entry:
   %arrayidx223 = addrspacecast i8 addrspace(1)* %source to i8*
-  %tmp4 = load i8* %arrayidx223
+  %tmp4 = load i8, i8* %arrayidx223
   ret i8 %tmp4
 ; CHECK-LABEL: @test6(
-; CHECK: load i8* %arrayidx223
+; CHECK: load i8, i8* %arrayidx223
 }
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index bb7ab58..eb16904 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -149,22 +149,22 @@
 
 define float @constant_fold_bitcast_ftoi_load() {
 ; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
-; CHECK: load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
-  %a = load float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+; CHECK: load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
   ret float %a
 }
 
 define i32 @constant_fold_bitcast_itof_load() {
 ; CHECK-LABEL: @constant_fold_bitcast_itof_load(
-; CHECK: load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
-  %a = load i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+; CHECK: load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
   ret i32 %a
 }
 
 define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
-; CHECK: load <4 x float> addrspace(3)* @g_v4f_as3, align 16
-  %a = load <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+; CHECK: load <4 x float>, <4 x float> addrspace(3)* @g_v4f_as3, align 16
+  %a = load <4 x float>, <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
   ret <4 x float> %a
 }
 
@@ -172,9 +172,9 @@
 
 define i32 @test_cast_gep_small_indices_as() {
 ; CHECK-LABEL: @test_cast_gep_small_indices_as(
-; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
    %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
-   %x = load i32 addrspace(3)* %p, align 4
+   %x = load i32, i32 addrspace(3)* %p, align 4
    ret i32 %x
 }
 
@@ -189,17 +189,17 @@
 
 define i32 @test_cast_gep_large_indices_as() {
 ; CHECK-LABEL: @test_cast_gep_large_indices_as(
-; CHECK: load i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
    %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
-   %x = load i32 addrspace(3)* %p, align 4
+   %x = load i32, i32 addrspace(3)* %p, align 4
    ret i32 %x
 }
 
 define i32 @test_constant_cast_gep_struct_indices_as() {
 ; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
-; CHECK: load i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
+; CHECK: load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 8
   %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
-  %y = load i32 addrspace(3)* %x, align 4
+  %y = load i32, i32 addrspace(3)* %x, align 4
   ret i32 %y
 }
 
@@ -209,7 +209,7 @@
 ; CHECK-LABEL: @test_read_data_from_global_as3(
 ; CHECK-NEXT: ret i32 2
   %x = getelementptr [5 x i32], [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
-  %y = load i32 addrspace(3)* %x, align 4
+  %y = load i32, i32 addrspace(3)* %x, align 4
   ret i32 %y
 }
 
@@ -224,9 +224,9 @@
 define i32 @constant_through_array_as_ptrs() {
 ; CHECK-LABEL: @constant_through_array_as_ptrs(
 ; CHECK-NEXT: ret i32 34
-  %p = load i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
-  %a = load i32 addrspace(1)* addrspace(2)* %p, align 4
-  %b = load i32 addrspace(1)* %a, align 4
+  %p = load i32 addrspace(1)* addrspace(2)*, i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
+  %a = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %p, align 4
+  %b = load i32, i32 addrspace(1)* %a, align 4
   ret i32 %b
 }
 
@@ -236,6 +236,6 @@
 ; CHECK-LABEL: @canonicalize_addrspacecast
 ; CHECK-NEXT: getelementptr inbounds float, float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 %i
   %p = getelementptr inbounds float, float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i
-  %v = load float* %p
+  %v = load float, float* %p
   ret float %v
 }
diff --git a/llvm/test/Transforms/InstCombine/crash.ll b/llvm/test/Transforms/InstCombine/crash.ll
index 9fd2aab..76ddd09 100644
--- a/llvm/test/Transforms/InstCombine/crash.ll
+++ b/llvm/test/Transforms/InstCombine/crash.ll
@@ -36,7 +36,7 @@
 define void @test2(<1 x i16>* nocapture %b, i32* nocapture %c) nounwind ssp {
 entry:
   %arrayidx = getelementptr inbounds <1 x i16>, <1 x i16>* %b, i64 undef ; <<1 x i16>*>
-  %tmp2 = load <1 x i16>* %arrayidx               ; <<1 x i16>> [#uses=1]
+  %tmp2 = load <1 x i16>, <1 x i16>* %arrayidx               ; <<1 x i16>> [#uses=1]
   %tmp6 = bitcast <1 x i16> %tmp2 to i16          ; <i16> [#uses=1]
   %tmp7 = zext i16 %tmp6 to i32                   ; <i32> [#uses=1]
   %ins = or i32 0, %tmp7                          ; <i32> [#uses=1]
@@ -60,7 +60,7 @@
 ; <label>:3                                       ; preds = %2, %1
   %4 = phi i8 [ 1, %2 ], [ 0, %1 ]                ; <i8> [#uses=1]
   %5 = icmp eq i8 %4, 0                           ; <i1> [#uses=1]
-  %6 = load i64* @tmp2, align 8                   ; <i64> [#uses=1]
+  %6 = load i64, i64* @tmp2, align 8                   ; <i64> [#uses=1]
   %7 = select i1 %5, i64 0, i64 %6                ; <i64> [#uses=1]
   br label %8
 
@@ -81,7 +81,7 @@
 ; <label>:3                                       ; preds = %2
   %4 = getelementptr inbounds %t0, %t0* null, i64 0, i32 1 ; <i32*> [#uses=0]
   %5 = getelementptr inbounds %t1, %t1* null, i64 0, i32 4 ; <i32**> [#uses=1]
-  %6 = load i32** %5, align 8                     ; <i32*> [#uses=1]
+  %6 = load i32*, i32** %5, align 8                     ; <i32*> [#uses=1]
   %7 = icmp ne i32* %6, null                      ; <i1> [#uses=1]
   %8 = zext i1 %7 to i32                          ; <i32> [#uses=1]
   %9 = add i32 %8, 0                              ; <i32> [#uses=1]
@@ -115,7 +115,7 @@
 
 BB2:
   %v5_ = phi i1 [ true, %BB0], [false, %BB1]
-  %v6 = load i64* %P
+  %v6 = load i64, i64* %P
   br label %l8
 
 l8:
@@ -183,7 +183,7 @@
 
 cond.false:                                       ; preds = %entry
   %tmp4 = getelementptr inbounds %class.RuleBasedBreakIterator, %class.RuleBasedBreakIterator* %this, i32 0, i32 0 ; <i64 ()**> [#uses=1]
-  %tmp5 = load i64 ()** %tmp4                     ; <i64 ()*> [#uses=1]
+  %tmp5 = load i64 ()*, i64 ()** %tmp4                     ; <i64 ()*> [#uses=1]
   %call = invoke i64 %tmp5()
           to label %cond.end unwind label %ehcleanup ; <i64> [#uses=1]
 
@@ -242,10 +242,10 @@
 ; PR6503
 define void @test12(i32* %A) nounwind {
 entry:
-  %tmp1 = load i32* %A
+  %tmp1 = load i32, i32* %A
   %cmp = icmp ugt i32 1, %tmp1                    ; <i1> [#uses=1]
   %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
-  %tmp2 = load i32* %A
+  %tmp2 = load i32, i32* %A
   %cmp3 = icmp ne i32 %tmp2, 0                    ; <i1> [#uses=1]
   %conv4 = zext i1 %cmp3 to i32                   ; <i32> [#uses=1]
   %or = or i32 %conv, %conv4                      ; <i32> [#uses=1]
@@ -261,7 +261,7 @@
   %0 = getelementptr inbounds %s1, %s1* null, i64 0, i32 2, i64 0, i32 0
   %1 = bitcast i64* %0 to i32*
   %2 = getelementptr inbounds %s1, %s1* null, i64 0, i32 2, i64 1, i32 0
-  %.pre = load i32* %1, align 8
+  %.pre = load i32, i32* %1, align 8
   %3 = lshr i32 %.pre, 19
   %brmerge = or i1 undef, undef
   %4 = and i32 %3, 3
@@ -269,7 +269,7 @@
   %6 = shl i32 %5, 19
   %7 = add i32 %6, 1572864
   %8 = and i32 %7, 1572864
-  %9 = load i64* %2, align 8
+  %9 = load i64, i64* %2, align 8
   %trunc156 = trunc i64 %9 to i32
   %10 = and i32 %trunc156, -1537
   %11 = and i32 %10, -6145
@@ -304,7 +304,7 @@
 
 define void @test15(i32* %p_92) nounwind {
 entry:
-%0 = load i32* %p_92, align 4
+%0 = load i32, i32* %p_92, align 4
 %1 = icmp ne i32 %0, 0
 %2 = zext i1 %1 to i32
 %3 = call i32 @func_14() nounwind
diff --git a/llvm/test/Transforms/InstCombine/debuginfo.ll b/llvm/test/Transforms/InstCombine/debuginfo.ll
index ae72f70..bf4a846 100644
--- a/llvm/test/Transforms/InstCombine/debuginfo.ll
+++ b/llvm/test/Transforms/InstCombine/debuginfo.ll
@@ -19,10 +19,10 @@
   call void @llvm.dbg.declare(metadata i32* %__val.addr, metadata !7, metadata !{}), !dbg !18
   store i64 %__len, i64* %__len.addr, align 8
   call void @llvm.dbg.declare(metadata i64* %__len.addr, metadata !9, metadata !{}), !dbg !20
-  %tmp = load i8** %__dest.addr, align 8, !dbg !21
-  %tmp1 = load i32* %__val.addr, align 4, !dbg !21
-  %tmp2 = load i64* %__len.addr, align 8, !dbg !21
-  %tmp3 = load i8** %__dest.addr, align 8, !dbg !21
+  %tmp = load i8*, i8** %__dest.addr, align 8, !dbg !21
+  %tmp1 = load i32, i32* %__val.addr, align 4, !dbg !21
+  %tmp2 = load i64, i64* %__len.addr, align 8, !dbg !21
+  %tmp3 = load i8*, i8** %__dest.addr, align 8, !dbg !21
   %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %tmp3, i1 false), !dbg !21
   %call = call i8* @foo(i8* %tmp, i32 %tmp1, i64 %tmp2, i64 %0), !dbg !21
   ret i8* %call, !dbg !21
diff --git a/llvm/test/Transforms/InstCombine/descale-zero.ll b/llvm/test/Transforms/InstCombine/descale-zero.ll
index 8aa8818..4347be4 100644
--- a/llvm/test/Transforms/InstCombine/descale-zero.ll
+++ b/llvm/test/Transforms/InstCombine/descale-zero.ll
@@ -5,10 +5,10 @@
 
 define internal i8* @descale_zero() {
 entry:
-; CHECK: load i8** inttoptr (i64 48 to i8**), align 16
+; CHECK: load i8*, i8** inttoptr (i64 48 to i8**), align 16
 ; CHECK-NEXT: ret i8*
-  %i16_ptr = load i16** inttoptr (i64 48 to i16**), align 16
-  %num = load i64* inttoptr (i64 64 to i64*), align 64
+  %i16_ptr = load i16*, i16** inttoptr (i64 48 to i16**), align 16
+  %num = load i64, i64* inttoptr (i64 64 to i64*), align 64
   %num_times_2 = shl i64 %num, 1
   %num_times_2_plus_4 = add i64 %num_times_2, 4
   %i8_ptr = bitcast i16* %i16_ptr to i8*
diff --git a/llvm/test/Transforms/InstCombine/div-shift-crash.ll b/llvm/test/Transforms/InstCombine/div-shift-crash.ll
index 96c2c22..936173c 100644
--- a/llvm/test/Transforms/InstCombine/div-shift-crash.ll
+++ b/llvm/test/Transforms/InstCombine/div-shift-crash.ll
@@ -39,7 +39,7 @@
   br label %for.cond1.i.i.i.i
 
 func_39.exit.i.i:                                 ; preds = %for.cond1.i.i.i.i
-  %l_8191.sroa.0.0.copyload.i.i = load i64* %0, align 1
+  %l_8191.sroa.0.0.copyload.i.i = load i64, i64* %0, align 1
   br label %for.cond1.i.i.i
 
 for.cond1.i.i.i:                                  ; preds = %safe_div_func_uint32_t_u_u.exit.i.i.i, %func_39.exit.i.i
diff --git a/llvm/test/Transforms/InstCombine/err-rep-cold.ll b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
index 0cbafc4..16a68e5 100644
--- a/llvm/test/Transforms/InstCombine/err-rep-cold.ll
+++ b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
@@ -18,7 +18,7 @@
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE** @stderr, align 8
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
   %call = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
   br label %return
 
@@ -38,7 +38,7 @@
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE** @stderr, align 8
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
   %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
   br label %return
 
@@ -58,7 +58,7 @@
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE** @stdout, align 8
+  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
   %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
   br label %return
 
diff --git a/llvm/test/Transforms/InstCombine/extractvalue.ll b/llvm/test/Transforms/InstCombine/extractvalue.ll
index 27c05db..4dc1545 100644
--- a/llvm/test/Transforms/InstCombine/extractvalue.ll
+++ b/llvm/test/Transforms/InstCombine/extractvalue.ll
@@ -41,7 +41,7 @@
 
 ; CHECK-LABEL: define i32 @extract2gep(
 ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %pair, i32 0, i32 1
-; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
 ; CHECK-NEXT: store
 ; CHECK-NEXT: br label %loop
 ; CHECK-NOT: extractvalue
@@ -52,7 +52,7 @@
         ; The load + extractvalue should be converted
         ; to an inbounds gep + smaller load.
         ; The new load should be in the same spot as the old load.
-        %L = load {i32, i32}* %pair
+        %L = load {i32, i32}, {i32, i32}* %pair
         store i32 0, i32* %P
         br label %loop
 
@@ -69,12 +69,12 @@
 
 ; CHECK-LABEL: define i32 @doubleextract2gep(
 ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i32 0, i32 1, i32 1
-; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
 ; CHECK-NEXT: ret i32 [[LOAD]]
 define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
         ; The load + extractvalues should be converted
         ; to a 3-index inbounds gep + smaller load.
-        %L = load {i32, {i32, i32}}* %arg
+        %L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg
         %E1 = extractvalue {i32, {i32, i32}} %L, 1
         %E2 = extractvalue {i32, i32} %E1, 1
         ret i32 %E2
@@ -88,7 +88,7 @@
 ; CHECK-NEXT: ret
 define i32 @nogep-multiuse({i32, i32}* %pair) {
         ; The load should be left unchanged since both parts are needed.
-        %L = load volatile {i32, i32}* %pair
+        %L = load volatile {i32, i32}, {i32, i32}* %pair
         %LHS = extractvalue {i32, i32} %L, 0
         %RHS = extractvalue {i32, i32} %L, 1
         %R = add i32 %LHS, %RHS
@@ -101,7 +101,7 @@
 ; CHECK-NEXT: ret
 define i32 @nogep-volatile({i32, i32}* %pair) {
         ; The load volatile should be left unchanged.
-        %L = load volatile {i32, i32}* %pair
+        %L = load volatile {i32, i32}, {i32, i32}* %pair
         %E = extractvalue {i32, i32} %L, 1
         ret i32 %E
 }
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index a776765..ac3000f 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -74,7 +74,7 @@
 ; Don't crash when attempting to cast a constant FMul to an instruction.
 define void @test8(i32* %inout) {
 entry:
-  %0 = load i32* %inout, align 4
+  %0 = load i32, i32* %inout, align 4
   %conv = uitofp i32 %0 to float
   %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3
   %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit
diff --git a/llvm/test/Transforms/InstCombine/fold-vector-zero.ll b/llvm/test/Transforms/InstCombine/fold-vector-zero.ll
index e1d86b6..bf661df 100644
--- a/llvm/test/Transforms/InstCombine/fold-vector-zero.ll
+++ b/llvm/test/Transforms/InstCombine/fold-vector-zero.ll
@@ -9,7 +9,7 @@
 	%l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ]
 	%r2 = add i64 %s0, %B
 	%r3 = inttoptr i64 %r2 to <2 x double>*
-	%r4 = load <2 x double>* %r3, align 8
+	%r4 = load <2 x double>, <2 x double>* %r3, align 8
 	%r6 = bitcast <2 x double> %r4 to <2 x i64>
 	%r7 = bitcast <2 x double> zeroinitializer to <2 x i64>
 	%r8 = insertelement <2 x i64> undef, i64 9223372036854775807, i32 0
diff --git a/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll b/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll
index 84ba236..7106933 100644
--- a/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -13,11 +13,11 @@
 entry:
 	%color = alloca %struct.NSArray*
 	%color.466 = alloca %struct.NSObject*
-	%tmp103 = load %struct.NSArray** %color, align 4
+	%tmp103 = load %struct.NSArray*, %struct.NSArray** %color, align 4
 	%tmp103104 = getelementptr %struct.NSArray, %struct.NSArray* %tmp103, i32 0, i32 0
 	store %struct.NSObject* %tmp103104, %struct.NSObject** %color.466, align 4
-	%tmp105 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_81", align 4
-	%tmp106 = load %struct.NSObject** %color.466, align 4
+	%tmp105 = load %struct.objc_selector*, %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_81", align 4
+	%tmp106 = load %struct.NSObject*, %struct.NSObject** %color.466, align 4
 	%tmp107 = call float bitcast (void (%struct.NSObject*, ...)* @objc_msgSend_fpret to float (%struct.NSObject*, %struct.objc_selector*)*)( %struct.NSObject* %tmp106, %struct.objc_selector* %tmp105 ) nounwind
 	br label %exit
 
diff --git a/llvm/test/Transforms/InstCombine/fpextend.ll b/llvm/test/Transforms/InstCombine/fpextend.ll
index 8640cd2..775caad 100644
--- a/llvm/test/Transforms/InstCombine/fpextend.ll
+++ b/llvm/test/Transforms/InstCombine/fpextend.ll
@@ -5,7 +5,7 @@
 
 define void @test() nounwind  {
 entry:
-	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp = load float, float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp3 = fadd double %tmp1, 0.000000e+00		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp3 to float		; <float> [#uses=1]
@@ -15,9 +15,9 @@
 
 define void @test2() nounwind  {
 entry:
-	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp = load float, float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = load float* @Y, align 4		; <float> [#uses=1]
+	%tmp2 = load float, float* @Y, align 4		; <float> [#uses=1]
 	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
 	%tmp5 = fmul double %tmp1, %tmp23		; <double> [#uses=1]
 	%tmp56 = fptrunc double %tmp5 to float		; <float> [#uses=1]
@@ -27,9 +27,9 @@
 
 define void @test3() nounwind  {
 entry:
-	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp = load float, float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
-	%tmp2 = load float* @Y, align 4		; <float> [#uses=1]
+	%tmp2 = load float, float* @Y, align 4		; <float> [#uses=1]
 	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
 	%tmp5 = fdiv double %tmp1, %tmp23		; <double> [#uses=1]
 	%tmp56 = fptrunc double %tmp5 to float		; <float> [#uses=1]
@@ -39,7 +39,7 @@
 
 define void @test4() nounwind  {
 entry:
-	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp = load float, float* @X, align 4		; <float> [#uses=1]
 	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
 	%tmp2 = fsub double -0.000000e+00, %tmp1		; <double> [#uses=1]
 	%tmp34 = fptrunc double %tmp2 to float		; <float> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/gc.relocate.ll b/llvm/test/Transforms/InstCombine/gc.relocate.ll
index d10ef5f..8fbb752f 100644
--- a/llvm/test/Transforms/InstCombine/gc.relocate.ll
+++ b/llvm/test/Transforms/InstCombine/gc.relocate.ll
@@ -13,7 +13,7 @@
 ; CHECK-LABEL: @deref
 ; CHECK: call dereferenceable(8)
 entry:
-    %load = load i32 addrspace(1)* %dparam
+    %load = load i32, i32 addrspace(1)* %dparam
     %tok = tail call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
     %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 4, i32 4)
     ret i32 addrspace(1)* %relocate
diff --git a/llvm/test/Transforms/InstCombine/gep-addrspace.ll b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
index 093954e..aa46ea6 100644
--- a/llvm/test/Transforms/InstCombine/gep-addrspace.ll
+++ b/llvm/test/Transforms/InstCombine/gep-addrspace.ll
@@ -12,7 +12,7 @@
   %B = addrspacecast %myStruct addrspace(1)* %A to %myStruct*
   %C = getelementptr inbounds %myStruct, %myStruct* %B, i32 0, i32 1
   %D = getelementptr inbounds [3 x float], [3 x float]* %C, i32 0, i32 2
-  %E = load float* %D, align 4
+  %E = load float, float* %D, align 4
   %F = fsub float %E, undef
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/gep-sext.ll b/llvm/test/Transforms/InstCombine/gep-sext.ll
index 2715c9d..36e2aef 100644
--- a/llvm/test/Transforms/InstCombine/gep-sext.ll
+++ b/llvm/test/Transforms/InstCombine/gep-sext.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: %1 = sext i32 %index to i64
 ; CHECK-NEXT: %addr = getelementptr i32, i32* %p, i64 %1
   %addr = getelementptr i32, i32* %p, i32 %index
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call void @use(i32 %val)
   ret void
 }
@@ -21,7 +21,7 @@
 ; CHECK-NEXT: %addr = getelementptr i32, i32* %p, i64 %i
   %i = zext i32 %index to i64
   %addr = getelementptr i32, i32* %p, i64 %i
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call void @use(i32 %val)
   ret void
 }
@@ -33,9 +33,9 @@
 ; CHECK-NOT: sext
   %addr_begin = getelementptr i32, i32* %p, i64 40
   %addr_fixed = getelementptr i32, i32* %addr_begin, i64 48
-  %val_fixed = load i32* %addr_fixed, !range !0
+  %val_fixed = load i32, i32* %addr_fixed, !range !0
   %addr = getelementptr i32, i32* %addr_begin, i32 %val_fixed
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call void @use(i32 %val)
   ret void
 }
@@ -46,10 +46,10 @@
 ; CHECK-NOT: sext
   %addr_begin = getelementptr i32, i32* %p, i64 40
   %addr_fixed = getelementptr i32, i32* %addr_begin, i64 48
-  %val_fixed = load i32* %addr_fixed, !range !0
+  %val_fixed = load i32, i32* %addr_fixed, !range !0
   %i = sext i32 %val_fixed to i64
   %addr = getelementptr i32, i32* %addr_begin, i64 %i
-  %val = load i32* %addr
+  %val = load i32, i32* %addr
   call void @use(i32 %val)
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/gepphigep.ll b/llvm/test/Transforms/InstCombine/gepphigep.ll
index 7cd22b9..7da1186 100644
--- a/llvm/test/Transforms/InstCombine/gepphigep.ll
+++ b/llvm/test/Transforms/InstCombine/gepphigep.ll
@@ -8,7 +8,7 @@
 define i32 @test1(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
 bb:
   %tmp = getelementptr inbounds %struct1, %struct1* %dm, i64 0, i32 0
-  %tmp1 = load %struct2** %tmp, align 8
+  %tmp1 = load %struct2*, %struct2** %tmp, align 8
   br i1 %tmp4, label %bb1, label %bb2
 
 bb1:
@@ -26,7 +26,7 @@
 bb3:
   %phi = phi %struct2* [ %tmp10, %bb1 ], [ %tmp20, %bb2 ]
   %tmp24 = getelementptr inbounds %struct2, %struct2* %phi, i64 0, i32 1
-  %tmp25 = load i32* %tmp24, align 4
+  %tmp25 = load i32, i32* %tmp24, align 4
   ret i32 %tmp25
 
 ; CHECK-LABEL: @test1(
@@ -40,7 +40,7 @@
 define i32 @test2(%struct1* %dm, i1 %tmp4, i64 %tmp9, i64 %tmp19) {
 bb:
   %tmp = getelementptr inbounds %struct1, %struct1* %dm, i64 0, i32 0
-  %tmp1 = load %struct2** %tmp, align 8
+  %tmp1 = load %struct2*, %struct2** %tmp, align 8
   %tmp10 = getelementptr inbounds %struct2, %struct2* %tmp1, i64 %tmp9
   %tmp11 = getelementptr inbounds %struct2, %struct2* %tmp10, i64 0, i32 0
   store i32 0, i32* %tmp11, align 4
@@ -48,7 +48,7 @@
   %tmp21 = getelementptr inbounds %struct2, %struct2* %tmp20, i64 0, i32 0
   store i32 0, i32* %tmp21, align 4
   %tmp24 = getelementptr inbounds %struct2, %struct2* %tmp10, i64 0, i32 1
-  %tmp25 = load i32* %tmp24, align 4
+  %tmp25 = load i32, i32* %tmp24, align 4
   ret i32 %tmp25
 
 ; CHECK-LABEL: @test2(
@@ -87,7 +87,7 @@
   %tmp27 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) catch i8* bitcast (i8** @_ZTIi to i8*)
   %tmp34 = getelementptr inbounds %struct4, %struct4* %phi, i64 %tmp21, i32 1
   %tmp35 = getelementptr inbounds %struct2, %struct2* %tmp34, i64 0, i32 1
-  %tmp25 = load i32* %tmp35, align 4
+  %tmp25 = load i32, i32* %tmp35, align 4
   ret i32 %tmp25
 
 ; CHECK-LABEL: @test3(
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 2cc7037..ebc3ffc 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -155,7 +155,7 @@
   %new_a = bitcast %struct.B* %g4 to %struct.A*
 
   %g5 = getelementptr %struct.A, %struct.A* %new_a, i32 0, i32 1
-  %a_a = load i32* %g5, align 4
+  %a_a = load i32, i32* %g5, align 4
   ret i32 %a_a
 ; CHECK-LABEL:      @test12(
 ; CHECK:      getelementptr %struct.A, %struct.A* %a, i64 0, i32 1
@@ -363,7 +363,7 @@
         %pbob1 = alloca %intstruct
         %pbob2 = getelementptr %intstruct, %intstruct* %pbob1
         %pbobel = getelementptr %intstruct, %intstruct* %pbob2, i64 0, i32 0
-        %rval = load i32* %pbobel
+        %rval = load i32, i32* %pbobel
         ret i32 %rval
 ; CHECK-LABEL: @test21(
 ; CHECK: getelementptr %intstruct, %intstruct* %pbob1, i64 0, i32 0
@@ -395,8 +395,8 @@
 define void @test25() {
 entry:
         %tmp = getelementptr { i64, i64, i64, i64 }, { i64, i64, i64, i64 }* null, i32 0, i32 3         ; <i64*> [#uses=1]
-        %tmp.upgrd.1 = load i64* %tmp           ; <i64> [#uses=1]
-        %tmp8.ui = load i64* null               ; <i64> [#uses=1]
+        %tmp.upgrd.1 = load i64, i64* %tmp           ; <i64> [#uses=1]
+        %tmp8.ui = load i64, i64* null               ; <i64> [#uses=1]
         %tmp8 = bitcast i64 %tmp8.ui to i64             ; <i64> [#uses=1]
         %tmp9 = and i64 %tmp8, %tmp.upgrd.1             ; <i64> [#uses=1]
         %sext = trunc i64 %tmp9 to i32          ; <i32> [#uses=1]
@@ -427,14 +427,14 @@
 define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
 entry:
 	%from_addr = alloca %struct.siginfo_t*
-	%tmp344 = load %struct.siginfo_t** %from_addr, align 8
+	%tmp344 = load %struct.siginfo_t*, %struct.siginfo_t** %from_addr, align 8
 	%tmp345 = getelementptr %struct.siginfo_t, %struct.siginfo_t* %tmp344, i32 0, i32 3
 	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }, { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
 	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*
 	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }, { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
 	%tmp349 = getelementptr %struct.sigval_t, %struct.sigval_t* %tmp348, i32 0, i32 0
 	%tmp349350 = bitcast i8** %tmp349 to i32*
-	%tmp351 = load i32* %tmp349350, align 8
+	%tmp351 = load i32, i32* %tmp349350, align 8
 	%tmp360 = call i32 asm sideeffect "...",
         "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
          %struct.__large_struct* null, i32 -14, i32 0 )
@@ -482,9 +482,9 @@
 	%T = type <{ i64, i64, i64 }>
 define i32 @test29(i8* %start, i32 %X) nounwind {
 entry:
-	%tmp3 = load i64* null
+	%tmp3 = load i64, i64* null
 	%add.ptr = getelementptr i8, i8* %start, i64 %tmp3
-	%tmp158 = load i32* null
+	%tmp158 = load i32, i32* null
 	%add.ptr159 = getelementptr %T, %T* null, i32 %tmp158
 	%add.ptr209 = getelementptr i8, i8* %start, i64 0
 	%add.ptr212 = getelementptr i8, i8* %add.ptr209, i32 %X
@@ -507,7 +507,7 @@
 	%1 = bitcast i32* %0 to [0 x i32]*
 	call void @test30f(i32* %0) nounwind
 	%2 = getelementptr [0 x i32], [0 x i32]* %1, i32 0, i32 %m
-	%3 = load i32* %2, align 4
+	%3 = load i32, i32* %2, align 4
 	ret i32 %3
 ; CHECK-LABEL: @test30(
 ; CHECK: getelementptr i32
@@ -537,7 +537,7 @@
 	%E = bitcast i8* %D to i8**
 	store i8* %v, i8** %E
 	%F = getelementptr [4 x i8*], [4 x i8*]* %A, i32 0, i32 2
-	%G = load i8** %F
+	%G = load i8*, i8** %F
 	ret i8* %G
 ; CHECK-LABEL: @test32(
 ; CHECK: %D = getelementptr [4 x i8*], [4 x i8*]* %A, i64 0, i64 1
@@ -599,7 +599,7 @@
 	%B = getelementptr %T2, %T2* %A, i64 0, i32 0
 
       	store i64 %V, i64* %mrv_gep
-	%C = load i8** %B, align 8
+	%C = load i8*, i8** %B, align 8
 	ret i8* %C
 ; CHECK-LABEL: @test34(
 ; CHECK: %[[C:.*]] = inttoptr i64 %V to i8*
@@ -695,7 +695,7 @@
 
 define void @test39(%struct.ham* %arg, i8 %arg1) nounwind {
   %tmp = getelementptr inbounds %struct.ham, %struct.ham* %arg, i64 0, i32 2
-  %tmp2 = load %struct.zot** %tmp, align 8
+  %tmp2 = load %struct.zot*, %struct.zot** %tmp, align 8
   %tmp3 = bitcast %struct.zot* %tmp2 to i8*
   %tmp4 = getelementptr inbounds i8, i8* %tmp3, i64 -8
   store i8 %arg1, i8* %tmp4, align 8
@@ -723,7 +723,7 @@
   %cast = bitcast i32 addrspace(1)* %arr to i8 addrspace(1)*
   %V = mul i16 %N, 4
   %t = getelementptr i8, i8 addrspace(1)* %cast, i16 %V
-  %x = load i8 addrspace(1)* %t
+  %x = load i8, i8 addrspace(1)* %t
   ret i8 %x
 }
 
@@ -735,7 +735,7 @@
   %cast = bitcast [100 x double]* %arr to i64*
   %V = mul i64 %N, 8
   %t = getelementptr i64, i64* %cast, i64 %V
-  %x = load i64* %t
+  %x = load i64, i64* %t
   ret i64 %x
 }
 
@@ -745,11 +745,11 @@
 ; CHECK: getelementptr [100 x double], [100 x double]* %arr, i64 0, i64 %V
 ; CHECK-NEXT: bitcast double*
 ; CHECK-NEXT: %t = addrspacecast i64*
-; CHECK: load i64 addrspace(3)* %t
+; CHECK: load i64, i64 addrspace(3)* %t
   %cast = addrspacecast [100 x double]* %arr to i64 addrspace(3)*
   %V = mul i64 %N, 8
   %t = getelementptr i64, i64 addrspace(3)* %cast, i64 %V
-  %x = load i64 addrspace(3)* %t
+  %x = load i64, i64 addrspace(3)* %t
   ret i64 %x
 }
 
@@ -761,7 +761,7 @@
   %cast = bitcast [100 x double]* %arr to i8*
   %V = mul i64 %N, 8
   %t = getelementptr i8, i8* %cast, i64 %V
-  %x = load i8* %t
+  %x = load i8, i8* %t
   ret i8 %x
 }
 
@@ -772,7 +772,7 @@
   %cast = bitcast [100 x double] addrspace(1)* %arr to i64 addrspace(1)*
   %V = mul i16 %N, 8
   %t = getelementptr i64, i64 addrspace(1)* %cast, i16 %V
-  %x = load i64 addrspace(1)* %t
+  %x = load i64, i64 addrspace(1)* %t
   ret i64 %x
 }
 
@@ -783,7 +783,7 @@
   %cast = bitcast [100 x double] addrspace(1)* %arr to i8 addrspace(1)*
   %V = mul i16 %N, 8
   %t = getelementptr i8, i8 addrspace(1)* %cast, i16 %V
-  %x = load i8 addrspace(1)* %t
+  %x = load i8, i8 addrspace(1)* %t
   ret i8 %x
 }
 
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll
index 0911ab0..041adf7 100644
--- a/llvm/test/Transforms/InstCombine/icmp-range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -6,14 +6,14 @@
 define i1 @test_nonzero(i32* nocapture readonly %arg) {
 ; CHECK-LABEL:test_nonzero
 ; CHECK: ret i1 true
-  %val = load i32* %arg, !range !0
+  %val = load i32, i32* %arg, !range !0
   %rval = icmp ne i32 %val, 0
   ret i1 %rval
 }
 define i1 @test_nonzero2(i32* nocapture readonly %arg) {
 ; CHECK-LABEL:test_nonzero2
 ; CHECK: ret i1 false
-  %val = load i32* %arg, !range !0
+  %val = load i32, i32* %arg, !range !0
   %rval = icmp eq i32 %val, 0
   ret i1 %rval
 }
@@ -23,7 +23,7 @@
 ; CHECK-LABEL: test_nonzero3
 ; Check that this does not trigger - it wouldn't be legal
 ; CHECK: icmp
-  %val = load i32* %arg, !range !1
+  %val = load i32, i32* %arg, !range !1
   %rval = icmp ne i32 %val, 0
   ret i1 %rval
 }
@@ -32,7 +32,7 @@
 define i1 @test_nonzero4(i8* nocapture readonly %arg) {
 ; CHECK-LABEL: test_nonzero4
 ; CHECK: ret i1 false
-  %val = load i8* %arg, !range !2
+  %val = load i8, i8* %arg, !range !2
   %rval = icmp ne i8 %val, 0
   ret i1 %rval
 }
@@ -40,7 +40,7 @@
 define i1 @test_nonzero5(i8* nocapture readonly %arg) {
 ; CHECK-LABEL: test_nonzero5
 ; CHECK: ret i1 false
-  %val = load i8* %arg, !range !2
+  %val = load i8, i8* %arg, !range !2
   %rval = icmp ugt i8 %val, 0
   ret i1 %rval
 }
@@ -49,7 +49,7 @@
 define i1 @test_nonzero6(i8* %argw) {
 ; CHECK-LABEL: test_nonzero6
 ; CHECK: icmp ne i8 %val, 0
-  %val = load i8* %argw, !range !3
+  %val = load i8, i8* %argw, !range !3
   %rval = icmp sgt i8 %val, 0
   ret i1 %rval
 }
diff --git a/llvm/test/Transforms/InstCombine/invariant.ll b/llvm/test/Transforms/InstCombine/invariant.ll
index 3832380..246f9e6 100644
--- a/llvm/test/Transforms/InstCombine/invariant.ll
+++ b/llvm/test/Transforms/InstCombine/invariant.ll
@@ -11,6 +11,6 @@
   %i = call {}* @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0]
   ; CHECK: call {}* @llvm.invariant.start
   call void @g(i8* %a)
-  %r = load i8* %a                                ; <i8> [#uses=1]
+  %r = load i8, i8* %a                                ; <i8> [#uses=1]
   ret i8 %r
 }
diff --git a/llvm/test/Transforms/InstCombine/known_align.ll b/llvm/test/Transforms/InstCombine/known_align.ll
index 0249951..670011f 100644
--- a/llvm/test/Transforms/InstCombine/known_align.ll
+++ b/llvm/test/Transforms/InstCombine/known_align.ll
@@ -11,17 +11,17 @@
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp1 = alloca i32, align 4		; <i32*> [#uses=3]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=1]
+	%tmp3 = load i32, i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=1]
 	store i32 %tmp3, i32* %tmp1, align 4
-	%tmp5 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
 	store i32 %tmp5, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
-	%tmp6 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp6 = load i32, i32* %tmp1, align 4		; <i32> [#uses=1]
 	store i32 %tmp6, i32* %tmp, align 4
-	%tmp7 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	store i32 %tmp7, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval8
 }
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 8b4c63a..7f4e280 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -22,7 +22,7 @@
 
 define i1 @test1(i32 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
 ; NODL-LABEL: @test1(
@@ -36,7 +36,7 @@
 
 define i1 @test1_noinbounds(i32 %X) {
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
 ; NODL-LABEL: @test1_noinbounds(
@@ -49,7 +49,7 @@
 
 define i1 @test1_noinbounds_i64(i64 %X) {
   %P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp eq i16 %Q, 0
   ret i1 %R
 ; NODL-LABEL: @test1_noinbounds_i64(
@@ -62,7 +62,7 @@
 
 define i1 @test1_noinbounds_as1(i32 %x) {
   %p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
-  %q = load i16 addrspace(1)* %p
+  %q = load i16, i16 addrspace(1)* %p
   %r = icmp eq i16 %q, 0
   ret i1 %r
 
@@ -74,7 +74,7 @@
 
 define i1 @test2(i32 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp slt i16 %Q, 85
   ret i1 %R
 ; NODL-LABEL: @test2(
@@ -84,7 +84,7 @@
 
 define i1 @test3(i32 %X) {
   %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
-  %Q = load double* %P
+  %Q = load double, double* %P
   %R = fcmp oeq double %Q, 1.0
   ret i1 %R
 ; NODL-LABEL: @test3(
@@ -99,7 +99,7 @@
 
 define i1 @test4(i32 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
 ; NODL-LABEL: @test4(
@@ -117,7 +117,7 @@
 
 define i1 @test4_i16(i16 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i16 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp sle i16 %Q, 73
   ret i1 %R
 
@@ -137,7 +137,7 @@
 
 define i1 @test5(i32 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = icmp eq i16 %Q, 69
   ret i1 %R
 ; NODL-LABEL: @test5(
@@ -155,7 +155,7 @@
 
 define i1 @test6(i32 %X) {
   %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
-  %Q = load double* %P
+  %Q = load double, double* %P
   %R = fcmp ogt double %Q, 0.0
   ret i1 %R
 ; NODL-LABEL: @test6(
@@ -171,7 +171,7 @@
 
 define i1 @test7(i32 %X) {
   %P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
-  %Q = load double* %P
+  %Q = load double, double* %P
   %R = fcmp olt double %Q, 0.0
   ret i1 %R
 ; NODL-LABEL: @test7(
@@ -187,7 +187,7 @@
 
 define i1 @test8(i32 %X) {
   %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
-  %Q = load i16* %P
+  %Q = load i16, i16* %P
   %R = and i16 %Q, 3
   %S = icmp eq i16 %R, 0
   ret i1 %S
@@ -211,7 +211,7 @@
 
 define i1 @test9(i32 %X) {
   %P = getelementptr inbounds [4 x { i32, i32 } ], [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
-  %Q = load i32* %P
+  %Q = load i32, i32* %P
   %R = icmp eq i32 %Q, 1
   ret i1 %R
 ; NODL-LABEL: @test9(
@@ -232,7 +232,7 @@
 ; P32-LABEL: @test10_struct(
 ; P32: ret i1 false
   %p = getelementptr inbounds %Foo, %Foo* @GS, i32 %x, i32 0
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -244,7 +244,7 @@
 ; P32-LABEL: @test10_struct_noinbounds(
 ; P32: getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
   %p = getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -258,7 +258,7 @@
 ; P32-LABEL: @test10_struct_i16(
 ; P32: ret i1 false
   %p = getelementptr inbounds %Foo, %Foo* @GS, i16 %x, i32 0
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 0
   ret i1 %r
 }
@@ -272,7 +272,7 @@
 ; P32-LABEL: @test10_struct_i64(
 ; P32: ret i1 false
   %p = getelementptr inbounds %Foo, %Foo* @GS, i64 %x, i32 0
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 0
   ret i1 %r
 }
@@ -285,7 +285,7 @@
 ; P32: %1 = sext i16 %x to i32
 ; P32: getelementptr %Foo, %Foo* @GS, i32 %1, i32 0
   %p = getelementptr %Foo, %Foo* @GS, i16 %x, i32 0
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 0
   ret i1 %r
 }
@@ -299,7 +299,7 @@
 ; P32-NEXT: %r = icmp ne i32 %x, 1
 ; P32-NEXT: ret i1 %r
   %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -311,7 +311,7 @@
 ; P32-LABEL: @test10_struct_arr_noinbounds(
 ; P32-NEXT  %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -325,7 +325,7 @@
 ; P32-NEXT: %r = icmp ne i16 %x, 1
 ; P32-NEXT: ret i1 %r
   %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -340,7 +340,7 @@
 ; P32-NEXT: %r = icmp ne i32 %1, 1
 ; P32-NEXT: ret i1 %r
   %p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -352,7 +352,7 @@
 ; P32-LABEL: @test10_struct_arr_noinbounds_i16(
 ; P32-NEXT: %r = icmp ne i16 %x, 1
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
@@ -366,7 +366,7 @@
 ; P32: %r = icmp ne i32 %1, 1
 ; P32-NEXT: ret i1 %r
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
-  %q = load i32* %p
+  %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
diff --git a/llvm/test/Transforms/InstCombine/load-select.ll b/llvm/test/Transforms/InstCombine/load-select.ll
index e8cbad3..e9dfaa1 100644
--- a/llvm/test/Transforms/InstCombine/load-select.ll
+++ b/llvm/test/Transforms/InstCombine/load-select.ll
@@ -11,6 +11,6 @@
 entry:
   %0 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
   %storemerge = select i1 %0, i32* getelementptr inbounds ([2 x i32]* @a, i32 0, i32 1), i32* getelementptr inbounds ([2 x i32]* @a, i32 0, i32 0) ; <i32*> [#uses=1]
-  %1 = load i32* %storemerge, align 4             ; <i32> [#uses=1]
+  %1 = load i32, i32* %storemerge, align 4             ; <i32> [#uses=1]
   ret i32 %1
 }
diff --git a/llvm/test/Transforms/InstCombine/load.ll b/llvm/test/Transforms/InstCombine/load.ll
index 3f8ed56..2cc55e9 100644
--- a/llvm/test/Transforms/InstCombine/load.ll
+++ b/llvm/test/Transforms/InstCombine/load.ll
@@ -16,7 +16,7 @@
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: load
 define i32 @test1() {
-	%B = load i32* @X		; <i32> [#uses=1]
+	%B = load i32, i32* @X		; <i32> [#uses=1]
 	ret i32 %B
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NOT: load
 define float @test2() {
 	%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
-	%B = load float* %A		; <float> [#uses=1]
+	%B = load float, float* %A		; <float> [#uses=1]
 	ret float %B
 }
 
@@ -32,7 +32,7 @@
 ; CHECK-NOT: load
 define i32 @test3() {
 	%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
-	%B = load i32* %A		; <i32> [#uses=1]
+	%B = load i32, i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
@@ -40,7 +40,7 @@
 ; CHECK-NOT: load
 define i32 @test4() {
 	%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0		; <i32*> [#uses=1]
-	%B = load i32* %A		; <i32> [#uses=1]
+	%B = load i32, i32* %A		; <i32> [#uses=1]
 	ret i32 %B
 }
 
@@ -48,7 +48,7 @@
 ; CHECK-NOT: load
 define i32 @test5(i1 %C) {
 	%Y = select i1 %C, i32* @X, i32* @X2		; <i32*> [#uses=1]
-	%Z = load i32* %Y		; <i32> [#uses=1]
+	%Z = load i32, i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
 
@@ -56,7 +56,7 @@
 ; CHECK-NOT: load
 define i32 @test7(i32 %X) {
 	%V = getelementptr i32, i32* null, i32 %X		; <i32*> [#uses=1]
-	%R = load i32* %V		; <i32> [#uses=1]
+	%R = load i32, i32* %V		; <i32> [#uses=1]
 	ret i32 %R
 }
 
@@ -64,15 +64,15 @@
 ; CHECK-NOT: load
 define i32 @test8(i32* %P) {
 	store i32 1, i32* %P
-	%X = load i32* %P		; <i32> [#uses=1]
+	%X = load i32, i32* %P		; <i32> [#uses=1]
 	ret i32 %X
 }
 
 ; CHECK-LABEL: @test9(
 ; CHECK-NOT: load
 define i32 @test9(i32* %P) {
-	%X = load i32* %P		; <i32> [#uses=1]
-	%Y = load i32* %P		; <i32> [#uses=1]
+	%X = load i32, i32* %P		; <i32> [#uses=1]
+	%Y = load i32, i32* %P		; <i32> [#uses=1]
 	%Z = sub i32 %X, %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
@@ -89,7 +89,7 @@
 	store i32 0, i32* %P
 	br label %C
 C:		; preds = %F, %T
-	%V = load i32* %P		; <i32> [#uses=1]
+	%V = load i32, i32* %P		; <i32> [#uses=1]
 	ret i32 %V
 }
 
@@ -99,7 +99,7 @@
   %t0 = getelementptr double, double* %p, i32 1
   store double 2.0, double* %t0
   %t1 = getelementptr double, double* %p, i32 1
-  %x = load double* %t1
+  %x = load double, double* %t1
   ret double %x
 }
 
@@ -110,14 +110,14 @@
   store i32 123, i32* %A
   ; Cast the result of the load not the source
   %Q = bitcast i32* %A to i32*
-  %V = load i32* %Q
+  %V = load i32, i32* %Q
   ret i32 %V
 }
 
 ; CHECK-LABEL: @test13(
 ; CHECK-NOT: load
 define <16 x i8> @test13(<2 x i64> %x) {
-  %tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
+  %tmp = load <16 x i8>, <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
   ret <16 x i8> %tmp
 }
 
@@ -128,13 +128,13 @@
 ; those confuse the analysis into thinking that the second store does not alias
 ; the first.
 ; CHECK-LABEL: @test14(
-; CHECK:         %[[R:.*]] = load i8*
+; CHECK:         %[[R:.*]] = load i8, i8*
 ; CHECK-NEXT:    ret i8 %[[R]]
   %a = alloca i32
   %a.i8 = bitcast i32* %a to i8*
   store i8 %x, i8* %a.i8
   store i32 %y, i32* %a
-  %r = load i8* %a.i8
+  %r = load i8, i8* %a.i8
   ret i8 %r
 }
 
@@ -143,12 +143,12 @@
 define i8 @test15(i8 %x, i32 %y) {
 ; Same test as @test14 essentially, but using a global instead of an alloca.
 ; CHECK-LABEL: @test15(
-; CHECK:         %[[R:.*]] = load i8*
+; CHECK:         %[[R:.*]] = load i8, i8*
 ; CHECK-NEXT:    ret i8 %[[R]]
   %g.i8 = bitcast i32* @test15_global to i8*
   store i8 %x, i8* %g.i8
   store i32 %y, i32* @test15_global
-  %r = load i8* %g.i8
+  %r = load i8, i8* %g.i8
   ret i8 %r
 }
 
@@ -156,12 +156,12 @@
 ; Check that we canonicalize loads which are only stored to use integer types
 ; when there is a valid integer type.
 ; CHECK-LABEL: @test16(
-; CHECK: %[[L1:.*]] = load i32*
+; CHECK: %[[L1:.*]] = load i32, i32*
 ; CHECK-NOT: load
 ; CHECK: store i32 %[[L1]], i32*
 ; CHECK: store i32 %[[L1]], i32*
 ; CHECK-NOT: store
-; CHECK: %[[L1:.*]] = load i32*
+; CHECK: %[[L1:.*]] = load i32, i32*
 ; CHECK-NOT: load
 ; CHECK: store i32 %[[L1]], i32*
 ; CHECK: store i32 %[[L1]], i32*
@@ -174,11 +174,11 @@
   %b.cast = bitcast i8* %b to float*
   %c.cast = bitcast i8* %c to i32*
 
-  %x1 = load float* %x.cast
+  %x1 = load float, float* %x.cast
   store float %x1, float* %a.cast
   store float %x1, float* %b.cast
 
-  %x2 = load float* %x.cast
+  %x2 = load float, float* %x.cast
   store float %x2, float* %b.cast
   %x2.cast = bitcast float %x2 to i32
   store i32 %x2.cast, i32* %c.cast
@@ -192,11 +192,11 @@
 ; than the value.
 ;
 ; CHECK-LABEL: @test17(
-; CHECK: %[[L:.*]] = load i8**
+; CHECK: %[[L:.*]] = load i8*, i8**
 ; CHECK: store i8 %y, i8* %[[L]]
 
 entry:
-  %x.load = load i8** %x
+  %x.load = load i8*, i8** %x
   store i8 %y, i8* %x.load
 
   ret void
diff --git a/llvm/test/Transforms/InstCombine/load3.ll b/llvm/test/Transforms/InstCombine/load3.ll
index bcec75f..00f4eaf 100644
--- a/llvm/test/Transforms/InstCombine/load3.ll
+++ b/llvm/test/Transforms/InstCombine/load3.ll
@@ -6,9 +6,9 @@
 
 define i32 @test1(i32* %p) {
   %t0 = getelementptr i32, i32* %p, i32 1
-  %y = load i32* %t0
+  %y = load i32, i32* %t0
   %t1 = getelementptr i32, i32* %p, i32 1
-  %x = load i32* %t1
+  %x = load i32, i32* %t1
   %a = sub i32 %y, %x
   ret i32 %a
 ; CHECK-LABEL: @test1(
@@ -19,7 +19,7 @@
 ; PR7429
 @.str = private constant [4 x i8] c"XYZ\00"
 define float @test2() {
-  %tmp = load float* bitcast ([4 x i8]* @.str to float*), align 1
+  %tmp = load float, float* bitcast ([4 x i8]* @.str to float*), align 1
   ret float %tmp
   
 ; CHECK-LABEL: @test2(
@@ -37,7 +37,7 @@
 ; PR14986
 define void @test3() nounwind {
 ; This is a weird way of computing zero.
-  %l = load i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
+  %l = load i32, i32* getelementptr ([36 x i32]* @expect32, i32 29826161, i32 28), align 4
   store i32 %l, i32* getelementptr ([36 x i32]* @rslts32, i32 29826161, i32 28), align 4
   ret void
 
diff --git a/llvm/test/Transforms/InstCombine/loadstore-alignment.ll b/llvm/test/Transforms/InstCombine/loadstore-alignment.ll
index 66ee4d2..4afa82d 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-alignment.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -10,7 +10,7 @@
 ; CHECK: , align 16
 define <2 x i64> @static_hem() {
   %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 7
-  %tmp1 = load <2 x i64>* %t, align 1
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -19,7 +19,7 @@
 define <2 x i64> @static_hem_addrspacecast() {
   %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 7
   %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(1)*
-  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  %tmp1 = load <2 x i64>, <2 x i64> addrspace(1)* %t.asc, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -28,7 +28,7 @@
 define <2 x i64> @static_hem_addrspacecast_smaller_ptr() {
   %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 7
   %t.asc = addrspacecast <2 x i64>* %t to <2 x i64> addrspace(2)*
-  %tmp1 = load <2 x i64> addrspace(2)* %t.asc, align 1
+  %tmp1 = load <2 x i64>, <2 x i64> addrspace(2)* %t.asc, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -37,7 +37,7 @@
 define <2 x i64> @static_hem_addrspacecast_larger_ptr() {
   %t = getelementptr <2 x i64>, <2 x i64> addrspace(2)* @x.as2, i32 7
   %t.asc = addrspacecast <2 x i64> addrspace(2)* %t to <2 x i64> addrspace(1)*
-  %tmp1 = load <2 x i64> addrspace(1)* %t.asc, align 1
+  %tmp1 = load <2 x i64>, <2 x i64> addrspace(1)* %t.asc, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -45,7 +45,7 @@
 ; CHECK: , align 16
 define <2 x i64> @hem(i32 %i) {
   %t = getelementptr <2 x i64>, <2 x i64>* @x, i32 %i
-  %tmp1 = load <2 x i64>* %t, align 1
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -53,14 +53,14 @@
 ; CHECK: , align 16
 define <2 x i64> @hem_2d(i32 %i, i32 %j) {
   %t = getelementptr [13 x <2 x i64>], [13 x <2 x i64>]* @xx, i32 %i, i32 %j
-  %tmp1 = load <2 x i64>* %t, align 1
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
   ret <2 x i64> %tmp1
 }
 
 ; CHECK-LABEL: @foo(
 ; CHECK: , align 16
 define <2 x i64> @foo() {
-  %tmp1 = load <2 x i64>* @x, align 1
+  %tmp1 = load <2 x i64>, <2 x i64>* @x, align 1
   ret <2 x i64> %tmp1
 }
 
@@ -70,7 +70,7 @@
 define <2 x i64> @bar() {
   %t = alloca <2 x i64>
   call void @kip(<2 x i64>* %t)
-  %tmp1 = load <2 x i64>* %t, align 1
+  %tmp1 = load <2 x i64>, <2 x i64>* %t, align 1
   ret <2 x i64> %tmp1
 }
 
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index d0ec785..a30c0bc 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -5,9 +5,9 @@
 define i32 @test_load_cast_combine_tbaa(float* %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves TBAA.
 ; CHECK-LABEL: @test_load_cast_combine_tbaa(
-; CHECK: load i32* %{{.*}}, !tbaa !0
+; CHECK: load i32, i32* %{{.*}}, !tbaa !0
 entry:
-  %l = load float* %ptr, !tbaa !0
+  %l = load float, float* %ptr, !tbaa !0
   %c = bitcast float %l to i32
   ret i32 %c
 }
@@ -15,9 +15,9 @@
 define i32 @test_load_cast_combine_noalias(float* %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves no-alias metadata.
 ; CHECK-LABEL: @test_load_cast_combine_noalias(
-; CHECK: load i32* %{{.*}}, !alias.scope !2, !noalias !1
+; CHECK: load i32, i32* %{{.*}}, !alias.scope !2, !noalias !1
 entry:
-  %l = load float* %ptr, !alias.scope !2, !noalias !1
+  %l = load float, float* %ptr, !alias.scope !2, !noalias !1
   %c = bitcast float %l to i32
   ret i32 %c
 }
@@ -27,11 +27,11 @@
 ; would be nice to preserve or update it somehow but this is hard when moving
 ; between types.
 ; CHECK-LABEL: @test_load_cast_combine_range(
-; CHECK: load float* %{{.*}}
+; CHECK: load float, float* %{{.*}}
 ; CHECK-NOT: !range
 ; CHECK: ret float
 entry:
-  %l = load i32* %ptr, !range !5
+  %l = load i32, i32* %ptr, !range !5
   %c = bitcast i32 %l to float
   ret float %c
 }
@@ -39,9 +39,9 @@
 define i32 @test_load_cast_combine_invariant(float* %ptr) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves invariant metadata.
 ; CHECK-LABEL: @test_load_cast_combine_invariant(
-; CHECK: load i32* %{{.*}}, !invariant.load !3
+; CHECK: load i32, i32* %{{.*}}, !invariant.load !3
 entry:
-  %l = load float* %ptr, !invariant.load !3
+  %l = load float, float* %ptr, !invariant.load !3
   %c = bitcast float %l to i32
   ret i32 %c
 }
@@ -50,9 +50,9 @@
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves nontemporal
 ; metadata.
 ; CHECK-LABEL: @test_load_cast_combine_nontemporal(
-; CHECK: load i32* %{{.*}}, !nontemporal !4
+; CHECK: load i32, i32* %{{.*}}, !nontemporal !4
 entry:
-  %l = load float* %ptr, !nontemporal !4
+  %l = load float, float* %ptr, !nontemporal !4
   %c = bitcast float %l to i32
   ret i32 %c
 }
@@ -61,7 +61,7 @@
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
 ; metadata.
 ; CHECK-LABEL: @test_load_cast_combine_loop(
-; CHECK: load i32* %{{.*}}, !llvm.mem.parallel_loop_access !1
+; CHECK: load i32, i32* %{{.*}}, !llvm.mem.parallel_loop_access !1
 entry:
   br label %loop
 
@@ -69,7 +69,7 @@
   %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
   %src.gep = getelementptr inbounds float, float* %src, i32 %i
   %dst.gep = getelementptr inbounds i32, i32* %dst, i32 %i
-  %l = load float* %src.gep, !llvm.mem.parallel_loop_access !1
+  %l = load float, float* %src.gep, !llvm.mem.parallel_loop_access !1
   %c = bitcast float %l to i32
   store i32 %c, i32* %dst.gep
   %i.next = add i32 %i, 1
@@ -93,11 +93,11 @@
 ; file, and no LABEL lines are to be added after this point.
 ;
 ; CHECK-LABEL: @test_load_cast_combine_nonnull(
-; CHECK: %[[V:.*]] = load i64* %{{.*}}, !range ![[MD:[0-9]+]]
+; CHECK: %[[V:.*]] = load i64, i64* %{{.*}}, !range ![[MD:[0-9]+]]
 ; CHECK-NOT: !nonnull
 ; CHECK: store i64 %[[V]], i64*
 entry:
-  %p = load float** %ptr, !nonnull !3
+  %p = load float*, float** %ptr, !nonnull !3
   %gep = getelementptr float*, float** %ptr, i32 42
   store float* %p, float** %gep
   ret void
diff --git a/llvm/test/Transforms/InstCombine/lshr-phi.ll b/llvm/test/Transforms/InstCombine/lshr-phi.ll
index ffc5754..79dc8b3 100644
--- a/llvm/test/Transforms/InstCombine/lshr-phi.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-phi.ll
@@ -8,7 +8,7 @@
 
 define i32 @hash_string(i8* nocapture %key) nounwind readonly {
 entry:
-	%t0 = load i8* %key, align 1		; <i8> [#uses=1]
+	%t0 = load i8, i8* %key, align 1		; <i8> [#uses=1]
 	%t1 = icmp eq i8 %t0, 0		; <i1> [#uses=1]
 	br i1 %t1, label %bb2, label %bb
 
@@ -19,13 +19,13 @@
 	%t2 = shl i32 %k.04, 1		; <i32> [#uses=1]
 	%t3 = lshr i32 %k.04, 14		; <i32> [#uses=1]
 	%t4 = add i32 %t2, %t3		; <i32> [#uses=1]
-	%t5 = load i8* %cp.05, align 1		; <i8> [#uses=1]
+	%t5 = load i8, i8* %cp.05, align 1		; <i8> [#uses=1]
 	%t6 = sext i8 %t5 to i32		; <i32> [#uses=1]
 	%t7 = xor i32 %t6, %t4		; <i32> [#uses=1]
 	%t8 = and i32 %t7, 16383		; <i32> [#uses=2]
 	%tmp = add i64 %indvar, 1		; <i64> [#uses=2]
 	%scevgep = getelementptr i8, i8* %key, i64 %tmp		; <i8*> [#uses=1]
-	%t9 = load i8* %scevgep, align 1		; <i8> [#uses=1]
+	%t9 = load i8, i8* %scevgep, align 1		; <i8> [#uses=1]
 	%t10 = icmp eq i8 %t9, 0		; <i1> [#uses=1]
 	br i1 %t10, label %bb2, label %bb
 
diff --git a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
index 765c8c3..399b474 100644
--- a/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/llvm/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -5,7 +5,7 @@
     %c_19 = alloca i8*
     %malloc_206 = tail call i8* @malloc(i32 mul (i32 ptrtoint (i8* getelementptr (i8* null, i32 1) to i32), i32 10))
     store i8* %malloc_206, i8** %c_19
-    %tmp_207 = load i8** %c_19
+    %tmp_207 = load i8*, i8** %c_19
     tail call void @free(i8* %tmp_207)
     ret i32 0
 ; CHECK-NEXT: ret i32 0
diff --git a/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll
index ddc6be3..cf021b1 100644
--- a/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll
+++ b/llvm/test/Transforms/InstCombine/mem-gep-zidx.ll
@@ -9,7 +9,7 @@
 entry:
   %idxprom = sext i32 %x to i64
   %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @f.a, i64 0, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @test1
@@ -37,7 +37,7 @@
   %idxprom = sext i32 %x to i64
   %p = select i1 %y, [1 x i32]* @f.a, [1 x i32]* @f.b
   %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* %p, i64 0, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
 
 ; CHECK-LABEL: @test3
diff --git a/llvm/test/Transforms/InstCombine/memcmp-1.ll b/llvm/test/Transforms/InstCombine/memcmp-1.ll
index 5fcd603..db15bd6 100644
--- a/llvm/test/Transforms/InstCombine/memcmp-1.ll
+++ b/llvm/test/Transforms/InstCombine/memcmp-1.ll
@@ -33,9 +33,9 @@
 define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
 ; CHECK-LABEL: @test_simplify3(
   %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1)
-; CHECK: [[LOAD1:%[a-z]+]] = load i8* %mem1, align 1
+; CHECK: [[LOAD1:%[a-z]+]] = load i8, i8* %mem1, align 1
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
-; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1
+; CHECK: [[LOAD2:%[a-z]+]] = load i8, i8* %mem2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
 ; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
   ret i32 %ret
diff --git a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
index 8efb2a3..cfd2398 100644
--- a/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -15,22 +15,22 @@
 	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
 	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
 	%tmp753 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
-	%tmp9 = load float* %tmp753		; <float> [#uses=1]
+	%tmp9 = load float, float* %tmp753		; <float> [#uses=1]
 	%tmp11 = fmul float %tmp9, %x		; <float> [#uses=1]
 	%tmp13 = fadd float %tmp11, 0.000000e+00		; <float> [#uses=1]
 	%tmp17.sum52 = or i32 %tmp5, 1		; <i32> [#uses=1]
 	%tmp1851 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp17.sum52		; <float*> [#uses=1]
-	%tmp19 = load float* %tmp1851		; <float> [#uses=1]
+	%tmp19 = load float, float* %tmp1851		; <float> [#uses=1]
 	%tmp21 = fmul float %tmp19, %y		; <float> [#uses=1]
 	%tmp23 = fadd float %tmp21, %tmp13		; <float> [#uses=1]
 	%tmp27.sum50 = or i32 %tmp5, 2		; <i32> [#uses=1]
 	%tmp2849 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp27.sum50		; <float*> [#uses=1]
-	%tmp29 = load float* %tmp2849		; <float> [#uses=1]
+	%tmp29 = load float, float* %tmp2849		; <float> [#uses=1]
 	%tmp31 = fmul float %tmp29, %z		; <float> [#uses=1]
 	%tmp33 = fadd float %tmp31, %tmp23		; <float> [#uses=1]
 	%tmp37.sum48 = or i32 %tmp5, 3		; <i32> [#uses=1]
 	%tmp3847 = getelementptr [128 x float], [128 x float]* %lookupTable, i32 0, i32 %tmp37.sum48		; <float*> [#uses=1]
-	%tmp39 = load float* %tmp3847		; <float> [#uses=1]
+	%tmp39 = load float, float* %tmp3847		; <float> [#uses=1]
 	%tmp41 = fmul float %tmp39, %w		; <float> [#uses=1]
 	%tmp43 = fadd float %tmp41, %tmp33		; <float> [#uses=1]
 	ret float %tmp43
diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll
index b021fe04..6a65b5b 100644
--- a/llvm/test/Transforms/InstCombine/merge-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
 define i1 @test1(i16* %x) {
-  %load = load i16* %x, align 4
+  %load = load i16, i16* %x, align 4
   %trunc = trunc i16 %load to i8
   %cmp1 = icmp eq i8 %trunc, 127
   %and = and i16 %load, -256
@@ -15,7 +15,7 @@
 }
 
 define i1 @test2(i16* %x) {
-  %load = load i16* %x, align 4
+  %load = load i16, i16* %x, align 4
   %and = and i16 %load, -256
   %cmp1 = icmp eq i16 %and, 32512
   %trunc = trunc i16 %load to i8
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 4d1e6c7..de09f12 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -107,7 +107,7 @@
 ; PR2642
 define internal void @test13(<4 x float>*) {
 ; CHECK-LABEL: @test13(
-	load <4 x float>* %0, align 1
+	load <4 x float>, <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
diff --git a/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
index a249253..4e5b210 100644
--- a/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/multi-size-address-space-pointer.ll
@@ -6,7 +6,7 @@
 ; CHECK-LABEL: @test_as0(
 ; CHECK: %arrayidx = getelementptr i32, i32* %a, i32 1
   %arrayidx = getelementptr i32, i32 addrspace(0)* %a, i64 1
-  %y = load i32 addrspace(0)* %arrayidx, align 4
+  %y = load i32, i32 addrspace(0)* %arrayidx, align 4
   ret i32 %y
 }
 
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: @test_as1(
 ; CHECK: %arrayidx = getelementptr i32, i32 addrspace(1)* %a, i64 1
   %arrayidx = getelementptr i32, i32 addrspace(1)* %a, i32 1
-  %y = load i32 addrspace(1)* %arrayidx, align 4
+  %y = load i32, i32 addrspace(1)* %arrayidx, align 4
   ret i32 %y
 }
 
@@ -22,7 +22,7 @@
 ; CHECK-LABEL: @test_as2(
 ; CHECK: %arrayidx = getelementptr i32, i32 addrspace(2)* %a, i8 1
   %arrayidx = getelementptr i32, i32 addrspace(2)* %a, i32 1
-  %y = load i32 addrspace(2)* %arrayidx, align 4
+  %y = load i32, i32 addrspace(2)* %arrayidx, align 4
   ret i32 %y
 }
 
@@ -30,17 +30,17 @@
 ; CHECK-LABEL: @test_as3(
 ; CHECK: %arrayidx = getelementptr i32, i32 addrspace(3)* %a, i16 1
   %arrayidx = getelementptr i32, i32 addrspace(3)* %a, i32 1
-  %y = load i32 addrspace(3)* %arrayidx, align 4
+  %y = load i32, i32 addrspace(3)* %arrayidx, align 4
   ret i32 %y
 }
 
 define i32 @test_combine_ptrtoint(i32 addrspace(2)* %a) {
 ; CHECK-LABEL: @test_combine_ptrtoint(
-; CHECK-NEXT: %y = load i32 addrspace(2)* %a
+; CHECK-NEXT: %y = load i32, i32 addrspace(2)* %a
 ; CHECK-NEXT: ret i32 %y
   %cast = ptrtoint i32 addrspace(2)* %a to i8
   %castback = inttoptr i8 %cast to i32 addrspace(2)*
-  %y = load i32 addrspace(2)* %castback, align 4
+  %y = load i32, i32 addrspace(2)* %castback, align 4
   ret i32 %y
 }
 
@@ -55,12 +55,12 @@
 define i32 @test_combine_vector_ptrtoint(<2 x i32 addrspace(2)*> %a) {
 ; CHECK-LABEL: @test_combine_vector_ptrtoint(
 ; CHECK-NEXT: %p = extractelement <2 x i32 addrspace(2)*> %a, i32 0
-; CHECK-NEXT: %y = load i32 addrspace(2)* %p, align 4
+; CHECK-NEXT: %y = load i32, i32 addrspace(2)* %p, align 4
 ; CHECK-NEXT: ret i32 %y
   %cast = ptrtoint <2 x i32 addrspace(2)*> %a to <2 x i8>
   %castback = inttoptr <2 x i8> %cast to <2 x i32 addrspace(2)*>
   %p = extractelement <2 x i32 addrspace(2)*> %castback, i32 0
-  %y = load i32 addrspace(2)* %p, align 4
+  %y = load i32, i32 addrspace(2)* %p, align 4
   ret i32 %y
 }
 
diff --git a/llvm/test/Transforms/InstCombine/no-negzero.ll b/llvm/test/Transforms/InstCombine/no-negzero.ll
index 4ed2836..07e6825 100644
--- a/llvm/test/Transforms/InstCombine/no-negzero.ll
+++ b/llvm/test/Transforms/InstCombine/no-negzero.ll
@@ -14,17 +14,17 @@
   %0 = alloca double, align 8                     ; <double*> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store double %x, double* %x_addr
-  %1 = load double* %x_addr, align 8              ; <double> [#uses=1]
+  %1 = load double, double* %x_addr, align 8              ; <double> [#uses=1]
   %2 = call double @fabs(double %1) nounwind readnone ; <double> [#uses=1]
   %3 = call double @sqrt(double %2) nounwind readonly ; <double> [#uses=1]
   %4 = fadd double %3, 0.000000e+00               ; <double> [#uses=1]
   store double %4, double* %0, align 8
-  %5 = load double* %0, align 8                   ; <double> [#uses=1]
+  %5 = load double, double* %0, align 8                   ; <double> [#uses=1]
   store double %5, double* %retval, align 8
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load double* %retval                 ; <double> [#uses=1]
+  %retval1 = load double, double* %retval                 ; <double> [#uses=1]
   ret double %retval1
 }
 
diff --git a/llvm/test/Transforms/InstCombine/obfuscated_splat.ll b/llvm/test/Transforms/InstCombine/obfuscated_splat.ll
index fa9cb42..c37456c 100644
--- a/llvm/test/Transforms/InstCombine/obfuscated_splat.ll
+++ b/llvm/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
 define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
-  %A = load <4 x float>* %in_ptr, align 16
+  %A = load <4 x float>, <4 x float>* %in_ptr, align 16
   %B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
   %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef>
   %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
index 38dd33d..d531ba2 100644
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -22,11 +22,11 @@
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:
-  %1 = load i8** %retval
+  %1 = load i8*, i8** %retval
   ret i8* %1
 
 cond.false:
-  %2 = load i8** %retval
+  %2 = load i8*, i8** %retval
   ret i8* %2
 }
 
@@ -126,7 +126,7 @@
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
   %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
-  %2 = load i8** @s, align 8
+  %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
@@ -138,7 +138,7 @@
 entry:
   %0 = tail call noalias i8* @malloc(i32 20) nounwind
   %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false)
-  %2 = load i8** @s, align 8
+  %2 = load i8*, i8** @s, align 8
 ; CHECK-NOT: @llvm.objectsize
 ; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
   %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 30, i32 %1) nounwind
diff --git a/llvm/test/Transforms/InstCombine/odr-linkage.ll b/llvm/test/Transforms/InstCombine/odr-linkage.ll
index 2ce6246..73675ef 100644
--- a/llvm/test/Transforms/InstCombine/odr-linkage.ll
+++ b/llvm/test/Transforms/InstCombine/odr-linkage.ll
@@ -6,10 +6,10 @@
 @g4 = internal constant i32 4
 
 define i32 @test() {
-  %A = load i32* @g1
-  %B = load i32* @g2
-  %C = load i32* @g3
-  %D = load i32* @g4
+  %A = load i32, i32* @g1
+  %B = load i32, i32* @g2
+  %C = load i32, i32* @g3
+  %D = load i32, i32* @g4
   
   %a = add i32 %A, %B
   %b = add i32 %a, %C
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index f604baf..b91a595 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -394,7 +394,7 @@
 ; CHECK: select i1 %tobool, i32 -1, i32 %x
   %tobool = icmp ne i32 %y, 0
   %sext = sext i1 %tobool to i32
-  %x = load i32* %xp
+  %x = load i32, i32* %xp
   %or = or i32 %sext, %x
   ret i32 %or
 }
@@ -404,7 +404,7 @@
 ; CHECK: select i1 %tobool, i32 -1, i32 %x
   %tobool = icmp ne i32 %y, 0
   %sext = sext i1 %tobool to i32
-  %x = load i32* %xp
+  %x = load i32, i32* %xp
   %or = or i32 %x, %sext
   ret i32 %or
 }
diff --git a/llvm/test/Transforms/InstCombine/phi-merge-gep.ll b/llvm/test/Transforms/InstCombine/phi-merge-gep.ll
index 138e868..b548e51 100644
--- a/llvm/test/Transforms/InstCombine/phi-merge-gep.ll
+++ b/llvm/test/Transforms/InstCombine/phi-merge-gep.ll
@@ -34,12 +34,12 @@
   br label %bb13
 
 bb:                                               ; preds = %bb13
-  %24 = load float* %A0r.0, align 4               ; <float> [#uses=1]
-  %25 = load float* %A0i.0, align 4               ; <float> [#uses=1]
-  %26 = load float* %A1r.0, align 4               ; <float> [#uses=2]
-  %27 = load float* %A1i.0, align 4               ; <float> [#uses=2]
-  %28 = load float* %A2r.0, align 4               ; <float> [#uses=2]
-  %29 = load float* %A2i.0, align 4               ; <float> [#uses=2]
+  %24 = load float, float* %A0r.0, align 4               ; <float> [#uses=1]
+  %25 = load float, float* %A0i.0, align 4               ; <float> [#uses=1]
+  %26 = load float, float* %A1r.0, align 4               ; <float> [#uses=2]
+  %27 = load float, float* %A1i.0, align 4               ; <float> [#uses=2]
+  %28 = load float, float* %A2r.0, align 4               ; <float> [#uses=2]
+  %29 = load float, float* %A2i.0, align 4               ; <float> [#uses=2]
   %30 = fadd float %26, %28                       ; <float> [#uses=2]
   %31 = fadd float %27, %29                       ; <float> [#uses=2]
   %32 = fsub float %26, %28                       ; <float> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index 6574164..54cc4cf 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -149,11 +149,11 @@
   br i1 %c, label %bb1, label %bb
 
 bb:
-  %C = load i32* %B, align 1
+  %C = load i32, i32* %B, align 1
   br label %bb2
 
 bb1:
-  %D = load i32* %A, align 1
+  %D = load i32, i32* %A, align 1
   br label %bb2
 
 bb2:
@@ -162,7 +162,7 @@
 ; CHECK-LABEL: @test9(
 ; CHECK:       bb2:
 ; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
-; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 1
+; CHECK-NEXT:   %E = load i32, i32* %{{[^,]*}}, align 1
 ; CHECK-NEXT:   ret i32 %E
 
 }
@@ -173,11 +173,11 @@
   br i1 %c, label %bb1, label %bb
 
 bb:
-  %C = load i32* %B, align 16
+  %C = load i32, i32* %B, align 16
   br label %bb2
 
 bb1:
-  %D = load i32* %A, align 32
+  %D = load i32, i32* %A, align 32
   br label %bb2
 
 bb2:
@@ -186,7 +186,7 @@
 ; CHECK-LABEL: @test10(
 ; CHECK:       bb2:
 ; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
-; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 16
+; CHECK-NEXT:   %E = load i32, i32* %{{[^,]*}}, align 16
 ; CHECK-NEXT:   ret i32 %E
 }
 
@@ -375,30 +375,30 @@
   store i32 %flag, i32* %flag.addr
   store i32* %pointer2, i32** %pointer2.addr
   store i32 10, i32* %res
-  %tmp = load i32* %flag.addr                     ; <i32> [#uses=1]
+  %tmp = load i32, i32* %flag.addr                     ; <i32> [#uses=1]
   %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
   br i1 %tobool, label %if.then, label %if.else
 
 return:                                           ; preds = %if.end
-  %tmp7 = load i32* %retval                       ; <i32> [#uses=1]
+  %tmp7 = load i32, i32* %retval                       ; <i32> [#uses=1]
   ret i32 %tmp7
 
 if.end:                                           ; preds = %if.else, %if.then
-  %tmp6 = load i32* %res                          ; <i32> [#uses=1]
+  %tmp6 = load i32, i32* %res                          ; <i32> [#uses=1]
   store i32 %tmp6, i32* %retval
   br label %return
 
 if.then:                                          ; preds = %entry
-  %tmp1 = load i32 addrspace(1)** %pointer1.addr  ; <i32 addrspace(1)*>
+  %tmp1 = load i32 addrspace(1)*, i32 addrspace(1)** %pointer1.addr  ; <i32 addrspace(1)*>
   %arrayidx = getelementptr i32, i32 addrspace(1)* %tmp1, i32 0 ; <i32 addrspace(1)*> [#uses=1]
-  %tmp2 = load i32 addrspace(1)* %arrayidx        ; <i32> [#uses=1]
+  %tmp2 = load i32, i32 addrspace(1)* %arrayidx        ; <i32> [#uses=1]
   store i32 %tmp2, i32* %res
   br label %if.end
 
 if.else:                                          ; preds = %entry
-  %tmp3 = load i32** %pointer2.addr               ; <i32*> [#uses=1]
+  %tmp3 = load i32*, i32** %pointer2.addr               ; <i32*> [#uses=1]
   %arrayidx4 = getelementptr i32, i32* %tmp3, i32 0    ; <i32*> [#uses=1]
-  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=1]
+  %tmp5 = load i32, i32* %arrayidx4                    ; <i32> [#uses=1]
   store i32 %tmp5, i32* %res
   br label %if.end
 }
diff --git a/llvm/test/Transforms/InstCombine/pr12251.ll b/llvm/test/Transforms/InstCombine/pr12251.ll
index 8c382bb..7197bda 100644
--- a/llvm/test/Transforms/InstCombine/pr12251.ll
+++ b/llvm/test/Transforms/InstCombine/pr12251.ll
@@ -2,13 +2,13 @@
 
 define zeroext i1 @_Z3fooPb(i8* nocapture %x) {
 entry:
-  %a = load i8* %x, align 1, !range !0
+  %a = load i8, i8* %x, align 1, !range !0
   %b = and i8 %a, 1
   %tobool = icmp ne i8 %b, 0
   ret i1 %tobool
 }
 
-; CHECK: %a = load i8* %x, align 1, !range !0
+; CHECK: %a = load i8, i8* %x, align 1, !range !0
 ; CHECK-NEXT: %tobool = icmp ne i8 %a, 0
 ; CHECK-NEXT: ret i1 %tobool
 
diff --git a/llvm/test/Transforms/InstCombine/pr2645-0.ll b/llvm/test/Transforms/InstCombine/pr2645-0.ll
index e2977c6..21bfa64 100644
--- a/llvm/test/Transforms/InstCombine/pr2645-0.ll
+++ b/llvm/test/Transforms/InstCombine/pr2645-0.ll
@@ -6,10 +6,10 @@
 define internal void @""(i8*) {
 ; <label>:1
         bitcast i8* %0 to i32*          ; <i32*>:2 [#uses=1]
-        load i32* %2, align 1           ; <i32>:3 [#uses=1]
+        load i32, i32* %2, align 1           ; <i32>:3 [#uses=1]
         getelementptr i8, i8* %0, i32 4             ; <i8*>:4 [#uses=1]
         bitcast i8* %4 to i32*          ; <i32*>:5 [#uses=1]
-        load i32* %5, align 1           ; <i32>:6 [#uses=1]
+        load i32, i32* %5, align 1           ; <i32>:6 [#uses=1]
         br label %7
 
 ; <label>:7             ; preds = %9, %1
diff --git a/llvm/test/Transforms/InstCombine/pr2645-1.ll b/llvm/test/Transforms/InstCombine/pr2645-1.ll
index 2818c07..2986d21 100644
--- a/llvm/test/Transforms/InstCombine/pr2645-1.ll
+++ b/llvm/test/Transforms/InstCombine/pr2645-1.ll
@@ -15,7 +15,7 @@
 ; <label>:6             ; preds = %4
         %7 = getelementptr i8, i8* %2, i32 %.0              ; <i8*> [#uses=1]
         %8 = bitcast i8* %7 to <4 x i16>*               ; <<4 x i16>*> [#uses=1]
-        %9 = load <4 x i16>* %8, align 1                ; <<4 x i16>> [#uses=1]
+        %9 = load <4 x i16>, <4 x i16>* %8, align 1                ; <<4 x i16>> [#uses=1]
         %10 = bitcast <4 x i16> %9 to <1 x i64>         ; <<1 x i64>> [#uses=1]
         %11 = call <2 x i64> @foo(<1 x i64> %10)
 ; <<2 x i64>> [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/select-cmp-br.ll b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
index c4086cd..1dc7e15 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp-br.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp-br.ll
@@ -10,12 +10,12 @@
 define void @test1(%C*) {
 entry:
   %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64** %1, align 8
+  %m = load i64*, i64** %1, align 8
   %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64** %2, align 8
+  %n = load i64*, i64** %2, align 8
   %3 = getelementptr inbounds i64, i64* %m, i64 9
   %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)** %4, align 8
+  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
   %6 = icmp eq i64* %m, %n
   %7 = select i1 %6, %C* %0, %C* null
   %8 = icmp eq %C* %7, null
@@ -41,12 +41,12 @@
 define void @test2(%C*) {
 entry:
   %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64** %1, align 8
+  %m = load i64*, i64** %1, align 8
   %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64** %2, align 8
+  %n = load i64*, i64** %2, align 8
   %3 = getelementptr inbounds i64, i64* %m, i64 9
   %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)** %4, align 8
+  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
   %6 = icmp eq i64* %m, %n
   %7 = select i1 %6, %C* null, %C* %0
   %8 = icmp eq %C* %7, null
@@ -72,12 +72,12 @@
 define void @test3(%C*) {
 entry:
   %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64** %1, align 8
+  %m = load i64*, i64** %1, align 8
   %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64** %2, align 8
+  %n = load i64*, i64** %2, align 8
   %3 = getelementptr inbounds i64, i64* %m, i64 9
   %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)** %4, align 8
+  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
   %6 = icmp eq i64* %m, %n
   %7 = select i1 %6, %C* %0, %C* null
   %8 = icmp ne %C* %7, null
@@ -103,12 +103,12 @@
 define void @test4(%C*) {
 entry:
   %1 = getelementptr inbounds %C, %C* %0, i64 0, i32 0, i32 0
-  %m = load i64** %1, align 8
+  %m = load i64*, i64** %1, align 8
   %2 = getelementptr inbounds %C, %C* %0, i64 1, i32 0, i32 0
-  %n = load i64** %2, align 8
+  %n = load i64*, i64** %2, align 8
   %3 = getelementptr inbounds i64, i64* %m, i64 9
   %4 = bitcast i64* %3 to i64 (%C*)**
-  %5 = load i64 (%C*)** %4, align 8
+  %5 = load i64 (%C*)*, i64 (%C*)** %4, align 8
   %6 = icmp eq i64* %m, %n
   %7 = select i1 %6, %C* null, %C* %0
   %8 = icmp ne %C* %7, null
diff --git a/llvm/test/Transforms/InstCombine/select-load-call.ll b/llvm/test/Transforms/InstCombine/select-load-call.ll
index b63468d..ad0ef4f 100644
--- a/llvm/test/Transforms/InstCombine/select-load-call.ll
+++ b/llvm/test/Transforms/InstCombine/select-load-call.ll
@@ -10,6 +10,6 @@
   call void @test2() readonly
 
   %P2 = select i1 %cond, i32 *%P, i32* %A
-  %V = load i32* %P2
+  %V = load i32, i32* %P2
   ret i32 %V
 }
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index a6a7aa9..e4cc6f5 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -301,30 +301,30 @@
 
 define i32 @test16(i1 %C, i32* %P) {
         %P2 = select i1 %C, i32* %P, i32* null          
-        %V = load i32* %P2              
+        %V = load i32, i32* %P2              
         ret i32 %V
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT: %V = load i32* %P
+; CHECK-NEXT: %V = load i32, i32* %P
 ; CHECK: ret i32 %V
 }
 
 ;; It may be legal to load from a null address in a non-zero address space
 define i32 @test16_neg(i1 %C, i32 addrspace(1)* %P) {
         %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
-        %V = load i32 addrspace(1)* %P2
+        %V = load i32, i32 addrspace(1)* %P2
         ret i32 %V
 ; CHECK-LABEL: @test16_neg
 ; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* %P, i32 addrspace(1)* null
-; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK-NEXT: %V = load i32, i32 addrspace(1)* %P2
 ; CHECK: ret i32 %V
 }
 define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
         %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
-        %V = load i32 addrspace(1)* %P2
+        %V = load i32, i32 addrspace(1)* %P2
         ret i32 %V
 ; CHECK-LABEL: @test16_neg2
 ; CHECK-NEXT: %P2 = select i1 %C, i32 addrspace(1)* null, i32 addrspace(1)* %P
-; CHECK-NEXT: %V = load i32 addrspace(1)* %P2
+; CHECK-NEXT: %V = load i32, i32 addrspace(1)* %P2
 ; CHECK: ret i32 %V
 }
 
@@ -813,7 +813,7 @@
 
 define i1 @test60(i32 %x, i1* %y) nounwind {
   %cmp = icmp eq i32 %x, 0
-  %load = load i1* %y, align 1
+  %load = load i1, i1* %y, align 1
   %cmp1 = icmp slt i32 %x, 1
   %sel = select i1 %cmp, i1 %load, i1 %cmp1
   ret i1 %sel
@@ -823,7 +823,7 @@
 
 @glbl = constant i32 10
 define i32 @test61(i32* %ptr) {
-  %A = load i32* %ptr
+  %A = load i32, i32* %ptr
   %B = icmp eq i32* %ptr, @glbl
   %C = select i1 %B, i32 %A, i32 10
   ret i32 %C
@@ -1249,11 +1249,11 @@
 ; CHECK-LABEL: @test76(
 ; CHECK: store i32 0, i32* %x
 ; CHECK: %[[P:.*]] = select i1 %flag, i32* @under_aligned, i32* %x
-; CHECK: load i32* %[[P]]
+; CHECK: load i32, i32* %[[P]]
 
   store i32 0, i32* %x
   %p = select i1 %flag, i32* @under_aligned, i32* %x
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -1268,13 +1268,13 @@
 ; CHECK: call void @scribble_on_i32(i32* %[[A]])
 ; CHECK: store i32 0, i32* %x
 ; CHECK: %[[P:.*]] = select i1 %flag, i32* %[[A]], i32* %x
-; CHECK: load i32* %[[P]]
+; CHECK: load i32, i32* %[[P]]
 
   %under_aligned = alloca i32, align 1
   call void @scribble_on_i32(i32* %under_aligned)
   store i32 0, i32* %x
   %p = select i1 %flag, i32* %under_aligned, i32* %x
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -1282,8 +1282,8 @@
 ; Test that we can speculate the loads around the select even when we can't
 ; fold the load completely away.
 ; CHECK-LABEL: @test78(
-; CHECK:         %[[V1:.*]] = load i32* %x
-; CHECK-NEXT:    %[[V2:.*]] = load i32* %y
+; CHECK:         %[[V1:.*]] = load i32, i32* %x
+; CHECK-NEXT:    %[[V2:.*]] = load i32, i32* %y
 ; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
 ; CHECK-NEXT:    ret i32 %[[S]]
 entry:
@@ -1292,7 +1292,7 @@
   ; Block forwarding by storing to %z which could alias either %x or %y.
   store i32 42, i32* %z
   %p = select i1 %flag, i32* %x, i32* %y
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -1300,8 +1300,8 @@
 ; Test that we can speculate the loads around the select even when we can't
 ; fold the load completely away.
 ; CHECK-LABEL: @test79(
-; CHECK:         %[[V1:.*]] = load float* %x
-; CHECK-NEXT:    %[[V2:.*]] = load float* %y
+; CHECK:         %[[V1:.*]] = load float, float* %x
+; CHECK-NEXT:    %[[V2:.*]] = load float, float* %y
 ; CHECK-NEXT:    %[[S:.*]] = select i1 %flag, float %[[V1]], float %[[V2]]
 ; CHECK-NEXT:    ret float %[[S]]
 entry:
@@ -1312,7 +1312,7 @@
   ; Block forwarding by storing to %z which could alias either %x or %y.
   store i32 42, i32* %z
   %p = select i1 %flag, float* %x, float* %y1
-  %v = load float* %p
+  %v = load float, float* %p
   ret float %v
 }
 
@@ -1322,7 +1322,7 @@
 ; CHECK-LABEL: @test80(
 ; CHECK:         %[[X:.*]] = alloca i32
 ; CHECK-NEXT:    %[[Y:.*]] = alloca i32
-; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK:         %[[V:.*]] = load i32, i32* %[[X]]
 ; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
 ; CHECK-NEXT:    ret i32 %[[V]]
 entry:
@@ -1330,10 +1330,10 @@
   %y = alloca i32
   call void @scribble_on_i32(i32* %x)
   call void @scribble_on_i32(i32* %y)
-  %tmp = load i32* %x
+  %tmp = load i32, i32* %x
   store i32 %tmp, i32* %y
   %p = select i1 %flag, i32* %x, i32* %y
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -1343,7 +1343,7 @@
 ; CHECK-LABEL: @test81(
 ; CHECK:         %[[X:.*]] = alloca i32
 ; CHECK-NEXT:    %[[Y:.*]] = alloca i32
-; CHECK:         %[[V:.*]] = load i32* %[[X]]
+; CHECK:         %[[V:.*]] = load i32, i32* %[[X]]
 ; CHECK-NEXT:    store i32 %[[V]], i32* %[[Y]]
 ; CHECK-NEXT:    %[[C:.*]] = bitcast i32 %[[V]] to float
 ; CHECK-NEXT:    ret float %[[C]]
@@ -1354,10 +1354,10 @@
   %y1 = bitcast i32* %y to float*
   call void @scribble_on_i32(i32* %x1)
   call void @scribble_on_i32(i32* %y)
-  %tmp = load i32* %x1
+  %tmp = load i32, i32* %x1
   store i32 %tmp, i32* %y
   %p = select i1 %flag, float* %x, float* %y1
-  %v = load float* %p
+  %v = load float, float* %p
   ret float %v
 }
 
@@ -1369,7 +1369,7 @@
 ; CHECK-NEXT:    %[[Y:.*]] = alloca i32
 ; CHECK-NEXT:    %[[X1:.*]] = bitcast float* %[[X]] to i32*
 ; CHECK-NEXT:    %[[Y1:.*]] = bitcast i32* %[[Y]] to float*
-; CHECK:         %[[V:.*]] = load float* %[[X]]
+; CHECK:         %[[V:.*]] = load float, float* %[[X]]
 ; CHECK-NEXT:    store float %[[V]], float* %[[Y1]]
 ; CHECK-NEXT:    %[[C:.*]] = bitcast float %[[V]] to i32
 ; CHECK-NEXT:    ret i32 %[[C]]
@@ -1380,10 +1380,10 @@
   %y1 = bitcast i32* %y to float*
   call void @scribble_on_i32(i32* %x1)
   call void @scribble_on_i32(i32* %y)
-  %tmp = load float* %x
+  %tmp = load float, float* %x
   store float %tmp, float* %y1
   %p = select i1 %flag, i32* %x1, i32* %y
-  %v = load i32* %p
+  %v = load i32, i32* %p
   ret i32 %v
 }
 
@@ -1398,7 +1398,7 @@
 ; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
 ; CHECK-DAG:     %[[X2:.*]] = bitcast i8** %[[X]] to i64*
 ; CHECK-DAG:     %[[Y2:.*]] = bitcast i8** %[[Y]] to i64*
-; CHECK:         %[[V:.*]] = load i64* %[[X2]]
+; CHECK:         %[[V:.*]] = load i64, i64* %[[X2]]
 ; CHECK-NEXT:    store i64 %[[V]], i64* %[[Y2]]
 ; CHECK-NEXT:    %[[C:.*]] = inttoptr i64 %[[V]] to i8*
 ; CHECK-NEXT:    ret i8* %[[S]]
@@ -1409,10 +1409,10 @@
   %y1 = bitcast i64* %y to i8**
   call void @scribble_on_i64(i64* %x1)
   call void @scribble_on_i64(i64* %y)
-  %tmp = load i64* %x1
+  %tmp = load i64, i64* %x1
   store i64 %tmp, i64* %y
   %p = select i1 %flag, i8** %x, i8** %y1
-  %v = load i8** %p
+  %v = load i8*, i8** %p
   ret i8* %v
 }
 
@@ -1422,7 +1422,7 @@
 ; CHECK-LABEL: @test84(
 ; CHECK:         %[[X:.*]] = alloca i8*
 ; CHECK-NEXT:    %[[Y:.*]] = alloca i8*
-; CHECK:         %[[V:.*]] = load i8** %[[X]]
+; CHECK:         %[[V:.*]] = load i8*, i8** %[[X]]
 ; CHECK-NEXT:    store i8* %[[V]], i8** %[[Y]]
 ; CHECK-NEXT:    %[[C:.*]] = ptrtoint i8* %[[V]] to i64
 ; CHECK-NEXT:    ret i64 %[[C]]
@@ -1433,10 +1433,10 @@
   %y1 = bitcast i64* %y to i8**
   call void @scribble_on_i64(i64* %x1)
   call void @scribble_on_i64(i64* %y)
-  %tmp = load i8** %x
+  %tmp = load i8*, i8** %x
   store i8* %tmp, i8** %y1
   %p = select i1 %flag, i64* %x1, i64* %y
-  %v = load i64* %p
+  %v = load i64, i64* %p
   ret i64 %v
 }
 
@@ -1445,10 +1445,10 @@
 ; pointer doesn't load all of the stored integer bits. We could fix this, but it
 ; would require endianness checks and other nastiness.
 ; CHECK-LABEL: @test85(
-; CHECK:         %[[T:.*]] = load i128*
+; CHECK:         %[[T:.*]] = load i128, i128*
 ; CHECK-NEXT:    store i128 %[[T]], i128*
-; CHECK-NEXT:    %[[X:.*]] = load i8**
-; CHECK-NEXT:    %[[Y:.*]] = load i8**
+; CHECK-NEXT:    %[[X:.*]] = load i8*, i8**
+; CHECK-NEXT:    %[[Y:.*]] = load i8*, i8**
 ; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i8* %[[X]], i8* %[[Y]]
 ; CHECK-NEXT:    ret i8* %[[V]]
 entry:
@@ -1459,10 +1459,10 @@
   %y1 = bitcast i128* %y to i8**
   call void @scribble_on_i128(i128* %x2)
   call void @scribble_on_i128(i128* %y)
-  %tmp = load i128* %x2
+  %tmp = load i128, i128* %x2
   store i128 %tmp, i128* %y
   %p = select i1 %flag, i8** %x1, i8** %y1
-  %v = load i8** %p
+  %v = load i8*, i8** %p
   ret i8* %v
 }
 
@@ -1472,10 +1472,10 @@
 ; the bits of the integer.
 ;
 ; CHECK-LABEL: @test86(
-; CHECK:         %[[T:.*]] = load i8**
+; CHECK:         %[[T:.*]] = load i8*, i8**
 ; CHECK-NEXT:    store i8* %[[T]], i8**
-; CHECK-NEXT:    %[[X:.*]] = load i128*
-; CHECK-NEXT:    %[[Y:.*]] = load i128*
+; CHECK-NEXT:    %[[X:.*]] = load i128, i128*
+; CHECK-NEXT:    %[[Y:.*]] = load i128, i128*
 ; CHECK-NEXT:    %[[V:.*]] = select i1 %flag, i128 %[[X]], i128 %[[Y]]
 ; CHECK-NEXT:    ret i128 %[[V]]
 entry:
@@ -1486,10 +1486,10 @@
   %y1 = bitcast i128* %y to i8**
   call void @scribble_on_i128(i128* %x2)
   call void @scribble_on_i128(i128* %y)
-  %tmp = load i8** %x1
+  %tmp = load i8*, i8** %x1
   store i8* %tmp, i8** %y1
   %p = select i1 %flag, i128* %x2, i128* %y
-  %v = load i128* %p
+  %v = load i128, i128* %p
   ret i128 %v
 }
 
diff --git a/llvm/test/Transforms/InstCombine/shufflemask-undef.ll b/llvm/test/Transforms/InstCombine/shufflemask-undef.ll
index 68183b7..10509a9 100644
--- a/llvm/test/Transforms/InstCombine/shufflemask-undef.ll
+++ b/llvm/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -70,7 +70,7 @@
 define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind {
 bb266.i:
 	getelementptr <4 x float>, <4 x float>* null, i32 11		; <<4 x float>*>:0 [#uses=1]
-	load <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
+	load <4 x float>, <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
 	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/signed-comparison.ll b/llvm/test/Transforms/InstCombine/signed-comparison.ll
index 97d7aff..922f4dc 100644
--- a/llvm/test/Transforms/InstCombine/signed-comparison.ll
+++ b/llvm/test/Transforms/InstCombine/signed-comparison.ll
@@ -13,7 +13,7 @@
 	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
 	%t0 = and i64 %indvar, 65535
 	%t1 = getelementptr double, double* %p, i64 %t0
-	%t2 = load double* %t1, align 8
+	%t2 = load double, double* %t1, align 8
 	%t3 = fmul double %t2, 2.2
 	store double %t3, double* %t1, align 8
 	%i.04 = trunc i64 %indvar to i16
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
index 6d2193f..db8f179 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
@@ -29,7 +29,7 @@
 	unreachable
 
 bb107:		; preds = %bb88
-	%0 = load i16* undef, align 8		; <i16> [#uses=1]
+	%0 = load i16, i16* undef, align 8		; <i16> [#uses=1]
 	%1 = icmp eq i16 %0, 38		; <i1> [#uses=1]
 	%src_eqv_here.0 = select i1 %1, %struct.rtx_def* null, %struct.rtx_def* null		; <%struct.rtx_def*> [#uses=1]
 	br i1 undef, label %bb127, label %bb125
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index 9059d8d..6eb4744 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -47,7 +47,7 @@
 
 define i1 @PR2341(i8** %start_addr) {
 entry:
-	%tmp4 = load i8** %start_addr, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i8*, i8** %start_addr, align 4		; <i8*> [#uses=1]
 	%tmp5 = call i32 @memcmp( i8* %tmp4, i8* getelementptr ([5 x i8]* @_2E_str, i32 0, i32 0), i32 4 ) nounwind readonly 		; <i32> [#uses=1]
 	%tmp6 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
 	ret i1 %tmp6
diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll
index 739827f..f49fb35 100644
--- a/llvm/test/Transforms/InstCombine/sincospi.ll
+++ b/llvm/test/Transforms/InstCombine/sincospi.ll
@@ -18,17 +18,17 @@
 @var64 = global double 0.0
 
 define float @test_instbased_f32() {
-       %val = load float* @var32
+       %val = load float, float* @var32
        %sin = call float @__sinpif(float %val) #0
        %cos = call float @__cospif(float %val) #0
        %res = fadd float %sin, %cos
        ret float %res
-; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load float, float* @var32
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call <2 x float> @__sincospif_stret(float [[VAL]])
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 0
 ; CHECK-FLOAT-IN-VEC: extractelement <2 x float> [[SINCOS]], i32 1
 
-; CHECK: [[VAL:%[a-z0-9]+]] = load float* @var32
+; CHECK: [[VAL:%[a-z0-9]+]] = load float, float* @var32
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { float, float } @__sincospif_stret(float [[VAL]])
 ; CHECK: extractvalue { float, float } [[SINCOS]], 0
 ; CHECK: extractvalue { float, float } [[SINCOS]], 1
@@ -55,17 +55,17 @@
 }
 
 define double @test_instbased_f64() {
-       %val = load double* @var64
+       %val = load double, double* @var64
        %sin = call double @__sinpi(double %val) #0
        %cos = call double @__cospi(double %val) #0
        %res = fadd double %sin, %cos
        ret double %res
-; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK-FLOAT-IN-VEC: [[VAL:%[a-z0-9]+]] = load double, double* @var64
 ; CHECK-FLOAT-IN-VEC: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK-FLOAT-IN-VEC: extractvalue { double, double } [[SINCOS]], 1
 
-; CHECK: [[VAL:%[a-z0-9]+]] = load double* @var64
+; CHECK: [[VAL:%[a-z0-9]+]] = load double, double* @var64
 ; CHECK: [[SINCOS:%[a-z0-9]+]] = call { double, double } @__sincospi_stret(double [[VAL]])
 ; CHECK: extractvalue { double, double } [[SINCOS]], 0
 ; CHECK: extractvalue { double, double } [[SINCOS]], 1
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
index 0c4019d..24c2e00 100644
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
@@ -39,7 +39,7 @@
 ; CHECK-NOT: sqrtf(
 ; CHECK: fptrunc
   %arrayidx13 = getelementptr inbounds float, float* %v, i64 2
-  %tmp14 = load float* %arrayidx13
+  %tmp14 = load float, float* %arrayidx13
   %mul18 = fmul float %tmp14, %tmp14
   %add19 = fadd float undef, %mul18
   %conv = fpext float %add19 to double
diff --git a/llvm/test/Transforms/InstCombine/store.ll b/llvm/test/Transforms/InstCombine/store.ll
index 1d15d89..c087a73 100644
--- a/llvm/test/Transforms/InstCombine/store.ll
+++ b/llvm/test/Transforms/InstCombine/store.ll
@@ -12,7 +12,7 @@
 }
 
 define void @test2(i32* %P) {
-        %X = load i32* %P               ; <i32> [#uses=1]
+        %X = load i32, i32* %P               ; <i32> [#uses=1]
         %Y = add i32 %X, 0              ; <i32> [#uses=1]
         store i32 %Y, i32* %P
         ret void
@@ -36,7 +36,7 @@
 	br label %Cont
 
 Cont:
-	%V = load i32* %A
+	%V = load i32, i32* %A
 	ret i32 %V
 ; CHECK-LABEL: @test3(
 ; CHECK-NOT: alloca
@@ -56,7 +56,7 @@
         br label %Cont
 
 Cont:
-	%V = load i32* %A
+	%V = load i32, i32* %A
 	ret i32 %V
 ; CHECK-LABEL: @test4(
 ; CHECK-NOT: alloca
@@ -92,7 +92,7 @@
 
 for.cond:                                         ; preds = %for.body, %entry
   %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
-  %0 = load i32* %gi, align 4, !tbaa !0
+  %0 = load i32, i32* %gi, align 4, !tbaa !0
   %cmp = icmp slt i32 %0, %n
   br i1 %cmp, label %for.body, label %for.end
 
@@ -100,7 +100,7 @@
   %idxprom = sext i32 %0 to i64
   %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
   store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
-  %1 = load i32* %gi, align 4, !tbaa !0
+  %1 = load i32, i32* %gi, align 4, !tbaa !0
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %gi, align 4, !tbaa !0
   br label %for.cond
diff --git a/llvm/test/Transforms/InstCombine/strcmp-1.ll b/llvm/test/Transforms/InstCombine/strcmp-1.ll
index 4d80f9b..0de26b1 100644
--- a/llvm/test/Transforms/InstCombine/strcmp-1.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-1.ll
@@ -13,7 +13,7 @@
 ; strcmp("", x) -> -*x
 define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
-; CHECK: %strcmpload = load i8* %str
+; CHECK: %strcmpload = load i8, i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
@@ -27,7 +27,7 @@
 ; strcmp(x, "") -> *x
 define i32 @test2(i8* %str1) {
 ; CHECK-LABEL: @test2(
-; CHECK: %strcmpload = load i8* %str
+; CHECK: %strcmpload = load i8, i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: ret i32 %1
 
diff --git a/llvm/test/Transforms/InstCombine/strncmp-1.ll b/llvm/test/Transforms/InstCombine/strncmp-1.ll
index 78bcc80..a112182 100644
--- a/llvm/test/Transforms/InstCombine/strncmp-1.ll
+++ b/llvm/test/Transforms/InstCombine/strncmp-1.ll
@@ -13,7 +13,7 @@
 ; strncmp("", x, n) -> -*x
 define i32 @test1(i8* %str2) {
 ; CHECK-LABEL: @test1(
-; CHECK: %strcmpload = load i8* %str
+; CHECK: %strcmpload = load i8, i8* %str
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: %2 = sub nsw i32 0, %1
 ; CHECK: ret i32 %2
@@ -26,7 +26,7 @@
 ; strncmp(x, "", n) -> *x
 define i32 @test2(i8* %str1) {
 ; CHECK-LABEL: @test2(
-; CHECK: %strcmpload = load i8* %str1
+; CHECK: %strcmpload = load i8, i8* %str1
 ; CHECK: %1 = zext i8 %strcmpload to i32
 ; CHECK: ret i32 %1
 
@@ -69,9 +69,9 @@
 ; strncmp(x,y,1) -> memcmp(x,y,1)
 define i32 @test6(i8* %str1, i8* %str2) {
 ; CHECK-LABEL: @test6(
-; CHECK: [[LOAD1:%[a-z]+]] = load i8* %str1, align 1
+; CHECK: [[LOAD1:%[a-z]+]] = load i8, i8* %str1, align 1
 ; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
-; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
+; CHECK: [[LOAD2:%[a-z]+]] = load i8, i8* %str2, align 1
 ; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
 ; CHECK: [[RET:%[a-z]+]] = sub nsw i32 [[ZEXT1]], [[ZEXT2]]
 ; CHECK: ret i32 [[RET]]
diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
index 8d42d9e..c75a839 100644
--- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
+++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -10,7 +10,7 @@
 %struct.test1 = type { float }
 
 ; CHECK: @test
-; CHECK: %[[LOAD:.*]] = load i32* %{{.*}}, align 4, !tbaa !0
+; CHECK: %[[LOAD:.*]] = load i32, i32* %{{.*}}, align 4, !tbaa !0
 ; CHECK: store i32 %[[LOAD:.*]], i32* %{{.*}}, align 4, !tbaa !0
 ; CHECK: ret
 define void @test1(%struct.test1* nocapture %a, %struct.test1* nocapture %b) {
@@ -31,7 +31,7 @@
   %tmp1 = bitcast %struct.test2* %tmp to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i32 8, i1 false), !tbaa.struct !4
   %tmp2 = getelementptr %struct.test2, %struct.test2* %tmp, i32 0, i32 0
-  %tmp3 = load i32 (i8*, i32*, double*)*** %tmp2
+  %tmp3 = load i32 (i8*, i32*, double*)**, i32 (i8*, i32*, double*)*** %tmp2
   ret i32 (i8*, i32*, double*)*** %tmp2
 }
 
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index 00a029a..8a8b834 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -110,7 +110,7 @@
 ; CHECK-NOT: load
 ; CHECK: ret
 entry:
-	%tmp1 = load <4 x float>* %a		; <<4 x float>> [#uses=1]
+	%tmp1 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=1]
 	%vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
 	%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
 	%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
@@ -141,8 +141,8 @@
 ; <rdar://problem/6945110>
 define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
 entry:
-	%tmp = load <4 x i16>* %src
-	%tmp1 = load <8 x i16>* %foo
+	%tmp = load <4 x i16>, <4 x i16>* %src
+	%tmp1 = load <8 x i16>, <8 x i16>* %foo
 ; CHECK: %tmp2 = shufflevector
 	%tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
 ; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
diff --git a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
index f6f9e01..5c0610f 100644
--- a/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
+++ b/llvm/test/Transforms/InstCombine/vec_extract_var_elt.ll
@@ -3,7 +3,7 @@
 define void @test (float %b, <8 x float> * %p)  {
 ; CHECK: extractelement
 ; CHECK: fptosi
-  %1 = load <8 x float> * %p
+  %1 = load <8 x float> , <8 x float> * %p
   %2 = bitcast <8 x float> %1 to <8 x i32>
   %3 = bitcast <8 x i32> %2 to <8 x float>
   %a = fptosi <8 x float> %3 to <8 x i32>
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index eb4e9d6..164e315 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -190,11 +190,11 @@
   %tmp = alloca <4 x i16>, align 8
   %vecinit6 = insertelement <4 x i16> undef, i16 23, i32 3
   store <4 x i16> %vecinit6, <4 x i16>* undef
-  %tmp1 = load <4 x i16>* undef
+  %tmp1 = load <4 x i16>, <4 x i16>* undef
   %vecinit11 = insertelement <4 x i16> undef, i16 %conv10, i32 3
   %div = udiv <4 x i16> %tmp1, %vecinit11
   store <4 x i16> %div, <4 x i16>* %tmp
-  %tmp4 = load <4 x i16>* %tmp
+  %tmp4 = load <4 x i16>, <4 x i16>* %tmp
   %tmp5 = shufflevector <4 x i16> %tmp4, <4 x i16> undef, <2 x i32> <i32 2, i32 0>
   %cmp = icmp ule <2 x i16> %tmp5, undef
   %sext = sext <2 x i1> %cmp to <2 x i16>
diff --git a/llvm/test/Transforms/InstCombine/volatile_store.ll b/llvm/test/Transforms/InstCombine/volatile_store.ll
index 7cab199..7377b68 100644
--- a/llvm/test/Transforms/InstCombine/volatile_store.ll
+++ b/llvm/test/Transforms/InstCombine/volatile_store.ll
@@ -5,7 +5,7 @@
 
 define void @self_assign_1() {
 entry:
-	%tmp = load volatile i32* @x		; <i32> [#uses=1]
+	%tmp = load volatile i32, i32* @x		; <i32> [#uses=1]
 	store volatile i32 %tmp, i32* @x
 	br label %return
 
diff --git a/llvm/test/Transforms/InstCombine/vsx-unaligned.ll b/llvm/test/Transforms/InstCombine/vsx-unaligned.ll
index 26e0426..ad264fb 100644
--- a/llvm/test/Transforms/InstCombine/vsx-unaligned.ll
+++ b/llvm/test/Transforms/InstCombine/vsx-unaligned.ll
@@ -14,28 +14,28 @@
   %t1 = alloca <4 x float>*, align 8
   %t2 = alloca <2 x double>*, align 8
   store <4 x float>* @vf, <4 x float>** %t1, align 8
-  %0 = load <4 x float>** %t1, align 8
+  %0 = load <4 x float>*, <4 x float>** %t1, align 8
   %1 = bitcast <4 x float>* %0 to i8*
   %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
   store <4 x float>* @res_vf, <4 x float>** %t1, align 8
-  %3 = load <4 x float>** %t1, align 8
+  %3 = load <4 x float>*, <4 x float>** %t1, align 8
   %4 = bitcast <4 x float>* %3 to i8*
   call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
   store <2 x double>* @vd, <2 x double>** %t2, align 8
-  %5 = load <2 x double>** %t2, align 8
+  %5 = load <2 x double>*, <2 x double>** %t2, align 8
   %6 = bitcast <2 x double>* %5 to i8*
   %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
   store <2 x double>* @res_vd, <2 x double>** %t2, align 8
-  %8 = load <2 x double>** %t2, align 8
+  %8 = load <2 x double>*, <2 x double>** %t2, align 8
   %9 = bitcast <2 x double>* %8 to i8*
   call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
   ret void
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: %0 = load <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
+; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
 ; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
-; CHECK: %1 = load <2 x double>* @vd, align 1
+; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1
 ; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
 
 declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index cadf664..3a27f9a 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -11,18 +11,18 @@
 	%tmp5 = and i32 %blk_i, 1		; <i32> [#uses=1]
 	%tmp6 = or i32 %tmp3, %tmp5		; <i32> [#uses=1]
 	%tmp8 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 7		; <i16*> [#uses=1]
-	%tmp9 = load i16* %tmp8, align 1		; <i16> [#uses=1]
+	%tmp9 = load i16, i16* %tmp8, align 1		; <i16> [#uses=1]
 	%tmp910 = zext i16 %tmp9 to i32		; <i32> [#uses=1]
 	%tmp12 = getelementptr [4 x i8], [4 x i8]* @some_idx, i32 0, i32 %tmp6		; <i8*> [#uses=1]
-	%tmp13 = load i8* %tmp12, align 1		; <i8> [#uses=1]
+	%tmp13 = load i8, i8* %tmp12, align 1		; <i8> [#uses=1]
 	%tmp1314 = zext i8 %tmp13 to i32		; <i32> [#uses=1]
 	%tmp151 = lshr i32 %tmp910, %tmp1314		; <i32> [#uses=1]
 	%tmp1516 = trunc i32 %tmp151 to i8		; <i8> [#uses=1]
 	%tmp18 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp19 = load i8* %tmp18, align 1		; <i8> [#uses=1]
+	%tmp19 = load i8, i8* %tmp18, align 1		; <i8> [#uses=1]
 	%tmp22 = and i8 %tmp1516, %tmp19		; <i8> [#uses=1]
 	%tmp24 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp25 = load i8* %tmp24, align 1		; <i8> [#uses=1]
+	%tmp25 = load i8, i8* %tmp24, align 1		; <i8> [#uses=1]
 	%tmp26.mask = and i8 %tmp25, 1		; <i8> [#uses=1]
 	%toBool = icmp eq i8 %tmp26.mask, 0		; <i1> [#uses=1]
 	%toBool.not = xor i1 %toBool, true		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/InstMerge/ld_hoist1.ll b/llvm/test/Transforms/InstMerge/ld_hoist1.ll
index 571f535..74c8900 100644
--- a/llvm/test/Transforms/InstMerge/ld_hoist1.ll
+++ b/llvm/test/Transforms/InstMerge/ld_hoist1.ll
@@ -16,12 +16,12 @@
 ; CHECK-LABEL: for.body
 ; CHECK: load
 ; CHECK:  %2 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-; CHECK:  %3 = load i32* %2, align 4
+; CHECK:  %3 = load i32, i32* %2, align 4
 
 for.body:                                         ; preds = %for.body.lr.ph, %for.inc
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]
   %arrayidx = getelementptr inbounds i32, i32* %trigger, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %1, 0
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -29,7 +29,7 @@
 if.then:                                          ; preds = %for.body
 ; This load should be hoisted
   %arrayidx3 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %conv = sitofp i32 %2 to float
   %add = fadd float %conv, 5.000000e-01
   %arrayidx5 = getelementptr inbounds float, float* %out, i64 %indvars.iv
@@ -38,12 +38,12 @@
 
 if.else:                                          ; preds = %for.body
   %arrayidx7 = getelementptr inbounds float, float* %out, i64 %indvars.iv
-  %3 = load float* %arrayidx7, align 4
+  %3 = load float, float* %arrayidx7, align 4
   %div = fdiv float %3, 3.000000e+00
   store float %div, float* %arrayidx7, align 4
 ; This load should be hoisted in spite of store 
   %arrayidx9 = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %4 = load i32* %arrayidx9, align 4
+  %4 = load i32, i32* %arrayidx9, align 4
   %conv10 = sitofp i32 %4 to float
   %add13 = fadd float %div, %conv10
   store float %add13, float* %arrayidx7, align 4
diff --git a/llvm/test/Transforms/InstMerge/ld_hoist_st_sink.ll b/llvm/test/Transforms/InstMerge/ld_hoist_st_sink.ll
index 8397027..1d3f941 100644
--- a/llvm/test/Transforms/InstMerge/ld_hoist_st_sink.ll
+++ b/llvm/test/Transforms/InstMerge/ld_hoist_st_sink.ll
@@ -9,7 +9,7 @@
 define i64 @foo(%struct.node* nocapture readonly %r) nounwind {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %tobool18 = icmp eq %struct.node* %node.017, null
   br i1 %tobool18, label %while.end, label %while.body.preheader
 
@@ -22,23 +22,23 @@
   %node.020 = phi %struct.node* [ %node.0, %if.end ], [ %node.017, %while.body.preheader ]
   %sum.019 = phi i64 [ %inc, %if.end ], [ 0, %while.body.preheader ]
   %orientation = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 4
-  %0 = load i64* %orientation, align 8
+  %0 = load i64, i64* %orientation, align 8
   %cmp = icmp eq i64 %0, 1
   br i1 %cmp, label %if.then, label %if.else
 ; CHECK: if.then
 if.then:                                          ; preds = %while.body
   %a = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5
 ; CHECK-NOT: load %struct.arc
-  %1 = load %struct.arc** %a, align 8
+  %1 = load %struct.arc*, %struct.arc** %a, align 8
   %cost = getelementptr inbounds %struct.arc, %struct.arc* %1, i64 0, i32 0
-; CHECK-NOT: load i64*
-  %2 = load i64* %cost, align 8
+; CHECK-NOT: load i64, i64*
+  %2 = load i64, i64* %cost, align 8
   %pred = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1
-; CHECK-NOT: load %struct.node**
-  %3 = load %struct.node** %pred, align 8
+; CHECK-NOT: load %struct.node*, %struct.node**
+  %3 = load %struct.node*, %struct.node** %pred, align 8
   %p = getelementptr inbounds %struct.node, %struct.node* %3, i64 0, i32 6
-; CHECK-NOT: load i64*
-  %4 = load i64* %p, align 8
+; CHECK-NOT: load i64, i64*
+  %4 = load i64, i64* %p, align 8
   %add = add nsw i64 %4, %2
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
 ; CHECK-NOT: store i64
@@ -48,17 +48,17 @@
 ; CHECK: if.else
 if.else:                                          ; preds = %while.body
   %pred2 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 1
-; CHECK-NOT: load %struct.node**
-  %5 = load %struct.node** %pred2, align 8
+; CHECK-NOT: load %struct.node*, %struct.node**
+  %5 = load %struct.node*, %struct.node** %pred2, align 8
   %p3 = getelementptr inbounds %struct.node, %struct.node* %5, i64 0, i32 6
-; CHECK-NOT: load i64*
-  %6 = load i64* %p3, align 8
+; CHECK-NOT: load i64, i64*
+  %6 = load i64, i64* %p3, align 8
   %a4 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 5
-; CHECK-NOT: load %struct.arc**
-  %7 = load %struct.arc** %a4, align 8
+; CHECK-NOT: load %struct.arc*, %struct.arc**
+  %7 = load %struct.arc*, %struct.arc** %a4, align 8
   %cost5 = getelementptr inbounds %struct.arc, %struct.arc* %7, i64 0, i32 0
-; CHECK-NOT: load i64*
-  %8 = load i64* %cost5, align 8
+; CHECK-NOT: load i64, i64*
+  %8 = load i64, i64* %cost5, align 8
   %sub = sub nsw i64 %6, %8
   %p6 = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 6
 ; CHECK-NOT: store i64
@@ -70,7 +70,7 @@
 ; CHECK: store
   %inc = add nsw i64 %sum.019, 1
   %node.0.in = getelementptr inbounds %struct.node, %struct.node* %node.020, i64 0, i32 2
-  %node.0 = load %struct.node** %node.0.in, align 8
+  %node.0 = load %struct.node*, %struct.node** %node.0.in, align 8
   %tobool = icmp eq %struct.node* %node.0, null
   br i1 %tobool, label %while.end.loopexit, label %while.body
 
diff --git a/llvm/test/Transforms/InstMerge/st_sink_barrier_call.ll b/llvm/test/Transforms/InstMerge/st_sink_barrier_call.ll
index d0be479..cdcc346 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_barrier_call.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_barrier_call.ll
@@ -11,16 +11,16 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK: store i32
   store i32 %1, i32* %p1, align 4
@@ -28,7 +28,7 @@
   
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK: store i32
diff --git a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
index 1f54877..1ec95f1 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -26,14 +26,14 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc8, %entry
-  %0 = load i32* @d, align 4
+  %0 = load i32, i32* @d, align 4
   %cmp = icmp slt i32 %0, 2
   br i1 %cmp, label %for.body, label %for.end10
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* @d, align 4
+  %1 = load i32, i32* @d, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32* @b, align 4
+  %2 = load i32, i32* @b, align 4
   %idxprom1 = sext i32 %2 to i64
   %arrayidx = getelementptr inbounds [1 x [3 x i8]], [1 x [3 x i8]]* @f, i32 0, i64 %idxprom1
   %arrayidx2 = getelementptr inbounds [3 x i8], [3 x i8]* %arrayidx, i32 0, i64 %idxprom
@@ -42,30 +42,30 @@
   br label %for.cond3
 
 for.cond3:                                        ; preds = %for.inc, %for.body
-  %3 = load i32* @e, align 4
+  %3 = load i32, i32* @e, align 4
   %cmp4 = icmp slt i32 %3, 3
   br i1 %cmp4, label %for.body5, label %for.end
 
 for.body5:                                        ; preds = %for.cond3
-  %4 = load i32* @c, align 4
+  %4 = load i32, i32* @c, align 4
   %tobool = icmp ne i32 %4, 0
   br i1 %tobool, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body5
-  %5 = load i32* @a, align 4
+  %5 = load i32, i32* @a, align 4
   %dec = add nsw i32 %5, -1
   store i32 %dec, i32* @a, align 4
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %for.body5
-  %6 = load i32* @e, align 4
+  %6 = load i32, i32* @e, align 4
   %idxprom6 = sext i32 %6 to i64
   %arrayidx7 = getelementptr inbounds [3 x i8], [3 x i8]* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0), i32 0, i64 %idxprom6
   store i8 1, i8* %arrayidx7, align 1
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %7 = load i32* @e, align 4
+  %7 = load i32, i32* @e, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* @e, align 4
   br label %for.cond3
@@ -74,7 +74,7 @@
   br label %for.inc8
 
 for.inc8:                                         ; preds = %for.end
-  %8 = load i32* @d, align 4
+  %8 = load i32, i32* @d, align 4
   %inc9 = add nsw i32 %8, 1
   store i32 %inc9, i32* @d, align 4
   br label %for.cond
@@ -89,7 +89,7 @@
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
   call void @fn1()
-  %0 = load i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
+  %0 = load i8, i8* getelementptr inbounds ([1 x [3 x i8]]* @f, i32 0, i64 0, i64 1), align 1
   %conv = sext i8 %0 to i32
   %cmp = icmp ne i32 %conv, 1
   br i1 %cmp, label %if.then, label %if.end
diff --git a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_call.ll b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
index f20bad3e..0ad90f8 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
@@ -11,16 +11,16 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
   store i32 %1, i32* %p1, align 4
@@ -28,7 +28,7 @@
   
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
diff --git a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_load.ll b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_load.ll
index 1b9aa96..b7236e4 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_load.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_load.ll
@@ -9,27 +9,27 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
   store i32 %1, i32* %p1, align 4
   %p2 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 5, i32 6
-  ; CHECK: load i32*
-  %not_barrier = load i32 * %p2, align 4
+  ; CHECK: load i32, i32*
+  %not_barrier = load i32 , i32 * %p2, align 4
   br label %if.end
 
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
diff --git a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_store.ll b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_store.ll
index 791ccc4..e13f28a 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_no_barrier_store.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_no_barrier_store.ll
@@ -9,16 +9,16 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
   store i32 %1, i32* %p1, align 4
@@ -26,7 +26,7 @@
 
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p2 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   store i32 %add, i32* %p2, align 4
diff --git a/llvm/test/Transforms/InstMerge/st_sink_two_stores.ll b/llvm/test/Transforms/InstMerge/st_sink_two_stores.ll
index b8e2f90..5b5582f 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_two_stores.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_two_stores.ll
@@ -9,16 +9,16 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
   store i32 %1, i32* %p1, align 4
@@ -29,7 +29,7 @@
 
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK-NOT: store i32
diff --git a/llvm/test/Transforms/InstMerge/st_sink_with_barrier.ll b/llvm/test/Transforms/InstMerge/st_sink_with_barrier.ll
index 4b302ec..a05ae88 100644
--- a/llvm/test/Transforms/InstMerge/st_sink_with_barrier.ll
+++ b/llvm/test/Transforms/InstMerge/st_sink_with_barrier.ll
@@ -8,27 +8,27 @@
 define void @sink_store(%struct.node* nocapture %r, i32 %index) {
 entry:
   %node.0.in16 = getelementptr inbounds %struct.node, %struct.node* %r, i64 0, i32 2
-  %node.017 = load %struct.node** %node.0.in16, align 8
+  %node.017 = load %struct.node*, %struct.node** %node.0.in16, align 8
   %index.addr = alloca i32, align 4
   store i32 %index, i32* %index.addr, align 4
-  %0 = load i32* %index.addr, align 4
+  %0 = load i32, i32* %index.addr, align 4
   %cmp = icmp slt i32 %0, 0
   br i1 %cmp, label %if.then, label %if.else
 
 ; CHECK: if.then
 if.then:                                          ; preds = %entry
-  %1 = load i32* %index.addr, align 4
+  %1 = load i32, i32* %index.addr, align 4
   %p1 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK: store i32
   store i32 %1, i32* %p1, align 4
   %p2 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
-  ; CHECK: load i32*
-  %barrier = load i32 * %p2, align 4
+  ; CHECK: load i32, i32*
+  %barrier = load i32 , i32 * %p2, align 4
   br label %if.end
 
 ; CHECK: if.else
 if.else:                                          ; preds = %entry
-  %2 = load i32* %index.addr, align 4
+  %2 = load i32, i32* %index.addr, align 4
   %add = add nsw i32 %2, 1
   %p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
   ; CHECK: store i32
diff --git a/llvm/test/Transforms/InstSimplify/call-callconv.ll b/llvm/test/Transforms/InstSimplify/call-callconv.ll
index e475be7..7701511 100644
--- a/llvm/test/Transforms/InstSimplify/call-callconv.ll
+++ b/llvm/test/Transforms/InstSimplify/call-callconv.ll
@@ -42,7 +42,7 @@
   %cmp = icmp ne i32 %call, 0
   ret i1 %cmp
 
-; CHECK: %[[STRLENFIRST:.*]] = load i8* %str
+; CHECK: %[[STRLENFIRST:.*]] = load i8, i8* %str
 ; CHECK: %[[CMP:.*]] = icmp ne i8 %[[STRLENFIRST]], 0
 ; CHECK: ret i1 %[[CMP]]
 }
diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll
index cbc0125..376f6ea 100644
--- a/llvm/test/Transforms/InstSimplify/compare.ll
+++ b/llvm/test/Transforms/InstSimplify/compare.ll
@@ -950,7 +950,7 @@
 }
 
 define i1 @nonnull_load(i32** %addr) {
-  %ptr = load i32** %addr, !nonnull !{}
+  %ptr = load i32*, i32** %addr, !nonnull !{}
   %cmp = icmp eq i32* %ptr, null
   ret i1 %cmp
 ; CHECK-LABEL: @nonnull_load
@@ -958,14 +958,14 @@
 }
 
 define i1 @nonnull_load_as_outer(i32* addrspace(1)* %addr) {
-  %ptr = load i32* addrspace(1)* %addr, !nonnull !{}
+  %ptr = load i32*, i32* addrspace(1)* %addr, !nonnull !{}
   %cmp = icmp eq i32* %ptr, null
   ret i1 %cmp
 ; CHECK-LABEL: @nonnull_load_as_outer
 ; CHECK: ret i1 false
 }
 define i1 @nonnull_load_as_inner(i32 addrspace(1)** %addr) {
-  %ptr = load i32 addrspace(1)** %addr, !nonnull !{}
+  %ptr = load i32 addrspace(1)*, i32 addrspace(1)** %addr, !nonnull !{}
   %cmp = icmp eq i32 addrspace(1)* %ptr, null
   ret i1 %cmp
 ; CHECK-LABEL: @nonnull_load_as_inner
diff --git a/llvm/test/Transforms/InstSimplify/load.ll b/llvm/test/Transforms/InstSimplify/load.ll
index 92953cd..ab87d4b 100644
--- a/llvm/test/Transforms/InstSimplify/load.ll
+++ b/llvm/test/Transforms/InstSimplify/load.ll
@@ -6,14 +6,14 @@
 define i32 @crash_on_zeroinit() {
 ; CHECK-LABEL: @crash_on_zeroinit
 ; CHECK: ret i32 0
-  %load = load i32* bitcast ({}* @zeroinit to i32*)
+  %load = load i32, i32* bitcast ({}* @zeroinit to i32*)
   ret i32 %load
 }
 
 define i32 @crash_on_undef() {
 ; CHECK-LABEL: @crash_on_undef
 ; CHECK: ret i32 undef
-  %load = load i32* bitcast ({}* @undef to i32*)
+  %load = load i32, i32* bitcast ({}* @undef to i32*)
   ret i32 %load
 }
 
diff --git a/llvm/test/Transforms/InstSimplify/vector_ptr_bitcast.ll b/llvm/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
index 607892a..97c8343 100644
--- a/llvm/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
+++ b/llvm/test/Transforms/InstSimplify/vector_ptr_bitcast.ll
@@ -14,7 +14,7 @@
                                           align 8
 
 define i64 @fn() {
-  %x = load <2 x i8*>* bitcast (%mst* @a to <2 x i8*>*), align 8
+  %x = load <2 x i8*>, <2 x i8*>* bitcast (%mst* @a to <2 x i8*>*), align 8
   %b = extractelement <2 x i8*> %x, i32 0
   %c = ptrtoint i8* %b to i64
   ; CHECK-LABEL: @fn
@@ -23,7 +23,7 @@
 }
 
 define i64 @fn2() {
-  %x = load <4 x i32*>* bitcast (%mst2* @b to <4 x i32*>*), align 8
+  %x = load <4 x i32*>, <4 x i32*>* bitcast (%mst2* @b to <4 x i32*>*), align 8
   %b = extractelement <4 x i32*> %x, i32 0
   %c = extractelement <4 x i32*> %x, i32 3
   %d = ptrtoint i32* %b to i64
diff --git a/llvm/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/llvm/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index 1652388..c50b6fc 100644
--- a/llvm/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/llvm/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -10,7 +10,7 @@
 ; CHECK: @C = internal alias i32* @A
 
 define i32 @main() {
-	%tmp = load i32* @C
+	%tmp = load i32, i32* @C
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/Transforms/JumpThreading/2010-08-26-and.ll b/llvm/test/Transforms/JumpThreading/2010-08-26-and.ll
index 694bc8f..b6e19cb 100644
--- a/llvm/test/Transforms/JumpThreading/2010-08-26-and.ll
+++ b/llvm/test/Transforms/JumpThreading/2010-08-26-and.ll
@@ -30,7 +30,7 @@
   %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1]
   %tmp146 = add i64 %indvar, 1                    ; <i64> [#uses=3]
   %arrayidx = getelementptr i8*, i8** %argv, i64 %tmp146 ; <i8**> [#uses=1]
-  %tmp6 = load i8** %arrayidx, align 8            ; <i8*> [#uses=8]
+  %tmp6 = load i8*, i8** %arrayidx, align 8            ; <i8*> [#uses=8]
   %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1]
   %conv.i.i = trunc i64 %call.i.i to i32          ; <i32> [#uses=6]\
 ; CHECK: switch i32 %conv.i.i
diff --git a/llvm/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll b/llvm/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
index 86a1321..ea9cc7f 100644
--- a/llvm/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
+++ b/llvm/test/Transforms/JumpThreading/2011-04-14-InfLoop.ll
@@ -15,7 +15,7 @@
   br i1 %cmp1179, label %for.cond1177, label %land.rhs1320
 
 land.rhs1320:
-  %tmp1324 = load volatile i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1
+  %tmp1324 = load volatile i64, i64* getelementptr inbounds (%0* @g_338, i64 0, i32 2), align 1
   br label %if.end.i
 
 if.end.i:
diff --git a/llvm/test/Transforms/JumpThreading/crash.ll b/llvm/test/Transforms/JumpThreading/crash.ll
index 2fe8746..900a773 100644
--- a/llvm/test/Transforms/JumpThreading/crash.ll
+++ b/llvm/test/Transforms/JumpThreading/crash.ll
@@ -356,7 +356,7 @@
   br label %BrBlock
 
 BrBlock:
-  %L = load i32* %P
+  %L = load i32, i32* %P
   %C = icmp eq i32 %L, 42
   br i1 %C, label %T, label %F
   
diff --git a/llvm/test/Transforms/JumpThreading/landing-pad.ll b/llvm/test/Transforms/JumpThreading/landing-pad.ll
index 0237e74..b25f5fd 100644
--- a/llvm/test/Transforms/JumpThreading/landing-pad.ll
+++ b/llvm/test/Transforms/JumpThreading/landing-pad.ll
@@ -13,8 +13,8 @@
 define void @_ZN15EditCommandImpl5applyEv(%class.E* %this) uwtable align 2 {
 entry:
   %0 = bitcast %class.E* %this to void (%class.E*)***
-  %vtable = load void (%class.E*)*** %0, align 8
-  %1 = load void (%class.E*)** %vtable, align 8
+  %vtable = load void (%class.E*)**, void (%class.E*)*** %0, align 8
+  %1 = load void (%class.E*)*, void (%class.E*)** %vtable, align 8
   call void %1(%class.E* %this)
   ret void
 }
@@ -53,7 +53,7 @@
   store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*]* @_ZTV1D, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
   %_ref.i.i.i = getelementptr inbounds i8, i8* %call, i64 8
   %1 = bitcast i8* %_ref.i.i.i to i32*
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %inc.i.i.i = add nsw i32 %2, 1
   store i32 %inc.i.i.i, i32* %1, align 4
   %3 = bitcast i8* %call to %class.D*
@@ -76,7 +76,7 @@
 lpad1:                                            ; preds = %_ZN1DC1Ev.exit, %_ZN15EditCommandImpl5applyEv.exit
   %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
           cleanup
-  %6 = load i32* %1, align 4
+  %6 = load i32, i32* %1, align 4
   %tobool.i.i.i = icmp eq i32 %6, 0
   br i1 %tobool.i.i.i, label %_ZN1BI1DED1Ev.exit, label %if.then.i.i.i
 
@@ -101,7 +101,7 @@
   %m_ptr.i = getelementptr inbounds %class.B, %class.B* %this, i64 0, i32 0
   store %class.D* %p1, %class.D** %m_ptr.i, align 8
   %_ref.i.i = getelementptr inbounds %class.D, %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  %0 = load i32* %_ref.i.i, align 4
+  %0 = load i32, i32* %_ref.i.i, align 4
   %inc.i.i = add nsw i32 %0, 1
   store i32 %inc.i.i, i32* %_ref.i.i, align 4
   ret void
@@ -116,7 +116,7 @@
 define %class.D* @_ZN1BI1DEptEv(%class.B* nocapture readonly %this) nounwind readonly uwtable align 2 {
 entry:
   %m_ptr = getelementptr inbounds %class.B, %class.B* %this, i64 0, i32 0
-  %0 = load %class.D** %m_ptr, align 8
+  %0 = load %class.D*, %class.D** %m_ptr, align 8
   ret %class.D* %0
 }
 
@@ -125,9 +125,9 @@
 define void @_ZN1BI1DED1Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
 entry:
   %m_ptr.i = getelementptr inbounds %class.B, %class.B* %this, i64 0, i32 0
-  %0 = load %class.D** %m_ptr.i, align 8
+  %0 = load %class.D*, %class.D** %m_ptr.i, align 8
   %_ref.i.i = getelementptr inbounds %class.D, %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  %1 = load i32* %_ref.i.i, align 4
+  %1 = load i32, i32* %_ref.i.i, align 4
   %tobool.i.i = icmp eq i32 %1, 0
   br i1 %tobool.i.i, label %_ZN1BI1DED2Ev.exit, label %if.then.i.i
 
@@ -147,9 +147,9 @@
 define void @_ZN1BI1DED2Ev(%class.B* nocapture readonly %this) unnamed_addr uwtable align 2 {
 entry:
   %m_ptr = getelementptr inbounds %class.B, %class.B* %this, i64 0, i32 0
-  %0 = load %class.D** %m_ptr, align 8
+  %0 = load %class.D*, %class.D** %m_ptr, align 8
   %_ref.i = getelementptr inbounds %class.D, %class.D* %0, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  %1 = load i32* %_ref.i, align 4
+  %1 = load i32, i32* %_ref.i, align 4
   %tobool.i = icmp eq i32 %1, 0
   br i1 %tobool.i, label %_ZN1AI1CE5derefEv.exit, label %if.then.i
 
@@ -167,7 +167,7 @@
 define void @_ZN1AI1CE5derefEv(%class.A* nocapture readonly %this) nounwind uwtable align 2 {
 entry:
   %_ref = getelementptr inbounds %class.A, %class.A* %this, i64 0, i32 0
-  %0 = load i32* %_ref, align 4
+  %0 = load i32, i32* %_ref, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -187,7 +187,7 @@
   %m_ptr = getelementptr inbounds %class.B, %class.B* %this, i64 0, i32 0
   store %class.D* %p1, %class.D** %m_ptr, align 8
   %_ref.i = getelementptr inbounds %class.D, %class.D* %p1, i64 0, i32 0, i32 0, i32 1, i32 0, i32 0
-  %0 = load i32* %_ref.i, align 4
+  %0 = load i32, i32* %_ref.i, align 4
   %inc.i = add nsw i32 %0, 1
   store i32 %inc.i, i32* %_ref.i, align 4
   ret void
@@ -196,7 +196,7 @@
 define void @_ZN1AI1CE3refEv(%class.A* nocapture %this) nounwind uwtable align 2 {
 entry:
   %_ref = getelementptr inbounds %class.A, %class.A* %this, i64 0, i32 0
-  %0 = load i32* %_ref, align 4
+  %0 = load i32, i32* %_ref, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %_ref, align 4
   ret void
diff --git a/llvm/test/Transforms/JumpThreading/lvi-load.ll b/llvm/test/Transforms/JumpThreading/lvi-load.ll
index d2c4fa4..8c993ae 100644
--- a/llvm/test/Transforms/JumpThreading/lvi-load.ll
+++ b/llvm/test/Transforms/JumpThreading/lvi-load.ll
@@ -17,7 +17,7 @@
 define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp {
 entry:
   %0 = getelementptr inbounds %"struct.llvm::Value", %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1]
-  %1 = load i8* %0, align 8                       ; <i8> [#uses=2]
+  %1 = load i8, i8* %0, align 8                       ; <i8> [#uses=2]
   %2 = icmp ugt i8 %1, 20                         ; <i1> [#uses=1]
   br i1 %2, label %bb.i, label %bb2
 
diff --git a/llvm/test/Transforms/JumpThreading/or-undef.ll b/llvm/test/Transforms/JumpThreading/or-undef.ll
index 6311b6d..b55bddd 100644
--- a/llvm/test/Transforms/JumpThreading/or-undef.ll
+++ b/llvm/test/Transforms/JumpThreading/or-undef.ll
@@ -32,13 +32,13 @@
   br i1 %tmp7, label %bb7, label %bb5
 
 bb5:                                              ; preds = %bb2
-  %tmp8 = load i8** %argv.0, align 8              ; <i8*> [#uses=1]
+  %tmp8 = load i8*, i8** %argv.0, align 8              ; <i8*> [#uses=1]
   %tmp9 = icmp eq i8* %tmp8, null                 ; <i1> [#uses=1]
   br i1 %tmp9, label %bb7, label %bb6
 
 bb6:                                              ; preds = %bb5
-  %tmp10 = load i8** %argv.0, align 8             ; <i8*> [#uses=1]
-  %tmp11 = load i8* %tmp10, align 1               ; <i8> [#uses=1]
+  %tmp10 = load i8*, i8** %argv.0, align 8             ; <i8*> [#uses=1]
+  %tmp11 = load i8, i8* %tmp10, align 1               ; <i8> [#uses=1]
   %tmp12 = icmp eq i8 %tmp11, 0                   ; <i1> [#uses=1]
   br i1 %tmp12, label %bb7, label %bb8
 
@@ -47,7 +47,7 @@
   br label %bb9
 
 bb8:                                              ; preds = %bb6
-  %tmp13 = load i8** %argv.0, align 8             ; <i8*> [#uses=1]
+  %tmp13 = load i8*, i8** %argv.0, align 8             ; <i8*> [#uses=1]
   %tmp14 = call i64 @f5(i8* %tmp13) nounwind      ; <i64> [#uses=0]
   br label %bb9
 
diff --git a/llvm/test/Transforms/JumpThreading/phi-eq.ll b/llvm/test/Transforms/JumpThreading/phi-eq.ll
index 78351ad..f158d7e0 100644
--- a/llvm/test/Transforms/JumpThreading/phi-eq.ll
+++ b/llvm/test/Transforms/JumpThreading/phi-eq.ll
@@ -98,33 +98,33 @@
 get_filter_list.exit6:                            ; preds = %sw.bb3.i4, %sw.bb2.i3, %sw.bb1.i2, %sw.bb.i1
   %1 = phi %struct._GList** [ @display_edited_filters, %sw.bb3.i4 ], [ @capture_edited_filters, %sw.bb2.i3 ], [ @display_filters, %sw.bb1.i2 ], [ @capture_filters, %sw.bb.i1 ]
 ; CHECK: %2 = load
-  %2 = load %struct._GList** %1, align 8
+  %2 = load %struct._GList*, %struct._GList** %1, align 8
 ; We should have jump-threading insert an additional load here for the value
 ; coming out of the first switch, which is picked up by a subsequent phi
-; CHECK: %.pr = load %struct._GList** %0
+; CHECK: %.pr = load %struct._GList*, %struct._GList** %0
 ; CHECK-NEXT:  br label %while.cond
   br label %while.cond
 
 ; CHECK: while.cond
 while.cond:                                       ; preds = %while.body, %get_filter_list.exit6
 ; CHECK: {{= phi .*%.pr}}
-  %3 = load %struct._GList** %0, align 8
+  %3 = load %struct._GList*, %struct._GList** %0, align 8
 ; CHECK: tobool
   %tobool = icmp ne %struct._GList* %3, null
   br i1 %tobool, label %while.body, label %while.end
 
 while.body:                                       ; preds = %while.cond
-  %4 = load %struct._GList** %0, align 8
-  %5 = load %struct._GList** %0, align 8
+  %4 = load %struct._GList*, %struct._GList** %0, align 8
+  %5 = load %struct._GList*, %struct._GList** %0, align 8
   %call2 = call %struct._GList* @g_list_first(%struct._GList* %5)
   %data.i = getelementptr inbounds %struct._GList, %struct._GList* %call2, i32 0, i32 0
-  %6 = load i8** %data.i, align 8
+  %6 = load i8*, i8** %data.i, align 8
   %7 = bitcast i8* %6 to %struct.filter_def*
   %name.i = getelementptr inbounds %struct.filter_def, %struct.filter_def* %7, i32 0, i32 0
-  %8 = load i8** %name.i, align 8
+  %8 = load i8*, i8** %name.i, align 8
   call void @g_free(i8* %8) nounwind
   %strval.i = getelementptr inbounds %struct.filter_def, %struct.filter_def* %7, i32 0, i32 1
-  %9 = load i8** %strval.i, align 8
+  %9 = load i8*, i8** %strval.i, align 8
   call void @g_free(i8* %9) nounwind
   %10 = bitcast %struct.filter_def* %7 to i8*
   call void @g_free(i8* %10) nounwind
@@ -136,7 +136,7 @@
   br label %do.body4
 
 do.body4:                                         ; preds = %while.end
-  %11 = load %struct._GList** %0, align 8
+  %11 = load %struct._GList*, %struct._GList** %0, align 8
   %call5 = call i32 @g_list_length(%struct._GList* %11)
   %cmp6 = icmp eq i32 %call5, 0
   br i1 %cmp6, label %if.then7, label %if.else8
@@ -161,13 +161,13 @@
 
 while.body13:                                     ; preds = %while.cond11
   %data = getelementptr inbounds %struct._GList, %struct._GList* %cond10, i32 0, i32 0
-  %12 = load i8** %data, align 8
+  %12 = load i8*, i8** %data, align 8
   %13 = bitcast i8* %12 to %struct.filter_def*
-  %14 = load %struct._GList** %0, align 8
+  %14 = load %struct._GList*, %struct._GList** %0, align 8
   %name = getelementptr inbounds %struct.filter_def, %struct.filter_def* %13, i32 0, i32 0
-  %15 = load i8** %name, align 8
+  %15 = load i8*, i8** %name, align 8
   %strval = getelementptr inbounds %struct.filter_def, %struct.filter_def* %13, i32 0, i32 1
-  %16 = load i8** %strval, align 8
+  %16 = load i8*, i8** %strval, align 8
   %call.i7 = call noalias i8* @g_malloc(i64 16) nounwind
   %17 = bitcast i8* %call.i7 to %struct.filter_def*
   %call1.i = call noalias i8* @g_strdup(i8* %15) nounwind
@@ -184,7 +184,7 @@
 
 cond.true:                                        ; preds = %while.body13
   %next = getelementptr inbounds %struct._GList, %struct._GList* %cond10, i32 0, i32 1
-  %19 = load %struct._GList** %next, align 8
+  %19 = load %struct._GList*, %struct._GList** %next, align 8
   br label %cond.end
 
 cond.false:                                       ; preds = %while.body13
diff --git a/llvm/test/Transforms/JumpThreading/select.ll b/llvm/test/Transforms/JumpThreading/select.ll
index 545e86c..d0df772 100644
--- a/llvm/test/Transforms/JumpThreading/select.ll
+++ b/llvm/test/Transforms/JumpThreading/select.ll
@@ -135,7 +135,7 @@
 ; CHECK: icmp
 define void @test_switch_default(i32* nocapture %status) nounwind {
 entry:
-  %0 = load i32* %status, align 4
+  %0 = load i32, i32* %status, align 4
   switch i32 %0, label %L2 [
     i32 5061, label %L1
     i32 0, label %L2
@@ -146,7 +146,7 @@
   br label %L2
 
 L2:
-  %1 = load i32* %status, align 4
+  %1 = load i32, i32* %status, align 4
   %cmp57.i = icmp eq i32 %1, 0
   br i1 %cmp57.i, label %L3, label %L4
 
diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll
index 4351f99..008eac7 100644
--- a/llvm/test/Transforms/JumpThreading/thread-loads.ll
+++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll
@@ -21,7 +21,7 @@
 
 bb1:		; preds = %entry, %bb
 	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
-	%2 = load i32* %P, align 4		; <i32> [#uses=1]
+	%2 = load i32, i32* %P, align 4		; <i32> [#uses=1]
 	%3 = icmp sgt i32 %2, 36		; <i1> [#uses=1]
 	br i1 %3, label %bb3, label %bb2
 
@@ -60,7 +60,7 @@
 
 bb1:		; preds = %entry, %bb
 	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
-	%2 = load i32* %P, align 4, !tbaa !0
+	%2 = load i32, i32* %P, align 4, !tbaa !0
 	%3 = icmp sgt i32 %2, 36
 	br i1 %3, label %bb3, label %bb2
 
@@ -83,16 +83,16 @@
 ; CHECK-LABEL: @test3(
 entry:
   %0 = bitcast i8** %x to i32**
-  %1 = load i32** %0, align 8
+  %1 = load i32*, i32** %0, align 8
   br i1 %f, label %if.end57, label %if.then56
-; CHECK: %[[LOAD:.*]] = load i32**
+; CHECK: %[[LOAD:.*]] = load i32*, i32**
 ; CHECK: %[[CAST:.*]] = bitcast i32* %[[LOAD]] to i8*
 
 if.then56:
   br label %if.end57
 
 if.end57:
-  %2 = load i8** %x, align 8
+  %2 = load i8*, i8** %x, align 8
   %tobool59 = icmp eq i8* %2, null
   br i1 %tobool59, label %return, label %if.then60
 ; CHECK: %[[PHI:.*]] = phi i8* [ %[[CAST]], %[[PRED:[^ ]+]] ], [ %[[CAST]], %[[PRED]] ]
diff --git a/llvm/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/llvm/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index 1e46611..a6abfa5 100644
--- a/llvm/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/llvm/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -13,7 +13,7 @@
         %tmp.20 = getelementptr %struct.SetJmpMapEntry, %struct.SetJmpMapEntry* %SJE.0.0, i32 0, i32 1          ; <i32*> [#uses=0]
         ret void
 endif:          ; preds = %no_exit
-        %tmp.24 = load %struct.SetJmpMapEntry** null            ; <%struct.SetJmpMapEntry*> [#uses=1]
+        %tmp.24 = load %struct.SetJmpMapEntry*, %struct.SetJmpMapEntry** null            ; <%struct.SetJmpMapEntry*> [#uses=1]
         br i1 false, label %UnifiedReturnBlock, label %no_exit
 UnifiedReturnBlock:             ; preds = %endif, %entry
         ret void
diff --git a/llvm/test/Transforms/LCSSA/2006-07-09-NoDominator.ll b/llvm/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
index bffd3de..bc3d150 100644
--- a/llvm/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
+++ b/llvm/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
@@ -14,7 +14,7 @@
 	%tmp.21 = getelementptr %struct.SetJmpMapEntry, %struct.SetJmpMapEntry* %SJE.0, i32 0, i32 1		; <i32*> [#uses=0]
 	br label %return
 endif:		; preds = %no_exit
-	%tmp.25 = load %struct.SetJmpMapEntry** null		; <%struct.SetJmpMapEntry*> [#uses=1]
+	%tmp.25 = load %struct.SetJmpMapEntry*, %struct.SetJmpMapEntry** null		; <%struct.SetJmpMapEntry*> [#uses=1]
 	br label %loopentry
 loopexit:		; preds = %loopentry
 	br label %return
diff --git a/llvm/test/Transforms/LCSSA/2007-07-12-LICM-2.ll b/llvm/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
index 2c5815c..f5d3f7e 100644
--- a/llvm/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
+++ b/llvm/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
@@ -4,7 +4,7 @@
 	br label %bb7
 
 bb7:		; preds = %bb7, %entry
-	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp39 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
 	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
 	%tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
 	%tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
diff --git a/llvm/test/Transforms/LCSSA/2007-07-12-LICM-3.ll b/llvm/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
index 7e0d3c6..689b6ea 100644
--- a/llvm/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
+++ b/llvm/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
@@ -8,7 +8,7 @@
         br label %bb7
 
 bb7:            ; preds = %bb7, %bb
-        %tmp39 = load <4 x float>* null         ; <<4 x float>> [#uses=1]
+        %tmp39 = load <4 x float>, <4 x float>* null         ; <<4 x float>> [#uses=1]
         %tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
         %tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
         %tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
diff --git a/llvm/test/Transforms/LCSSA/2007-07-12-LICM.ll b/llvm/test/Transforms/LCSSA/2007-07-12-LICM.ll
index 8c07aa2..f1785ed 100644
--- a/llvm/test/Transforms/LCSSA/2007-07-12-LICM.ll
+++ b/llvm/test/Transforms/LCSSA/2007-07-12-LICM.ll
@@ -4,7 +4,7 @@
 	br label %bb7
 
 bb7:		; preds = %bb7, %entry
-	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp39 = load <4 x float>, <4 x float>* null		; <<4 x float>> [#uses=1]
 	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
 	store <4 x float> zeroinitializer, <4 x float>* null
 	br i1 false, label %bb7, label %bb56
diff --git a/llvm/test/Transforms/LCSSA/unreachable-use.ll b/llvm/test/Transforms/LCSSA/unreachable-use.ll
index 2ea7aeb..c9e456c 100644
--- a/llvm/test/Transforms/LCSSA/unreachable-use.ll
+++ b/llvm/test/Transforms/LCSSA/unreachable-use.ll
@@ -4,7 +4,7 @@
 ; LCSSA doesn't need to transform uses in blocks not reachable
 ; from the entry block.
 
-; CHECK: %tmp33 = load i1** %tmp
+; CHECK: %tmp33 = load i1*, i1** %tmp
 
 define fastcc void @dfs() nounwind {
 bb:
@@ -21,7 +21,7 @@
   br label %bb44
 
 bb32:
-  %tmp33 = load i1** %tmp, align 8
+  %tmp33 = load i1*, i1** %tmp, align 8
   br label %bb45
 
 bb45:
diff --git a/llvm/test/Transforms/LICM/2003-05-02-LoadHoist.ll b/llvm/test/Transforms/LICM/2003-05-02-LoadHoist.ll
index 71d3e78..2f94dff 100644
--- a/llvm/test/Transforms/LICM/2003-05-02-LoadHoist.ll
+++ b/llvm/test/Transforms/LICM/2003-05-02-LoadHoist.ll
@@ -10,12 +10,12 @@
 declare void @foo()
 
 define i32 @test(i1 %c) {
-	%A = load i32* @X		; <i32> [#uses=1]
+	%A = load i32, i32* @X		; <i32> [#uses=1]
 	br label %Loop
 Loop:		; preds = %Loop, %0
 	call void @foo( )
         ;; Should not hoist this load!
-	%B = load i32* @X		; <i32> [#uses=1]
+	%B = load i32, i32* @X		; <i32> [#uses=1]
 	br i1 %c, label %Loop, label %Out
 Out:		; preds = %Loop
 	%C = sub i32 %A, %B		; <i32> [#uses=1]
diff --git a/llvm/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/llvm/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
index 16f4fed..73862db 100644
--- a/llvm/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
+++ b/llvm/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -7,8 +7,8 @@
 Outer:		; preds = %Next, %0
 	br label %Inner
 Inner:		; preds = %Inner, %Outer
-	%tmp.114.i.i.i = load i8** @PL_regcomp_parse		; <i8*> [#uses=1]
-	%tmp.115.i.i.i = load i8* %tmp.114.i.i.i		; <i8> [#uses=0]
+	%tmp.114.i.i.i = load i8*, i8** @PL_regcomp_parse		; <i8*> [#uses=1]
+	%tmp.115.i.i.i = load i8, i8* %tmp.114.i.i.i		; <i8> [#uses=0]
 	store i8* null, i8** @PL_regcomp_parse
 	br i1 false, label %Inner, label %Next
 Next:		; preds = %Inner
diff --git a/llvm/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/llvm/test/Transforms/LICM/2007-05-22-VolatileSink.ll
index a05b634..f5ce86b 100644
--- a/llvm/test/Transforms/LICM/2007-05-22-VolatileSink.ll
+++ b/llvm/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -10,7 +10,7 @@
 	br label %bb6
 
 bb:		; preds = %bb6
-	%tmp2 = load volatile i32* %DataIn		; <i32> [#uses=1]
+	%tmp2 = load volatile i32, i32* %DataIn		; <i32> [#uses=1]
 	%tmp3 = getelementptr [64 x i32], [64 x i32]* %buffer, i32 0, i32 %i.0		; <i32*> [#uses=1]
 	store i32 %tmp2, i32* %tmp3
 	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
@@ -27,7 +27,7 @@
 	%tmp14 = mul i32 %j.1, 8		; <i32> [#uses=1]
 	%tmp16 = add i32 %tmp14, %i.1		; <i32> [#uses=1]
 	%tmp17 = getelementptr [64 x i32], [64 x i32]* %buffer, i32 0, i32 %tmp16		; <i32*> [#uses=1]
-	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* %tmp17		; <i32> [#uses=1]
 	store volatile i32 %tmp18, i32* %DataOut
 	%tmp21 = add i32 %j.1, 1		; <i32> [#uses=1]
 	br label %bb22
diff --git a/llvm/test/Transforms/LICM/2007-07-30-AliasSet.ll b/llvm/test/Transforms/LICM/2007-07-30-AliasSet.ll
index 7585fb6..3e4fbb0a 100644
--- a/llvm/test/Transforms/LICM/2007-07-30-AliasSet.ll
+++ b/llvm/test/Transforms/LICM/2007-07-30-AliasSet.ll
@@ -18,7 +18,7 @@
 bb35:		; preds = %cond_next60, %bb63.outer
 	%window.34 = phi i32 [ %tmp62, %cond_next60 ], [ 0, %bb63.outer ]		; <i32> [#uses=1]
 	%tmp44 = getelementptr [4 x i32], [4 x i32]* null, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp46 = load i32* %tmp44, align 4		; <i32> [#uses=0]
+	%tmp46 = load i32, i32* %tmp44, align 4		; <i32> [#uses=0]
 	br i1 false, label %cond_true50, label %cond_next60
 
 cond_true50:		; preds = %bb35
diff --git a/llvm/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll b/llvm/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
index f6ab77d..a715af0 100644
--- a/llvm/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
+++ b/llvm/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
@@ -6,7 +6,7 @@
 entry:
         br label %forcond
 
-; CHECK:  %tmp3 = load float** @a
+; CHECK:  %tmp3 = load float*, float** @a
 ; CHECK:  br label %forcond
 
 forcond:
@@ -19,7 +19,7 @@
 ; CHECK:  br i1 %cmp, label %forbody, label %afterfor
 
 forbody:
-        %tmp3 = load float** @a
+        %tmp3 = load float*, float** @a
         %arrayidx = getelementptr float, float* %tmp3, i32 %i.0
         %tmp7 = uitofp i32 %i.0 to float
         store float %tmp7, float* %arrayidx
diff --git a/llvm/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll b/llvm/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
index e3cdbb3..1b3ff5b 100644
--- a/llvm/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
+++ b/llvm/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
@@ -12,7 +12,7 @@
     indirectbr i8* undef, [label %preheader, label %stuff]
 
   stuff:
-    %0 = load i8* undef, align 1
+    %0 = load i8, i8* undef, align 1
     br label %loop
 
   return:
diff --git a/llvm/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll b/llvm/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
index 2bbc6ab..b462885 100644
--- a/llvm/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
+++ b/llvm/test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll
@@ -15,11 +15,11 @@
   br label %for.body4
 
 ; CHECK: for.body4:
-; CHECK: load volatile i16* @g_39
+; CHECK: load volatile i16, i16* @g_39
 
 for.body4:
   %l_612.11 = phi i32* [ undef, %for.body4.lr.ph ], [ %call19, %for.body4 ]
-  %tmp7 = load volatile i16* @g_39, align 2
+  %tmp7 = load volatile i16, i16* @g_39, align 2
   %call = call i32** @func_108(i32*** undef)
   %call19 = call i32* @func_84(i32** %call)
   br i1 false, label %for.body4, label %for.cond.loopexit
diff --git a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
index 5587142..370491e 100644
--- a/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
+++ b/llvm/test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll
@@ -8,7 +8,7 @@
 
 ; CHECK: entry:
 ; CHECK: alloca [9 x i16]
-; CHECK: load i32* @g_58
+; CHECK: load i32, i32* @g_58
 ; CHECK: br label %for.body
 
 entry:
@@ -18,8 +18,8 @@
 for.body:                                         ; preds = %entry, %for.inc
   %inc12 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   store i32* @g_58, i32** @g_116, align 8, !tbaa !0
-  %tmp2 = load i32** @g_116, align 8, !tbaa !0
-  %tmp3 = load i32* %tmp2, !tbaa !4
+  %tmp2 = load i32*, i32** @g_116, align 8, !tbaa !0
+  %tmp3 = load i32, i32* %tmp2, !tbaa !4
   %or = or i32 %tmp3, 10
   store i32 %or, i32* %tmp2, !tbaa !4
   %inc = add nsw i32 %inc12, 1
diff --git a/llvm/test/Transforms/LICM/2011-04-09-RAUW-AST.ll b/llvm/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
index 4285bd1..5011917 100644
--- a/llvm/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
+++ b/llvm/test/Transforms/LICM/2011-04-09-RAUW-AST.ll
@@ -6,7 +6,7 @@
 
 define i32 @main() nounwind {
 entry:
-  %tmp = load i32* @g_3, align 4
+  %tmp = load i32, i32* @g_3, align 4
   %tobool = icmp eq i32 %tmp, 0
   br i1 %tobool, label %for.cond, label %if.then
 
@@ -40,7 +40,7 @@
   br label %for.cond
 
 for.end13:                                        ; preds = %for.cond
-  %tmp14 = load i32* @g_3, align 4
+  %tmp14 = load i32, i32* @g_3, align 4
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp14) nounwind
   ret i32 0
 }
diff --git a/llvm/test/Transforms/LICM/PR21582.ll b/llvm/test/Transforms/LICM/PR21582.ll
index c918d1a..5664f2e 100644
--- a/llvm/test/Transforms/LICM/PR21582.ll
+++ b/llvm/test/Transforms/LICM/PR21582.ll
@@ -20,7 +20,7 @@
 for.body:                                         ; preds = %for.body, %for.body.preheader
   %g.15 = phi i32 [ undef, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx2 = getelementptr inbounds i32, i32* @fn3.i, i64 0
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %call = call i32 @g()
   br i1 false, label %for.body, label %for.end.loopexit
 
diff --git a/llvm/test/Transforms/LICM/atomics.ll b/llvm/test/Transforms/LICM/atomics.ll
index acf605d..4fe197a 100644
--- a/llvm/test/Transforms/LICM/atomics.ll
+++ b/llvm/test/Transforms/LICM/atomics.ll
@@ -7,7 +7,7 @@
 
 loop:
   %i = phi i32 [ %inc, %loop ], [ 0, %entry ]
-  %val = load atomic i32* %y unordered, align 4
+  %val = load atomic i32, i32* %y unordered, align 4
   %inc = add nsw i32 %i, 1
   %exitcond = icmp eq i32 %inc, %val
   br i1 %exitcond, label %end, label %loop
@@ -27,7 +27,7 @@
   br label %loop
 
 loop:
-  %val = load atomic i32* %y monotonic, align 4
+  %val = load atomic i32, i32* %y monotonic, align 4
   %exitcond = icmp ne i32 %val, 0
   br i1 %exitcond, label %end, label %loop
 
@@ -47,15 +47,15 @@
   br label %loop
 
 loop:
-  %vala = load atomic i32* %y monotonic, align 4
-  %valb = load atomic i32* %x unordered, align 4
+  %vala = load atomic i32, i32* %y monotonic, align 4
+  %valb = load atomic i32, i32* %x unordered, align 4
   %exitcond = icmp ne i32 %vala, %valb
   br i1 %exitcond, label %end, label %loop
 
 end:
   ret i32 %vala
 ; CHECK-LABEL: define i32 @test3(
-; CHECK: load atomic i32* %x unordered
+; CHECK: load atomic i32, i32* %x unordered
 ; CHECK-NEXT: br label %loop
 }
 
@@ -66,7 +66,7 @@
   br label %loop
 
 loop:
-  %vala = load atomic i32* %y monotonic, align 4
+  %vala = load atomic i32, i32* %y monotonic, align 4
   store atomic i32 %vala, i32* %x unordered, align 4
   %exitcond = icmp ne i32 %vala, 0
   br i1 %exitcond, label %end, label %loop
@@ -74,6 +74,6 @@
 end:
   ret i32 %vala
 ; CHECK-LABEL: define i32 @test4(
-; CHECK: load atomic i32* %y monotonic
+; CHECK: load atomic i32, i32* %y monotonic
 ; CHECK-NEXT: store atomic
 }
diff --git a/llvm/test/Transforms/LICM/constexpr.ll b/llvm/test/Transforms/LICM/constexpr.ll
index f788787..506721f 100644
--- a/llvm/test/Transforms/LICM/constexpr.ll
+++ b/llvm/test/Transforms/LICM/constexpr.ll
@@ -9,7 +9,7 @@
 
 ; CHECK-LABEL: @bar
 ; CHECK: entry:
-; CHECK: load i64* bitcast (i32** @in to i64*)
+; CHECK: load i64, i64* bitcast (i32** @in to i64*)
 ; CHECK: do.body:
 ; CHECK-NOT: load
 
@@ -24,11 +24,11 @@
   br i1 %c, label %l1, label %do.body.l2_crit_edge
 
 do.body.l2_crit_edge:                             ; preds = %do.body
-  %inval.pre = load i32** @in, align 8
+  %inval.pre = load i32*, i32** @in, align 8
   br label %l2
 
 l1:                                               ; preds = %do.body
-  %v1 = load i64* bitcast (i32** @in to i64*), align 8
+  %v1 = load i64, i64* bitcast (i32** @in to i64*), align 8
   store i64 %v1, i64* bitcast (i32** @out to i64*), align 8
   %0 = inttoptr i64 %v1 to i32*
   br label %l2
diff --git a/llvm/test/Transforms/LICM/crash.ll b/llvm/test/Transforms/LICM/crash.ll
index 8dfa586..7fa4115 100644
--- a/llvm/test/Transforms/LICM/crash.ll
+++ b/llvm/test/Transforms/LICM/crash.ll
@@ -12,7 +12,7 @@
 
 for.body:                                         ; preds = %for.cond, %bb.nph
   store i8 0, i8* @g_12, align 1
-  %tmp6 = load i8* @g_12, align 1
+  %tmp6 = load i8, i8* @g_12, align 1
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body
@@ -34,7 +34,7 @@
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
-  %tmp7 = load i32* @g_8, align 4
+  %tmp7 = load i32, i32* @g_8, align 4
   store i32* @g_8, i32** undef, align 16
   store i32 undef, i32* @g_8, align 4
   br label %for.body
@@ -48,7 +48,7 @@
 
 for.cond:                                         ; preds = %for.cond, %entry
   %tmp1 = getelementptr { i32*}, { i32*}* %__first, i32 0, i32 0
-  %tmp2 = load i32** %tmp1, align 4
+  %tmp2 = load i32*, i32** %tmp1, align 4
   %call = tail call i32* @test3helper(i32* %tmp2)
   %tmp3 = getelementptr { i32*}, { i32*}* %__first, i32 0, i32 0
   store i32* %call, i32** %tmp3, align 4
diff --git a/llvm/test/Transforms/LICM/hoist-bitcast-load.ll b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll
index 4a5874f..47c474c 100644
--- a/llvm/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -4,7 +4,7 @@
 
 ; Make sure the basic alloca pointer hoisting works:
 ; CHECK-LABEL: @test1
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 ; CHECK: for.body:
 
 ; Function Attrs: nounwind uwtable
@@ -17,14 +17,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -42,7 +42,7 @@
 ; Make sure the basic alloca pointer hoisting works through a bitcast to a
 ; pointer to a smaller type:
 ; CHECK-LABEL: @test2
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 ; CHECK: for.body:
 
 ; Function Attrs: nounwind uwtable
@@ -56,14 +56,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -80,7 +80,7 @@
 
 ; Make sure the basic alloca pointer hoisting works through an addrspacecast
 ; CHECK-LABEL: @test2_addrspacecast
-; CHECK: load i32 addrspace(1)* %c, align 4
+; CHECK: load i32, i32 addrspace(1)* %c, align 4
 ; CHECK: for.body:
 
 ; Function Attrs: nounwind uwtable
@@ -94,14 +94,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %indvars.iv
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32 addrspace(1)* %c, align 4
+  %1 = load i32, i32 addrspace(1)* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %indvars.iv
-  %2 = load i32 addrspace(1)* %arrayidx3, align 4
+  %2 = load i32, i32 addrspace(1)* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32 addrspace(1)* %arrayidx, align 4
   br label %for.inc
@@ -119,7 +119,7 @@
 ; Make sure the basic alloca pointer hoisting works through a bitcast to a
 ; pointer to a smaller type (where the bitcast also needs to be hoisted):
 ; CHECK-LABEL: @test3
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 ; CHECK: for.body:
 
 ; Function Attrs: nounwind uwtable
@@ -132,15 +132,15 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
   %c = bitcast i64* %ca to i32*
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -159,7 +159,7 @@
 ; to a pointer to a larger type:
 ; CHECK-LABEL: @test4
 ; CHECK: for.body:
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 
 ; Function Attrs: nounwind uwtable
 define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
@@ -172,14 +172,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -197,7 +197,7 @@
 ; Don't crash on bitcasts to unsized types.
 ; CHECK-LABEL: @test5
 ; CHECK: for.body:
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 
 %atype = type opaque
 
@@ -213,14 +213,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
diff --git a/llvm/test/Transforms/LICM/hoist-deref-load.ll b/llvm/test/Transforms/LICM/hoist-deref-load.ll
index fc4efb2..4d0ae45 100644
--- a/llvm/test/Transforms/LICM/hoist-deref-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-deref-load.ll
@@ -12,7 +12,7 @@
 ; because the dereferenceable attribute is on %c.
 
 ; CHECK-LABEL: @test1
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 ; CHECK: for.body:
 
 define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull dereferenceable(4) %c, i32 %n) #0 {
@@ -23,14 +23,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -50,7 +50,7 @@
 
 ; CHECK-LABEL: @test2
 ; CHECK: if.then:
-; CHECK: load i32* %c, align 4
+; CHECK: load i32, i32* %c, align 4
 
 define void @test2(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly nonnull %c, i32 %n) #0 {
 entry:
@@ -60,14 +60,14 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
-  %1 = load i32* %c, align 4
+  %1 = load i32, i32* %c, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -92,7 +92,7 @@
 ; because the dereferenceable attribute is on %c.
 
 ; CHECK-LABEL: @test3
-; CHECK: load i32* %c2, align 4
+; CHECK: load i32, i32* %c2, align 4
 ; CHECK: for.body:
 
 define void @test3(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(12) %c, i32 %n) #0 {
@@ -103,15 +103,15 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
   %c2 = getelementptr inbounds i32, i32* %c, i64 2
-  %1 = load i32* %c2, align 4
+  %1 = load i32, i32* %c2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
@@ -131,7 +131,7 @@
 
 ; CHECK-LABEL: @test4
 ; CHECK: if.then:
-; CHECK: load i32* %c2, align 4
+; CHECK: load i32, i32* %c2, align 4
 
 define void @test4(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* nocapture readonly dereferenceable(11) %c, i32 %n) #0 {
 entry:
@@ -141,15 +141,15 @@
 for.body:                                         ; preds = %entry, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 0
   br i1 %cmp1, label %if.then, label %for.inc
 
 if.then:                                          ; preds = %for.body
   %c2 = getelementptr inbounds i32, i32* %c, i64 2
-  %1 = load i32* %c2, align 4
+  %1 = load i32, i32* %c2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %mul = mul nsw i32 %2, %1
   store i32 %mul, i32* %arrayidx, align 4
   br label %for.inc
diff --git a/llvm/test/Transforms/LICM/hoist-invariant-load.ll b/llvm/test/Transforms/LICM/hoist-invariant-load.ll
index 59904ba..5d96896 100644
--- a/llvm/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/llvm/test/Transforms/LICM/hoist-invariant-load.ll
@@ -15,18 +15,18 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp ult i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i8** %x.addr, align 8
-  %2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !invariant.load !0
+  %1 = load i8*, i8** %x.addr, align 8
+  %2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", !invariant.load !0
   %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %1, i8* %2)
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %3 = load i32* %i, align 4
+  %3 = load i32, i32* %i, align 4
   %inc = add i32 %3, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
diff --git a/llvm/test/Transforms/LICM/hoisting.ll b/llvm/test/Transforms/LICM/hoisting.ll
index b4d297a..8609407 100644
--- a/llvm/test/Transforms/LICM/hoisting.ll
+++ b/llvm/test/Transforms/LICM/hoisting.ll
@@ -8,7 +8,7 @@
 ; potentially trapping instructions when they are not guaranteed to execute.
 define i32 @test1(i1 %c) {
 ; CHECK-LABEL: @test1(
-	%A = load i32* @X		; <i32> [#uses=2]
+	%A = load i32, i32* @X		; <i32> [#uses=2]
 	br label %Loop
 Loop:		; preds = %LoopTail, %0
 	call void @foo( )
@@ -35,9 +35,9 @@
 ;; It is ok and desirable to hoist this potentially trapping instruction.
 define i32 @test2(i1 %c) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT: load i32* @X
+; CHECK-NEXT: load i32, i32* @X
 ; CHECK-NEXT: %B = sdiv i32 4, %A
-	%A = load i32* @X		; <i32> [#uses=2]
+	%A = load i32, i32* @X		; <i32> [#uses=2]
 	br label %Loop
 Loop:
         ;; Should have hoisted this div!
@@ -54,7 +54,7 @@
 define i32 @test3(i1 %c) {
 ; CHECK-LABEL: define i32 @test3(
 ; CHECK: call void @foo2(i32 6)
-	%A = load i32* @X		; <i32> [#uses=2]
+	%A = load i32, i32* @X		; <i32> [#uses=2]
 	br label %Loop
 Loop:
 	%B = add i32 4, 2		; <i32> [#uses=2]
diff --git a/llvm/test/Transforms/LICM/lcssa-ssa-promoter.ll b/llvm/test/Transforms/LICM/lcssa-ssa-promoter.ll
index 5df3ef1..b0cae87 100644
--- a/llvm/test/Transforms/LICM/lcssa-ssa-promoter.ll
+++ b/llvm/test/Transforms/LICM/lcssa-ssa-promoter.ll
@@ -44,7 +44,7 @@
 ; CHECK-NEXT: br label %inner.latch
 
 inner.latch:
-  %y_val = load i32* @y, align 4
+  %y_val = load i32, i32* @y, align 4
   %icmp = icmp eq i32 %y_val, 0
   br i1 %icmp, label %inner.exit, label %inner.header
 ; CHECK: inner.latch:
diff --git a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
index 23d70f5..3603c25 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-memmodel.ll
@@ -19,12 +19,12 @@
   br i1 %tobool, label %for.inc, label %if.then
 
 if.then:                                          ; preds = %for.body
-  %tmp3 = load i32* @g, align 4
+  %tmp3 = load i32, i32* @g, align 4
   %inc = add nsw i32 %tmp3, 1
   store i32 %inc, i32* @g, align 4
   br label %for.inc
 
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NEXT: add
 ; CHECK-NEXT: store i32
 
diff --git a/llvm/test/Transforms/LICM/scalar_promote.ll b/llvm/test/Transforms/LICM/scalar_promote.ll
index a49b980..584d69a 100644
--- a/llvm/test/Transforms/LICM/scalar_promote.ll
+++ b/llvm/test/Transforms/LICM/scalar_promote.ll
@@ -8,13 +8,13 @@
   br label %Loop
 ; CHECK-LABEL: @test1(
 ; CHECK: Entry:
-; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   load i32, i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
 Loop:   ; preds = %Loop, %0
   %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
-  %x = load i32* @X   ; <i32> [#uses=1]
+  %x = load i32, i32* @X   ; <i32> [#uses=1]
   %x2 = add i32 %x, 1   ; <i32> [#uses=1]
   store i32 %x2, i32* @X
   %Next = add i32 %j, 1   ; <i32> [#uses=2]
@@ -35,12 +35,12 @@
   br label %Loop
 ; CHECK-LABEL: @test2(
 ; CHECK: Entry:
-; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
+; CHECK-NEXT:    %.promoted = load i32, i32* getelementptr inbounds (i32* @X, i64 1)
 ; CHECK-NEXT:    br label %Loop
 
 Loop:   ; preds = %Loop, %0
   %X1 = getelementptr i32, i32* @X, i64 1    ; <i32*> [#uses=1]
-  %A = load i32* %X1    ; <i32> [#uses=1]
+  %A = load i32, i32* %X1    ; <i32> [#uses=1]
   %V = add i32 %A, 1    ; <i32> [#uses=1]
   %X2 = getelementptr i32, i32* @X, i64 1    ; <i32*> [#uses=1]
   store i32 %V, i32* %X2
@@ -61,7 +61,7 @@
   br label %Loop
 Loop:
         ; Should not promote this to a register
-  %x = load volatile i32* @X
+  %x = load volatile i32, i32* @X
   %x2 = add i32 %x, 1
   store i32 %x2, i32* @X
   br i1 true, label %Out, label %Loop
@@ -88,7 +88,7 @@
 
 subloop:
   %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ]
-  %offsetx2 = load i8** %handle2
+  %offsetx2 = load i8*, i8** %handle2
   store i8 %n, i8* %offsetx2
   %newoffsetx2 = getelementptr i8, i8* %offsetx2, i64 -1
   store i8* %newoffsetx2, i8** %handle2
@@ -105,14 +105,14 @@
 ; CHECK: br i1
 
 innerexit:
-  %offsetx1 = load i8** %handle1
-  %val = load i8* %offsetx1
+  %offsetx1 = load i8*, i8** %handle1
+  %val = load i8, i8* %offsetx1
   %cond = icmp eq i8 %val, %n
   br i1 %cond, label %exit, label %loop
 
 ; Should not have promoted offsetx1 loads.
 ; CHECK: innerexit:
-; CHECK: %val = load i8* %offsetx1
+; CHECK: %val = load i8, i8* %offsetx1
 ; CHECK: %cond = icmp eq i8 %val, %n
 ; CHECK: br i1 %cond, label %exit, label %loop
 
@@ -125,13 +125,13 @@
   br label %Loop
 ; CHECK-LABEL: @test5(
 ; CHECK: Entry:
-; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   load i32, i32* @X
 ; CHECK-NEXT:   br label %Loop
 
 
 Loop:   ; preds = %Loop, %0
   %j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]    ; <i32> [#uses=1]
-  %x = load i32* @X   ; <i32> [#uses=1]
+  %x = load i32, i32* @X   ; <i32> [#uses=1]
   %x2 = add i32 %x, 1   ; <i32> [#uses=1]
   store i32 %x2, i32* @X
 
@@ -166,7 +166,7 @@
   %idxprom = sext i32 %storemerge2 to i64
   %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
   store float 0.000000e+00, float* %arrayidx, align 4, !tbaa !3
-  %0 = load i32* %gi, align 4, !tbaa !0
+  %0 = load i32, i32* %gi, align 4, !tbaa !0
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %gi, align 4, !tbaa !0
   %cmp = icmp slt i32 %inc, %n
@@ -179,7 +179,7 @@
   ret void
 
 ; CHECK: for.body.lr.ph:
-; CHECK-NEXT:  %gi.promoted = load i32* %gi, align 4, !tbaa !0
+; CHECK-NEXT:  %gi.promoted = load i32, i32* %gi, align 4, !tbaa !0
 ; CHECK: for.cond.for.end_crit_edge:
 ; CHECK-NEXT:  %[[LCSSAPHI:.*]] = phi i32 [ %inc
 ; CHECK-NEXT:  store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0
diff --git a/llvm/test/Transforms/LICM/sinking.ll b/llvm/test/Transforms/LICM/sinking.ll
index a3df819..02bf584 100644
--- a/llvm/test/Transforms/LICM/sinking.ll
+++ b/llvm/test/Transforms/LICM/sinking.ll
@@ -93,7 +93,7 @@
 	br label %Loop
 Loop:		; preds = %Loop, %Entry
 	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]	
-	%tmp.6 = load i32* @X		; <i32> [#uses=1]
+	%tmp.6 = load i32, i32* @X		; <i32> [#uses=1]
 	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
 	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1		; <i1> [#uses=1]
 	br i1 %tmp.1, label %Loop, label %Out
@@ -101,7 +101,7 @@
 	ret i32 %tmp.6
 ; CHECK-LABEL: @test5(
 ; CHECK:     Out:
-; CHECK-NEXT:  %tmp.6.le = load i32* @X
+; CHECK-NEXT:  %tmp.6.le = load i32, i32* @X
 ; CHECK-NEXT:  ret i32 %tmp.6.le
 }
 
@@ -119,14 +119,14 @@
 	br label %Loop
 Loop:
 	%dead = getelementptr %Ty, %Ty* @X2, i64 0, i32 0
-	%sunk2 = load i32* %dead
+	%sunk2 = load i32, i32* %dead
 	br i1 false, label %Loop, label %Out
 Out:		; preds = %Loop
 	ret i32 %sunk2
 ; CHECK-LABEL: @test6(
 ; CHECK:     Out:
 ; CHECK-NEXT:  %dead.le = getelementptr %Ty, %Ty* @X2, i64 0, i32 0
-; CHECK-NEXT:  %sunk2.le = load i32* %dead.le
+; CHECK-NEXT:  %sunk2.le = load i32, i32* %dead.le
 ; CHECK-NEXT:  ret i32 %sunk2.le
 }
 
@@ -174,7 +174,7 @@
 Loop:		; preds = %Cont, %Entry
 	br i1 %C1, label %Cont, label %exit1
 Cont:		; preds = %Loop
-	%X = load i32* %P		; <i32> [#uses=2]
+	%X = load i32, i32* %P		; <i32> [#uses=2]
 	store i32 %X, i32* %Q
 	%V = add i32 %X, 1		; <i32> [#uses=1]
 	br i1 %C2, label %Loop, label %exit2
@@ -265,32 +265,32 @@
   br label %l2.header
 
 l2.header:
-  %x0 = load i1* %c, align 4
+  %x0 = load i1, i1* %c, align 4
   br i1 %x0, label %l1.latch, label %l3.preheader
 
 l3.preheader:
   br label %l3.header
 
 l3.header:
-  %x1 = load i1* %d, align 4
+  %x1 = load i1, i1* %d, align 4
   br i1 %x1, label %l2.latch, label %l4.preheader
 
 l4.preheader:
   br label %l4.header
 
 l4.header:
-  %x2 = load i1* %a
+  %x2 = load i1, i1* %a
   br i1 %x2, label %l3.latch, label %l4.body
 
 l4.body:
   call void @f(i32* %arrayidx.i)
-  %x3 = load i1* %b
+  %x3 = load i1, i1* %b
   %l = trunc i64 %iv to i32
   br i1 %x3, label %l4.latch, label %exit
 
 l4.latch:
   call void @g()
-  %x4 = load i1* %b, align 4
+  %x4 = load i1, i1* %b, align 4
   br i1 %x4, label %l4.header, label %exit
 
 l3.latch:
diff --git a/llvm/test/Transforms/LICM/speculate.ll b/llvm/test/Transforms/LICM/speculate.ll
index 4e933ad..91b5a25 100644
--- a/llvm/test/Transforms/LICM/speculate.ll
+++ b/llvm/test/Transforms/LICM/speculate.ll
@@ -13,7 +13,7 @@
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %i.02
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.inc, label %if.then
 
@@ -45,7 +45,7 @@
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %i.02
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.inc, label %if.then
 
@@ -79,7 +79,7 @@
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %i.02
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.inc, label %if.then
 
@@ -112,7 +112,7 @@
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %i.02
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.inc, label %if.then
 
@@ -145,7 +145,7 @@
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %p, i64 %i.02
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %for.inc, label %if.then
 
diff --git a/llvm/test/Transforms/LICM/volatile-alias.ll b/llvm/test/Transforms/LICM/volatile-alias.ll
index df7f0a9..fda930d 100644
--- a/llvm/test/Transforms/LICM/volatile-alias.ll
+++ b/llvm/test/Transforms/LICM/volatile-alias.ll
@@ -2,9 +2,9 @@
 ; The objects *p and *q are aliased to each other, but even though *q is
 ; volatile, *p can be considered invariant in the loop. Check if it is moved
 ; out of the loop.
-; CHECK: load i32* %p
+; CHECK: load i32, i32* %p
 ; CHECK: for.body:
-; CHECK: load volatile i32* %q
+; CHECK: load volatile i32, i32* %q
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -24,30 +24,30 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
-  %1 = load i32* %n.addr, align 4
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %n.addr, align 4
   %cmp = icmp slt i32 %0, %1
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %2 = load i32** %p.addr, align 8
-  %3 = load i32* %2, align 4
-  %4 = load i32** %q.addr, align 8
-  %5 = load volatile i32* %4, align 4
+  %2 = load i32*, i32** %p.addr, align 8
+  %3 = load i32, i32* %2, align 4
+  %4 = load i32*, i32** %q.addr, align 8
+  %5 = load volatile i32, i32* %4, align 4
   %add = add nsw i32 %3, %5
-  %6 = load i32* %s, align 4
+  %6 = load i32, i32* %s, align 4
   %add1 = add nsw i32 %6, %add
   store i32 %add1, i32* %s, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32* %s, align 4
+  %8 = load i32, i32* %s, align 4
   ret i32 %8
 }
 
diff --git a/llvm/test/Transforms/LoadCombine/load-combine-aa.ll b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll
index 714e13d..fc639c0 100644
--- a/llvm/test/Transforms/LoadCombine/load-combine-aa.ll
+++ b/llvm/test/Transforms/LoadCombine/load-combine-aa.ll
@@ -5,14 +5,14 @@
 define i64 @test1(i32* nocapture readonly noalias %a, i32* nocapture readonly noalias %b) {
 ; CHECK-LABEL: @test1
 
-; CHECK: load i64*
+; CHECK: load i64, i64*
 ; CHECK: ret i64
 
-  %load1 = load i32* %a, align 4
+  %load1 = load i32, i32* %a, align 4
   %conv = zext i32 %load1 to i64
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %load1, i32* %b, align 4
-  %load2 = load i32* %arrayidx1, align 4
+  %load2 = load i32, i32* %arrayidx1, align 4
   %conv2 = zext i32 %load2 to i64
   %shl = shl nuw i64 %conv2, 32
   %add = or i64 %shl, %conv
@@ -22,15 +22,15 @@
 define i64 @test2(i32* nocapture readonly %a, i32* nocapture readonly %b) {
 ; CHECK-LABEL: @test2
 
-; CHECK: load i32*
-; CHECK: load i32*
+; CHECK: load i32, i32*
+; CHECK: load i32, i32*
 ; CHECK: ret i64
 
-  %load1 = load i32* %a, align 4
+  %load1 = load i32, i32* %a, align 4
   %conv = zext i32 %load1 to i64
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %load1, i32* %b, align 4
-  %load2 = load i32* %arrayidx1, align 4
+  %load2 = load i32, i32* %arrayidx1, align 4
   %conv2 = zext i32 %load2 to i64
   %shl = shl nuw i64 %conv2, 32
   %add = or i64 %shl, %conv
diff --git a/llvm/test/Transforms/LoadCombine/load-combine-assume.ll b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll
index ff0a0d3..2d6d160 100644
--- a/llvm/test/Transforms/LoadCombine/load-combine-assume.ll
+++ b/llvm/test/Transforms/LoadCombine/load-combine-assume.ll
@@ -8,14 +8,14 @@
 define i64 @test1(i32* nocapture readonly %a, i1 %b) {
 ; CHECK-LABEL: @test1
 
-; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: load i64, i64* %1, align 4
 ; CHECK-DAG: tail call void @llvm.assume(i1 %b)
 ; CHECK: ret i64
 
-  %load1 = load i32* %a, align 4
+  %load1 = load i32, i32* %a, align 4
   %conv = zext i32 %load1 to i64
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
-  %load2 = load i32* %arrayidx1, align 4
+  %load2 = load i32, i32* %arrayidx1, align 4
   tail call void @llvm.assume(i1 %b)
   %conv2 = zext i32 %load2 to i64
   %shl = shl nuw i64 %conv2, 32
@@ -27,15 +27,15 @@
 define i64 @test2(i32* nocapture readonly %a, i1 %b) {
 ; CHECK-LABEL: @test2
 
-; CHECK-DAG: load i64* %1, align 4
+; CHECK-DAG: load i64, i64* %1, align 4
 ; CHECK-DAG: tail call void @llvm.assume(i1 %b)
 ; CHECK: ret i64
 
-  %load1 = load i32* %a, align 4
+  %load1 = load i32, i32* %a, align 4
   %conv = zext i32 %load1 to i64
   %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 1
   tail call void @llvm.assume(i1 %b)
-  %load2 = load i32* %arrayidx1, align 4
+  %load2 = load i32, i32* %arrayidx1, align 4
   %conv2 = zext i32 %load2 to i64
   %shl = shl nuw i64 %conv2, 32
   %add = or i64 %shl, %conv
diff --git a/llvm/test/Transforms/LoadCombine/load-combine.ll b/llvm/test/Transforms/LoadCombine/load-combine.ll
index e0e3c54..d506878 100644
--- a/llvm/test/Transforms/LoadCombine/load-combine.ll
+++ b/llvm/test/Transforms/LoadCombine/load-combine.ll
@@ -6,138 +6,138 @@
 ; Combine read from char* idiom.
 define i64 @LoadU64_x64_0(i64* %pData) {
   %1 = bitcast i64* %pData to i8*
-  %2 = load i8* %1, align 1
+  %2 = load i8, i8* %1, align 1
   %3 = zext i8 %2 to i64
   %4 = shl nuw i64 %3, 56
   %5 = getelementptr inbounds i8, i8* %1, i64 1
-  %6 = load i8* %5, align 1
+  %6 = load i8, i8* %5, align 1
   %7 = zext i8 %6 to i64
   %8 = shl nuw nsw i64 %7, 48
   %9 = or i64 %8, %4
   %10 = getelementptr inbounds i8, i8* %1, i64 2
-  %11 = load i8* %10, align 1
+  %11 = load i8, i8* %10, align 1
   %12 = zext i8 %11 to i64
   %13 = shl nuw nsw i64 %12, 40
   %14 = or i64 %9, %13
   %15 = getelementptr inbounds i8, i8* %1, i64 3
-  %16 = load i8* %15, align 1
+  %16 = load i8, i8* %15, align 1
   %17 = zext i8 %16 to i64
   %18 = shl nuw nsw i64 %17, 32
   %19 = or i64 %14, %18
   %20 = getelementptr inbounds i8, i8* %1, i64 4
-  %21 = load i8* %20, align 1
+  %21 = load i8, i8* %20, align 1
   %22 = zext i8 %21 to i64
   %23 = shl nuw nsw i64 %22, 24
   %24 = or i64 %19, %23
   %25 = getelementptr inbounds i8, i8* %1, i64 5
-  %26 = load i8* %25, align 1
+  %26 = load i8, i8* %25, align 1
   %27 = zext i8 %26 to i64
   %28 = shl nuw nsw i64 %27, 16
   %29 = or i64 %24, %28
   %30 = getelementptr inbounds i8, i8* %1, i64 6
-  %31 = load i8* %30, align 1
+  %31 = load i8, i8* %30, align 1
   %32 = zext i8 %31 to i64
   %33 = shl nuw nsw i64 %32, 8
   %34 = or i64 %29, %33
   %35 = getelementptr inbounds i8, i8* %1, i64 7
-  %36 = load i8* %35, align 1
+  %36 = load i8, i8* %35, align 1
   %37 = zext i8 %36 to i64
   %38 = or i64 %34, %37
   ret i64 %38
 ; CHECK-LABEL: @LoadU64_x64_0(
-; CHECK: load i64* %{{.*}}, align 1
+; CHECK: load i64, i64* %{{.*}}, align 1
 ; CHECK-NOT: load
 }
 
 ; Combine simple adjacent loads.
 define i32 @"2xi16_i32"(i16* %x) {
-  %1 = load i16* %x, align 2
+  %1 = load i16, i16* %x, align 2
   %2 = getelementptr inbounds i16, i16* %x, i64 1
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw i32 %4, 16
   %6 = zext i16 %1 to i32
   %7 = or i32 %5, %6
   ret i32 %7
 ; CHECK-LABEL: @"2xi16_i32"(
-; CHECK: load i32* %{{.*}}, align 2
+; CHECK: load i32, i32* %{{.*}}, align 2
 ; CHECK-NOT: load
 }
 
 ; Don't combine loads across stores.
 define i32 @"2xi16_i32_store"(i16* %x, i16* %y) {
-  %1 = load i16* %x, align 2
+  %1 = load i16, i16* %x, align 2
   store i16 0, i16* %y, align 2
   %2 = getelementptr inbounds i16, i16* %x, i64 1
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw i32 %4, 16
   %6 = zext i16 %1 to i32
   %7 = or i32 %5, %6
   ret i32 %7
 ; CHECK-LABEL: @"2xi16_i32_store"(
-; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
 ; CHECK: store
-; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
 }
 
 ; Don't combine loads with a gap.
 define i32 @"2xi16_i32_gap"(i16* %x) {
-  %1 = load i16* %x, align 2
+  %1 = load i16, i16* %x, align 2
   %2 = getelementptr inbounds i16, i16* %x, i64 2
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw i32 %4, 16
   %6 = zext i16 %1 to i32
   %7 = or i32 %5, %6
   ret i32 %7
 ; CHECK-LABEL: @"2xi16_i32_gap"(
-; CHECK: load i16* %{{.*}}, align 2
-; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
 }
 
 ; Combine out of order loads.
 define i32 @"2xi16_i32_order"(i16* %x) {
   %1 = getelementptr inbounds i16, i16* %x, i64 1
-  %2 = load i16* %1, align 2
+  %2 = load i16, i16* %1, align 2
   %3 = zext i16 %2 to i32
-  %4 = load i16* %x, align 2
+  %4 = load i16, i16* %x, align 2
   %5 = shl nuw i32 %3, 16
   %6 = zext i16 %4 to i32
   %7 = or i32 %5, %6
   ret i32 %7
 ; CHECK-LABEL: @"2xi16_i32_order"(
-; CHECK: load i32* %{{.*}}, align 2
+; CHECK: load i32, i32* %{{.*}}, align 2
 ; CHECK-NOT: load
 }
 
 ; Overlapping loads.
 define i32 @"2xi16_i32_overlap"(i8* %x) {
   %1 = bitcast i8* %x to i16*
-  %2 = load i16* %1, align 2
+  %2 = load i16, i16* %1, align 2
   %3 = getelementptr inbounds i8, i8* %x, i64 1
   %4 = bitcast i8* %3 to i16*
-  %5 = load i16* %4, align 2
+  %5 = load i16, i16* %4, align 2
   %6 = zext i16 %5 to i32
   %7 = shl nuw i32 %6, 16
   %8 = zext i16 %2 to i32
   %9 = or i32 %7, %8
   ret i32 %9
 ; CHECK-LABEL: @"2xi16_i32_overlap"(
-; CHECK: load i16* %{{.*}}, align 2
-; CHECK: load i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
+; CHECK: load i16, i16* %{{.*}}, align 2
 }
 
 ; Combine valid alignments.
 define i64 @"2xi16_i64_align"(i8* %x) {
   %1 = bitcast i8* %x to i32*
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = getelementptr inbounds i8, i8* %x, i64 4
   %4 = bitcast i8* %3 to i16*
-  %5 = load i16* %4, align 2
+  %5 = load i16, i16* %4, align 2
   %6 = getelementptr inbounds i8, i8* %x, i64 6
   %7 = bitcast i8* %6 to i16*
-  %8 = load i16* %7, align 2
+  %8 = load i16, i16* %7, align 2
   %9 = zext i16 %8 to i64
   %10 = shl nuw i64 %9, 48
   %11 = zext i16 %5 to i64
@@ -147,44 +147,44 @@
   %15 = or i64 %14, %10
   ret i64 %15
 ; CHECK-LABEL: @"2xi16_i64_align"(
-; CHECK: load i64* %{{.*}}, align 4
+; CHECK: load i64, i64* %{{.*}}, align 4
 }
 
 ; Non power of two.
 define i64 @"2xi16_i64_npo2"(i8* %x) {
-  %1 = load i8* %x, align 1
+  %1 = load i8, i8* %x, align 1
   %2 = zext i8 %1 to i64
   %3 = getelementptr inbounds i8, i8* %x, i64 1
-  %4 = load i8* %3, align 1
+  %4 = load i8, i8* %3, align 1
   %5 = zext i8 %4 to i64
   %6 = shl nuw nsw i64 %5, 8
   %7 = or i64 %6, %2
   %8 = getelementptr inbounds i8, i8* %x, i64 2
-  %9 = load i8* %8, align 1
+  %9 = load i8, i8* %8, align 1
   %10 = zext i8 %9 to i64
   %11 = shl nuw nsw i64 %10, 16
   %12 = or i64 %11, %7
   %13 = getelementptr inbounds i8, i8* %x, i64 3
-  %14 = load i8* %13, align 1
+  %14 = load i8, i8* %13, align 1
   %15 = zext i8 %14 to i64
   %16 = shl nuw nsw i64 %15, 24
   %17 = or i64 %16, %12
   %18 = getelementptr inbounds i8, i8* %x, i64 4
-  %19 = load i8* %18, align 1
+  %19 = load i8, i8* %18, align 1
   %20 = zext i8 %19 to i64
   %21 = shl nuw nsw i64 %20, 32
   %22 = or i64 %21, %17
   %23 = getelementptr inbounds i8, i8* %x, i64 5
-  %24 = load i8* %23, align 1
+  %24 = load i8, i8* %23, align 1
   %25 = zext i8 %24 to i64
   %26 = shl nuw nsw i64 %25, 40
   %27 = or i64 %26, %22
   %28 = getelementptr inbounds i8, i8* %x, i64 6
-  %29 = load i8* %28, align 1
+  %29 = load i8, i8* %28, align 1
   %30 = zext i8 %29 to i64
   %31 = shl nuw nsw i64 %30, 48
   %32 = or i64 %31, %27
   ret i64 %32
 ; CHECK-LABEL: @"2xi16_i64_npo2"(
-; CHECK: load i32* %{{.*}}, align 1
+; CHECK: load i32, i32* %{{.*}}, align 1
 }
diff --git a/llvm/test/Transforms/LoopDeletion/2008-05-06-Phi.ll b/llvm/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
index 32e1eef..fcf5ede 100644
--- a/llvm/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
+++ b/llvm/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
@@ -45,14 +45,14 @@
 define internal fastcc void @encodeMainData(%struct.lame_global_flags* %gfp, [2 x [576 x i32]]* %l3_enc, %struct.III_side_info_t* %si, [2 x %struct.III_scalefac_t]* %scalefac) nounwind  {
 entry:
 	%tmp69 = getelementptr %struct.lame_global_flags, %struct.lame_global_flags* %gfp, i32 0, i32 43		; <i32*> [#uses=1]
-	%tmp70 = load i32* %tmp69, align 4		; <i32> [#uses=1]
+	%tmp70 = load i32, i32* %tmp69, align 4		; <i32> [#uses=1]
 	%tmp71 = icmp eq i32 %tmp70, 1		; <i1> [#uses=1]
 	br i1 %tmp71, label %bb352, label %bb498
 
 bb113:		; preds = %bb132
 	%tmp123 = getelementptr [2 x %struct.III_scalefac_t], [2 x %struct.III_scalefac_t]* %scalefac, i32 0, i32 0, i32 1, i32 %sfb.0, i32 %window.0		; <i32*> [#uses=1]
-	%tmp124 = load i32* %tmp123, align 4		; <i32> [#uses=1]
-	%tmp126 = load %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
+	%tmp124 = load i32, i32* %tmp123, align 4		; <i32> [#uses=1]
+	%tmp126 = load %struct.BF_PartHolder*, %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
 	%tmp128 = call %struct.BF_PartHolder* @BF_addEntry( %struct.BF_PartHolder* %tmp126, i32 %tmp124, i32 %tmp93 ) nounwind 		; <%struct.BF_PartHolder*> [#uses=1]
 	store %struct.BF_PartHolder* %tmp128, %struct.BF_PartHolder** %tmp80, align 4
 	%tmp131 = add i32 %window.0, 1		; <i32> [#uses=1]
@@ -73,7 +73,7 @@
 	br i1 %tmp142, label %bb132, label %bb174
 
 bb166:		; preds = %bb174
-	%tmp160 = load %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
+	%tmp160 = load %struct.BF_PartHolder*, %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
 	%tmp162 = call %struct.BF_PartHolder* @BF_addEntry( %struct.BF_PartHolder* %tmp160, i32 0, i32 0 ) nounwind 		; <%struct.BF_PartHolder*> [#uses=0]
 	unreachable
 
@@ -84,7 +84,7 @@
 bb341:		; preds = %bb352, %bb174
 	%tmp80 = getelementptr [2 x [2 x %struct.BF_PartHolder*]], [2 x [2 x %struct.BF_PartHolder*]]* @scaleFactorsPH, i32 0, i32 0, i32 0		; <%struct.BF_PartHolder**> [#uses=3]
 	%tmp92 = getelementptr [16 x i32], [16 x i32]* @slen1_tab, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp93 = load i32* %tmp92, align 4		; <i32> [#uses=1]
+	%tmp93 = load i32, i32* %tmp92, align 4		; <i32> [#uses=1]
 	br label %bb140
 
 bb352:		; preds = %entry
diff --git a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
index b6caa72..3ab874c 100644
--- a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll
@@ -75,7 +75,7 @@
   %add = add nsw i32 %tmp5, 4
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [7 x i32], [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom
-  %tmp2 = load i32 addrspace(2)* %arrayidx, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %arrayidx, align 4
   %add4 = add nsw i32 %tmp5, 5
   %idxprom5 = sext i32 %add4 to i64
   %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32] addrspace(2)* @g_50, i32 0, i64 %idxprom5
@@ -85,7 +85,7 @@
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.inc
-  %tmp8 = load i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
+  %tmp8 = load i32, i32 addrspace(2)* getelementptr inbounds ([7 x i32] addrspace(2)* @g_50, i32 0, i64 6), align 4
   ret i32 %tmp8
 }
 
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
index 7344f5d..c0aafc2 100644
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -150,7 +150,7 @@
   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
   %DestI = getelementptr i8, i8* %Dest, i64 %indvar
-  %V = load i8* %I.0.014, align 1
+  %V = load i8, i8* %I.0.014, align 1
   store i8 %V, i8* %DestI, align 1
   %indvar.next = add i64 %indvar, 1
   %exitcond = icmp eq i64 %indvar.next, %Size
@@ -223,7 +223,7 @@
   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
   %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
   %DestI = getelementptr i8, i8* %Dest, i64 %indvar
-  %V = load i8* %I.0.014, align 1
+  %V = load i8, i8* %I.0.014, align 1
   store i8 %V, i8* %DestI, align 1
 
   ;; This store can clobber the input.
@@ -363,7 +363,7 @@
   %add = add nsw i32 %tmp5, 4
   %idxprom = sext i32 %add to i64
   %arrayidx = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom
-  %tmp2 = load i32* %arrayidx, align 4
+  %tmp2 = load i32, i32* %arrayidx, align 4
   %add4 = add nsw i32 %tmp5, 5
   %idxprom5 = sext i32 %add4 to i64
   %arrayidx6 = getelementptr inbounds [7 x i32], [7 x i32]* @g_50, i32 0, i64 %idxprom5
@@ -373,7 +373,7 @@
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.inc
-  %tmp8 = load i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
+  %tmp8 = load i32, i32* getelementptr inbounds ([7 x i32]* @g_50, i32 0, i64 6), align 4
   ret i32 %tmp8
 ; CHECK-LABEL: @test14(
 ; CHECK: for.body:
@@ -404,7 +404,7 @@
 while.body:
   %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
   %src.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 1
-  %val = load i32* %src.ptr, align 4
+  %val = load i32, i32* %src.ptr, align 4
 ; CHECK: load
   %dst.ptr = getelementptr inbounds i32, i32* %phi.ptr, i64 0
   store i32 %val, i32* %dst.ptr, align 4
diff --git a/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll b/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll
index 1e72e0b..2fe8a30 100644
--- a/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll
+++ b/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll
@@ -11,7 +11,7 @@
   %backslashes.0 = phi i32 [ undef, %entry ], [ %backslashes.2, %for.inc ]
   %p.0 = phi i8* [ undef, %entry ], [ %incdec.ptr3, %for.inc ]
   %q.0 = phi i8* [ undef, %entry ], [ %q.2, %for.inc ]
-  %0 = load i8* %p.0, align 1
+  %0 = load i8, i8* %p.0, align 1
   switch i8 %0, label %while.cond.preheader [
     i8 0, label %for.cond4.preheader
     i8 92, label %for.inc
diff --git a/llvm/test/Transforms/LoopReroll/basic.ll b/llvm/test/Transforms/LoopReroll/basic.ll
index 4ae7827..16a6dc8 100644
--- a/llvm/test/Transforms/LoopReroll/basic.ll
+++ b/llvm/test/Transforms/LoopReroll/basic.ll
@@ -158,42 +158,42 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %mul = fmul float %0, %alpha
   %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %add = fadd float %1, %mul
   store float %add, float* %arrayidx2, align 4
   %2 = add nsw i64 %indvars.iv, 1
   %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
-  %3 = load float* %arrayidx5, align 4
+  %3 = load float, float* %arrayidx5, align 4
   %mul6 = fmul float %3, %alpha
   %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
-  %4 = load float* %arrayidx9, align 4
+  %4 = load float, float* %arrayidx9, align 4
   %add10 = fadd float %4, %mul6
   store float %add10, float* %arrayidx9, align 4
   %5 = add nsw i64 %indvars.iv, 2
   %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
-  %6 = load float* %arrayidx13, align 4
+  %6 = load float, float* %arrayidx13, align 4
   %mul14 = fmul float %6, %alpha
   %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
-  %7 = load float* %arrayidx17, align 4
+  %7 = load float, float* %arrayidx17, align 4
   %add18 = fadd float %7, %mul14
   store float %add18, float* %arrayidx17, align 4
   %8 = add nsw i64 %indvars.iv, 3
   %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
-  %9 = load float* %arrayidx21, align 4
+  %9 = load float, float* %arrayidx21, align 4
   %mul22 = fmul float %9, %alpha
   %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
-  %10 = load float* %arrayidx25, align 4
+  %10 = load float, float* %arrayidx25, align 4
   %add26 = fadd float %10, %mul22
   store float %add26, float* %arrayidx25, align 4
   %11 = add nsw i64 %indvars.iv, 4
   %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
-  %12 = load float* %arrayidx29, align 4
+  %12 = load float, float* %arrayidx29, align 4
   %mul30 = fmul float %12, %alpha
   %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
-  %13 = load float* %arrayidx33, align 4
+  %13 = load float, float* %arrayidx33, align 4
   %add34 = fadd float %13, %mul30
   store float %add34, float* %arrayidx33, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
@@ -206,10 +206,10 @@
 ; CHECK: for.body:
 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
-; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %0 = load float, float* %arrayidx, align 4
 ; CHECK: %mul = fmul float %0, %alpha
 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
-; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %1 = load float, float* %arrayidx2, align 4
 ; CHECK: %add = fadd float %1, %mul
 ; CHECK: store float %add, float* %arrayidx2, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
@@ -240,57 +240,57 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
   %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %mul = fmul float %1, %alpha
   %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %add = fadd float %2, %mul
   store float %add, float* %arrayidx4, align 4
   %3 = add nsw i64 %indvars.iv, 1
   %arrayidx7 = getelementptr inbounds i32, i32* %ip, i64 %3
-  %4 = load i32* %arrayidx7, align 4
+  %4 = load i32, i32* %arrayidx7, align 4
   %idxprom8 = sext i32 %4 to i64
   %arrayidx9 = getelementptr inbounds float, float* %b, i64 %idxprom8
-  %5 = load float* %arrayidx9, align 4
+  %5 = load float, float* %arrayidx9, align 4
   %mul10 = fmul float %5, %alpha
   %arrayidx13 = getelementptr inbounds float, float* %a, i64 %3
-  %6 = load float* %arrayidx13, align 4
+  %6 = load float, float* %arrayidx13, align 4
   %add14 = fadd float %6, %mul10
   store float %add14, float* %arrayidx13, align 4
   %7 = add nsw i64 %indvars.iv, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %ip, i64 %7
-  %8 = load i32* %arrayidx17, align 4
+  %8 = load i32, i32* %arrayidx17, align 4
   %idxprom18 = sext i32 %8 to i64
   %arrayidx19 = getelementptr inbounds float, float* %b, i64 %idxprom18
-  %9 = load float* %arrayidx19, align 4
+  %9 = load float, float* %arrayidx19, align 4
   %mul20 = fmul float %9, %alpha
   %arrayidx23 = getelementptr inbounds float, float* %a, i64 %7
-  %10 = load float* %arrayidx23, align 4
+  %10 = load float, float* %arrayidx23, align 4
   %add24 = fadd float %10, %mul20
   store float %add24, float* %arrayidx23, align 4
   %11 = add nsw i64 %indvars.iv, 3
   %arrayidx27 = getelementptr inbounds i32, i32* %ip, i64 %11
-  %12 = load i32* %arrayidx27, align 4
+  %12 = load i32, i32* %arrayidx27, align 4
   %idxprom28 = sext i32 %12 to i64
   %arrayidx29 = getelementptr inbounds float, float* %b, i64 %idxprom28
-  %13 = load float* %arrayidx29, align 4
+  %13 = load float, float* %arrayidx29, align 4
   %mul30 = fmul float %13, %alpha
   %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
-  %14 = load float* %arrayidx33, align 4
+  %14 = load float, float* %arrayidx33, align 4
   %add34 = fadd float %14, %mul30
   store float %add34, float* %arrayidx33, align 4
   %15 = add nsw i64 %indvars.iv, 4
   %arrayidx37 = getelementptr inbounds i32, i32* %ip, i64 %15
-  %16 = load i32* %arrayidx37, align 4
+  %16 = load i32, i32* %arrayidx37, align 4
   %idxprom38 = sext i32 %16 to i64
   %arrayidx39 = getelementptr inbounds float, float* %b, i64 %idxprom38
-  %17 = load float* %arrayidx39, align 4
+  %17 = load float, float* %arrayidx39, align 4
   %mul40 = fmul float %17, %alpha
   %arrayidx43 = getelementptr inbounds float, float* %a, i64 %15
-  %18 = load float* %arrayidx43, align 4
+  %18 = load float, float* %arrayidx43, align 4
   %add44 = fadd float %18, %mul40
   store float %add44, float* %arrayidx43, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
@@ -303,13 +303,13 @@
 ; CHECK: for.body:
 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %ip, i64 %indvar
-; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %0 = load i32, i32* %arrayidx, align 4
 ; CHECK: %idxprom1 = sext i32 %0 to i64
 ; CHECK: %arrayidx2 = getelementptr inbounds float, float* %b, i64 %idxprom1
-; CHECK: %1 = load float* %arrayidx2, align 4
+; CHECK: %1 = load float, float* %arrayidx2, align 4
 ; CHECK: %mul = fmul float %1, %alpha
 ; CHECK: %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvar
-; CHECK: %2 = load float* %arrayidx4, align 4
+; CHECK: %2 = load float, float* %arrayidx4, align 4
 ; CHECK: %add = fadd float %2, %mul
 ; CHECK: store float %add, float* %arrayidx4, align 4
 ; CHECK: %indvar.next = add i64 %indvar, 1
diff --git a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll
index 45a6701..5effa42 100644
--- a/llvm/test/Transforms/LoopReroll/nonconst_lb.ll
+++ b/llvm/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -18,25 +18,25 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.035 = phi i32 [ %add18, %for.body ], [ %m, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.035
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = shl nsw i32 %0, 2
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.035
   store i32 %mul, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %i.035, 1
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i32 %add3
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %mul5 = shl nsw i32 %1, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %add3
   store i32 %mul5, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %i.035, 2
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i32 %add8
-  %2 = load i32* %arrayidx9, align 4
+  %2 = load i32, i32* %arrayidx9, align 4
   %mul10 = shl nsw i32 %2, 2
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i32 %add8
   store i32 %mul10, i32* %arrayidx12, align 4
   %add13 = add nsw i32 %i.035, 3
   %arrayidx14 = getelementptr inbounds i32, i32* %B, i32 %add13
-  %3 = load i32* %arrayidx14, align 4
+  %3 = load i32, i32* %arrayidx14, align 4
   %mul15 = shl nsw i32 %3, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %A, i32 %add13
   store i32 %mul15, i32* %arrayidx17, align 4
@@ -61,7 +61,7 @@
 ; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
 ; CHECK:   %6 = add i32 %m, %indvar
 ; CHECK:   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %6
-; CHECK:   %7 = load i32* %arrayidx, align 4
+; CHECK:   %7 = load i32, i32* %arrayidx, align 4
 ; CHECK:   %mul = shl nsw i32 %7, 2
 ; CHECK:   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %6
 ; CHECK:   store i32 %mul, i32* %arrayidx2, align 4
@@ -89,33 +89,33 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.056 = phi i32 [ %add27, %for.body ], [ %rem, %entry ]
   %arrayidx = getelementptr inbounds float, float* %dy, i32 %i.056
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %i.056
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %mul = fmul float %1, %da
   %add = fadd float %0, %mul
   store float %add, float* %arrayidx, align 4
   %add3 = add nsw i32 %i.056, 1
   %arrayidx4 = getelementptr inbounds float, float* %dy, i32 %add3
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds float, float* %dx, i32 %add3
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %mul7 = fmul float %3, %da
   %add8 = fadd float %2, %mul7
   store float %add8, float* %arrayidx4, align 4
   %add11 = add nsw i32 %i.056, 2
   %arrayidx12 = getelementptr inbounds float, float* %dy, i32 %add11
-  %4 = load float* %arrayidx12, align 4
+  %4 = load float, float* %arrayidx12, align 4
   %arrayidx14 = getelementptr inbounds float, float* %dx, i32 %add11
-  %5 = load float* %arrayidx14, align 4
+  %5 = load float, float* %arrayidx14, align 4
   %mul15 = fmul float %5, %da
   %add16 = fadd float %4, %mul15
   store float %add16, float* %arrayidx12, align 4
   %add19 = add nsw i32 %i.056, 3
   %arrayidx20 = getelementptr inbounds float, float* %dy, i32 %add19
-  %6 = load float* %arrayidx20, align 4
+  %6 = load float, float* %arrayidx20, align 4
   %arrayidx22 = getelementptr inbounds float, float* %dx, i32 %add19
-  %7 = load float* %arrayidx22, align 4
+  %7 = load float, float* %arrayidx22, align 4
   %mul23 = fmul float %7, %da
   %add24 = fadd float %6, %mul23
   store float %add24, float* %arrayidx20, align 4
@@ -141,9 +141,9 @@
 ; CHECK:   %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
 ; CHECK:   %6 = add i32 %rem, %indvar
 ; CHECK:   %arrayidx = getelementptr inbounds float, float* %dy, i32 %6
-; CHECK:   %7 = load float* %arrayidx, align 4
+; CHECK:   %7 = load float, float* %arrayidx, align 4
 ; CHECK:   %arrayidx1 = getelementptr inbounds float, float* %dx, i32 %6
-; CHECK:   %8 = load float* %arrayidx1, align 4
+; CHECK:   %8 = load float, float* %arrayidx1, align 4
 ; CHECK:   %mul = fmul float %8, %da
 ; CHECK:   %add = fadd float %7, %mul
 ; CHECK:   store float %add, float* %arrayidx, align 4
diff --git a/llvm/test/Transforms/LoopReroll/reduction.ll b/llvm/test/Transforms/LoopReroll/reduction.ll
index 559f456..1915185 100644
--- a/llvm/test/Transforms/LoopReroll/reduction.ll
+++ b/llvm/test/Transforms/LoopReroll/reduction.ll
@@ -10,19 +10,19 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.029
   %1 = or i64 %indvars.iv, 1
   %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %add4 = add nsw i32 %add, %2
   %3 = or i64 %indvars.iv, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %3
-  %4 = load i32* %arrayidx7, align 4
+  %4 = load i32, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %add4, %4
   %5 = or i64 %indvars.iv, 3
   %arrayidx11 = getelementptr inbounds i32, i32* %x, i64 %5
-  %6 = load i32* %arrayidx11, align 4
+  %6 = load i32, i32* %arrayidx11, align 4
   %add12 = add nsw i32 %add8, %6
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
   %7 = trunc i64 %indvars.iv.next to i32
@@ -35,7 +35,7 @@
 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %r.029 = phi i32 [ 0, %entry ], [ %add, %for.body ]
 ; CHECK: %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvar
-; CHECK: %0 = load i32* %arrayidx, align 4
+; CHECK: %0 = load i32, i32* %arrayidx, align 4
 ; CHECK: %add = add nsw i32 %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
 ; CHECK: %exitcond = icmp eq i64 %indvar, 399
@@ -55,19 +55,19 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.029 = phi float [ 0.0, %entry ], [ %add12, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %add = fadd float %0, %r.029
   %1 = or i64 %indvars.iv, 1
   %arrayidx3 = getelementptr inbounds float, float* %x, i64 %1
-  %2 = load float* %arrayidx3, align 4
+  %2 = load float, float* %arrayidx3, align 4
   %add4 = fadd float %add, %2
   %3 = or i64 %indvars.iv, 2
   %arrayidx7 = getelementptr inbounds float, float* %x, i64 %3
-  %4 = load float* %arrayidx7, align 4
+  %4 = load float, float* %arrayidx7, align 4
   %add8 = fadd float %add4, %4
   %5 = or i64 %indvars.iv, 3
   %arrayidx11 = getelementptr inbounds float, float* %x, i64 %5
-  %6 = load float* %arrayidx11, align 4
+  %6 = load float, float* %arrayidx11, align 4
   %add12 = fadd float %add8, %6
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
   %7 = trunc i64 %indvars.iv.next to i32
@@ -80,7 +80,7 @@
 ; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
 ; CHECK: %r.029 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
 ; CHECK: %arrayidx = getelementptr inbounds float, float* %x, i64 %indvar
-; CHECK: %0 = load float* %arrayidx, align 4
+; CHECK: %0 = load float, float* %arrayidx, align 4
 ; CHECK: %add = fadd float %0, %r.029
 ; CHECK: %indvar.next = add i64 %indvar, 1
 ; CHECK: %exitcond = icmp eq i64 %indvar, 399
@@ -100,19 +100,19 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.029 = phi i32 [ 0, %entry ], [ %add12, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %0
   %1 = or i64 %indvars.iv, 1
   %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %add4 = add nsw i32 %add, %2
   %3 = or i64 %indvars.iv, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %x, i64 %3
-  %4 = load i32* %arrayidx7, align 4
+  %4 = load i32, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %add4, %4
   %5 = or i64 %indvars.iv, 3
   %arrayidx11 = getelementptr inbounds i32, i32* %x, i64 %5
-  %6 = load i32* %arrayidx11, align 4
+  %6 = load i32, i32* %arrayidx11, align 4
   %add12 = add nsw i32 %add8, %6
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
   %7 = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopRotate/PhiRename-1.ll b/llvm/test/Transforms/LoopRotate/PhiRename-1.ll
index 098fd98..8ec0fbf 100644
--- a/llvm/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/llvm/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -36,59 +36,59 @@
 	%op = alloca %struct.operator*, align 4		; <%struct.operator**> [#uses=3]
 	store i32 %arity, i32* %arity_addr
 	store i32 0, i32* %c
-	%tmp1 = load %struct.list** @operators		; <%struct.list*> [#uses=1]
+	%tmp1 = load %struct.list*, %struct.list** @operators		; <%struct.list*> [#uses=1]
 	store %struct.list* %tmp1, %struct.list** %l
 	br label %bb21
 
 bb:		; preds = %bb21
 	%tmp3 = getelementptr %struct.list, %struct.list* %tmp22, i32 0, i32 0		; <i8**> [#uses=1]
-	%tmp4 = load i8** %tmp3		; <i8*> [#uses=1]
+	%tmp4 = load i8*, i8** %tmp3		; <i8*> [#uses=1]
 	%tmp45 = bitcast i8* %tmp4 to %struct.operator*		; <%struct.operator*> [#uses=1]
 	store %struct.operator* %tmp45, %struct.operator** %op
-	%tmp6 = load %struct.operator** %op		; <%struct.operator*> [#uses=1]
+	%tmp6 = load %struct.operator*, %struct.operator** %op		; <%struct.operator*> [#uses=1]
 	%tmp7 = getelementptr %struct.operator, %struct.operator* %tmp6, i32 0, i32 5		; <i32*> [#uses=1]
-	%tmp8 = load i32* %tmp7		; <i32> [#uses=1]
-	%tmp9 = load i32* %arity_addr		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %tmp7		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %arity_addr		; <i32> [#uses=1]
 	icmp eq i32 %tmp8, %tmp9		; <i1>:0 [#uses=1]
 	zext i1 %0 to i8		; <i8>:1 [#uses=1]
 	icmp ne i8 %1, 0		; <i1>:2 [#uses=1]
 	br i1 %2, label %cond_true, label %cond_next
 
 cond_true:		; preds = %bb
-	%tmp10 = load %struct.operator** %op		; <%struct.operator*> [#uses=1]
+	%tmp10 = load %struct.operator*, %struct.operator** %op		; <%struct.operator*> [#uses=1]
 	%tmp11 = getelementptr %struct.operator, %struct.operator* %tmp10, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp12 = load i32* %tmp11		; <i32> [#uses=1]
-	%tmp13 = load %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
+	%tmp12 = load i32, i32* %tmp11		; <i32> [#uses=1]
+	%tmp13 = load %struct.FILE*, %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
 	%tmp14 = getelementptr [11 x i8], [11 x i8]* @str1, i32 0, i32 0		; <i8*> [#uses=1]
 	%tmp15 = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp13, i8* %tmp14, i32 %tmp12 )		; <i32> [#uses=0]
-	%tmp16 = load i32* %c		; <i32> [#uses=1]
+	%tmp16 = load i32, i32* %c		; <i32> [#uses=1]
 	%tmp17 = add i32 %tmp16, 1		; <i32> [#uses=1]
 	store i32 %tmp17, i32* %c
 	br label %cond_next
 
 cond_next:		; preds = %cond_true, %bb
 	%tmp19 = getelementptr %struct.list, %struct.list* %tmp22, i32 0, i32 1		; <%struct.list**> [#uses=1]
-	%tmp20 = load %struct.list** %tmp19		; <%struct.list*> [#uses=1]
+	%tmp20 = load %struct.list*, %struct.list** %tmp19		; <%struct.list*> [#uses=1]
 	store %struct.list* %tmp20, %struct.list** %l
 	br label %bb21
 
 bb21:		; preds = %cond_next, %entry
         %l.in = phi %struct.list** [ @operators, %entry ], [ %tmp19, %cond_next ]
-	%tmp22 = load %struct.list** %l.in		; <%struct.list*> [#uses=1]
+	%tmp22 = load %struct.list*, %struct.list** %l.in		; <%struct.list*> [#uses=1]
 	icmp ne %struct.list* %tmp22, null		; <i1>:3 [#uses=1]
 	zext i1 %3 to i8		; <i8>:4 [#uses=1]
 	icmp ne i8 %4, 0		; <i1>:5 [#uses=1]
 	br i1 %5, label %bb, label %bb23
 
 bb23:		; preds = %bb21
-	%tmp24 = load i32* %c		; <i32> [#uses=1]
+	%tmp24 = load i32, i32* %c		; <i32> [#uses=1]
 	store i32 %tmp24, i32* %tmp
-	%tmp25 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp25 = load i32, i32* %tmp		; <i32> [#uses=1]
 	store i32 %tmp25, i32* %retval
 	br label %return
 
 return:		; preds = %bb23
-	%retval26 = load i32* %retval		; <i32> [#uses=1]
+	%retval26 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval26
 }
 
diff --git a/llvm/test/Transforms/LoopRotate/alloca.ll b/llvm/test/Transforms/LoopRotate/alloca.ll
index fd217ea..bbcfb39 100644
--- a/llvm/test/Transforms/LoopRotate/alloca.ll
+++ b/llvm/test/Transforms/LoopRotate/alloca.ll
@@ -14,7 +14,7 @@
 
 define void @test() {
 entry:
-  %end = load i16* @e
+  %end = load i16, i16* @e
   br label %loop
 
 loop:
diff --git a/llvm/test/Transforms/LoopRotate/dbgvalue.ll b/llvm/test/Transforms/LoopRotate/dbgvalue.ll
index 1b9e9bf..1d6a864 100644
--- a/llvm/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/llvm/test/Transforms/LoopRotate/dbgvalue.ll
@@ -61,12 +61,12 @@
   br i1 %cmp, label %for.end, label %for.body
 
 for.body:
-  %0 = load i64* @channelColumns, align 8
+  %0 = load i64, i64* @channelColumns, align 8
   %mul = mul i64 %0, %row
   %add = add i64 %mul, %i.0
-  %1 = load i8** @horzPlane, align 8
+  %1 = load i8*, i8** @horzPlane, align 8
   %arrayidx = getelementptr inbounds i8, i8* %1, i64 %add
-  %2 = load i8* %arrayidx, align 1
+  %2 = load i8, i8* %arrayidx, align 1
   %tobool = icmp eq i8 %2, 0
   br i1 %tobool, label %for.inc, label %for.end
 
diff --git a/llvm/test/Transforms/LoopRotate/indirectbr.ll b/llvm/test/Transforms/LoopRotate/indirectbr.ll
index 9c82aa8..2ccc546 100644
--- a/llvm/test/Transforms/LoopRotate/indirectbr.ll
+++ b/llvm/test/Transforms/LoopRotate/indirectbr.ll
@@ -12,7 +12,7 @@
   br label %indirectgoto
 
 run_opcode:                                       ; preds = %indirectgoto
-  %tmp276 = load i8* undef                        ; <i8> [#uses=1]
+  %tmp276 = load i8, i8* undef                        ; <i8> [#uses=1]
   br label %indirectgoto
 
 if.else295:                                       ; preds = %divide_late
diff --git a/llvm/test/Transforms/LoopRotate/multiple-exits.ll b/llvm/test/Transforms/LoopRotate/multiple-exits.ll
index e97d3df..f31ed7f 100644
--- a/llvm/test/Transforms/LoopRotate/multiple-exits.ll
+++ b/llvm/test/Transforms/LoopRotate/multiple-exits.ll
@@ -22,7 +22,7 @@
 land.rhs:                                         ; preds = %for.cond1
   %conv = zext i32 %i.1 to i64
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* %a, i64 0, i64 %conv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add i32 %0, %sum.1
   %cmp4 = icmp ugt i32 %add, 1000
   %inc = add i32 %i.1, 1
diff --git a/llvm/test/Transforms/LoopRotate/nosimplifylatch.ll b/llvm/test/Transforms/LoopRotate/nosimplifylatch.ll
index da3ec55..07ff664 100644
--- a/llvm/test/Transforms/LoopRotate/nosimplifylatch.ll
+++ b/llvm/test/Transforms/LoopRotate/nosimplifylatch.ll
@@ -18,8 +18,8 @@
   br i1 %lnot.i, label %for.end, label %for.body
 
 for.body:                                         ; preds = %for.cond
-  %0 = load i32* %coerce.val.ip9, align 4
-  %1 = load i32* %__value_, align 4
+  %0 = load i32, i32* %coerce.val.ip9, align 4
+  %1 = load i32, i32* %__value_, align 4
   %cmp = icmp eq i32 %0, %1
   br i1 %cmp, label %for.end, label %for.inc
 
diff --git a/llvm/test/Transforms/LoopRotate/phi-duplicate.ll b/llvm/test/Transforms/LoopRotate/phi-duplicate.ll
index 88170d6..46ee596 100644
--- a/llvm/test/Transforms/LoopRotate/phi-duplicate.ll
+++ b/llvm/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -14,10 +14,10 @@
 
 for.body:                                         ; preds = %for.cond
   %arrayidx = getelementptr inbounds double, double* %G, i64 %j.0 ; <double*> [#uses=1]
-  %tmp3 = load double* %arrayidx                  ; <double> [#uses=1]
+  %tmp3 = load double, double* %arrayidx                  ; <double> [#uses=1]
   %sub = sub i64 %j.0, 1                          ; <i64> [#uses=1]
   %arrayidx6 = getelementptr inbounds double, double* %G, i64 %sub ; <double*> [#uses=1]
-  %tmp7 = load double* %arrayidx6                 ; <double> [#uses=1]
+  %tmp7 = load double, double* %arrayidx6                 ; <double> [#uses=1]
   %add = fadd double %tmp3, %tmp7                 ; <double> [#uses=1]
   %arrayidx10 = getelementptr inbounds double, double* %G, i64 %j.0 ; <double*> [#uses=1]
   store double %add, double* %arrayidx10
diff --git a/llvm/test/Transforms/LoopRotate/simplifylatch.ll b/llvm/test/Transforms/LoopRotate/simplifylatch.ll
index 4bb3c79..215622f 100644
--- a/llvm/test/Transforms/LoopRotate/simplifylatch.ll
+++ b/llvm/test/Transforms/LoopRotate/simplifylatch.ll
@@ -14,7 +14,7 @@
 	%mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]		; <i8> [#uses=4]
 	zext i8 %mode.0 to i32		; <i32>:1 [#uses=1]
 	getelementptr [4 x i32], [4 x i32]* @mode_table, i32 0, i32 %1		; <i32*>:2 [#uses=1]
-	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	load i32, i32* %2, align 4		; <i32>:3 [#uses=1]
 	icmp eq i32 %3, %0		; <i1>:4 [#uses=1]
 	br i1 %4, label %bb1, label %bb2
 
@@ -40,7 +40,7 @@
 
 ;CHECK: for.body.lr.ph:
 ;CHECK-NEXT:  %arrayidx1 = getelementptr inbounds i8, i8* %CurPtr, i64 0
-;CHECK-NEXT:  %0 = load i8* %arrayidx1, align 1
+;CHECK-NEXT:  %0 = load i8, i8* %arrayidx1, align 1
 ;CHECK-NEXT:  %conv2 = sext i8 %0 to i32
 ;CHECK-NEXT:  br label %for.body
 
@@ -56,10 +56,10 @@
 for.body:					  ; preds = %for.cond
   %idxprom = zext i32 %i.0 to i64
   %arrayidx = getelementptr inbounds i8, i8* %CurPtr, i64 %idxprom
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = sext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %CurPtr, i64 0
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv2 = sext i8 %1 to i32
   %cmp3 = icmp ne i32 %conv, %conv2
   br i1 %cmp3, label %return, label %for.inc
diff --git a/llvm/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll b/llvm/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
index 772cbb3..c336528 100644
--- a/llvm/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
+++ b/llvm/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
@@ -9,28 +9,28 @@
 	br label %loopentry.0
 loopentry.0:		; preds = %else.26, %0
 	store i32* getelementptr ([16386 x i32]* @yy_state_buf, i64 0, i64 0), i32** @yy_state_ptr
-	%tmp.35 = load i32** @yy_state_ptr		; <i32*> [#uses=2]
+	%tmp.35 = load i32*, i32** @yy_state_ptr		; <i32*> [#uses=2]
 	%inc.0 = getelementptr i32, i32* %tmp.35, i64 1		; <i32*> [#uses=1]
 	store i32* %inc.0, i32** @yy_state_ptr
-	%tmp.36 = load i32* null		; <i32> [#uses=1]
+	%tmp.36 = load i32, i32* null		; <i32> [#uses=1]
 	store i32 %tmp.36, i32* %tmp.35
 	br label %loopexit.2
 loopexit.2:		; preds = %else.26, %loopexit.2, %loopentry.0
 	store i8* null, i8** null
-	%tmp.91 = load i32* null		; <i32> [#uses=1]
+	%tmp.91 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp.92 = sext i32 %tmp.91 to i64		; <i64> [#uses=1]
 	%tmp.93 = getelementptr [787 x i16], [787 x i16]* @yy_base, i64 0, i64 %tmp.92		; <i16*> [#uses=1]
-	%tmp.94 = load i16* %tmp.93		; <i16> [#uses=1]
+	%tmp.94 = load i16, i16* %tmp.93		; <i16> [#uses=1]
 	%tmp.95 = icmp ne i16 %tmp.94, 4394		; <i1> [#uses=1]
 	br i1 %tmp.95, label %loopexit.2, label %yy_find_action
 yy_find_action:		; preds = %else.26, %loopexit.2
 	br label %loopentry.3
 loopentry.3:		; preds = %then.9, %shortcirc_done.0, %yy_find_action
-	%tmp.105 = load i32* @yy_lp		; <i32> [#uses=1]
+	%tmp.105 = load i32, i32* @yy_lp		; <i32> [#uses=1]
 	%tmp.106 = icmp ne i32 %tmp.105, 0		; <i1> [#uses=1]
 	br i1 %tmp.106, label %shortcirc_next.0, label %shortcirc_done.0
 shortcirc_next.0:		; preds = %loopentry.3
-	%tmp.114 = load i16* null		; <i16> [#uses=1]
+	%tmp.114 = load i16, i16* null		; <i16> [#uses=1]
 	%tmp.115 = sext i16 %tmp.114 to i32		; <i32> [#uses=1]
 	%tmp.116 = icmp slt i32 0, %tmp.115		; <i1> [#uses=1]
 	br label %shortcirc_done.0
@@ -38,7 +38,7 @@
 	%shortcirc_val.0 = phi i1 [ false, %loopentry.3 ], [ %tmp.116, %shortcirc_next.0 ]		; <i1> [#uses=1]
 	br i1 %shortcirc_val.0, label %else.0, label %loopentry.3
 else.0:		; preds = %shortcirc_done.0
-	%tmp.144 = load i32* null		; <i32> [#uses=1]
+	%tmp.144 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp.145 = and i32 %tmp.144, 8192		; <i32> [#uses=1]
 	%tmp.146 = icmp ne i32 %tmp.145, 0		; <i1> [#uses=1]
 	br i1 %tmp.146, label %then.9, label %else.26
diff --git a/llvm/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll b/llvm/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
index fb39f05..32b6322 100644
--- a/llvm/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
+++ b/llvm/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
@@ -14,15 +14,15 @@
 	store i32 123, i32* @G
 	br label %loopentry.i
 loopentry.i:		; preds = %endif.1.i, %entry
-	%tmp.0.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.0.i = load i32, i32* @G		; <i32> [#uses=1]
 	%tmp.1.i = icmp eq i32 %tmp.0.i, 123		; <i1> [#uses=1]
 	br i1 %tmp.1.i, label %Out.i, label %endif.0.i
 endif.0.i:		; preds = %loopentry.i
-	%tmp.3.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.3.i = load i32, i32* @G		; <i32> [#uses=1]
 	%tmp.4.i = icmp eq i32 %tmp.3.i, 126		; <i1> [#uses=1]
 	br i1 %tmp.4.i, label %ExitBlock.i, label %endif.1.i
 endif.1.i:		; preds = %endif.0.i
-	%tmp.6.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.6.i = load i32, i32* @G		; <i32> [#uses=1]
 	%inc.i = add i32 %tmp.6.i, 1		; <i32> [#uses=1]
 	store i32 %inc.i, i32* @G
 	br label %loopentry.i
@@ -30,7 +30,7 @@
 	store i32 0, i32* @G
 	br label %ExitBlock.i
 ExitBlock.i:		; preds = %Out.i, %endif.0.i
-	%tmp.7.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.7.i = load i32, i32* @G		; <i32> [#uses=1]
 	ret i32 %tmp.7.i
 }
 
diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
index c58903d..b5cc144 100644
--- a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
+++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll
@@ -51,10 +51,10 @@
   br i1 %cmp2, label %for.body3, label %for.inc7
 
 for.body3:                                        ; preds = %for.cond1
-  %0 = load i32* @c, align 4
+  %0 = load i32, i32* @c, align 4
   %cmp4 = icmp sge i32 %storemerge1, %0
   %conv = zext i1 %cmp4 to i32
-  %1 = load i32* @d, align 4
+  %1 = load i32, i32* @d, align 4
   %add = add nsw i32 %conv, %1
   %sext = shl i32 %add, 16
   %conv6 = ashr exact i32 %sext, 16
@@ -63,7 +63,7 @@
   br label %for.cond1
 
 for.inc7:                                         ; preds = %for.cond1
-  %2 = load i32* @d, align 4
+  %2 = load i32, i32* @d, align 4
   %inc8 = add nsw i32 %2, 1
   br label %for.cond
 
diff --git a/llvm/test/Transforms/LoopSimplify/merge-exits.ll b/llvm/test/Transforms/LoopSimplify/merge-exits.ll
index 0e1f0be..5cdf814 100644
--- a/llvm/test/Transforms/LoopSimplify/merge-exits.ll
+++ b/llvm/test/Transforms/LoopSimplify/merge-exits.ll
@@ -15,18 +15,18 @@
 
 define float @test1(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
 entry:
-	%t0 = load float* %peakWeight, align 4
+	%t0 = load float, float* %peakWeight, align 4
 	br label %bb1
 
 bb:		; preds = %bb2
 	%t1 = sext i32 %hiPart.0 to i64
 	%t2 = getelementptr float, float* %pTmp1, i64 %t1
-	%t3 = load float* %t2, align 4
+	%t3 = load float, float* %t2, align 4
 	%t4 = fadd float %t3, %distERBhi.0
 	%t5 = add i32 %hiPart.0, 1
 	%t6 = sext i32 %t5 to i64
 	%t7 = getelementptr float, float* %peakWeight, i64 %t6
-	%t8 = load float* %t7, align 4
+	%t8 = load float, float* %t7, align 4
 	%t9 = fadd float %t8, %peakCount.0
 	br label %bb1
 
diff --git a/llvm/test/Transforms/LoopSimplify/phi-node-simplify.ll b/llvm/test/Transforms/LoopSimplify/phi-node-simplify.ll
index 52b7aa5..676f467 100644
--- a/llvm/test/Transforms/LoopSimplify/phi-node-simplify.ll
+++ b/llvm/test/Transforms/LoopSimplify/phi-node-simplify.ll
@@ -28,13 +28,13 @@
 	%b.1.4 = getelementptr i32, i32* %b.1.4.ph, i64 %gep.upgrd.3		; <i32*> [#uses=1]
 	%inc.0.rec = add i32 %b.1.4.rec, 1		; <i32> [#uses=2]
 	%inc.0 = getelementptr i32, i32* %a.0.4.ph, i32 %inc.0.rec		; <i32*> [#uses=2]
-	%tmp.13 = load i32* %a.0.4		; <i32> [#uses=1]
+	%tmp.13 = load i32, i32* %a.0.4		; <i32> [#uses=1]
 	%inc.1 = getelementptr i32, i32* %b.1.4.ph, i32 %inc.0.rec		; <i32*> [#uses=1]
-	%tmp.15 = load i32* %b.1.4		; <i32> [#uses=1]
-	%tmp.18 = load i32* %c.2.4		; <i32> [#uses=1]
+	%tmp.15 = load i32, i32* %b.1.4		; <i32> [#uses=1]
+	%tmp.18 = load i32, i32* %c.2.4		; <i32> [#uses=1]
 	%tmp.16 = mul i32 %tmp.15, %tmp.13		; <i32> [#uses=1]
 	%tmp.19 = mul i32 %tmp.16, %tmp.18		; <i32> [#uses=1]
-	%tmp.20 = load i32* @Z		; <i32> [#uses=1]
+	%tmp.20 = load i32, i32* @Z		; <i32> [#uses=1]
 	%tmp.21 = add i32 %tmp.19, %tmp.20		; <i32> [#uses=1]
 	store i32 %tmp.21, i32* @Z
 	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
diff --git a/llvm/test/Transforms/LoopSimplify/preserve-scev.ll b/llvm/test/Transforms/LoopSimplify/preserve-scev.ll
index 89626b2..bc6d35c 100644
--- a/llvm/test/Transforms/LoopSimplify/preserve-scev.ll
+++ b/llvm/test/Transforms/LoopSimplify/preserve-scev.ll
@@ -15,7 +15,7 @@
   %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ]
   %add = add i32 %0, 1
   %cmp = icmp slt i32 %0, 1
-  %tmp1 = load i32* @maxStat, align 4
+  %tmp1 = load i32, i32* @maxStat, align 4
   br i1 %cmp, label %for.body, label %for.cond14.preheader
 
 for.cond14.preheader:                             ; preds = %for.cond
@@ -39,7 +39,7 @@
   %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ]
   call void @foo() nounwind
   %1 = add nsw i32 %i13.027, 1
-  %tmp16 = load i32* @maxStat, align 4
+  %tmp16 = load i32, i32* @maxStat, align 4
   %cmp17 = icmp slt i32 %1, %tmp16
   br i1 %cmp17, label %for.body18, label %return
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll b/llvm/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
index f13483c..7ee1e63 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
@@ -48,7 +48,7 @@
 loopentry.4:		; preds = %loopentry.4, %loopexit.3
 	%indvar340 = phi i32 [ 0, %loopexit.3 ], [ %indvar.next341, %loopentry.4 ]		; <i32> [#uses=2]
 	%tmp. = add i32 %indvar340, %indvar342		; <i32> [#uses=1]
-	%tmp.526 = load i32** null		; <i32*> [#uses=1]
+	%tmp.526 = load i32*, i32** null		; <i32*> [#uses=1]
 	%gep.upgrd.1 = zext i32 %tmp. to i64		; <i64> [#uses=1]
 	%tmp.528 = getelementptr i32, i32* %tmp.526, i64 %gep.upgrd.1		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp.528
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll b/llvm/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
index f1c523a..3e52dff 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
@@ -6,7 +6,7 @@
 	br label %loopentry.1
 loopentry.1:		; preds = %loopentry.1, %loopentry.1.outer
 	%i.3 = phi i32 [ 0, %loopentry.1.outer ], [ %i.3.be, %loopentry.1 ]		; <i32> [#uses=2]
-	%tmp.390 = load i32* null		; <i32> [#uses=1]
+	%tmp.390 = load i32, i32* null		; <i32> [#uses=1]
 	%tmp.392 = mul i32 %tmp.390, %j.2.1.ph		; <i32> [#uses=1]
 	%tmp.394 = add i32 %tmp.392, %i.3		; <i32> [#uses=1]
 	%i.3.be = add i32 %i.3, 1		; <i32> [#uses=1]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/llvm/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
index ce56bd3..11b3171 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -22,7 +22,7 @@
 define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
-	load volatile i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	load volatile i16, i16* @g_3, align 2		; <i16>:0 [#uses=1]
 	zext i16 %0 to i32		; <i32>:1 [#uses=1]
 	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
 	ret i32 0
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll b/llvm/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
index 1ee6b5c..5fb157b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
@@ -21,7 +21,7 @@
 	%indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ]		; <i16> [#uses=2]
 	%tmp = sub i16 0, %indvar		; <i16> [#uses=1]
 	%tmp27 = trunc i16 %tmp to i8		; <i8> [#uses=1]
-	load i32* @g_19, align 4		; <i32>:0 [#uses=2]
+	load i32, i32* @g_19, align 4		; <i32>:0 [#uses=2]
 	add i32 %0, 1		; <i32>:1 [#uses=1]
 	store i32 %1, i32* @g_19, align 4
 	trunc i32 %0 to i8		; <i8>:2 [#uses=1]
@@ -40,7 +40,7 @@
 define i32 @main() nounwind {
 entry:
 	tail call void @func_1( ) nounwind
-	load i32* @g_19, align 4		; <i32>:0 [#uses=1]
+	load i32, i32* @g_19, align 4		; <i32>:0 [#uses=1]
 	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %0 ) nounwind		; <i32>:1 [#uses=0]
 	ret i32 0
 }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
index b2cf818d..69e2b98 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
@@ -26,12 +26,12 @@
 	br i1 %0, label %bb2.i3, label %nactive_heaps.exit
 
 bb2.i3:		; preds = %bb1.i
-	%1 = load %struct.obj** null, align 4		; <%struct.obj*> [#uses=1]
+	%1 = load %struct.obj*, %struct.obj** null, align 4		; <%struct.obj*> [#uses=1]
 	%2 = icmp eq %struct.obj* %1, null		; <i1> [#uses=1]
 	br i1 %2, label %nactive_heaps.exit, label %bb.i2
 
 nactive_heaps.exit:		; preds = %bb2.i3, %bb1.i
-	%3 = load i32* @heap_size, align 4		; <i32> [#uses=1]
+	%3 = load i32, i32* @heap_size, align 4		; <i32> [#uses=1]
 	%4 = mul i32 %3, %m.0.i		; <i32> [#uses=1]
 	%5 = sub i32 %4, 0		; <i32> [#uses=1]
 	%6 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* null, i8* getelementptr ([39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind		; <i32> [#uses=0]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
index a0daed5..5b5d2cd 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
@@ -33,7 +33,7 @@
 	br i1 %exitcond10, label %bb5, label %bb2
 
 bb5:		; preds = %bb4
-	%4 = load i32* getelementptr ([32 x [256 x i32]]* @table, i32 0, i32 9, i32 132), align 16		; <i32> [#uses=1]
+	%4 = load i32, i32* getelementptr ([32 x [256 x i32]]* @table, i32 0, i32 9, i32 132), align 16		; <i32> [#uses=1]
 	%5 = icmp eq i32 %4, -1116		; <i1> [#uses=1]
 	br i1 %5, label %bb7, label %bb6
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-07-19-CritEdgeBreakCrash.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-07-19-CritEdgeBreakCrash.ll
index e0f6879..cf549fc 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2011-07-19-CritEdgeBreakCrash.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2011-07-19-CritEdgeBreakCrash.ll
@@ -29,7 +29,7 @@
   br i1 %boo2, label %indirectgoto, label %while.body.i15795
 
 while.body.i15795:                                ; preds = %while.cond.i
-  %tmp20.i = load i64* %incdec.ptr.i15793, align 8
+  %tmp20.i = load i64, i64* %incdec.ptr.i15793, align 8
   %boo1 = call i1 @foo()
   br i1 %boo1, label %while.cond.i, label %body_failed
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
index 950d8e2..4388a33 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2011-10-06-ReusePhi.ll
@@ -37,8 +37,8 @@
   %B.addr.04 = phi float* [ %B, %while.body.lr.ph ], [ %add.ptr3, %while.body ]
   %N.addr.03 = phi i32 [ %N, %while.body.lr.ph ], [ %sub, %while.body ]
   %Sum0.02 = phi float [ 0.000000e+00, %while.body.lr.ph ], [ %add, %while.body ]
-  %0 = load float* %A.addr.05, align 4
-  %1 = load float* %B.addr.04, align 4
+  %0 = load float, float* %A.addr.05, align 4
+  %1 = load float, float* %B.addr.04, align 4
   %mul = fmul float %0, %1
   %add = fadd float %Sum0.02, %mul
   %add.ptr = getelementptr inbounds float, float* %A.addr.05, i64 %idx.ext
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll b/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
index 8dac982..317b0b0 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll
@@ -33,7 +33,7 @@
   %bf.459 = phi i32 [ %inc44, %for.body43 ], [ %t1, %for.body7 ]
   %inc44 = add nsw i32 %bf.459, 1
   %arrayidx45 = getelementptr inbounds [121 x i32], [121 x i32]* @b, i32 0, i32 %bf.459
-  %t2 = load i32* %arrayidx45, align 4
+  %t2 = load i32, i32* %arrayidx45, align 4
   br label %for.body43
 }
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll b/llvm/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
index 2a723c2..62064cb 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2012-03-15-nopreheader.ll
@@ -35,7 +35,7 @@
   %indvar65 = phi i64 [ %indvar.next66, %for.inc ], [ 0, %lor.lhs.false184 ], [ 0, %if.end152 ]
   %tmp128 = add i64 %0, %indvar65
   %s.4 = getelementptr i8, i8* %cmd, i64 %tmp128
-  %tmp195 = load i8* %s.4, align 1
+  %tmp195 = load i8, i8* %s.4, align 1
   indirectbr i8* undef, [label %return, label %land.rhs198]
 
 land.rhs198:                                      ; preds = %for.cond
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll b/llvm/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
index 1baf265..ce6161c 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2012-03-26-constexpr.ll
@@ -11,9 +11,9 @@
 define internal fastcc void @someFunction(%struct.this_structure_s.0.5* nocapture %scratch, i32 %stage, i32 %cbSize) nounwind {
 entry:
   %0 = getelementptr inbounds %struct.this_structure_s.0.5, %struct.this_structure_s.0.5* %scratch, i32 0, i32 4, i32 %stage
-  %1 = load i8** %0, align 4
+  %1 = load i8*, i8** %0, align 4
   %2 = getelementptr inbounds %struct.this_structure_s.0.5, %struct.this_structure_s.0.5* %scratch, i32 0, i32 5, i32 %stage
-  %3 = load i8** %2, align 4
+  %3 = load i8*, i8** %2, align 4
   %4 = getelementptr inbounds %struct.this_structure_s.0.5, %struct.this_structure_s.0.5* %scratch, i32 0, i32 2, i32 0, i32 0
   %tmp11 = shl i32 %stage, 1
   %tmp1325 = or i32 %tmp11, 1
@@ -31,9 +31,9 @@
   %scevgep10 = getelementptr i32, i32* %4, i32 %tmp928
   %scevgep12 = getelementptr %struct.this_structure_s.0.5, %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp11, i32 %i.12
   %scevgep14 = getelementptr %struct.this_structure_s.0.5, %struct.this_structure_s.0.5* %scratch, i32 0, i32 9, i32 %tmp1325, i32 %i.12
-  %5 = load i8* %scevgep12, align 1
+  %5 = load i8, i8* %scevgep12, align 1
   %6 = sext i8 %5 to i32
-  %7 = load i8* %scevgep14, align 1
+  %7 = load i8, i8* %scevgep14, align 1
   %8 = sext i8 %7 to i32
   store i32 0, i32* %lvar_g.13, align 4
   store i32 %8, i32* %scevgep, align 4
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll b/llvm/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
index 301c845..80095c3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
@@ -19,10 +19,10 @@
   %l_2 = alloca [1 x i32], align 4
   %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* %l_2, i64 0, i64 0
   store i32 0, i32* %arrayidx, align 4
-  %tmp = load i32* @g_3, align 4
+  %tmp = load i32, i32* @g_3, align 4
   %idxprom = sext i32 %tmp to i64
   %arrayidx1 = getelementptr inbounds [1 x i32], [1 x i32]* %l_2, i64 0, i64 %idxprom
-  %tmp1 = load i32* %arrayidx1, align 4
+  %tmp1 = load i32, i32* %arrayidx1, align 4
   %conv.i.i = and i32 %tmp1, 65535
   %tobool.i.i.i = icmp ne i32 %tmp, 0
   br label %codeRepl
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll b/llvm/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
index 3030d3d..7cac15f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2012-07-18-LimitReassociate.ll
@@ -77,7 +77,7 @@
 
 bb22:                                             ; preds = %bb17
   %tmp23 = getelementptr inbounds %struct.jim, %struct.jim* @global3, i64 0, i32 3, i64 0
-  %tmp24 = load i8* %tmp23, align 1
+  %tmp24 = load i8, i8* %tmp23, align 1
   %tmp25 = icmp eq i8 %tmp24, 58
   br i1 %tmp25, label %bb30, label %bb26
 
@@ -123,7 +123,7 @@
 
 bb48:                                             ; preds = %bb43
   %tmp49 = add i64 %tmp44, %tmp37
-  %tmp50 = load i8* undef, align 1
+  %tmp50 = load i8, i8* undef, align 1
   %tmp51 = icmp eq i8 %tmp50, 58
   br i1 %tmp51, label %bb55, label %bb52
 
@@ -166,11 +166,11 @@
 
 bb69:                                             ; preds = %bb68
   tail call void (...)* @snork(i8* getelementptr inbounds ([52 x i8]* @global1, i64 0, i64 0), i32 2071) nounwind
-  %tmp70 = load i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 2), align 4
+  %tmp70 = load i32, i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 2), align 4
   unreachable
 
 bb71:                                             ; preds = %bb68
-  %tmp72 = load i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 4), align 4
+  %tmp72 = load i32, i32* getelementptr inbounds (%struct.snork* @global, i64 0, i32 4), align 4
   %tmp73 = icmp eq i32 undef, 0
   br i1 %tmp73, label %bb247, label %bb74
 
@@ -462,7 +462,7 @@
 bb226:                                            ; preds = %bb221
   %tmp227 = add i64 %tmp222, %tmp216
   %tmp228 = getelementptr inbounds %struct.jim, %struct.jim* @global3, i64 0, i32 3, i64 %tmp227
-  %tmp229 = load i8* %tmp228, align 1
+  %tmp229 = load i8, i8* %tmp228, align 1
   br i1 false, label %bb233, label %bb230
 
 bb230:                                            ; preds = %bb226
diff --git a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
index 2c2e0a4..dcd0681 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
@@ -73,7 +73,7 @@
 
 bb63:                                             ; preds = %bb61
   %tmp64 = getelementptr inbounds i8, i8* %tmp3, i64 %i.0.i
-  %tmp65 = load i8* %tmp64, align 1
+  %tmp65 = load i8, i8* %tmp64, align 1
   %tmp67 = add i64 %i.0.i, 1
   br i1 undef, label %_ZNK4llvm9StringRef4findEcm.exit.loopexit, label %bb61
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
index c0ebc97..2120b2a 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-memcpy.ll
@@ -20,7 +20,7 @@
   %pDst.05 = phi i64* [ inttoptr (i64 6442450944 to i64*), %entry ], [ %incdec.ptr1, %while.body ]
   %pSrc.04 = phi i64* [ inttoptr (i64 4294967296 to i64*), %entry ], [ %incdec.ptr, %while.body ]
   %incdec.ptr = getelementptr inbounds i64, i64* %pSrc.04, i64 1
-  %tmp = load volatile i64* %pSrc.04, align 8
+  %tmp = load volatile i64, i64* %pSrc.04, align 8
   %incdec.ptr1 = getelementptr inbounds i64, i64* %pDst.05, i64 1
   store volatile i64 %tmp, i64* %pDst.05, align 8
   %sub = add i64 %len.06, -8
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
index 217896e..c877ace 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/req-regs.ll
@@ -11,7 +11,7 @@
 define void @do_integer_add(i64 %iterations, i8* nocapture readonly %cookie) {
 entry:
   %N = bitcast i8* %cookie to i32*
-  %0 = load i32* %N, align 4
+  %0 = load i32, i32* %N, align 4
   %add = add nsw i32 %0, 57
   %cmp56 = icmp eq i64 %iterations, 0
   br i1 %cmp56, label %while.end, label %for.cond.preheader.preheader
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
index bc654d3..56ff69c 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/2012-06-15-lsr-noaddrmode.ll
@@ -50,7 +50,7 @@
 ; CHECK: ldr{{.*}}lsl #2
 define i32 @main() nounwind ssp {
 entry:
-  %v0 = load i32* @ncol, align 4
+  %v0 = load i32, i32* @ncol, align 4
   %v1 = tail call i32* @getptr() nounwind
   %cmp10.i = icmp eq i32 %v0, 0
   br label %while.cond.outer
@@ -64,12 +64,12 @@
   br label %while.body
 
 while.body:
-  %v3 = load i32* @ncol, align 4
+  %v3 = load i32, i32* @ncol, align 4
   br label %end_of_chain
 
 end_of_chain:
   %state.i = getelementptr inbounds %s, %s* %call18, i32 0, i32 0
-  %v4 = load i32** %state.i, align 4
+  %v4 = load i32*, i32** %state.i, align 4
   br label %while.cond.i.i
 
 while.cond.i.i:
@@ -80,9 +80,9 @@
 
 land.rhs.i.i:
   %arrayidx.i.i = getelementptr inbounds i32, i32* %v4, i32 %dec.i.i
-  %v5 = load i32* %arrayidx.i.i, align 4
+  %v5 = load i32, i32* %arrayidx.i.i, align 4
   %arrayidx1.i.i = getelementptr inbounds i32, i32* %v1, i32 %dec.i.i
-  %v6 = load i32* %arrayidx1.i.i, align 4
+  %v6 = load i32, i32* %arrayidx1.i.i, align 4
   %cmp.i.i = icmp eq i32 %v5, %v6
   br i1 %cmp.i.i, label %while.cond.i.i, label %equal_data.exit.i
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 235394c..2ad6c2e 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -18,13 +18,13 @@
 loop:
   %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
   %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
-  %v = load i32* %iv
+  %v = load i32, i32* %iv
   %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
-  %v1 = load i32* %iv1
+  %v1 = load i32, i32* %iv1
   %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
-  %v2 = load i32* %iv2
+  %v2 = load i32, i32* %iv2
   %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
-  %v3 = load i32* %iv3
+  %v3 = load i32, i32* %iv3
   %s1 = add i32 %s, %v
   %s2 = add i32 %s1, %v1
   %s3 = add i32 %s2, %v2
@@ -52,13 +52,13 @@
 loop:
   %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
   %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
-  %v = load i32* %iv
+  %v = load i32, i32* %iv
   %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
-  %v1 = load i32* %iv1
+  %v1 = load i32, i32* %iv1
   %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
-  %v2 = load i32* %iv2
+  %v2 = load i32, i32* %iv2
   %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
-  %v3 = load i32* %iv3
+  %v3 = load i32, i32* %iv3
   %s1 = add i32 %s, %v
   %s2 = add i32 %s1, %v1
   %s3 = add i32 %s2, %v2
@@ -103,19 +103,19 @@
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
   %0 = bitcast i8* %main.addr.011 to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8* %main.addr.011, i32 %main_stride
   %2 = bitcast i8* %add.ptr to i32*
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8* %add.ptr1 to i32*
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8* %add.ptr2 to i32*
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8* %add.ptr3 to i32*
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %add = add i32 %3, %1
   %add4 = add i32 %add, %5
   %add5 = add i32 %add4, %7
@@ -147,10 +147,10 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.07
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv5 = zext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.07
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv26 = zext i8 %1 to i32
   %add = add nsw i32 %conv26, %conv5
   %conv3 = trunc i32 %add to i8
@@ -158,10 +158,10 @@
   store i8 %conv3, i8* %arrayidx4, align 1
   %inc1 = or i32 %i.07, 1
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc1
-  %2 = load i8* %arrayidx.1, align 1
+  %2 = load i8, i8* %arrayidx.1, align 1
   %conv5.1 = zext i8 %2 to i32
   %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc1
-  %3 = load i8* %arrayidx1.1, align 1
+  %3 = load i8, i8* %arrayidx1.1, align 1
   %conv26.1 = zext i8 %3 to i32
   %add.1 = add nsw i32 %conv26.1, %conv5.1
   %conv3.1 = trunc i32 %add.1 to i8
@@ -169,10 +169,10 @@
   store i8 %conv3.1, i8* %arrayidx4.1, align 1
   %inc.12 = or i32 %i.07, 2
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.12
-  %4 = load i8* %arrayidx.2, align 1
+  %4 = load i8, i8* %arrayidx.2, align 1
   %conv5.2 = zext i8 %4 to i32
   %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.12
-  %5 = load i8* %arrayidx1.2, align 1
+  %5 = load i8, i8* %arrayidx1.2, align 1
   %conv26.2 = zext i8 %5 to i32
   %add.2 = add nsw i32 %conv26.2, %conv5.2
   %conv3.2 = trunc i32 %add.2 to i8
@@ -180,10 +180,10 @@
   store i8 %conv3.2, i8* %arrayidx4.2, align 1
   %inc.23 = or i32 %i.07, 3
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.23
-  %6 = load i8* %arrayidx.3, align 1
+  %6 = load i8, i8* %arrayidx.3, align 1
   %conv5.3 = zext i8 %6 to i32
   %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.23
-  %7 = load i8* %arrayidx1.3, align 1
+  %7 = load i8, i8* %arrayidx1.3, align 1
   %conv26.3 = zext i8 %7 to i32
   %add.3 = add nsw i32 %conv26.3, %conv5.3
   %conv3.3 = trunc i32 %add.3 to i8
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
index 091e76f..862fff2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll
@@ -30,26 +30,26 @@
   %dp.036 = phi i32* [ %add.ptr, %for.body ], [ %destrow, %entry ]
   %p.035 = phi float* [ %incdec.ptr4, %for.body ], [ %srcrow, %entry ]
   %incdec.ptr = getelementptr inbounds float, float* %p.035, i64 1
-  %0 = load float* %incdec.ptr, align 4
+  %0 = load float, float* %incdec.ptr, align 4
   %incdec.ptr2 = getelementptr inbounds float, float* %p.035, i64 2
-  %1 = load float* %incdec.ptr2, align 4
+  %1 = load float, float* %incdec.ptr2, align 4
   %incdec.ptr3 = getelementptr inbounds float, float* %p.035, i64 3
-  %2 = load float* %incdec.ptr3, align 4
+  %2 = load float, float* %incdec.ptr3, align 4
   %incdec.ptr4 = getelementptr inbounds float, float* %p.035, i64 4
-  %3 = load float* %incdec.ptr4, align 4
-  %4 = load i32* %dp.036, align 4
+  %3 = load float, float* %incdec.ptr4, align 4
+  %4 = load i32, i32* %dp.036, align 4
   %conv5 = fptoui float %0 to i32
   %or = or i32 %4, %conv5
   %arrayidx6 = getelementptr inbounds i32, i32* %dp.036, i64 1
-  %5 = load i32* %arrayidx6, align 4
+  %5 = load i32, i32* %arrayidx6, align 4
   %conv7 = fptoui float %1 to i32
   %or8 = or i32 %5, %conv7
   %arrayidx9 = getelementptr inbounds i32, i32* %dp.036, i64 2
-  %6 = load i32* %arrayidx9, align 4
+  %6 = load i32, i32* %arrayidx9, align 4
   %conv10 = fptoui float %2 to i32
   %or11 = or i32 %6, %conv10
   %arrayidx12 = getelementptr inbounds i32, i32* %dp.036, i64 3
-  %7 = load i32* %arrayidx12, align 4
+  %7 = load i32, i32* %arrayidx12, align 4
   %conv13 = fptoui float %3 to i32
   %or14 = or i32 %7, %conv13
   store i32 %or, i32* %dp.036, align 4
@@ -77,8 +77,8 @@
   %dp.132 = phi i32* [ %add.ptr, %for.body23.lr.ph ], [ %incdec.ptr28, %for.body23 ]
   %p.131 = phi float* [ %incdec.ptr4, %for.body23.lr.ph ], [ %incdec.ptr24, %for.body23 ]
   %incdec.ptr24 = getelementptr inbounds float, float* %p.131, i64 1
-  %9 = load float* %incdec.ptr24, align 4
-  %10 = load i32* %dp.132, align 4
+  %9 = load float, float* %incdec.ptr24, align 4
+  %10 = load i32, i32* %dp.132, align 4
   %conv25 = fptoui float %9 to i32
   %or26 = or i32 %10, %conv25
   store i32 %or26, i32* %dp.132, align 4
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index 33f2a65..b52700f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -28,7 +28,7 @@
   %indvars.iv.i = phi i64 [ 0, %while.body.i ], [ %indvars.iv.next.i, %for.body.i ]
   %add.ptr.sum = add i64 %add.ptr.sum.i, %indvars.iv.i
   %arrayidx22.i = getelementptr inbounds i8, i8* %base, i64 %add.ptr.sum
-  %0 = load i8* %arrayidx22.i, align 1
+  %0 = load i8, i8* %arrayidx22.i, align 1
   %indvars.iv.next.i = add i64 %indvars.iv.i, 1
   %cmp = call i1 @check() nounwind
   br i1 %cmp, label %for.end.i, label %for.body.i
@@ -69,14 +69,14 @@
 for.cond468:                                      ; preds = %if.then477, %entry
   %indvars.iv1163 = phi i64 [ %indvars.iv.next1164, %if.then477 ], [ 1, %entry ]
   %k.0.in = phi i32* [ %last, %if.then477 ], [ getelementptr inbounds ([5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 0, i32 2), %entry ]
-  %k.0 = load i32* %k.0.in, align 4
+  %k.0 = load i32, i32* %k.0.in, align 4
   %0 = trunc i64 %indvars.iv1163 to i32
   %cmp469 = icmp slt i32 %0, %n
   br i1 %cmp469, label %for.body471, label %for.inc498
 
 for.body471:                                      ; preds = %for.cond468
   %first = getelementptr inbounds [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771], [5000 x %struct.anon.7.91.199.307.415.475.559.643.751.835.943.1003.1111.1219.1351.1375.1399.1435.1471.1483.1519.1531.1651.1771]* @tags, i64 0, i64 %indvars.iv1163, i32 1
-  %1 = load i32* %first, align 4
+  %1 = load i32, i32* %first, align 4
   br i1 undef, label %if.then477, label %for.inc498
 
 if.then477:                                       ; preds = %for.body471
@@ -119,7 +119,7 @@
   %1 = trunc i64 %0 to i32
   %mul.i.us.i = mul nsw i32 0, %1
   %arrayidx5.us.i = getelementptr inbounds double, double* %u, i64 %indvars.iv.i.SV.phi
-  %2 = load double* %arrayidx5.us.i, align 8
+  %2 = load double, double* %arrayidx5.us.i, align 8
   %indvars.iv.next.i = add i64 %indvars.iv.i.SV.phi, 1
   br i1 undef, label %for.inc8.us.i, label %meshBB
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index c01cab0..c1099b2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -29,13 +29,13 @@
 loop:
   %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
   %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
-  %v = load i32* %iv
+  %v = load i32, i32* %iv
   %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
-  %v1 = load i32* %iv1
+  %v1 = load i32, i32* %iv1
   %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
-  %v2 = load i32* %iv2
+  %v2 = load i32, i32* %iv2
   %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
-  %v3 = load i32* %iv3
+  %v3 = load i32, i32* %iv3
   %s1 = add i32 %s, %v
   %s2 = add i32 %s1, %v1
   %s3 = add i32 %s2, %v2
@@ -71,13 +71,13 @@
 loop:
   %iv = phi i32* [ %a, %entry ], [ %iv4, %loop ]
   %s = phi i32 [ 0, %entry ], [ %s4, %loop ]
-  %v = load i32* %iv
+  %v = load i32, i32* %iv
   %iv1 = getelementptr inbounds i32, i32* %iv, i32 %x
-  %v1 = load i32* %iv1
+  %v1 = load i32, i32* %iv1
   %iv2 = getelementptr inbounds i32, i32* %iv1, i32 %x
-  %v2 = load i32* %iv2
+  %v2 = load i32, i32* %iv2
   %iv3 = getelementptr inbounds i32, i32* %iv2, i32 %x
-  %v3 = load i32* %iv3
+  %v3 = load i32, i32* %iv3
   %s1 = add i32 %s, %v
   %s2 = add i32 %s1, %v1
   %s3 = add i32 %s2, %v2
@@ -126,19 +126,19 @@
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %res.addr.09 = phi i32* [ %res, %for.body.lr.ph ], [ %add.ptr7, %for.body ]
   %0 = bitcast i8* %main.addr.011 to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8* %main.addr.011, i32 %main_stride
   %2 = bitcast i8* %add.ptr to i32*
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8* %add.ptr1 to i32*
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8* %add.ptr2 to i32*
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8* %add.ptr3 to i32*
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %add = add i32 %3, %1
   %add4 = add i32 %add, %5
   %add5 = add i32 %add4, %7
@@ -173,10 +173,10 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.07 = phi i32 [ 0, %entry ], [ %inc.3, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.07
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv5 = zext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.07
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv26 = zext i8 %1 to i32
   %add = add nsw i32 %conv26, %conv5
   %conv3 = trunc i32 %add to i8
@@ -184,10 +184,10 @@
   store i8 %conv3, i8* %arrayidx4, align 1
   %inc1 = or i32 %i.07, 1
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %inc1
-  %2 = load i8* %arrayidx.1, align 1
+  %2 = load i8, i8* %arrayidx.1, align 1
   %conv5.1 = zext i8 %2 to i32
   %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %inc1
-  %3 = load i8* %arrayidx1.1, align 1
+  %3 = load i8, i8* %arrayidx1.1, align 1
   %conv26.1 = zext i8 %3 to i32
   %add.1 = add nsw i32 %conv26.1, %conv5.1
   %conv3.1 = trunc i32 %add.1 to i8
@@ -195,10 +195,10 @@
   store i8 %conv3.1, i8* %arrayidx4.1, align 1
   %inc.12 = or i32 %i.07, 2
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %inc.12
-  %4 = load i8* %arrayidx.2, align 1
+  %4 = load i8, i8* %arrayidx.2, align 1
   %conv5.2 = zext i8 %4 to i32
   %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %inc.12
-  %5 = load i8* %arrayidx1.2, align 1
+  %5 = load i8, i8* %arrayidx1.2, align 1
   %conv26.2 = zext i8 %5 to i32
   %add.2 = add nsw i32 %conv26.2, %conv5.2
   %conv3.2 = trunc i32 %add.2 to i8
@@ -206,10 +206,10 @@
   store i8 %conv3.2, i8* %arrayidx4.2, align 1
   %inc.23 = or i32 %i.07, 3
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %inc.23
-  %6 = load i8* %arrayidx.3, align 1
+  %6 = load i8, i8* %arrayidx.3, align 1
   %conv5.3 = zext i8 %6 to i32
   %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %inc.23
-  %7 = load i8* %arrayidx1.3, align 1
+  %7 = load i8, i8* %arrayidx1.3, align 1
   %conv26.3 = zext i8 %7 to i32
   %add.3 = add nsw i32 %conv26.3, %conv5.3
   %conv3.3 = trunc i32 %add.3 to i8
@@ -291,7 +291,7 @@
   %dest = phi i8* [ %dest0, %entry ], [ %incdec.ptr91.us, %for.body82.us ]
   %source = phi i8* [ %source0, %entry ], [ %add.ptr83.us, %for.body82.us ]
   %0 = bitcast i8* %source to i32*
-  %1 = load i32* %0, align 4
+  %1 = load i32, i32* %0, align 4
   %trunc = trunc i32 %1 to i8
   %add.ptr83.us = getelementptr inbounds i8, i8* %source, i32 4
   %incdec.ptr91.us = getelementptr inbounds i8, i8* %dest, i32 1
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index f4807c5..7925bf0 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -33,10 +33,10 @@
 for.body:                                         ; preds = %entry, %for.body.3
   %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv6 = zext i8 %0 to i32
   %arrayidx1 = getelementptr inbounds i8, i8* %b, i32 %i.09
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %conv27 = zext i8 %1 to i32
   %add = add nsw i32 %conv27, %conv6
   %conv3 = trunc i32 %add to i8
@@ -51,10 +51,10 @@
 
 for.body.1:                                       ; preds = %for.body
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
-  %2 = load i8* %arrayidx.1, align 1
+  %2 = load i8, i8* %arrayidx.1, align 1
   %conv6.1 = zext i8 %2 to i32
   %arrayidx1.1 = getelementptr inbounds i8, i8* %b, i32 %add5
-  %3 = load i8* %arrayidx1.1, align 1
+  %3 = load i8, i8* %arrayidx1.1, align 1
   %conv27.1 = zext i8 %3 to i32
   %add.1 = add nsw i32 %conv27.1, %conv6.1
   %conv3.1 = trunc i32 %add.1 to i8
@@ -66,10 +66,10 @@
 
 for.body.2:                                       ; preds = %for.body.1
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
-  %4 = load i8* %arrayidx.2, align 1
+  %4 = load i8, i8* %arrayidx.2, align 1
   %conv6.2 = zext i8 %4 to i32
   %arrayidx1.2 = getelementptr inbounds i8, i8* %b, i32 %add5.1
-  %5 = load i8* %arrayidx1.2, align 1
+  %5 = load i8, i8* %arrayidx1.2, align 1
   %conv27.2 = zext i8 %5 to i32
   %add.2 = add nsw i32 %conv27.2, %conv6.2
   %conv3.2 = trunc i32 %add.2 to i8
@@ -81,10 +81,10 @@
 
 for.body.3:                                       ; preds = %for.body.2
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
-  %6 = load i8* %arrayidx.3, align 1
+  %6 = load i8, i8* %arrayidx.3, align 1
   %conv6.3 = zext i8 %6 to i32
   %arrayidx1.3 = getelementptr inbounds i8, i8* %b, i32 %add5.2
-  %7 = load i8* %arrayidx1.3, align 1
+  %7 = load i8, i8* %arrayidx1.3, align 1
   %conv27.3 = zext i8 %7 to i32
   %add.3 = add nsw i32 %conv27.3, %conv6.3
   %conv3.3 = trunc i32 %add.3 to i8
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
index 20af548..a6613c5 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/no_superflous_induction_vars.ll
@@ -40,7 +40,7 @@
   %index = phi i64 [ %index.next, %vector.body ], [ %0, %for.body14.lr.ph ]
   %4 = getelementptr inbounds i8, i8* %rowsptr, i64 %index
   %5 = bitcast i8* %4 to <4 x i8>*
-  %wide.load = load <4 x i8>* %5, align 1
+  %wide.load = load <4 x i8>, <4 x i8>* %5, align 1
   %index.next = add i64 %index, 8
   %6 = icmp eq i64 %index.next, %end.idx.rnd.down
   br i1 %6, label %for.end24, label %vector.body
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
index e7ebaa8..2be2762 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr17473.ll
@@ -25,9 +25,9 @@
 define i32 @main() #0 {
 entry:
   store i8 0, i8* @h, align 1
-  %0 = load i32* @j, align 4
+  %0 = load i32, i32* @j, align 4
   %tobool.i = icmp eq i32 %0, 0
-  %1 = load i32* @d, align 4
+  %1 = load i32, i32* @d, align 4
   %cmp3 = icmp sgt i32 %1, -1
   %.lobit = lshr i32 %1, 31
   %.lobit.not = xor i32 %.lobit, 1
diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
index 834b040..5650f81 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep-address-space.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ; CHECK: bb1:
-; CHECK: load double addrspace(1)* [[IV:%[^,]+]]
+; CHECK: load double, double addrspace(1)* [[IV:%[^,]+]]
 ; CHECK: store double {{.*}}, double addrspace(1)* [[IV]]
 
 ; CHECK-NOT: cast
@@ -37,7 +37,7 @@
 	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
         %z0 = add i64 %tmp3, 5203
 	%tmp5 = getelementptr double, double addrspace(1)* %p, i64 %z0		; <double addrspace(1)*> [#uses=1]
-	%tmp6 = load double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
+	%tmp6 = load double, double addrspace(1)* %tmp5, align 8		; <double> [#uses=1]
 	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
         %z1 = add i64 %tmp4, 5203
 	%tmp8 = getelementptr double, double addrspace(1)* %p, i64 %z1		; <double addrspace(1)*> [#uses=1]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
index 114a181..6919a33 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/addrec-gep.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-reduce -S | FileCheck %s
 ; CHECK: bb1:
-; CHECK: load double* [[IV:%[^,]+]]
+; CHECK: load double, double* [[IV:%[^,]+]]
 ; CHECK: store double {{.*}}, double* [[IV]]
 ; CHECK: getelementptr double, double*
 ; CHECK-NOT: cast
@@ -31,7 +31,7 @@
 	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
         %z0 = add i64 %tmp3, 5203
 	%tmp5 = getelementptr double, double* %p, i64 %z0		; <double*> [#uses=1]
-	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
+	%tmp6 = load double, double* %tmp5, align 8		; <double> [#uses=1]
 	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
         %z1 = add i64 %tmp4, 5203
 	%tmp8 = getelementptr double, double* %p, i64 %z1		; <double*> [#uses=1]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/address-space-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/address-space-loop.ll
index 3ae5f32..57ba665 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/address-space-loop.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/address-space-loop.ll
@@ -36,7 +36,7 @@
 
 ; CHECK: bb14:
 ; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
-; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; CHECK-NEXT: %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
 ; Fold %t3's add within the address.
 ; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float addrspace(1)* %t6, i16 4
 ; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
@@ -47,7 +47,7 @@
 bb14:                                             ; preds = %bb14, %bb10
   %t2 = getelementptr inbounds i8, i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
   store i8 undef, i8 addrspace(1)* %t2
-  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
   %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
   %t9 = getelementptr inbounds i8, i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
   store i8 undef, i8 addrspace(1)* %t9
diff --git a/llvm/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/llvm/test/Transforms/LoopStrengthReduce/dont_reverse.ll
index d65213d..4809def 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/dont_reverse.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/dont_reverse.ll
@@ -11,7 +11,7 @@
 bb8:
 	%indvar34 = phi i32 [ 0, %entry ], [ %indvar.next35, %bb8 ]
 	%indvar3451 = trunc i32 %indvar34 to i2
-	%xmp4344 = load i2* %p
+	%xmp4344 = load i2, i2* %p
 	%xmp104 = icmp eq i2 %indvar3451, %xmp4344
 	%indvar.next35 = add i32 %indvar34, 1
 	br i1 %xmp104, label %bb10, label %bb8
diff --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 3cffa65..092b274 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -35,7 +35,7 @@
   %div = udiv i32 %i.addr.0, 10
   %idxprom = zext i32 %rem to i64
   %arrayidx = getelementptr inbounds [37 x i8], [37 x i8]* @.str, i64 0, i64 %idxprom
-  %tmp5 = load i8* %arrayidx, align 1
+  %tmp5 = load i8, i8* %arrayidx, align 1
   %conv = sext i8 %tmp5 to i16
   store i16 %conv, i16* %incdec.ptr, align 2
   %1 = icmp ugt i32 %i.addr.0, 9
@@ -59,9 +59,9 @@
   br i1 %cmp2740, label %for.end, label %for.body.lr.ph
 
 for.body.lr.ph:                                   ; preds = %do.end
-  %tmp16 = load i32* %mLength, align 4
+  %tmp16 = load i32, i32* %mLength, align 4
   %mBegin = getelementptr inbounds %struct.Vector2, %struct.Vector2* %result, i64 0, i32 0
-  %tmp14 = load i16** %mBegin, align 8
+  %tmp14 = load i16*, i16** %mBegin, align 8
   %tmp48 = zext i32 %tmp16 to i64
   br label %for.body
 
@@ -73,7 +73,7 @@
   %incdec.ptr32 = getelementptr [33 x i16], [33 x i16]* %buffer, i64 1, i64 %tmp47
   %tmp49 = add i64 %tmp48, %indvar
   %dst.041 = getelementptr i16, i16* %tmp14, i64 %tmp49
-  %tmp29 = load i16* %p.042, align 2
+  %tmp29 = load i16, i16* %p.042, align 2
   store i16 %tmp29, i16* %dst.041, align 2
   %cmp27 = icmp eq i16* %incdec.ptr32, %add.ptr22
   %indvar.next = add i64 %indvar, 1
@@ -83,7 +83,7 @@
   br label %for.end
 
 for.end:                                          ; preds = %for.end.loopexit, %do.end
-  %tmp38 = load i32* %mLength, align 4
+  %tmp38 = load i32, i32* %mLength, align 4
   %add = add i32 %tmp38, %conv11
   store i32 %add, i32* %mLength, align 4
   ret void
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr12691.ll b/llvm/test/Transforms/LoopStrengthReduce/pr12691.ll
index 8399434..dfc1343 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr12691.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr12691.ll
@@ -13,13 +13,13 @@
   br i1 %tobool, label %for.cond, label %for.end
 
 for.end:
-; CHECK:  %tmp1 = load i32* @d, align 4
-; CHECK-NEXT:  %tmp2 = load i32* @d, align 4
+; CHECK:  %tmp1 = load i32, i32* @d, align 4
+; CHECK-NEXT:  %tmp2 = load i32, i32* @d, align 4
 ; CHECK-NEXT:  %0 = sub i32 %tmp1, %tmp2
 
-  %tmp1 = load i32* @d, align 4
+  %tmp1 = load i32, i32* @d, align 4
   %add = add nsw i32 %tmp1, %g.0
-  %tmp2 = load i32* @d, align 4
+  %tmp2 = load i32, i32* @d, align 4
   %tobool26 = icmp eq i32 %x, 0
   br i1 %tobool26, label %for.end5, label %for.body.lr.ph
 
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr18165.ll b/llvm/test/Transforms/LoopStrengthReduce/pr18165.ll
index cc878c4..5eb1b98 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr18165.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr18165.ll
@@ -20,10 +20,10 @@
 ; Function Attrs: nounwind optsize ssp uwtable
 define i32 @main() #0 {
 entry:
-  %0 = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
+  %0 = load i32, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 0), align 4, !tbaa !1
   %tobool7.i = icmp eq i32 %0, 0
-  %.promoted.i = load i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
-  %f.promoted.i = load i32* @f, align 4, !tbaa !7
+  %.promoted.i = load i32, i32* getelementptr inbounds (%struct.anon* @a, i64 0, i32 2), align 4, !tbaa !6
+  %f.promoted.i = load i32, i32* @f, align 4, !tbaa !7
   br label %for.body6.i.outer
 
 for.body6.i.outer:                                ; preds = %entry, %lor.end.i
@@ -42,7 +42,7 @@
   br i1 %tobool12.i, label %lor.rhs.i, label %lor.end.i
 
 lor.rhs.i:                                        ; preds = %if.end9.i
-  %1 = load i32* @b, align 4, !tbaa !7
+  %1 = load i32, i32* @b, align 4, !tbaa !7
   %dec.i = add nsw i32 %1, -1
   store i32 %dec.i, i32* @b, align 4, !tbaa !7
   %tobool13.i = icmp ne i32 %1, 0
@@ -63,7 +63,7 @@
   store i32 %or15.i, i32* @f, align 4, !tbaa !7
   store i32 %add.i, i32* getelementptr inbounds (%struct.anon* @e, i64 0, i32 1), align 4, !tbaa !8
   store i32 0, i32* @h, align 4, !tbaa !7
-  %3 = load i32* @b, align 4, !tbaa !7
+  %3 = load i32, i32* @b, align 4, !tbaa !7
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %3) #2
   ret i32 0
 }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr2570.ll b/llvm/test/Transforms/LoopStrengthReduce/pr2570.ll
index 7b56971..671ffde 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr2570.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr2570.ll
@@ -23,7 +23,7 @@
 entry:
 	tail call i32 @func_116( i8 zeroext  2 ) nounwind 		; <i32>:0 [#uses=0]
 	tail call i32 @func_63( i16 signext  2 ) nounwind 		; <i32>:1 [#uses=1]
-	load i16* @g_39, align 2		; <i16>:2 [#uses=1]
+	load i16, i16* @g_39, align 2		; <i16>:2 [#uses=1]
 	tail call i32 @func_63( i16 signext  %2 ) nounwind 		; <i32>:3 [#uses=1]
 	trunc i32 %3 to i16		; <i16>:4 [#uses=1]
 	and i16 %4, 1		; <i16>:5 [#uses=1]
@@ -32,10 +32,10 @@
 	tail call i32 @func_74( i16 zeroext  %5, i8 zeroext  %6, i16 zeroext  %7, i16 zeroext  0 ) nounwind 		; <i32>:8 [#uses=0]
 	tail call i32 @func_124( i32 544824386 ) nounwind 		; <i32>:9 [#uses=0]
 	zext i8 %p_50 to i32		; <i32>:10 [#uses=1]
-	load i32* @g_43, align 4		; <i32>:11 [#uses=1]
+	load i32, i32* @g_43, align 4		; <i32>:11 [#uses=1]
 	icmp sle i32 %10, %11		; <i1>:12 [#uses=1]
 	zext i1 %12 to i32		; <i32>:13 [#uses=2]
-	load i8* @g_247, align 1		; <i8>:14 [#uses=1]
+	load i8, i8* @g_247, align 1		; <i8>:14 [#uses=1]
 	trunc i32 %p_45 to i16		; <i16>:15 [#uses=1]
 	zext i8 %14 to i16		; <i16>:16 [#uses=1]
 	tail call i32 @func_74( i16 zeroext  %15, i8 zeroext  0, i16 zeroext  %16, i16 zeroext  23618 ) nounwind 		; <i32>:17 [#uses=4]
@@ -72,7 +72,7 @@
 	icmp eq i32 %.0343, 0		; <i1>:30 [#uses=1]
 	%.0341 = select i1 %30, i32 1, i32 %.0343		; <i32> [#uses=1]
 	urem i32 %23, %.0341		; <i32>:31 [#uses=1]
-	load i32* @g_137, align 4		; <i32>:32 [#uses=4]
+	load i32, i32* @g_137, align 4		; <i32>:32 [#uses=4]
 	icmp slt i32 %32, 0		; <i1>:33 [#uses=1]
 	br i1 %33, label %bb202, label %bb198
 
@@ -119,13 +119,13 @@
 
 bb223:		; preds = %bb222, %bb215
 	%iftmp.437.0 = phi i32 [ 0, %bb222 ], [ 1, %bb215 ]		; <i32> [#uses=1]
-	load i32* @g_91, align 4		; <i32>:55 [#uses=3]
+	load i32, i32* @g_91, align 4		; <i32>:55 [#uses=3]
 	tail call i32 @func_103( i16 zeroext  4 ) nounwind 		; <i32>:56 [#uses=0]
 	tail call i32 @func_112( i32 0, i16 zeroext  -31374 ) nounwind 		; <i32>:57 [#uses=0]
-	load i32* @g_197, align 4		; <i32>:58 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:58 [#uses=1]
 	tail call i32 @func_124( i32 28156 ) nounwind 		; <i32>:59 [#uses=1]
-	load i32* @g_260, align 4		; <i32>:60 [#uses=1]
-	load i32* @g_43, align 4		; <i32>:61 [#uses=1]
+	load i32, i32* @g_260, align 4		; <i32>:60 [#uses=1]
+	load i32, i32* @g_43, align 4		; <i32>:61 [#uses=1]
 	xor i32 %61, %60		; <i32>:62 [#uses=1]
 	mul i32 %62, %59		; <i32>:63 [#uses=1]
 	trunc i32 %63 to i8		; <i8>:64 [#uses=1]
@@ -138,7 +138,7 @@
 	%or.cond352 = or i1 %70, %67		; <i1> [#uses=1]
 	select i1 %or.cond352, i32 0, i32 %55		; <i32>:71 [#uses=1]
 	%.353 = ashr i32 %66, %71		; <i32> [#uses=2]
-	load i16* @g_221, align 2		; <i16>:72 [#uses=1]
+	load i16, i16* @g_221, align 2		; <i16>:72 [#uses=1]
 	zext i16 %72 to i32		; <i32>:73 [#uses=1]
 	icmp ugt i32 %.353, 31		; <i1>:74 [#uses=1]
 	select i1 %74, i32 0, i32 %.353		; <i32>:75 [#uses=1]
@@ -146,7 +146,7 @@
 	add i32 %.0323, %iftmp.437.0		; <i32>:76 [#uses=1]
 	and i32 %48, 255		; <i32>:77 [#uses=2]
 	add i32 %77, 2042556439		; <i32>:78 [#uses=1]
-	load i32* @g_207, align 4		; <i32>:79 [#uses=2]
+	load i32, i32* @g_207, align 4		; <i32>:79 [#uses=2]
 	icmp ugt i32 %79, 31		; <i1>:80 [#uses=1]
 	select i1 %80, i32 0, i32 %79		; <i32>:81 [#uses=1]
 	%.0320 = lshr i32 %77, %81		; <i32> [#uses=1]
@@ -154,7 +154,7 @@
 	zext i1 %82 to i8		; <i8>:83 [#uses=1]
 	tail call i32 @func_25( i8 zeroext  %83 ) nounwind 		; <i32>:84 [#uses=1]
 	xor i32 %84, 1		; <i32>:85 [#uses=1]
-	load i32* @g_197, align 4		; <i32>:86 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:86 [#uses=1]
 	add i32 %86, 1		; <i32>:87 [#uses=1]
 	add i32 %87, %85		; <i32>:88 [#uses=1]
 	icmp ugt i32 %76, %88		; <i1>:89 [#uses=1]
@@ -163,22 +163,22 @@
 bb241:		; preds = %bb223
 	store i16 -9, i16* @g_221, align 2
 	udiv i32 %p_52, 1538244727		; <i32>:90 [#uses=1]
-	load i32* @g_207, align 4		; <i32>:91 [#uses=1]
+	load i32, i32* @g_207, align 4		; <i32>:91 [#uses=1]
 	sub i32 %91, %90		; <i32>:92 [#uses=1]
-	load i32* @g_14, align 4		; <i32>:93 [#uses=1]
+	load i32, i32* @g_14, align 4		; <i32>:93 [#uses=1]
 	trunc i32 %93 to i16		; <i16>:94 [#uses=1]
 	trunc i32 %p_46 to i16		; <i16>:95 [#uses=2]
 	sub i16 %94, %95		; <i16>:96 [#uses=1]
-	load i32* @g_197, align 4		; <i32>:97 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:97 [#uses=1]
 	trunc i32 %97 to i16		; <i16>:98 [#uses=1]
 	tail call i32 @func_55( i32 -346178830, i16 zeroext  %98, i16 zeroext  %95 ) nounwind 		; <i32>:99 [#uses=0]
 	zext i16 %p_48 to i32		; <i32>:100 [#uses=1]
-	load i8* @g_247, align 1		; <i8>:101 [#uses=1]
+	load i8, i8* @g_247, align 1		; <i8>:101 [#uses=1]
 	zext i8 %101 to i32		; <i32>:102 [#uses=1]
 	sub i32 %100, %102		; <i32>:103 [#uses=1]
 	tail call i32 @func_55( i32 %103, i16 zeroext  -2972, i16 zeroext  %96 ) nounwind 		; <i32>:104 [#uses=0]
 	xor i32 %92, 2968		; <i32>:105 [#uses=1]
-	load i32* @g_197, align 4		; <i32>:106 [#uses=1]
+	load i32, i32* @g_197, align 4		; <i32>:106 [#uses=1]
 	icmp ugt i32 %105, %106		; <i1>:107 [#uses=1]
 	zext i1 %107 to i32		; <i32>:108 [#uses=1]
 	store i32 %108, i32* @g_33, align 4
@@ -195,12 +195,12 @@
 
 bb272.thread:		; preds = %bb248
 	store i32 1, i32* @g_82
-	load i16* @g_267, align 2		; <i16>:111 [#uses=1]
+	load i16, i16* @g_267, align 2		; <i16>:111 [#uses=1]
 	icmp eq i16 %111, 0		; <i1>:112 [#uses=1]
 	br i1 %112, label %bb311.loopexit.split, label %bb268
 
 bb255.thread:		; preds = %bb248
-	load i32* @g_260, align 4		; <i32>:113 [#uses=1]
+	load i32, i32* @g_260, align 4		; <i32>:113 [#uses=1]
 	sub i32 %113, %p_52		; <i32>:114 [#uses=1]
 	and i32 %114, -20753		; <i32>:115 [#uses=1]
 	icmp ne i32 %115, 0		; <i1>:116 [#uses=1]
@@ -237,7 +237,7 @@
 	%p_49_addr.0 = phi i32 [ %p_49_addr.1.reg2mem.0, %bb279 ], [ %p_49_addr.1.reg2mem.0, %bb276 ], [ 0, %bb255.thread ]		; <i32> [#uses=1]
 	%p_48_addr.1 = phi i16 [ %124, %bb279 ], [ %118, %bb276 ], [ %p_48_addr.2.reg2mem.0, %bb255.thread ]		; <i16> [#uses=1]
 	%p_45_addr.0 = phi i32 [ %p_45_addr.1.reg2mem.0, %bb279 ], [ %p_45_addr.1.reg2mem.0, %bb276 ], [ 8, %bb255.thread ]		; <i32> [#uses=3]
-	load i32* @g_43, align 4		; <i32>:125 [#uses=1]
+	load i32, i32* @g_43, align 4		; <i32>:125 [#uses=1]
 	trunc i32 %125 to i8		; <i8>:126 [#uses=1]
 	tail call i32 @func_116( i8 zeroext  %126 ) nounwind 		; <i32>:127 [#uses=0]
 	lshr i32 65255, %p_45_addr.0		; <i32>:128 [#uses=1]
@@ -245,7 +245,7 @@
 	%.op = lshr i32 %128, 31		; <i32> [#uses=1]
 	%.op.op = xor i32 %.op, 1		; <i32> [#uses=1]
 	%.354..lobit.not = select i1 %129, i32 1, i32 %.op.op		; <i32> [#uses=1]
-	load i16* @g_39, align 2		; <i16>:130 [#uses=1]
+	load i16, i16* @g_39, align 2		; <i16>:130 [#uses=1]
 	zext i16 %130 to i32		; <i32>:131 [#uses=1]
 	icmp slt i32 %.354..lobit.not, %131		; <i1>:132 [#uses=1]
 	zext i1 %132 to i32		; <i32>:133 [#uses=1]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr3086.ll b/llvm/test/Transforms/LoopStrengthReduce/pr3086.ll
index 085cbca..187c14f 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr3086.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr3086.ll
@@ -10,7 +10,7 @@
 	br label %bb11
 
 bb5:		; preds = %bb9
-	%0 = load %struct.Lit** %storemerge, align 8		; <%struct.Lit*> [#uses=0]
+	%0 = load %struct.Lit*, %struct.Lit** %storemerge, align 8		; <%struct.Lit*> [#uses=0]
 	%indvar.next8 = add i64 %storemerge.rec, 1		; <i64> [#uses=1]
 	br label %bb9
 
@@ -21,7 +21,7 @@
 	br i1 %1, label %bb5, label %bb22
 
 bb11:		; preds = %bb22, %entry
-	%2 = load %struct.Cls** null, align 8		; <%struct.Cls*> [#uses=0]
+	%2 = load %struct.Cls*, %struct.Cls** null, align 8		; <%struct.Cls*> [#uses=0]
 	br label %bb22
 
 bb22:		; preds = %bb11, %bb9
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr3399.ll b/llvm/test/Transforms/LoopStrengthReduce/pr3399.ll
index 26c5002..1037768 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr3399.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -13,7 +13,7 @@
 
 bb1:		; preds = %bb
 	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
-	%0 = load volatile i32* @g_53, align 4		; <i32> [#uses=1]
+	%0 = load volatile i32, i32* @g_53, align 4		; <i32> [#uses=1]
 	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
 	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
 	%3 = mul i16 %2, %1		; <i16> [#uses=1]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr3571.ll b/llvm/test/Transforms/LoopStrengthReduce/pr3571.ll
index a23e4db..1615a81 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr3571.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr3571.ll
@@ -12,7 +12,7 @@
 
 _ZNK11QModelIndex7isValidEv.exit.i:		; preds = %bb.i, %entry
 	%result.0.i = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]
-	%0 = load i32** null, align 4		; <%struct.QAbstractItemDelegate*> [#uses=0]
+	%0 = load i32*, i32** null, align 4		; <%struct.QAbstractItemDelegate*> [#uses=0]
 	br i1 false, label %_ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit, label %bb.i
 
 _ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit:		; preds = %_ZNK11QModelIndex7isValidEv.exit.i
diff --git a/llvm/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll b/llvm/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
index 22f5c50..1035ce1 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
@@ -13,9 +13,9 @@
 	%N_addr.0.0 = sub i32 %N.s, %indvar		; <i32> [#uses=1]
 	%tmp.8 = add i32 %N_addr.0.0, %tmp.6		; <i32> [#uses=2]
 	%tmp.9 = getelementptr i8, i8* %A, i32 %tmp.8		; <i8*> [#uses=1]
-	%tmp.10 = load i8* %tmp.9		; <i8> [#uses=1]
+	%tmp.10 = load i8, i8* %tmp.9		; <i8> [#uses=1]
 	%tmp.17 = getelementptr i8, i8* %B, i32 %tmp.8		; <i8*> [#uses=1]
-	%tmp.18 = load i8* %tmp.17		; <i8> [#uses=1]
+	%tmp.18 = load i8, i8* %tmp.17		; <i8> [#uses=1]
 	%tmp.19 = sub i8 %tmp.10, %tmp.18		; <i8> [#uses=1]
 	%tmp.21 = add i8 %tmp.19, %Sum.0.0		; <i8> [#uses=2]
 	%indvar.next = add i32 %indvar.ui, 1		; <i32> [#uses=2]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll b/llvm/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
index 6ac842d..a81e314 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll
@@ -36,7 +36,7 @@
 
 ; CHECK: bb14:
 ; CHECK-NEXT: store i8 undef, i8 addrspace(1)* [[SCEVGEP]]
-; CHECK-NEXT: %t6 = load float addrspace(1)* addrspace(1)* undef
+; CHECK-NEXT: %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
 ; Fold %t3's add within the address.
 ; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float addrspace(1)* %t6, i16 4
 ; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float addrspace(1)* [[SCEVGEP1]] to i8 addrspace(1)*
@@ -47,7 +47,7 @@
 bb14:                                             ; preds = %bb14, %bb10
   %t2 = getelementptr inbounds i8, i8 addrspace(1)* undef, i16 %t4 ; <i8*> [#uses=1]
   store i8 undef, i8 addrspace(1)* %t2
-  %t6 = load float addrspace(1)* addrspace(1)* undef
+  %t6 = load float addrspace(1)*, float addrspace(1)* addrspace(1)* undef
   %t8 = bitcast float addrspace(1)* %t6 to i8 addrspace(1)*              ; <i8*> [#uses=1]
   %t9 = getelementptr inbounds i8, i8 addrspace(1)* %t8, i16 %t3 ; <i8*> [#uses=1]
   store i8 undef, i8 addrspace(1)* %t9
diff --git a/llvm/test/Transforms/LoopStrengthReduce/uglygep.ll b/llvm/test/Transforms/LoopStrengthReduce/uglygep.ll
index b1d9d69..430127b3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/uglygep.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/uglygep.ll
@@ -33,7 +33,7 @@
 
 ; CHECK: bb14:
 ; CHECK-NEXT: store i8 undef, i8* [[SCEVGEP]]
-; CHECK-NEXT: %t6 = load float** undef
+; CHECK-NEXT: %t6 = load float*, float** undef
 ; Fold %t3's add within the address.
 ; CHECK-NEXT: [[SCEVGEP1:%[^ ]+]] = getelementptr float, float* %t6, i64 4
 ; CHECK-NEXT: [[SCEVGEP2:%[^ ]+]] = bitcast float* [[SCEVGEP1]] to i8*
@@ -44,7 +44,7 @@
 bb14:                                             ; preds = %bb14, %bb10
   %t2 = getelementptr inbounds i8, i8* undef, i64 %t4 ; <i8*> [#uses=1]
   store i8 undef, i8* %t2
-  %t6 = load float** undef
+  %t6 = load float*, float** undef
   %t8 = bitcast float* %t6 to i8*              ; <i8*> [#uses=1]
   %t9 = getelementptr inbounds i8, i8* %t8, i64 %t3 ; <i8*> [#uses=1]
   store i8 undef, i8* %t9
diff --git a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
index c5a7bec..a87b16a 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-08-PhiUpdate.ll
@@ -47,7 +47,7 @@
   %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb1 ] ; <i64> [#uses=2]
   %s.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb1 ]    ; <i32> [#uses=1]
   %scevgep = getelementptr i32, i32* %p, i64 %indvar   ; <i32*> [#uses=1]
-  %1 = load i32* %scevgep, align 1                ; <i32> [#uses=1]
+  %1 = load i32, i32* %scevgep, align 1                ; <i32> [#uses=1]
   %2 = add nsw i32 %1, %s.01                      ; <i32> [#uses=2]
   br label %bb1
 
@@ -84,7 +84,7 @@
   br i1 %cond2, label %exit, label %do.cond
 
 exit:                  ; preds = %do.body
-  %tmp7.i = load i32* undef, align 8
+  %tmp7.i = load i32, i32* undef, align 8
   br i1 undef, label %do.cond, label %land.lhs.true
 
 land.lhs.true:                                    ; preds = %exit
diff --git a/llvm/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll b/llvm/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
index 2e85d0d..0b48409 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-08-09-IVSimplify.ll
@@ -29,7 +29,7 @@
   %rem = and i32 %bit_addr.addr.01, 31
   %shl = shl i32 1, %rem
   %arrayidx = getelementptr inbounds i32, i32* %bitmap, i32 %shr
-  %tmp6 = load i32* %arrayidx, align 4
+  %tmp6 = load i32, i32* %arrayidx, align 4
   %xor = xor i32 %tmp6, %shl
   store i32 %xor, i32* %arrayidx, align 4
   %inc = add i32 %bit_addr.addr.01, 1
diff --git a/llvm/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll b/llvm/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
index c77832d..5f9eec7 100644
--- a/llvm/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
+++ b/llvm/test/Transforms/LoopUnroll/2011-10-01-NoopTrunc.ll
@@ -22,7 +22,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i8, i8* %arr, i64 %indvars.iv
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = sext i8 %0 to i32
   %add = add nsw i32 %conv, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index 2329023..e9aa1ac 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -28,7 +28,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopUnroll/X86/partial.ll b/llvm/test/Transforms/LoopUnroll/X86/partial.ll
index bb8a043..4566f79 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/partial.ll
@@ -11,11 +11,11 @@
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %0 = getelementptr inbounds double, double* %b, i64 %index
   %1 = bitcast double* %0 to <2 x double>*
-  %wide.load = load <2 x double>* %1, align 8
+  %wide.load = load <2 x double>, <2 x double>* %1, align 8
   %.sum9 = or i64 %index, 2
   %2 = getelementptr double, double* %b, i64 %.sum9
   %3 = bitcast double* %2 to <2 x double>*
-  %wide.load8 = load <2 x double>* %3, align 8
+  %wide.load8 = load <2 x double>, <2 x double>* %3, align 8
   %4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
   %5 = fadd <2 x double> %wide.load8, <double 1.000000e+00, double 1.000000e+00>
   %6 = getelementptr inbounds double, double* %a, i64 %index
@@ -47,7 +47,7 @@
   %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
   %v0 = getelementptr inbounds double, double* %b, i64 %index
   %v1 = bitcast double* %v0 to <2 x double>*
-  %wide.load = load <2 x double>* %v1, align 8
+  %wide.load = load <2 x double>, <2 x double>* %v1, align 8
   %v4 = fadd <2 x double> %wide.load, <double 1.000000e+00, double 1.000000e+00>
   %v5 = fmul <2 x double> %v4, <double 8.000000e+00, double 8.000000e+00>
   %v6 = getelementptr inbounds double, double* %a, i64 %index
@@ -85,17 +85,17 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
-  %0 = load i16* %arrayidx, align 2
+  %0 = load i16, i16* %arrayidx, align 2
   %add = add i16 %0, %reduction.026
   %sext = mul i64 %indvars.iv, 12884901888
   %idxprom3 = ashr exact i64 %sext, 32
   %arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
-  %1 = load i16* %arrayidx4, align 2
+  %1 = load i16, i16* %arrayidx4, align 2
   %add7 = add i16 %add, %1
   %sext28 = mul i64 %indvars.iv, 21474836480
   %idxprom10 = ashr exact i64 %sext28, 32
   %arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
-  %2 = load i16* %arrayidx11, align 2
+  %2 = load i16, i16* %arrayidx11, align 2
   %add14 = add i16 %add7, %2
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopUnroll/ephemeral.ll b/llvm/test/Transforms/LoopUnroll/ephemeral.ll
index 4190520..d16eba7 100644
--- a/llvm/test/Transforms/LoopUnroll/ephemeral.ll
+++ b/llvm/test/Transforms/LoopUnroll/ephemeral.ll
@@ -13,7 +13,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
 
   ; This loop will be completely unrolled, even with these extra instructions,
   ; but only because they're ephemeral (and, thus, free).
diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
index 458828f..a9104ad 100644
--- a/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-heuristics.ll
@@ -47,9 +47,9 @@
   %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
   %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
   %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
-  %src_element = load i32* %arrayidx, align 4
+  %src_element = load i32, i32* %arrayidx, align 4
   %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
-  %const_array_element = load i32* %array_const_idx, align 4
+  %const_array_element = load i32, i32* %array_const_idx, align 4
   %mul = mul nsw i32 %src_element, %const_array_element
   %add = add nsw i32 %mul, %r
   %inc = add nuw nsw i64 %iv, 1
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
index ff63d54..3bec939 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -21,7 +21,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -48,7 +48,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.01
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -97,7 +97,7 @@
   %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ]
   %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1
-  %0 = load i16* %p.addr.05, align 2
+  %0 = load i16, i16* %p.addr.05, align 2
   %conv = zext i16 %0 to i32
   %add = add i32 %conv, %res.03
   %sub = add nsw i32 %len.addr.04, -2
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
index e2fc012..7684e39 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -17,7 +17,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
index 7dc466b9..7c6bb96 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -17,7 +17,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
index 2bbea1b2..fd13ebfa 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop3.ll
@@ -24,7 +24,7 @@
   %sum.19 = phi i32 [ %add4, %for.body3 ], [ %sum.012, %for.cond1.preheader ]
   %0 = add nsw i64 %indvars.iv, %indvars.iv16
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %0
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %add4 = add nsw i32 %1, %sum.19
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
index e018878..a5c9a6e 100644
--- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll
+++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll
@@ -20,7 +20,7 @@
   %sum = phi i32 [ 0, %entry ], [ %sum.next, %while.body ]
   %iv.next = add i64 %iv, -1
   %adr = getelementptr inbounds i32, i32* %base, i64 %iv.next
-  %tmp = load i32* %adr, align 8
+  %tmp = load i32, i32* %adr, align 8
   %sum.next = add i32 %sum, %tmp
   %iv.narrow = trunc i64 %iv.next to i32
   %cmp.i65 = icmp sgt i32 %iv.narrow, 0
@@ -47,7 +47,7 @@
   %iv = phi i64 [ 0, %entry ], [ %inc, %tail ]
   %s = phi i64 [ 0, %entry ], [ %s.next, %tail ]
   %adr = getelementptr i64, i64* %base, i64 %iv
-  %val = load i64* %adr
+  %val = load i64, i64* %adr
   %s.next = add i64 %s, %val
   %inc = add i64 %iv, 1
   %cmp = icmp ne i64 %inc, 4
@@ -68,7 +68,7 @@
 ;
 ; CHECK-LABEL: @multiExit(
 ; CHECK: getelementptr i32, i32* %base, i32 10
-; CHECK-NEXT: load i32*
+; CHECK-NEXT: load i32, i32*
 ; CHECK: br i1 false, label %l2.10, label %exit1
 ; CHECK: l2.10:
 ; CHECK-NOT: br
@@ -82,7 +82,7 @@
   %inc1 = add i32 %iv1, 1
   %inc2 = add i32 %iv2, 1
   %adr = getelementptr i32, i32* %base, i32 %iv1
-  %val = load i32* %adr
+  %val = load i32, i32* %adr
   %cmp1 = icmp slt i32 %iv1, 5
   br i1 %cmp1, label %l2, label %exit1
 l2:
@@ -113,7 +113,7 @@
   %inc1 = add i32 %iv1, 1
   %inc2 = add i32 %iv2, 1
   %adr = getelementptr i32, i32* %base, i32 %iv1
-  %val = load i32* %adr
+  %val = load i32, i32* %adr
   %cmp1 = icmp slt i32 %iv1, 5
   br i1 %cmp1, label %l2, label %exit1
 l2:
diff --git a/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll b/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
index fb3d857..4c21698 100644
--- a/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
+++ b/llvm/test/Transforms/LoopUnroll/shifted-tripcount.ll
@@ -16,8 +16,8 @@
   %arrayidx7 = getelementptr double, double* %p, i64 %i.013 ; <double*> [#uses=2]
   %tmp16 = add i64 %i.013, 1                      ; <i64> [#uses=3]
   %arrayidx = getelementptr double, double* %p, i64 %tmp16 ; <double*> [#uses=1]
-  %tmp4 = load double* %arrayidx                  ; <double> [#uses=1]
-  %tmp8 = load double* %arrayidx7                 ; <double> [#uses=1]
+  %tmp4 = load double, double* %arrayidx                  ; <double> [#uses=1]
+  %tmp8 = load double, double* %arrayidx7                 ; <double> [#uses=1]
   %mul9 = fmul double %tmp8, %tmp4                ; <double> [#uses=1]
   store double %mul9, double* %arrayidx7
   %exitcond = icmp eq i64 %tmp16, %mul10          ; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll b/llvm/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
index e185ddd..dc812fb 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-pragmas-disabled.ll
@@ -20,7 +20,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -52,7 +52,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -79,7 +79,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -111,7 +111,7 @@
 for.body3:                                        ; preds = %for.body3, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body3 ]
   %arrayidx = getelementptr inbounds i32, i32* %List, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add4 = add nsw i32 %0, 10
   store i32 %add4, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -125,7 +125,7 @@
   %indvars.iv.1 = phi i64 [ %1, %for.body3.1 ], [ 0, %for.body3.1.preheader ]
   %1 = add nsw i64 %indvars.iv.1, 1
   %arrayidx.1 = getelementptr inbounds i32, i32* %List, i64 %1
-  %2 = load i32* %arrayidx.1, align 4
+  %2 = load i32, i32* %arrayidx.1, align 4
   %add4.1 = add nsw i32 %2, 10
   store i32 %add4.1, i32* %arrayidx.1, align 4
   %exitcond.1 = icmp eq i64 %1, 4
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 3840f0b..1354181 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -20,7 +20,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -44,7 +44,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -72,7 +72,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -95,7 +95,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -125,7 +125,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -154,7 +154,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -191,7 +191,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -218,7 +218,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -246,7 +246,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %inc = add nsw i32 %0, 1
   store i32 %inc, i32* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll b/llvm/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
index f74054a..d606ea9 100644
--- a/llvm/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
@@ -17,6 +17,6 @@
 	store i16 0, i16* @g_56, align 2
 	br i1 false, label %bb44, label %bb3
 bb44:		; preds = %bb44, %bb36
-	%tmp46 = load i16* @g_56, align 2		; <i16> [#uses=0]
+	%tmp46 = load i16, i16* @g_56, align 2		; <i16> [#uses=0]
 	br i1 false, label %bb, label %bb44
 }
diff --git a/llvm/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll b/llvm/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
index a976d18..3b89fa9 100644
--- a/llvm/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
@@ -7,7 +7,7 @@
   br i1 true, label %for.end12, label %bb.nph
 
 bb.nph:                                           ; preds = %entry
-  %g_38.promoted = load i32* @g_38
+  %g_38.promoted = load i32, i32* @g_38
   br label %for.body
 
 for.body:                                         ; preds = %for.cond, %bb.nph
diff --git a/llvm/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll b/llvm/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
index 5717dd8..0b7f91f 100644
--- a/llvm/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
@@ -24,7 +24,7 @@
   %idxprom = sext i32 %inc1 to i64
   %array_ = getelementptr inbounds %class.MyContainer.1.3.19.29, %class.MyContainer.1.3.19.29* %this, i32 0, i32 0
   %arrayidx = getelementptr inbounds [6 x %class.MyMemVarClass.0.2.18.28*], [6 x %class.MyMemVarClass.0.2.18.28*]* %array_, i32 0, i64 %idxprom
-  %tmp4 = load %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
+  %tmp4 = load %class.MyMemVarClass.0.2.18.28*, %class.MyMemVarClass.0.2.18.28** %arrayidx, align 8
   %isnull = icmp eq %class.MyMemVarClass.0.2.18.28* %tmp4, null
   br i1 %isnull, label %for.inc, label %delete.notnull
 
diff --git a/llvm/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/llvm/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index a8608b8..a35596a 100644
--- a/llvm/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -15,7 +15,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us
 
 ; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us
-; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   %var_val.us = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 1, label %default.us-lcssa.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
@@ -34,7 +34,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us1
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
-; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
 ; CHECK-NEXT:     i32 1, label %inc.us4
 ; CHECK-NEXT:     i32 2, label %dec.us3
@@ -48,7 +48,7 @@
 ; CHECK-NEXT:   br label %loop_begin
 
 ; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
-; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   %var_val = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 %c, label %default.us-lcssa.us-lcssa [
 ; CHECK-NEXT:     i32 1, label %inc
 ; CHECK-NEXT:     i32 2, label %dec
@@ -63,13 +63,13 @@
 define i32 @test(i32* %var) {
   %mem = alloca i32
   store i32 2, i32* %mem
-  %c = load i32* %mem
+  %c = load i32, i32* %mem
 
   br label %loop_begin
 
 loop_begin:
 
-  %var_val = load i32* %var
+  %var_val = load i32, i32* %var
 
   switch i32 %c, label %default [
       i32 1, label %inc
diff --git a/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 686cedb..393dd5c 100644
--- a/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -52,14 +52,14 @@
 define i32 @test(i32* %var) {
   %mem = alloca i32
   store i32 2, i32* %mem
-  %c = load i32* %mem
-  %d = load i32* %mem
+  %c = load i32, i32* %mem
+  %d = load i32, i32* %mem
 
   br label %loop_begin
 
 loop_begin:
 
-  %var_val = load i32* %var
+  %var_val = load i32, i32* %var
 
   switch i32 %c, label %second_switch [
       i32 1, label %inc
diff --git a/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 3ba9fc2..20f03c9 100644
--- a/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -22,7 +22,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us.us
 
 ; CHECK:      loop_begin.us.us:                                 ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
-; CHECK-NEXT:   %var_val.us.us = load i32* %var
+; CHECK-NEXT:   %var_val.us.us = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
@@ -38,7 +38,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us
 
 ; CHECK:      loop_begin.us:                                    ; preds = %loop_begin.backedge.us, %.split.us.split
-; CHECK-NEXT:   %var_val.us = load i32* %var
+; CHECK-NEXT:   %var_val.us = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
@@ -65,7 +65,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us1
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
-; CHECK-NEXT:   %var_val.us2 = load i32* %var
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 %c, label %second_switch.us3 [
 ; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
 ; CHECK-NEXT:   ]
@@ -86,7 +86,7 @@
 ; CHECK-NEXT:   br label %loop_begin
 
 ; CHECK:      loop_begin:                                       ; preds = %loop_begin.backedge, %.split.split
-; CHECK-NEXT:   %var_val = load i32* %var
+; CHECK-NEXT:   %var_val = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 %c, label %second_switch [
 ; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge
 ; CHECK-NEXT:   ]
@@ -106,14 +106,14 @@
 define i32 @test(i32* %var) {
   %mem = alloca i32
   store i32 2, i32* %mem
-  %c = load i32* %mem
-  %d = load i32* %mem
+  %c = load i32, i32* %mem
+  %d = load i32, i32* %mem
 
   br label %loop_begin
 
 loop_begin:
 
-  %var_val = load i32* %var
+  %var_val = load i32, i32* %var
 
   switch i32 %c, label %second_switch [
       i32 1, label %inc
diff --git a/llvm/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll b/llvm/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
index 80e4d78..223fbf1 100644
--- a/llvm/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2012-04-30-LoopUnswitch-LPad-Crash.ll
@@ -46,9 +46,9 @@
 entry:
   %this.addr = alloca %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, align 8
   store %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr, align 8
-  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
+  %this1 = load %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379*, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379** %this.addr
   %px = getelementptr inbounds %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379, %class.D.22.42.66.102.138.158.178.198.238.242.246.250.262.294.302.338.346.379* %this1, i32 0, i32 0
-  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
+  %0 = load %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376*, %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376** %px, align 8
   %tobool = icmp ne %class.C.23.43.67.103.139.159.179.199.239.243.247.251.263.295.303.339.347.376* %0, null
   br i1 %tobool, label %cond.end, label %cond.false
 
diff --git a/llvm/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll b/llvm/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
index 4c63a56..96bc28c 100644
--- a/llvm/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
+++ b/llvm/test/Transforms/LoopUnswitch/2012-05-20-Phi.ll
@@ -9,9 +9,9 @@
 
 define void @func() noreturn nounwind uwtable {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
-  %1 = load i32* @b, align 4
+  %1 = load i32, i32* @b, align 4
   br label %while.body
 
 while.body:                                       ; preds = %while.body, %entry
diff --git a/llvm/test/Transforms/LoopUnswitch/basictest.ll b/llvm/test/Transforms/LoopUnswitch/basictest.ll
index 2a0f5a5..e990144 100644
--- a/llvm/test/Transforms/LoopUnswitch/basictest.ll
+++ b/llvm/test/Transforms/LoopUnswitch/basictest.ll
@@ -7,12 +7,12 @@
 	%i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ]		; <i32> [#uses=3]
 	%gep.upgrd.1 = zext i32 %i.0.0 to i64		; <i64> [#uses=1]
 	%tmp.7 = getelementptr i32, i32* %A, i64 %gep.upgrd.1		; <i32*> [#uses=4]
-	%tmp.13 = load i32* %tmp.7		; <i32> [#uses=2]
+	%tmp.13 = load i32, i32* %tmp.7		; <i32> [#uses=2]
 	%tmp.14 = add i32 %tmp.13, 1		; <i32> [#uses=1]
 	store i32 %tmp.14, i32* %tmp.7
 	br i1 %C, label %then, label %endif
 then:		; preds = %no_exit
-	%tmp.29 = load i32* %tmp.7		; <i32> [#uses=1]
+	%tmp.29 = load i32, i32* %tmp.7		; <i32> [#uses=1]
 	%tmp.30 = add i32 %tmp.29, 2		; <i32> [#uses=1]
 	store i32 %tmp.30, i32* %tmp.7
 	%inc9 = add i32 %i.0.0, 1		; <i32> [#uses=2]
@@ -36,13 +36,13 @@
 define i32 @test2(i32* %var) {
   %mem = alloca i32
   store i32 2, i32* %mem
-  %c = load i32* %mem
+  %c = load i32, i32* %mem
 
   br label %loop_begin
 
 loop_begin:
 
-  %var_val = load i32* %var
+  %var_val = load i32, i32* %var
 
   switch i32 %c, label %default [
       i32 1, label %inc
diff --git a/llvm/test/Transforms/LoopUnswitch/preserve-analyses.ll b/llvm/test/Transforms/LoopUnswitch/preserve-analyses.ll
index 2725745..e3774a1 100644
--- a/llvm/test/Transforms/LoopUnswitch/preserve-analyses.ll
+++ b/llvm/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -11,8 +11,8 @@
 
 define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
 entry:
-  %0 = load i32* @delim1, align 4                 ; <i32> [#uses=1]
-  %1 = load i32* @delim2, align 4                 ; <i32> [#uses=1]
+  %0 = load i32, i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32, i32* @delim2, align 4                 ; <i32> [#uses=1]
   br label %bb8.outer
 
 bb:                                               ; preds = %bb8
@@ -61,7 +61,7 @@
 
 bb8:                                              ; preds = %bb8.outer, %bb8.backedge
   %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
-  %7 = load i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %7 = load i8, i8* %p_addr.0, align 1                ; <i8> [#uses=2]
   %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
   %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
   br i1 %9, label %bb10, label %bb
diff --git a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
index aa2e618..d5e020c 100644
--- a/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
+++ b/llvm/test/Transforms/LoopVectorize/12-12-11-if-conv.ll
@@ -15,7 +15,7 @@
 for.body:                                         ; preds = %entry, %if.end
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end, label %if.then
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
index 83e39a1..a689f44 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/aarch64-unroll.ll
@@ -23,9 +23,9 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
   store i32 %add, i32* %arrayidx4, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index a7a78c7..4cd703f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -11,8 +11,8 @@
 ;   }
 
 ; CHECK-LABEL: @ind_plus2(
-; CHECK: load <4 x i32>*
-; CHECK: load <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
@@ -21,7 +21,7 @@
 ; CHECK: icmp eq i64 %index.next, 512
 
 ; FORCE-VEC-LABEL: @ind_plus2(
-; FORCE-VEC: %wide.load = load <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
 ; FORCE-VEC: %index.next = add i64 %index, 2
@@ -35,7 +35,7 @@
   %i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32* %A.addr, align 4
+  %0 = load i32, i32* %A.addr, align 4
   %mul = mul nsw i32 %0, %i
   %add = add nsw i32 %mul, %sum
   %add1 = add nsw i32 %i, 2
@@ -55,8 +55,8 @@
 ;   }
 
 ; CHECK-LABEL: @ind_minus2(
-; CHECK: load <4 x i32>*
-; CHECK: load <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
@@ -65,7 +65,7 @@
 ; CHECK: icmp eq i64 %index.next, 512
 
 ; FORCE-VEC-LABEL: @ind_minus2(
-; FORCE-VEC: %wide.load = load <2 x i32>*
+; FORCE-VEC: %wide.load = load <2 x i32>, <2 x i32>*
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
 ; FORCE-VEC: %index.next = add i64 %index, 2
@@ -79,7 +79,7 @@
   %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32* %A.addr, align 4
+  %0 = load i32, i32* %A.addr, align 4
   %mul = mul nsw i32 %0, %i
   %add = add nsw i32 %mul, %sum
   %sub = add nsw i32 %i, -2
@@ -102,10 +102,10 @@
 ;   }
 
 ; CHECK-LABEL: @ptr_ind_plus2(
-; CHECK: load i32*
-; CHECK: load i32*
-; CHECK: load i32*
-; CHECK: load i32*
+; CHECK: load i32, i32*
+; CHECK: load i32, i32*
+; CHECK: load i32, i32*
+; CHECK: load i32, i32*
 ; CHECK: mul nsw i32
 ; CHECK: mul nsw i32
 ; CHECK: add nsw i32
@@ -114,13 +114,13 @@
 ; CHECK: %21 = icmp eq i64 %index.next, 1024
 
 ; FORCE-VEC-LABEL: @ptr_ind_plus2(
-; FORCE-VEC: load i32*
+; FORCE-VEC: load i32, i32*
 ; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
+; FORCE-VEC: load i32, i32*
 ; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
+; FORCE-VEC: load i32, i32*
 ; FORCE-VEC: insertelement <2 x i32>
-; FORCE-VEC: load i32*
+; FORCE-VEC: load i32, i32*
 ; FORCE-VEC: insertelement <2 x i32>
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
@@ -135,9 +135,9 @@
   %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %inc.ptr = getelementptr inbounds i32, i32* %A.addr, i64 1
-  %0 = load i32* %A.addr, align 4
+  %0 = load i32, i32* %A.addr, align 4
   %inc.ptr1 = getelementptr inbounds i32, i32* %A.addr, i64 2
-  %1 = load i32* %inc.ptr, align 4
+  %1 = load i32, i32* %inc.ptr, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %sum
   %inc = add nsw i32 %i, 1
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
index 159aaf4..395b468 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/arm64-unroll.ll
@@ -23,9 +23,9 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
   store i32 %add, i32* %arrayidx4, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
index 302ec79..46b8ef1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/gather-cost.ll
@@ -31,23 +31,23 @@
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
   %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
   %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
   %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
   %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float* %arrayidx8, align 4
+  %4 = load float, float* %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
   %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float* %arrayidx11, align 4
+  %5 = load float, float* %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
@@ -55,7 +55,7 @@
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
   %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float* %arrayidx21, align 4
+  %6 = load float, float* %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
index d1ca199..f3c6548 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sdiv-pow2.ll
@@ -7,7 +7,7 @@
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ; CHECK-LABEL: @foo(
-; CHECK: load <4 x i32>*
+; CHECK: load <4 x i32>, <4 x i32>*
 ; CHECK: sdiv <4 x i32>
 ; CHECK: store <4 x i32>
 
@@ -18,7 +18,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %div = sdiv i32 %0, 2
   %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
   store i32 %div, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
index 96e1384..7b09913 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-unroll.ll
@@ -21,7 +21,7 @@
   %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
   %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %exitcond = icmp eq i32 %5, %n
@@ -49,7 +49,7 @@
   %sum.05 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %sum.06 = phi i32 [ %10, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i32 %i.02
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i32 %3, %sum.01
   %5 = add nsw i32 %i.02, 1
   %6 = add nsw i32 %3, %sum.02
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
index 1b5e45e..f14a8cc 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gather-cost.ll
@@ -34,23 +34,23 @@
   %add = add i32 %v.055, %offset
   %mul = mul i32 %add, 3
   %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %mul
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i32 0, i32 %v.055
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
   %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i32 0, i32 %v.055
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
   %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i32 0, i32 %v.055
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
   %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i32 0, i32 %v.055
-  %4 = load float* %arrayidx8, align 4
+  %4 = load float, float* %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i32 %mul, 1
   %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum
-  %5 = load float* %arrayidx11, align 4
+  %5 = load float, float* %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
@@ -58,7 +58,7 @@
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i32 %mul, 2
   %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i32 0, i32 %arrayidx.sum52
-  %6 = load float* %arrayidx21, align 4
+  %6 = load float, float* %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
index ae61da0..783156d 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/gcc-examples.ll
@@ -19,9 +19,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
@@ -45,7 +45,7 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = sext i16 %3 to i32
   %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
   store i32 %4, i32* %5, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
index d2e3de2..e88fcca 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mul-cast-vect.ll
@@ -16,9 +16,9 @@
 
 define void @direct(%T432* %loadaddr, %T432* %loadaddr2, %T432* %storeaddr) {
 ; COST: function 'direct':
-  %v0 = load %T432* %loadaddr
+  %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
-  %v1 = load %T432* %loadaddr2
+  %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
   %r3 = mul %T432 %v0, %v1 
 ; COST: cost of 2 for instruction: {{.*}} mul <4 x i32>
@@ -30,9 +30,9 @@
 
 define void @ups1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 ; COST: function 'ups1632':
-  %v0 = load %T416* %loadaddr
+  %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
-  %v1 = load %T416* %loadaddr2
+  %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = sext %T416 %v0 to %T432
   %r2 = sext %T416 %v1 to %T432
@@ -47,9 +47,9 @@
 
 define void @upu1632(%T416* %loadaddr, %T416* %loadaddr2, %T432* %storeaddr) {
 ; COST: function 'upu1632':
-  %v0 = load %T416* %loadaddr
+  %v0 = load %T416, %T416* %loadaddr
 ; ASM: vldr
-  %v1 = load %T416* %loadaddr2
+  %v1 = load %T416, %T416* %loadaddr2
 ; ASM: vldr
   %r1 = zext %T416 %v0 to %T432
   %r2 = zext %T416 %v1 to %T432
@@ -64,9 +64,9 @@
 
 define void @ups3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
 ; COST: function 'ups3264':
-  %v0 = load %T232* %loadaddr
+  %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
-  %v1 = load %T232* %loadaddr2
+  %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
   %r3 = mul %T232 %v0, %v1 
 ; ASM: vmul.i32
@@ -81,9 +81,9 @@
 
 define void @upu3264(%T232* %loadaddr, %T232* %loadaddr2, %T264* %storeaddr) {
 ; COST: function 'upu3264':
-  %v0 = load %T232* %loadaddr
+  %v0 = load %T232, %T232* %loadaddr
 ; ASM: vldr
-  %v1 = load %T232* %loadaddr2
+  %v1 = load %T232, %T232* %loadaddr2
 ; ASM: vldr
   %r3 = mul %T232 %v0, %v1 
 ; ASM: vmul.i32
@@ -98,9 +98,9 @@
 
 define void @dn3216(%T432* %loadaddr, %T432* %loadaddr2, %T416* %storeaddr) {
 ; COST: function 'dn3216':
-  %v0 = load %T432* %loadaddr
+  %v0 = load %T432, %T432* %loadaddr
 ; ASM: vld1.64
-  %v1 = load %T432* %loadaddr2
+  %v1 = load %T432, %T432* %loadaddr2
 ; ASM: vld1.64
   %r3 = mul %T432 %v0, %v1 
 ; ASM: vmul.i32
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
index f970e92..66d2556 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/width-detect.ll
@@ -14,7 +14,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.01 = phi float [ %4, %.lr.ph ], [ 0.000000e+00, %0 ]
   %2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %3 = load float* %2, align 8
+  %3 = load float, float* %2, align 8
   %4 = fmul fast float %prod.01, %3
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -37,7 +37,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %red.01 = phi i8 [ %4, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %3 = load i8* %2, align 1
+  %3 = load i8, i8* %2, align 1
   %4 = xor i8 %3, %red.01
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index bc04347..2898af2 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -30,7 +30,7 @@
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %redx.05 = phi double [ 0.000000e+00, %for.body.lr.ph ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %arr, i64 %indvars.iv
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %add = fadd fast double %1, %redx.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv to i32
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 27a1102..65b3919 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -9,7 +9,7 @@
 
 define signext i32 @s173() #0 {
 entry:
-  %0 = load i32* @ntimes, align 4
+  %0 = load i32, i32* @ntimes, align 4
   %cmp21 = icmp sgt i32 %0, 0
   br i1 %cmp21, label %for.cond1.preheader, label %for.end12
 
@@ -20,9 +20,9 @@
 for.body3:                                        ; preds = %for.body3, %for.cond1.preheader
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
   %arrayidx = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %indvars.iv
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %arrayidx5 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 3, i64 %indvars.iv
-  %2 = load float* %arrayidx5, align 4
+  %2 = load float, float* %arrayidx5, align 4
   %add = fadd float %1, %2
   %3 = add nsw i64 %indvars.iv, 16000
   %arrayidx8 = getelementptr inbounds %struct.GlobalData, %struct.GlobalData* @global_data, i64 0, i32 0, i64 %3
@@ -33,7 +33,7 @@
 
 for.end:                                          ; preds = %for.body3
   %inc11 = add nsw i32 %nl.022, 1
-  %4 = load i32* @ntimes, align 4
+  %4 = load i32, i32* @ntimes, align 4
   %mul = mul nsw i32 %4, 10
   %cmp = icmp slt i32 %inc11, %mul
   br i1 %cmp, label %for.cond1.preheader, label %for.end12
@@ -42,7 +42,7 @@
   ret i32 0
 
 ; CHECK-LABEL: @s173
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
 ; CHECK: add i64 %index, 16000
 ; CHECK: ret i32 0
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
index dcf2c6e..248d6dc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll
@@ -22,7 +22,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds [255 x i32], [255 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255
diff --git a/llvm/test/Transforms/LoopVectorize/X86/assume.ll b/llvm/test/Transforms/LoopVectorize/X86/assume.ll
index c036bba..4fd378d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/assume.ll
@@ -23,7 +23,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp1 = fcmp ogt float %0, 1.000000e+02
   tail call void @llvm.assume(i1 %cmp1)
   %add = fadd float %0, 1.000000e+00
@@ -49,12 +49,12 @@
 define void @test2(%struct.data* nocapture readonly %d) #0 {
 entry:
   %b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
-  %0 = load float** %b, align 8
+  %0 = load float*, float** %b, align 8
   %ptrint = ptrtoint float* %0 to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   %a = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 0
-  %1 = load float** %a, align 8
+  %1 = load float*, float** %a, align 8
   %ptrint2 = ptrtoint float* %1 to i64
   %maskedptr3 = and i64 %ptrint2, 31
   %maskcond4 = icmp eq i64 %maskedptr3, 0
@@ -85,7 +85,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds float, float* %0, i64 %indvars.iv
-  %2 = load float* %arrayidx, align 4
+  %2 = load float, float* %arrayidx, align 4
   %add = fadd float %2, 1.000000e+00
   tail call void @llvm.assume(i1 %maskcond4)
   %arrayidx5 = getelementptr inbounds float, float* %1, i64 %indvars.iv
diff --git a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
index 95088df..37977c4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -13,7 +13,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = fmul float %3, 3.000000e+00
   store float %4, float* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -36,7 +36,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %3 = load i64* %2, align 4
+  %3 = load i64, i64* %2, align 4
   %4 = add i64 %3, 3
   store i64 %4, i64* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
index 106bd84..d75b1d9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-vector-operand.ll
@@ -16,7 +16,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %shl = ashr i32 %0, 3
   %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
   store i32 %shl, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 190e130..0136571 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -22,12 +22,12 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = shl nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %0
-  %1 = load i32* %arrayidx, align 8
+  %1 = load i32, i32* %arrayidx, align 8
   %idxprom1 = sext i32 %1 to i64
   %arrayidx2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %idxprom1
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @d, i64 0, i64 %indvars.iv
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %idxprom5 = sext i32 %3 to i64
   %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %idxprom5
   store i32 %2, i32* %arrayidx6, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
index 627ae00..4a56d6b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp32_to_uint32-cost-model.ll
@@ -21,7 +21,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds [10000 x float], [10000 x float]* @float_array, i64 0, i64 %indvars.iv
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %conv = fptoui float %1 to i32
   %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
index 7c1dfe3..c066afc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp64_to_uint32-cost-model.ll
@@ -13,7 +13,7 @@
 
 define void @convert() {
 entry:
-  %0 = load i32* @n, align 4
+  %0 = load i32, i32* @n, align 4
   %cmp4 = icmp eq i32 %0, 0
   br i1 %cmp4, label %for.end, label %for.body.preheader
 
@@ -23,7 +23,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds [10000 x double], [10000 x double]* @double_array, i64 0, i64 %indvars.iv
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %conv = fptoui double %1 to i32
   %arrayidx2 = getelementptr inbounds [10000 x i32], [10000 x i32]* @unsigned_array, i64 0, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
index 106c9d6..b3a0710 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fp_to_sint8-cost-model.ll
@@ -12,7 +12,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %tmp = load float* %arrayidx, align 4
+  %tmp = load float, float* %arrayidx, align 4
   %conv = fptosi float %tmp to i8
   %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
   store i8 %conv, i8* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
index 8c9cb65..f0e6c8f 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather-cost.ll
@@ -32,23 +32,23 @@
   %add = add i64 %v.055, %offset
   %mul = mul i64 %add, 3
   %arrayidx = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %mul
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float]* @kernel, i64 0, i64 %v.055
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %1
   %arrayidx4 = getelementptr inbounds [512 x float], [512 x float]* @kernel2, i64 0, i64 %v.055
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %mul5 = fmul fast float %mul3, %2
   %arrayidx6 = getelementptr inbounds [512 x float], [512 x float]* @kernel3, i64 0, i64 %v.055
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %mul5, %3
   %arrayidx8 = getelementptr inbounds [512 x float], [512 x float]* @kernel4, i64 0, i64 %v.055
-  %4 = load float* %arrayidx8, align 4
+  %4 = load float, float* %arrayidx8, align 4
   %mul9 = fmul fast float %mul7, %4
   %add10 = fadd fast float %r.057, %mul9
   %arrayidx.sum = add i64 %mul, 1
   %arrayidx11 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum
-  %5 = load float* %arrayidx11, align 4
+  %5 = load float, float* %arrayidx11, align 4
   %mul13 = fmul fast float %1, %5
   %mul15 = fmul fast float %2, %mul13
   %mul17 = fmul fast float %3, %mul15
@@ -56,7 +56,7 @@
   %add20 = fadd fast float %g.056, %mul19
   %arrayidx.sum52 = add i64 %mul, 2
   %arrayidx21 = getelementptr inbounds [1536 x float], [1536 x float]* @src_data, i64 0, i64 %arrayidx.sum52
-  %6 = load float* %arrayidx21, align 4
+  %6 = load float, float* %arrayidx21, align 4
   %mul23 = fmul fast float %1, %6
   %mul25 = fmul fast float %2, %mul23
   %mul27 = fmul fast float %3, %mul25
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
index eb84662..c581f4b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -29,9 +29,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
@@ -62,7 +62,7 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = sext i16 %3 to i32
   %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
   store i32 %4, i32* %5, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index f2163b0..cbba530 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -15,7 +15,7 @@
 
 for.end.us:                                       ; preds = %for.body3.us
   %arrayidx9.us = getelementptr inbounds i32, i32* %b, i64 %indvars.iv33
-  %0 = load i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
+  %0 = load i32, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
   %add10.us = add nsw i32 %0, 3
   store i32 %add10.us, i32* %arrayidx9.us, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next34 = add i64 %indvars.iv33, 1
@@ -29,7 +29,7 @@
   %add4.us = add i32 %add.us, %1
   %idxprom.us = sext i32 %add4.us to i64
   %arrayidx.us = getelementptr inbounds i32, i32* %a, i64 %idxprom.us
-  %2 = load i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3
+  %2 = load i32, i32* %arrayidx.us, align 4, !llvm.mem.parallel_loop_access !3
   %add5.us = add nsw i32 %2, 1
   store i32 %add5.us, i32* %arrayidx7.us, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next30 = add i64 %indvars.iv29, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index c3ee6f8..c3175b0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -46,34 +46,34 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp slt i32 %3, 100
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom2 = sext i32 %4 to i64
-  %5 = load i32** %B.addr, align 8
+  %5 = load i32*, i32** %B.addr, align 8
   %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2
-  %6 = load i32* %arrayidx3, align 4
-  %7 = load i32* %i, align 4
+  %6 = load i32, i32* %arrayidx3, align 4
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load i32** %trigger.addr, align 8
+  %8 = load i32*, i32** %trigger.addr, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
   %add = add nsw i32 %6, %9
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %10 to i64
-  %11 = load i32** %A.addr, align 8
+  %11 = load i32*, i32** %A.addr, align 8
   %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6
   store i32 %add, i32* %arrayidx7, align 4
   br label %if.end
@@ -82,7 +82,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %inc = add nsw i32 %12, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -130,35 +130,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp slt i32 %3, 100
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom2 = sext i32 %4 to i64
-  %5 = load float** %B.addr, align 8
+  %5 = load float*, float** %B.addr, align 8
   %arrayidx3 = getelementptr inbounds float, float* %5, i64 %idxprom2
-  %6 = load float* %arrayidx3, align 4
-  %7 = load i32* %i, align 4
+  %6 = load float, float* %arrayidx3, align 4
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load i32** %trigger.addr, align 8
+  %8 = load i32*, i32** %trigger.addr, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
   %conv = sitofp i32 %9 to float
   %add = fadd float %6, %conv
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %10 to i64
-  %11 = load float** %A.addr, align 8
+  %11 = load float*, float** %A.addr, align 8
   %arrayidx7 = getelementptr inbounds float, float* %11, i64 %idxprom6
   store float %add, float* %arrayidx7, align 4
   br label %if.end
@@ -167,7 +167,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %inc = add nsw i32 %12, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -218,35 +218,35 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp slt i32 %3, 100
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom2 = sext i32 %4 to i64
-  %5 = load double** %B.addr, align 8
+  %5 = load double*, double** %B.addr, align 8
   %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
-  %6 = load double* %arrayidx3, align 8
-  %7 = load i32* %i, align 4
+  %6 = load double, double* %arrayidx3, align 8
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load i32** %trigger.addr, align 8
+  %8 = load i32*, i32** %trigger.addr, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
   %conv = sitofp i32 %9 to double
   %add = fadd double %6, %conv
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %10 to i64
-  %11 = load double** %A.addr, align 8
+  %11 = load double*, double** %A.addr, align 8
   %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6
   store double %add, double* %arrayidx7, align 8
   br label %if.end
@@ -255,7 +255,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %inc = add nsw i32 %12, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -297,36 +297,36 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp slt i32 %3, 100
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %mul = mul nsw i32 %4, 2
   %idxprom2 = sext i32 %mul to i64
-  %5 = load double** %B.addr, align 8
+  %5 = load double*, double** %B.addr, align 8
   %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
-  %6 = load double* %arrayidx3, align 8
-  %7 = load i32* %i, align 4
+  %6 = load double, double* %arrayidx3, align 8
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load i32** %trigger.addr, align 8
+  %8 = load i32*, i32** %trigger.addr, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
   %conv = sitofp i32 %9 to double
   %add = fadd double %6, %conv
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %10 to i64
-  %11 = load double** %A.addr, align 8
+  %11 = load double*, double** %A.addr, align 8
   %arrayidx7 = getelementptr inbounds double, double* %11, i64 %idxprom6
   store double %add, double* %arrayidx7, align 8
   br label %if.end
@@ -335,7 +335,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %inc = add nsw i32 %12, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -373,34 +373,34 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 10000
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp slt i32 %3, 100
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom2 = sext i32 %4 to i64
-  %5 = load i32** %B.addr, align 8
+  %5 = load i32*, i32** %B.addr, align 8
   %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2
-  %6 = load i32* %arrayidx3, align 4
-  %7 = load i32* %i, align 4
+  %6 = load i32, i32* %arrayidx3, align 4
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load i32** %trigger.addr, align 8
+  %8 = load i32*, i32** %trigger.addr, align 8
   %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4
-  %9 = load i32* %arrayidx5, align 4
+  %9 = load i32, i32* %arrayidx5, align 4
   %add = add nsw i32 %6, %9
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %idxprom6 = sext i32 %10 to i64
-  %11 = load i32** %A.addr, align 8
+  %11 = load i32*, i32** %A.addr, align 8
   %arrayidx7 = getelementptr inbounds i32, i32* %11, i64 %idxprom6
   store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*]* @a, i64 0, i64 1), i32** @c) to i32)), i32* %arrayidx7, align 4
   br label %if.end
@@ -409,7 +409,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %12 = load i32* %i, align 4
+  %12 = load i32, i32* %i, align 4
   %inc = add nsw i32 %12, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
@@ -459,29 +459,29 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp sge i32 %0, 0
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %idxprom = sext i32 %1 to i64
-  %2 = load i32** %trigger.addr, align 8
+  %2 = load i32*, i32** %trigger.addr, align 8
   %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
-  %3 = load i32* %arrayidx, align 4
+  %3 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %3, 0
   br i1 %cmp1, label %if.then, label %if.end
 
 if.then:                                          ; preds = %for.body
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %idxprom2 = sext i32 %4 to i64
-  %5 = load double** %in.addr, align 8
+  %5 = load double*, double** %in.addr, align 8
   %arrayidx3 = getelementptr inbounds double, double* %5, i64 %idxprom2
-  %6 = load double* %arrayidx3, align 8
+  %6 = load double, double* %arrayidx3, align 8
   %add = fadd double %6, 5.000000e-01
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %idxprom4 = sext i32 %7 to i64
-  %8 = load double** %out.addr, align 8
+  %8 = load double*, double** %out.addr, align 8
   %arrayidx5 = getelementptr inbounds double, double* %8, i64 %idxprom4
   store double %add, double* %arrayidx5, align 8
   br label %if.end
@@ -490,7 +490,7 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %if.end
-  %9 = load i32* %i, align 4
+  %9 = load i32, i32* %i, align 4
   %dec = add nsw i32 %9, -1
   store i32 %dec, i32* %i, align 4
   br label %for.cond
diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index bdce3ad..ba8e11e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -55,7 +55,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %N
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
@@ -64,7 +64,7 @@
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
 
 for.end:                                          ; preds = %for.body
-  %1 = load i32* %a, align 4
+  %1 = load i32, i32* %a, align 4
   ret i32 %1
 }
 
@@ -106,7 +106,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %N
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
@@ -115,7 +115,7 @@
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
-  %1 = load i32* %a, align 4
+  %1 = load i32, i32* %a, align 4
   ret i32 %1
 }
 
@@ -157,7 +157,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %N
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
@@ -166,7 +166,7 @@
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
 
 for.end:                                          ; preds = %for.body
-  %1 = load i32* %a, align 4
+  %1 = load i32, i32* %a, align 4
   ret i32 %1
 }
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
index 3207025..bb972c4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/min-trip-count-switch.ll
@@ -11,7 +11,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %add = fadd float %0, 1.000000e+00
   store float %add, float* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
index 1ac18f2..4b464b0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no-vector.ll
@@ -9,7 +9,7 @@
   %i.06 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %r.05 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i8, i8* %s, i32 %i.06
-  %0 = load i8* %arrayidx, align 1
+  %0 = load i8, i8* %arrayidx, align 1
   %conv = sext i8 %0 to i32
   %xor = xor i32 %conv, %r.05
   %inc = add nsw i32 %i.06, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
index a7f6362..631361c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops-after-reg2mem.ll
@@ -17,28 +17,28 @@
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
-  %indvars.iv.reload = load i64* %indvars.iv.reg2mem
+  %indvars.iv.reload = load i64, i64* %indvars.iv.reg2mem
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.reload
-  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.reload
-  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next = add i64 %indvars.iv.reload, 1
   ; A new store without the parallel metadata here:
   store i64 %indvars.iv.next, i64* %indvars.iv.next.reg2mem
-  %indvars.iv.next.reload1 = load i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload1 = load i64, i64* %indvars.iv.next.reg2mem
   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next.reload1
-  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+  %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
-  %indvars.iv.next.reload = load i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload = load i64, i64* %indvars.iv.next.reg2mem
   %lftr.wideiv = trunc i64 %indvars.iv.next.reload to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
   br i1 %exitcond, label %for.end, label %for.body.for.body_crit_edge, !llvm.loop !3
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
-  %indvars.iv.next.reload2 = load i64* %indvars.iv.next.reg2mem
+  %indvars.iv.next.reload2 = load i64, i64* %indvars.iv.next.reg2mem
   store i64 %indvars.iv.next.reload2, i64* %indvars.iv.reg2mem
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
index 83bed43..53061ed 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll
@@ -21,15 +21,15 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
   store i32 %0, i32* %arrayidx4, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4
+  %2 = load i32, i32* %arrayidx6, align 4
   store i32 %2, i32* %arrayidx2, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -51,9 +51,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
   ; This store might have originated from inlining a function with a parallel
@@ -61,7 +61,7 @@
   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
+  %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !3
   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
@@ -84,9 +84,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
+  %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !6
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
+  %1 = load i32, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
   %idxprom3 = sext i32 %1 to i64
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i64 %idxprom3
   ; This refers to the loop marked with !7 which we are not in at the moment.
@@ -94,7 +94,7 @@
   store i32 %0, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !7
   %indvars.iv.next = add i64 %indvars.iv, 1
   %arrayidx6 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %2 = load i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
+  %2 = load i32, i32* %arrayidx6, align 4, !llvm.mem.parallel_loop_access !6
   store i32 %2, i32* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !6
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, 512
diff --git a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
index af3cfe0..6bc738a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -7,7 +7,7 @@
 @Foo = common global %struct.anon zeroinitializer, align 4
 
 ;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>*
+;CHECK: load <4 x i32>, <4 x i32>*
 ;CHECK: sdiv <4 x i32>
 ;CHECK: store <4 x i32>
 
@@ -18,7 +18,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 2, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %div = sdiv i32 %0, 2
   %arrayidx2 = getelementptr inbounds %struct.anon, %struct.anon* @Foo, i64 0, i32 0, i64 %indvars.iv
   store i32 %div, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index ce2aa89..3741b95 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -14,14 +14,14 @@
   br label %bb2
 
 bb2:                                              ; preds = %bb
-  %tmp = load double* null, align 8
+  %tmp = load double, double* null, align 8
   br i1 undef, label %bb3, label %bb12
 
 bb3:                                              ; preds = %bb3, %bb2
   %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ]
   %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ]
   %tmp6 = getelementptr inbounds [16 x double], [16 x double]* undef, i32 0, i32 %tmp5
-  %tmp7 = load double* %tmp6, align 4
+  %tmp7 = load double, double* %tmp6, align 4
   %tmp8 = add nsw i32 %tmp5, 1
   %tmp9 = fadd fast double %tmp4, undef
   %tmp10 = getelementptr inbounds float, float* %arg, i32 %tmp5
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index 38e3c08..47c262b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -31,9 +31,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
@@ -77,9 +77,9 @@
   %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
   %4 = add nsw i32 %.02, -1
   %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %8 = load i32* %7, align 4
+  %8 = load i32, i32* %7, align 4
   %9 = and i32 %8, %6
   %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %9, i32* %10, align 4
@@ -105,7 +105,7 @@
   %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
   %2 = add nsw i32 %.05, -1
   %3 = getelementptr inbounds i32, i32* %.023, i64 1
-  %4 = load i32* %.023, align 16
+  %4 = load i32, i32* %.023, align 16
   %5 = getelementptr inbounds i32, i32* %.014, i64 1
   store i32 %4, i32* %.014, align 16
   %6 = icmp eq i32 %2, 0
@@ -129,7 +129,7 @@
   %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
   %2 = add nsw i32 %.05, -1
   %3 = getelementptr inbounds i32, i32* %.023, i64 1
-  %4 = load i32* %.023, align 16
+  %4 = load i32, i32* %.023, align 16
   %5 = getelementptr inbounds i32, i32* %.014, i64 1
   store i32 %4, i32* %.014, align 16
   %6 = icmp eq i32 %2, 0
@@ -153,7 +153,7 @@
   %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
   %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
   %2 = getelementptr inbounds i16, i16* %.04, i64 1
-  %3 = load i16* %.04, align 2
+  %3 = load i16, i16* %.04, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw nsw i32 %4, 7
   %6 = getelementptr inbounds i32, i32* %.013, i64 1
@@ -179,7 +179,7 @@
   %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
   %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
   %2 = getelementptr inbounds i16, i16* %.04, i64 1
-  %3 = load i16* %.04, align 2
+  %3 = load i16, i16* %.04, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw nsw i32 %4, 7
   %6 = getelementptr inbounds i32, i32* %.013, i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
index 9984e2d..c0bbb92 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tripcount.ll
@@ -23,7 +23,7 @@
 for.body:
   %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @big, i32 0, i32 %i.07
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %neg = xor i32 %0, -1
   store i32 %neg, i32* %arrayidx, align 4
   %inc = add nsw i32 %i.07, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
index 40421d4..38af11c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -13,7 +13,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i64, i64* %a, i64 %indvars.iv
-  %tmp = load i64* %arrayidx, align 4
+  %tmp = load i64, i64* %arrayidx, align 4
   %conv = uitofp i64 %tmp to double
   %arrayidx2 = getelementptr inbounds double, double* %b, i64 %indvars.iv
   store double %conv, double* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
index 50fdf18..52914b6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-pm.ll
@@ -18,7 +18,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i32 %3, 6
   store i32 %4, i32* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
index eff6ca4..4411da3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll-small-loops.ll
@@ -16,8 +16,8 @@
 ; CHECK-VECTOR: ret
 ;
 ; CHECK-SCALAR-LABEL: @foo(
-; CHECK-SCALAR: load i32*
-; CHECK-SCALAR-NOT: load i32*
+; CHECK-SCALAR: load i32, i32*
+; CHECK-SCALAR-NOT: load i32, i32*
 ; CHECK-SCALAR: store i32
 ; CHECK-SCALAR-NOT: store i32
 ; CHECK-SCALAR: ret
@@ -27,7 +27,7 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i32 %3, 6
   store i32 %4, i32* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -58,7 +58,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i32 %3, 6
   store i32 %4, i32* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -87,7 +87,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %mul = fmul float %0, %N
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
   store float %mul, float* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
index 360d66d..71b8290 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/unroll_selection.ll
@@ -17,7 +17,7 @@
 ; <label>:2                                       ; preds = %2, %0
   %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
   %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
-  %4 = load double* %3, align 8
+  %4 = load double, double* %3, align 8
   %5 = fadd double %4, 3.000000e+00
   %6 = fmul double %4, 2.000000e+00
   %7 = fadd double %5, %6
@@ -59,7 +59,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %i.01 = phi i64 [ %5, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i16, i16* %A, i64 %i.01
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = xor i16 %3, 3
   store i16 %4, i16* %2, align 2
   %5 = add i64 %i.01, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
index e6a0d475..1030756 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.ll
@@ -36,7 +36,7 @@
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
   %call = tail call float @llvm.sin.f32(float %0)
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
@@ -70,7 +70,7 @@
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %call = tail call float @llvm.sin.f32(float %0)
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index f4bc160..8d139ac 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -30,9 +30,9 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
   %add = fadd fast float %0, %1
   store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
@@ -56,9 +56,9 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
+  %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !3
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
+  %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %add = fadd fast float %0, %1
   store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
index 86cebf3..5efabe1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll
@@ -16,9 +16,9 @@
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
 
@@ -43,9 +43,9 @@
 ; <label>:1
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   %8 = icmp ult i64 %indvars.iv, 8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 048c260..6cd3c9c 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -19,7 +19,7 @@
 ; CHECK: test_consecutive_store
 ; CHECK: The Widest type: 64 bits
 define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
-  %4 = load %0** %2, align 8
+  %4 = load %0*, %0** %2, align 8
   %5 = icmp eq %0** %0, %1
   br i1 %5, label %12, label %6
 
@@ -62,7 +62,7 @@
 ; <label>:3                                       ; preds = %3, %1
   %4 = phi i64 [ 0, %1 ], [ %11, %3 ]
   %5 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q, i64 0, i64 %4
-  %6 = load i16* %5, align 2
+  %6 = load i16, i16* %5, align 2
   %7 = sext i16 %6 to i64
   %8 = add i64 %7, 1
   %9 = inttoptr i64 %8 to i32*
@@ -101,7 +101,7 @@
   %2 = phi i64 [ 0, %0 ], [ %10, %1 ]
   %3 = phi i8 [ 0, %0 ], [ %9, %1 ]
   %4 = getelementptr inbounds [1024 x i32*], [1024 x i32*]* @ia, i32 0, i64 %2
-  %5 = load i32** %4, align 4
+  %5 = load i32*, i32** %4, align 4
   %6 = ptrtoint i32* %5 to i64
   %7 = trunc i64 %6 to i8
   %8 = add i8 %3, 1
@@ -129,7 +129,7 @@
   %4 = phi i64 [ 0, %1 ], [ %10, %3 ]
   %5 = getelementptr inbounds [2048 x [8 x i32*]], [2048 x [8 x i32*]]* @p2, i64 0, i64 %4, i64 %2
   %6 = getelementptr inbounds [2048 x i16], [2048 x i16]* @q2, i64 0, i64 %4
-  %7 = load i32** %5, align 2
+  %7 = load i32*, i32** %5, align 2
   %8 = ptrtoint i32* %7 to i64
   %9 = trunc i64 %8 to i16
   store i16 %9, i16* %6, align 8
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 92eba52..2ab0ee3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -98,10 +98,10 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !35
-  %0 = load i32* %arrayidx, align 4, !dbg !35, !tbaa !18
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !35, !tbaa !18
   %idxprom1 = sext i32 %0 to i64, !dbg !35
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35
-  %1 = load i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !18
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !35
   store i32 %1, i32* %arrayidx4, align 4, !dbg !35, !tbaa !18
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !32
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index cf64283..10e27c1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -27,10 +27,10 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ], !dbg !19
   %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv, !dbg !19
-  %0 = load i8* %arrayidx, align 1, !dbg !19, !tbaa !21
+  %0 = load i8, i8* %arrayidx, align 1, !dbg !19, !tbaa !21
   %conv = sext i8 %0 to i32, !dbg !19
   %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv, !dbg !19
-  %1 = load i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
+  %1 = load i8, i8* %arrayidx2, align 1, !dbg !19, !tbaa !21
   %conv3 = sext i8 %1 to i32, !dbg !19
   %sub = sub i32 %conv, %conv3, !dbg !19
   %add = add nsw i32 %sub, %add8, !dbg !19
diff --git a/llvm/test/Transforms/LoopVectorize/align.ll b/llvm/test/Transforms/LoopVectorize/align.ll
index f12dbde..7ee401d 100644
--- a/llvm/test/Transforms/LoopVectorize/align.ll
+++ b/llvm/test/Transforms/LoopVectorize/align.ll
@@ -6,8 +6,8 @@
 ; Make sure we output the abi alignment if no alignment is specified.
 
 ;CHECK-LABEL: @align
-;CHECK: load <4 x i32>* {{.*}} align  4
-;CHECK: load <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
+;CHECK: load <4 x i32>, <4 x i32>* {{.*}} align  4
 ;CHECK: store <4 x i32> {{.*}} align  4
 
 define void @align(i32* %a, i32* %b, i32* %c) nounwind uwtable ssp {
@@ -16,9 +16,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   %4 = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
-  %5 = load i32* %4
+  %5 = load i32, i32* %4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %6, i32* %7
diff --git a/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll b/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
index 707b5b9..f1efb2563 100644
--- a/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/bzip_reverse_loops.ll
@@ -17,7 +17,7 @@
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %cond.end ]
   %p.addr.0 = phi i16* [ %p, %entry ], [ %incdec.ptr, %cond.end ]
   %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.0, i64 -1
-  %0 = load i16* %incdec.ptr, align 2
+  %0 = load i16, i16* %incdec.ptr, align 2
   %conv = zext i16 %0 to i32
   %cmp = icmp ult i32 %conv, %size
   br i1 %cmp, label %cond.end, label %cond.true
@@ -52,7 +52,7 @@
   %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.body ]
   %p.0 = phi i32* [ %a, %entry ], [ %incdec.ptr, %do.body ]
   %incdec.ptr = getelementptr inbounds i32, i32* %p.0, i64 -1
-  %0 = load i32* %incdec.ptr, align 4
+  %0 = load i32, i32* %incdec.ptr, align 4
   %cmp = icmp slt i32 %0, %wsize
   %sub = sub nsw i32 %0, %wsize
   %cond = select i1 %cmp, i32 0, i32 %sub
diff --git a/llvm/test/Transforms/LoopVectorize/calloc.ll b/llvm/test/Transforms/LoopVectorize/calloc.ll
index 3ac3b7b..a41e517 100644
--- a/llvm/test/Transforms/LoopVectorize/calloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/calloc.ll
@@ -23,7 +23,7 @@
   %i.030 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %shr = lshr i64 %i.030, 1
   %arrayidx = getelementptr inbounds i8, i8* %bytes, i64 %shr
-  %1 = load i8* %arrayidx, align 1
+  %1 = load i8, i8* %arrayidx, align 1
   %conv = zext i8 %1 to i32
   %and = shl i64 %i.030, 2
   %neg = and i64 %and, 4
diff --git a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
index 7c52320..15750aa 100644
--- a/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/llvm/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -13,7 +13,7 @@
 for.body:                                         ; preds = %for.inc, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %arrayidx = getelementptr inbounds i32, i32* %indices, i64 %indvars.iv, !dbg !12
-  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !14
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !14
   %cmp1 = icmp eq i32 %0, 1024, !dbg !12
   br i1 %cmp1, label %if.then, label %for.inc, !dbg !12
 
diff --git a/llvm/test/Transforms/LoopVectorize/control-flow.ll b/llvm/test/Transforms/LoopVectorize/control-flow.ll
index ee73110..c95d55c 100644
--- a/llvm/test/Transforms/LoopVectorize/control-flow.ll
+++ b/llvm/test/Transforms/LoopVectorize/control-flow.ll
@@ -31,7 +31,7 @@
 for.body:                                         ; preds = %for.body.preheader, %if.else
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.else ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !12
-  %0 = load i32* %arrayidx, align 4, !dbg !12, !tbaa !15
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !12, !tbaa !15
   %cmp1 = icmp sgt i32 %0, 10, !dbg !12
   br i1 %cmp1, label %end.loopexit, label %if.else, !dbg !12
 
diff --git a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
index cc41e5c..22896d3 100644
--- a/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/llvm/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -25,10 +25,10 @@
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
   %arrayidx = getelementptr inbounds float, float* %0, i64 %idxprom
-  %3 = load float* %arrayidx, align 4
+  %3 = load float, float* %arrayidx, align 4
   %idxprom5 = sext i32 %i.01 to i64
   %arrayidx6 = getelementptr inbounds float, float* %1, i64 %idxprom5
-  %4 = load float* %arrayidx6, align 4
+  %4 = load float, float* %arrayidx6, align 4
   %add = fadd float %3, %4
   %idxprom7 = sext i32 %i.01 to i64
   %arrayidx8 = getelementptr inbounds float, float* %2, i64 %idxprom7
@@ -38,7 +38,7 @@
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %for.body
-  %5 = load float* %2, align 4
+  %5 = load float, float* %2, align 4
   %conv10 = fptosi float %5 to i32
   ret i32 %conv10
 }
diff --git a/llvm/test/Transforms/LoopVectorize/dbg.value.ll b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
index f3e75d8..bf9a796 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg.value.ll
@@ -18,9 +18,9 @@
   ;CHECK: load <4 x i32>
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19
-  %0 = load i32* %arrayidx, align 4, !dbg !19
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !19
   %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19
-  %1 = load i32* %arrayidx2, align 4, !dbg !19
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !19
   %add = add nsw i32 %1, %0, !dbg !19
   %arrayidx4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19
   store i32 %add, i32* %arrayidx4, align 4, !dbg !19
diff --git a/llvm/test/Transforms/LoopVectorize/debugloc.ll b/llvm/test/Transforms/LoopVectorize/debugloc.ll
index a30ca62..c443ae4 100644
--- a/llvm/test/Transforms/LoopVectorize/debugloc.ll
+++ b/llvm/test/Transforms/LoopVectorize/debugloc.ll
@@ -9,7 +9,7 @@
 ; CHECK: vector.body
 ; CHECK:   index {{.*}}, !dbg ![[LOC]]
 ; CHECK:   getelementptr inbounds i32, i32* %a, {{.*}}, !dbg ![[LOC2:[0-9]+]]
-; CHECK:   load <2 x i32>* {{.*}}, !dbg ![[LOC2]]
+; CHECK:   load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC2]]
 ; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC2]]
 ; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
 ; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
@@ -33,7 +33,7 @@
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %sum.05 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv, !dbg !22
-  %0 = load i32* %arrayidx, align 4, !dbg !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !22
   %add = add i32 %0, %sum.05, !dbg !22
   tail call void @llvm.dbg.value(metadata i32 %add.lcssa, i64 0, metadata !15, metadata !{}), !dbg !22
   %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !21
diff --git a/llvm/test/Transforms/LoopVectorize/duplicated-metadata.ll b/llvm/test/Transforms/LoopVectorize/duplicated-metadata.ll
index e82d280..9f7cdef 100644
--- a/llvm/test/Transforms/LoopVectorize/duplicated-metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/duplicated-metadata.ll
@@ -13,7 +13,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %p = load float* %arrayidx, align 4
+  %p = load float, float* %arrayidx, align 4
   %mul = fmul float %p, 2.000000e+00
   store float %mul, float* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/ee-crash.ll b/llvm/test/Transforms/LoopVectorize/ee-crash.ll
index 7ed1c66..0c38734 100644
--- a/llvm/test/Transforms/LoopVectorize/ee-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/ee-crash.ll
@@ -18,7 +18,7 @@
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
   %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32* %__first.addr.04.i, align 4
+  %0 = load i32, i32* %__first.addr.04.i, align 4
   %q1 = extractelement <2 x i32> %q, i32 %n
   %q2 = add nsw i32 %0, %q1
   %add.i = add nsw i32 %q2, %__init.addr.05.i
diff --git a/llvm/test/Transforms/LoopVectorize/exact.ll b/llvm/test/Transforms/LoopVectorize/exact.ll
index 90bad3a..0a4e0dc 100644
--- a/llvm/test/Transforms/LoopVectorize/exact.ll
+++ b/llvm/test/Transforms/LoopVectorize/exact.ll
@@ -12,7 +12,7 @@
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %x, i64 %iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %conv1 = lshr exact i32 %0, 1
   store i32 %conv1, i32* %arrayidx, align 4
   %iv.next = add nuw nsw i64 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/flags.ll b/llvm/test/Transforms/LoopVectorize/flags.ll
index a4e392f..fcbc8741 100644
--- a/llvm/test/Transforms/LoopVectorize/flags.ll
+++ b/llvm/test/Transforms/LoopVectorize/flags.ll
@@ -15,7 +15,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = mul nsw i32 %3, 3
   store i32 %4, i32* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -40,7 +40,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = mul i32 %3, 3
   store i32 %4, i32* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -67,7 +67,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %s, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %add = fadd fast float %q.04, %0
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 256
diff --git a/llvm/test/Transforms/LoopVectorize/float-reduction.ll b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
index 1401bd9..1310b27 100644
--- a/llvm/test/Transforms/LoopVectorize/float-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-reduction.ll
@@ -13,7 +13,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %add = fadd fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -35,7 +35,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %sum.04 = phi float [ 0.000000e+00, %entry ], [ %sub, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %sub = fsub fast float %sum.04, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/funcall.ll b/llvm/test/Transforms/LoopVectorize/funcall.ll
index 81158ee..35c2dfc 100644
--- a/llvm/test/Transforms/LoopVectorize/funcall.ll
+++ b/llvm/test/Transforms/LoopVectorize/funcall.ll
@@ -17,7 +17,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %d, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %1 = tail call double @llvm.pow.f64(double %0, double %t)
   store double %1, double* %arrayidx, align 8
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
index 9bc5cb7..1880901 100644
--- a/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/llvm/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -45,9 +45,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
@@ -96,9 +96,9 @@
   %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
   %4 = add nsw i32 %.02, -1
   %5 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %8 = load i32* %7, align 4
+  %8 = load i32, i32* %7, align 4
   %9 = and i32 %8, %6
   %10 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %9, i32* %10, align 4
@@ -129,7 +129,7 @@
   %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
   %2 = add nsw i32 %.05, -1
   %3 = getelementptr inbounds i32, i32* %.023, i64 1
-  %4 = load i32* %.023, align 16
+  %4 = load i32, i32* %.023, align 16
   %5 = getelementptr inbounds i32, i32* %.014, i64 1
   store i32 %4, i32* %.014, align 16
   %6 = icmp eq i32 %2, 0
@@ -162,7 +162,7 @@
   %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
   %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
   %5 = getelementptr inbounds i32, i32* %.027, i64 1
-  %6 = load i32* %.027, align 16
+  %6 = load i32, i32* %.027, align 16
   %7 = add nsw i32 %6, 5
   %8 = getelementptr inbounds i32, i32* %.018, i64 1
   store i32 %7, i32* %.018, align 16
@@ -177,10 +177,10 @@
   %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
   %indvars.iv.next12 = add i64 %indvars.iv11, 1
   %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
-  %12 = load i32* %11, align 4
+  %12 = load i32, i32* %11, align 4
   %13 = add nsw i64 %indvars.iv11, 3
   %14 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %13
-  %15 = load i32* %14, align 4
+  %15 = load i32, i32* %14, align 4
   %16 = add nsw i32 %15, %12
   %17 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv11
   store i32 %16, i32* %17, align 4
@@ -191,7 +191,7 @@
 .lr.ph:                                           ; preds = %.preheader, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
   %18 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %19 = load i32* %18, align 4
+  %19 = load i32, i32* %18, align 4
   %20 = icmp sgt i32 %19, 4
   %21 = select i1 %20, i32 4, i32 0
   %22 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
@@ -250,9 +250,9 @@
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ]
   %2 = getelementptr inbounds [1024 x i32], [1024 x i32]* @ub, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [1024 x i32], [1024 x i32]* @uc, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add i32 %3, %diff.01
   %7 = sub i32 %6, %5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -277,16 +277,16 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i32, i32* %ib, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds i32, i32* %ic, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
   %8 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %9 = load i16* %8, align 2
+  %9 = load i16, i16* %8, align 2
   %10 = getelementptr inbounds i16, i16* %sc, i64 %indvars.iv
-  %11 = load i16* %10, align 2
+  %11 = load i16, i16* %10, align 2
   %12 = add i16 %11, %9
   %13 = getelementptr inbounds i16, i16* %sa, i64 %indvars.iv
   store i16 %12, i16* %13, align 2
@@ -310,7 +310,7 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds i16, i16* %sb, i64 %indvars.iv
-  %3 = load i16* %2, align 2
+  %3 = load i16, i16* %2, align 2
   %4 = sext i16 %3 to i32
   %5 = getelementptr inbounds i32, i32* %ia, i64 %indvars.iv
   store i32 %4, i32* %5, align 4
@@ -341,14 +341,14 @@
   %2 = shl nsw i64 %indvars.iv, 1
   %3 = or i64 %2, 1
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %3
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %3
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = mul nsw i32 %7, %5
   %9 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %2
-  %10 = load i32* %9, align 8
+  %10 = load i32, i32* %9, align 8
   %11 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %2
-  %12 = load i32* %11, align 8
+  %12 = load i32, i32* %11, align 8
   %13 = mul nsw i32 %12, %10
   %14 = sub nsw i32 %8, %13
   %15 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
@@ -397,18 +397,18 @@
 .preheader:                                       ; preds = %14, %0
   %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
   %1 = getelementptr inbounds i32*, i32** %A, i64 %indvars.iv4
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32*, i32** %B, i64 %indvars.iv4
-  %4 = load i32** %3, align 8
+  %4 = load i32*, i32** %3, align 8
   br label %5
 
 ; <label>:5                                       ; preds = %.preheader, %5
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
   %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
   %6 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = getelementptr inbounds i32, i32* %4, i64 %indvars.iv
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %10 = add i32 %7, %diff.02
   %11 = sub i32 %10, %9
   %indvars.iv.next = add i64 %indvars.iv, 8
@@ -445,13 +445,13 @@
   %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
   %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
   %1 = getelementptr inbounds i32*, i32** %in, i64 %indvars.iv
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32, i32* %2, i64 %indvars.iv7
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv
-  %6 = load i32** %5, align 8
+  %6 = load i32*, i32** %5, align 8
   %7 = getelementptr inbounds i32, i32* %6, i64 %indvars.iv7
-  %8 = load i32* %7, align 4
+  %8 = load i32, i32* %7, align 4
   %9 = mul nsw i32 %8, %4
   %10 = add nsw i32 %9, %sum.12
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -479,13 +479,13 @@
   %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
   %13 = add nsw i64 %indvars.iv.1, 1
   %14 = getelementptr inbounds i32*, i32** %in, i64 %13
-  %15 = load i32** %14, align 8
+  %15 = load i32*, i32** %14, align 8
   %16 = getelementptr inbounds i32, i32* %15, i64 %indvars.iv7.1
-  %17 = load i32* %16, align 4
+  %17 = load i32, i32* %16, align 4
   %18 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.1
-  %19 = load i32** %18, align 8
+  %19 = load i32*, i32** %18, align 8
   %20 = getelementptr inbounds i32, i32* %19, i64 %indvars.iv7.1
-  %21 = load i32* %20, align 4
+  %21 = load i32, i32* %20, align 4
   %22 = mul nsw i32 %21, %17
   %23 = add nsw i32 %22, %sum.12.1
   %lftr.wideiv.1 = trunc i64 %13 to i32
@@ -513,13 +513,13 @@
   %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
   %27 = add nsw i64 %indvars.iv.2, 2
   %28 = getelementptr inbounds i32*, i32** %in, i64 %27
-  %29 = load i32** %28, align 8
+  %29 = load i32*, i32** %28, align 8
   %30 = getelementptr inbounds i32, i32* %29, i64 %indvars.iv7.2
-  %31 = load i32* %30, align 4
+  %31 = load i32, i32* %30, align 4
   %32 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.2
-  %33 = load i32** %32, align 8
+  %33 = load i32*, i32** %32, align 8
   %34 = getelementptr inbounds i32, i32* %33, i64 %indvars.iv7.2
-  %35 = load i32* %34, align 4
+  %35 = load i32, i32* %34, align 4
   %36 = mul nsw i32 %35, %31
   %37 = add nsw i32 %36, %sum.12.2
   %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
@@ -548,13 +548,13 @@
   %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
   %41 = add nsw i64 %indvars.iv.3, 3
   %42 = getelementptr inbounds i32*, i32** %in, i64 %41
-  %43 = load i32** %42, align 8
+  %43 = load i32*, i32** %42, align 8
   %44 = getelementptr inbounds i32, i32* %43, i64 %indvars.iv7.3
-  %45 = load i32* %44, align 4
+  %45 = load i32, i32* %44, align 4
   %46 = getelementptr inbounds i32*, i32** %coeff, i64 %indvars.iv.3
-  %47 = load i32** %46, align 8
+  %47 = load i32*, i32** %46, align 8
   %48 = getelementptr inbounds i32, i32* %47, i64 %indvars.iv7.3
-  %49 = load i32* %48, align 4
+  %49 = load i32, i32* %48, align 4
   %50 = mul nsw i32 %49, %45
   %51 = add nsw i32 %50, %sum.12.3
   %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
@@ -591,7 +591,7 @@
   %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
   %indvars.iv.next = add i64 %indvars.iv, -1
   %4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %a.02
   %7 = trunc i64 %indvars.iv.next to i32
   %8 = icmp sgt i32 %7, 0
@@ -613,7 +613,7 @@
   %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
   %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
   %2 = getelementptr inbounds i16, i16* %.04, i64 1
-  %3 = load i16* %.04, align 2
+  %3 = load i16, i16* %.04, align 2
   %4 = zext i16 %3 to i32
   %5 = shl nuw nsw i32 %4, 7
   %6 = getelementptr inbounds i32, i32* %.013, i64 1
@@ -635,9 +635,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [1024 x float], [1024 x float]* @fa, i64 0, i64 %indvars.iv
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = getelementptr inbounds [1024 x float], [1024 x float]* @fb, i64 0, i64 %indvars.iv
-  %5 = load float* %4, align 4
+  %5 = load float, float* %4, align 4
   %6 = fcmp olt float %3, %5
   %x.y = select i1 %6, i16 %x, i16 %y
   %7 = sext i16 %x.y to i32
@@ -662,14 +662,14 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [1024 x float], [1024 x float]* @da, i64 0, i64 %indvars.iv
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = getelementptr inbounds [1024 x float], [1024 x float]* @db, i64 0, i64 %indvars.iv
-  %5 = load float* %4, align 4
+  %5 = load float, float* %4, align 4
   %6 = fcmp olt float %3, %5
   %7 = getelementptr inbounds [1024 x float], [1024 x float]* @dc, i64 0, i64 %indvars.iv
-  %8 = load float* %7, align 4
+  %8 = load float, float* %7, align 4
   %9 = getelementptr inbounds [1024 x float], [1024 x float]* @dd, i64 0, i64 %indvars.iv
-  %10 = load float* %9, align 4
+  %10 = load float, float* %9, align 4
   %11 = fcmp olt float %8, %10
   %12 = and i1 %6, %11
   %13 = zext i1 %12 to i32
diff --git a/llvm/test/Transforms/LoopVectorize/global_alias.ll b/llvm/test/Transforms/LoopVectorize/global_alias.ll
index ef232669..b2ac4ca 100644
--- a/llvm/test/Transforms/LoopVectorize/global_alias.ll
+++ b/llvm/test/Transforms/LoopVectorize/global_alias.ll
@@ -35,31 +35,31 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
   store i32 %add, i32* %arrayidx1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx2, align 4
+  %7 = load i32, i32* %arrayidx2, align 4
   ret i32 %7
 }
 
@@ -83,32 +83,32 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 90
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %add = add nsw i32 %1, 10
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %add
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add1 = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
   store i32 %add1, i32* %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -132,32 +132,32 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %add1 = add nsw i32 %4, 10
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add1
   store i32 %add, i32* %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -184,34 +184,34 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32** @PB, align 4
-  %2 = load i32* %i, align 4
+  %1 = load i32*, i32** @PB, align 4
+  %2 = load i32, i32* %i, align 4
   %add.ptr = getelementptr inbounds i32, i32* %1, i32 %2
-  %3 = load i32* %add.ptr, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %add.ptr, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32** @PA, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32*, i32** @PA, align 4
+  %6 = load i32, i32* %i, align 4
   %add.ptr1 = getelementptr inbounds i32, i32* %5, i32 %6
   store i32 %add, i32* %add.ptr1, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32** @PA, align 4
-  %9 = load i32* %a.addr, align 4
+  %8 = load i32*, i32** @PA, align 4
+  %9 = load i32, i32* %a.addr, align 4
   %add.ptr2 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32* %add.ptr2, align 4
+  %10 = load i32, i32* %add.ptr2, align 4
   ret i32 %10
 }
 
@@ -237,37 +237,37 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
-  %2 = load i32* %N, align 4
+  %1 = load i32, i32* %i, align 4
+  %2 = load i32, i32* %N, align 4
   %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
   %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
-  %3 = load i32* %arrayidx1, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %arrayidx1, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32* %i, align 4
-  %6 = load i32* %N, align 4
+  %5 = load i32, i32* %i, align 4
+  %6 = load i32, i32* %N, align 4
   %arrayidx2 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx2, i32 0, i32 %5
   store i32 %add, i32* %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32* %a.addr, align 4
-  %9 = load i32* %N, align 4
+  %8 = load i32, i32* %a.addr, align 4
+  %9 = load i32, i32* %N, align 4
   %arrayidx4 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx4, i32 0, i32 %8
-  %10 = load i32* %arrayidx5, align 4
+  %10 = load i32, i32* %arrayidx5, align 4
   ret i32 %10
 }
 
@@ -293,38 +293,38 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
-  %2 = load i32* %N, align 4
+  %1 = load i32, i32* %i, align 4
+  %2 = load i32, i32* %N, align 4
   %add = add nsw i32 %2, 1
   %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
   %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %1
-  %3 = load i32* %arrayidx1, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %arrayidx1, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add2 = add nsw i32 %3, %4
-  %5 = load i32* %i, align 4
-  %6 = load i32* %N, align 4
+  %5 = load i32, i32* %i, align 4
+  %6 = load i32, i32* %N, align 4
   %arrayidx3 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx3, i32 0, i32 %5
   store i32 %add2, i32* %arrayidx4, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32* %a.addr, align 4
-  %9 = load i32* %N, align 4
+  %8 = load i32, i32* %a.addr, align 4
+  %9 = load i32, i32* %N, align 4
   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
   %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %8
-  %10 = load i32* %arrayidx6, align 4
+  %10 = load i32, i32* %arrayidx6, align 4
   ret i32 %10
 }
 
@@ -347,19 +347,19 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
@@ -367,15 +367,15 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx5, align 4
+  %7 = load i32, i32* %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -399,19 +399,19 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 90
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
@@ -419,15 +419,15 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx5, align 4
+  %7 = load i32, i32* %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -451,19 +451,19 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 10
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
@@ -471,15 +471,15 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx5, align 4
+  %7 = load i32, i32* %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -506,23 +506,23 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32** @PB, align 4
+  %1 = load i32*, i32** @PB, align 4
   %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %idx.neg = sub i32 0, %2
   %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
   %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
-  %3 = load i32* %add.ptr2, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %add.ptr2, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32** @PA, align 4
+  %5 = load i32*, i32** @PA, align 4
   %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 100
-  %6 = load i32* %i, align 4
+  %6 = load i32, i32* %i, align 4
   %idx.neg4 = sub i32 0, %6
   %add.ptr5 = getelementptr inbounds i32, i32* %add.ptr3, i32 %idx.neg4
   %add.ptr6 = getelementptr inbounds i32, i32* %add.ptr5, i32 -1
@@ -530,16 +530,16 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32** @PA, align 4
-  %9 = load i32* %a.addr, align 4
+  %8 = load i32*, i32** @PA, align 4
+  %9 = load i32, i32* %a.addr, align 4
   %add.ptr7 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32* %add.ptr7, align 4
+  %10 = load i32, i32* %add.ptr7, align 4
   ret i32 %10
 }
 
@@ -565,41 +565,41 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %2 = load i32* %N, align 4
+  %2 = load i32, i32* %N, align 4
   %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 2), i32 0, i32 %2
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
-  %3 = load i32* %arrayidx2, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %arrayidx2, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %sub3 = sub nsw i32 100, %5
   %sub4 = sub nsw i32 %sub3, 1
-  %6 = load i32* %N, align 4
+  %6 = load i32, i32* %N, align 4
   %arrayidx5 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
   %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx5, i32 0, i32 %sub4
   store i32 %add, i32* %arrayidx6, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32* %a.addr, align 4
-  %9 = load i32* %N, align 4
+  %8 = load i32, i32* %a.addr, align 4
+  %9 = load i32, i32* %N, align 4
   %arrayidx7 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
   %arrayidx8 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx7, i32 0, i32 %8
-  %10 = load i32* %arrayidx8, align 4
+  %10 = load i32, i32* %arrayidx8, align 4
   ret i32 %10
 }
 
@@ -625,42 +625,42 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
-  %2 = load i32* %N, align 4
+  %2 = load i32, i32* %N, align 4
   %add = add nsw i32 %2, 1
   %arrayidx = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %add
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx, i32 0, i32 %sub1
-  %3 = load i32* %arrayidx2, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %arrayidx2, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add3 = add nsw i32 %3, %4
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %sub4 = sub nsw i32 100, %5
   %sub5 = sub nsw i32 %sub4, 1
-  %6 = load i32* %N, align 4
+  %6 = load i32, i32* %N, align 4
   %arrayidx6 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %6
   %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx6, i32 0, i32 %sub5
   store i32 %add3, i32* %arrayidx7, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32* %a.addr, align 4
-  %9 = load i32* %N, align 4
+  %8 = load i32, i32* %a.addr, align 4
+  %9 = load i32, i32* %N, align 4
   %arrayidx8 = getelementptr inbounds [100 x [100 x i32]], [100 x [100 x i32]]* getelementptr inbounds (%struct.anon.0* @Bar, i32 0, i32 0), i32 0, i32 %9
   %arrayidx9 = getelementptr inbounds [100 x i32], [100 x i32]* %arrayidx8, i32 0, i32 %8
-  %10 = load i32* %arrayidx9, align 4
+  %10 = load i32, i32* %arrayidx9, align 4
   ret i32 %10
 }
 
@@ -684,32 +684,32 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %add = add nsw i32 %1, 4
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add1 = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
   store i32 %add1, i32* %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -733,19 +733,19 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 5
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub2 = sub nsw i32 100, %4
   %sub3 = sub nsw i32 %sub2, 1
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub3
@@ -753,15 +753,15 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx5 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx5, align 4
+  %7 = load i32, i32* %arrayidx5, align 4
   ret i32 %7
 }
 
@@ -789,33 +789,33 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
   store i32 %add, i32* %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -839,17 +839,17 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %4
   %sub1 = sub nsw i32 %sub, 1
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
@@ -857,15 +857,15 @@
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -889,37 +889,37 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32** @PB, align 4
+  %1 = load i32*, i32** @PB, align 4
   %add.ptr = getelementptr inbounds i32, i32* %1, i32 100
-  %2 = load i32* %i, align 4
+  %2 = load i32, i32* %i, align 4
   %idx.neg = sub i32 0, %2
   %add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %idx.neg
   %add.ptr2 = getelementptr inbounds i32, i32* %add.ptr1, i32 -1
-  %3 = load i32* %add.ptr2, align 4
-  %4 = load i32* %a.addr, align 4
+  %3 = load i32, i32* %add.ptr2, align 4
+  %4 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %3, %4
-  %5 = load i32** @PA, align 4
-  %6 = load i32* %i, align 4
+  %5 = load i32*, i32** @PA, align 4
+  %6 = load i32, i32* %i, align 4
   %add.ptr3 = getelementptr inbounds i32, i32* %5, i32 %6
   store i32 %add, i32* %add.ptr3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %7 = load i32* %i, align 4
+  %7 = load i32, i32* %i, align 4
   %inc = add nsw i32 %7, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %8 = load i32** @PA, align 4
-  %9 = load i32* %a.addr, align 4
+  %8 = load i32*, i32** @PA, align 4
+  %9 = load i32, i32* %a.addr, align 4
   %add.ptr4 = getelementptr inbounds i32, i32* %8, i32 %9
-  %10 = load i32* %add.ptr4, align 4
+  %10 = load i32, i32* %add.ptr4, align 4
   ret i32 %10
 }
 
@@ -946,34 +946,34 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 1
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %add2 = add nsw i32 %4, 10
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
   store i32 %add, i32* %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx4, align 4
+  %7 = load i32, i32* %arrayidx4, align 4
   ret i32 %7
 }
 
@@ -996,33 +996,33 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
   store i32 %add, i32* %arrayidx2, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx3, align 4
+  %7 = load i32, i32* %arrayidx3, align 4
   ret i32 %7
 }
 
@@ -1045,33 +1045,33 @@
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
-  %0 = load i32* %i, align 4
+  %0 = load i32, i32* %i, align 4
   %cmp = icmp slt i32 %0, 100
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-  %1 = load i32* %i, align 4
+  %1 = load i32, i32* %i, align 4
   %sub = sub nsw i32 100, %1
   %sub1 = sub nsw i32 %sub, 10
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
-  %2 = load i32* %arrayidx, align 4
-  %3 = load i32* %a.addr, align 4
+  %2 = load i32, i32* %arrayidx, align 4
+  %3 = load i32, i32* %a.addr, align 4
   %add = add nsw i32 %2, %3
-  %4 = load i32* %i, align 4
+  %4 = load i32, i32* %i, align 4
   %add2 = add nsw i32 %4, 10
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
   store i32 %add, i32* %arrayidx3, align 4
   br label %for.inc
 
 for.inc:                                          ; preds = %for.body
-  %5 = load i32* %i, align 4
+  %5 = load i32, i32* %i, align 4
   %inc = add nsw i32 %5, 1
   store i32 %inc, i32* %i, align 4
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
-  %6 = load i32* %a.addr, align 4
+  %6 = load i32, i32* %a.addr, align 4
   %arrayidx4 = getelementptr inbounds [100 x i32], [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
-  %7 = load i32* %arrayidx4, align 4
+  %7 = load i32, i32* %arrayidx4, align 4
   ret i32 %7
 }
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
index ae7f5dc..a20b0f6 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-loads.ll
@@ -16,12 +16,12 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %indvars.iv
   %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx2, align 4
+  %0 = load float, float* %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   br label %if.end9
 
 if.end9:
@@ -48,12 +48,12 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end9 ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @A, i64 0, i64 %indvars.iv
   %arrayidx2 = getelementptr inbounds [1024 x float], [1024 x float]* @B, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx2, align 4
+  %0 = load float, float* %arrayidx2, align 4
   %cmp3 = fcmp oeq float %0, 0.000000e+00
   br i1 %cmp3, label %if.end9, label %if.else
 
 if.else:
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   br label %if.end9
 
 if.end9:
diff --git a/llvm/test/Transforms/LoopVectorize/i8-induction.ll b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
index 90e3ec0..d9e8a43 100644
--- a/llvm/test/Transforms/LoopVectorize/i8-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/i8-induction.ll
@@ -9,7 +9,7 @@
 define void @f() nounwind uwtable ssp {
 scalar.ph:
   store i8 0, i8* inttoptr (i64 1 to i8*), align 1
-  %0 = load i8* @a, align 1
+  %0 = load i8, i8* @a, align 1
   br label %for.body
 
 for.body:
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
index f2d1f25..8d435f5 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-edgemasks.ll
@@ -20,15 +20,15 @@
   br i1 %cmp88, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load i32** @b, align 8
-  %1 = load i32** @a, align 8
-  %2 = load i32** @c, align 8
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32*, i32** @a, align 8
+  %2 = load i32*, i32** @c, align 8
   br label %for.body
 
 for.body:
   %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %_ZL3fn3ii.exit58 ]
   %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %3 = load i32* %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
+  %3 = load i32, i32* %arrayidx, align 4  %4 = trunc i64 %indvars.iv to i32
   %and.i = and i32 %4, 1
   %tobool.i.i = icmp eq i32 %and.i, 0
   br i1 %tobool.i.i, label %if.end.i, label %if.then.i
@@ -136,7 +136,7 @@
   %p1.addr.0.i16.i = phi i32 [ %or.i14.i, %if.then.i15.i ], [ %p1.addr.3.i.i, %_Z3fn2iii.exit.i ]
   %arrayidx2 = getelementptr inbounds i32, i32* %1, i64 %indvars.iv
   store i32 %p1.addr.0.i16.i, i32* %arrayidx2, align 4  %arrayidx4 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
-  %10 = load i32* %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
+  %10 = load i32, i32* %arrayidx4, align 4  br i1 %tobool.i.i, label %_Z3fn1ii.exit.i26, label %if.then.i.i21
 
 if.then.i.i21:
   %and.i.i18 = lshr i32 %10, 2
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 4a327e8..3a581eb 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -20,9 +20,9 @@
 for.body:
   %indvars.iv = phi i64 [ %indvars.iv.next, %if.end14 ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %cmp3 = icmp sgt i32 %0, %1
   br i1 %cmp3, label %if.then, label %if.end14
 
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
index 711ca76..20333b9 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -15,7 +15,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion.ll b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
index 7a3d825..3a6ac8b1 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion.ll
@@ -36,9 +36,9 @@
 for.body:
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
-  %2 = load i32* %arrayidx4, align 4
+  %2 = load i32, i32* %arrayidx4, align 4
   %cmp5 = icmp sgt i32 %1, %2
   br i1 %cmp5, label %if.then, label %if.end
 
@@ -85,7 +85,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
   %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 30
   br i1 %cmp1, label %if.then, label %for.inc
 
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index ee5f8bc..991d027 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -41,8 +41,8 @@
 ; UNROLL:   %[[IND1:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 1
 ; UNROLL:   %[[v0:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND]]
 ; UNROLL:   %[[v1:[a-zA-Z0-9]+]] = getelementptr inbounds i32, i32* %f, i64 %[[IND1]]
-; UNROLL:   %[[v2:[a-zA-Z0-9]+]] = load i32* %[[v0]], align 4
-; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32* %[[v1]], align 4
+; UNROLL:   %[[v2:[a-zA-Z0-9]+]] = load i32, i32* %[[v0]], align 4
+; UNROLL:   %[[v3:[a-zA-Z0-9]+]] = load i32, i32* %[[v1]], align 4
 ; UNROLL:   %[[v4:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v2]], 100
 ; UNROLL:   %[[v5:[a-zA-Z0-9]+]] = icmp sgt i32 %[[v3]], 100
 ; UNROLL:   %[[v6:[a-zA-Z0-9]+]] = add nsw i32 %[[v2]], 20
@@ -67,7 +67,7 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp1 = icmp sgt i32 %0, 100
   br i1 %cmp1, label %if.then, label %for.inc
 
@@ -105,7 +105,7 @@
   %indvars.iv3 = phi i64 [ %indvars.iv.next4, %for.inc23 ], [ undef, %for.body9 ]
   %iNewChunks.120 = phi i32 [ %iNewChunks.2, %for.inc23 ], [ undef, %for.body9 ]
   %arrayidx16 = getelementptr inbounds [768 x i32], [768 x i32]* undef, i64 0, i64 %indvars.iv3
-  %tmp = load i32* %arrayidx16, align 4
+  %tmp = load i32, i32* %arrayidx16, align 4
   br i1 undef, label %if.then18, label %for.inc23
 
 if.then18:
diff --git a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
index 12fc13a..798793a 100644
--- a/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/incorrect-dom-info.ll
@@ -59,7 +59,7 @@
 
 ; <label>:11                                      ; preds = %.lr.ph21
   %12 = getelementptr inbounds [0 x i8], [0 x i8]* @PL_utf8skip, i64 0, i64 undef
-  %13 = load i8* %12, align 1
+  %13 = load i8, i8* %12, align 1
   %14 = zext i8 %13 to i64
   %15 = icmp ugt i64 %14, %10
   %. = select i1 %15, i64 %10, i64 %14
@@ -91,7 +91,7 @@
   br label %26
 
 ; <label>:26                                      ; preds = %25, %24, %23, %22
-  %27 = load i64* %len, align 8
+  %27 = load i64, i64* %len, align 8
   %28 = add i64 %27, -1
   br i1 undef, label %thread-pre-split._crit_edge, label %.lr.ph21
 
diff --git a/llvm/test/Transforms/LoopVectorize/increment.ll b/llvm/test/Transforms/LoopVectorize/increment.ll
index 369bd38..d0b2509 100644
--- a/llvm/test/Transforms/LoopVectorize/increment.ll
+++ b/llvm/test/Transforms/LoopVectorize/increment.ll
@@ -21,7 +21,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
   store i32 %5, i32* %2, align 4
@@ -50,10 +50,10 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %idxprom1 = sext i32 %0 to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %inc = add nsw i32 %1, 1
   store i32 %inc, i32* %arrayidx2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 811c492..2fbb2de 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -52,10 +52,10 @@
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %ind.sum = add i64 %iv, %offset
   %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
-  %l1 = load float* %arr.idx, align 4
+  %l1 = load float, float* %arr.idx, align 4
   %ind.sum2 = add i64 %iv, %offset2
   %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
-  %l2 = load float* %arr.idx2, align 4
+  %l2 = load float, float* %arr.idx2, align 4
   %m = fmul fast float %b, %l2
   %ad = fadd fast float %l1, %m
   store float %ad, float* %arr.idx, align 4
@@ -153,9 +153,9 @@
 @c = common global i32 0, align 4
 define i32 @testoverflowcheck() {
 entry:
-  %.pr.i = load i8* @e, align 1
-  %0 = load i32* @d, align 4
-  %c.promoted.i = load i32* @c, align 4
+  %.pr.i = load i8, i8* @e, align 1
+  %0 = load i32, i32* @d, align 4
+  %c.promoted.i = load i32, i32* @c, align 4
   br label %cond.end.i
 
 cond.end.i:
diff --git a/llvm/test/Transforms/LoopVectorize/intrinsic.ll b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
index 029d8b6..fae6a8c 100644
--- a/llvm/test/Transforms/LoopVectorize/intrinsic.ll
+++ b/llvm/test/Transforms/LoopVectorize/intrinsic.ll
@@ -14,7 +14,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -40,7 +40,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -66,7 +66,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -92,7 +92,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -118,7 +118,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -144,7 +144,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -170,7 +170,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -196,7 +196,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -222,7 +222,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -248,7 +248,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -274,7 +274,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.log.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -300,7 +300,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.log.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -326,7 +326,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -352,7 +352,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -378,7 +378,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -404,7 +404,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -430,7 +430,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -453,7 +453,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.fabs(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -479,9 +479,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %call = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -504,9 +504,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx1 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.copysign(double %0, double %1) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -532,7 +532,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -558,7 +558,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -584,7 +584,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -610,7 +610,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -636,7 +636,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -662,7 +662,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -688,7 +688,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -714,7 +714,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -740,7 +740,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -766,7 +766,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -792,7 +792,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @llvm.round.f32(float %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx2, align 4
@@ -818,7 +818,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.round.f64(double %0) nounwind readnone
   %arrayidx2 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx2, align 8
@@ -844,11 +844,11 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
   %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %3, float* %arrayidx6, align 4
@@ -874,11 +874,11 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double* %arrayidx4, align 8
+  %2 = load double, double* %arrayidx4, align 8
   %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
   %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %3, double* %arrayidx6, align 8
@@ -904,11 +904,11 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %w, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %3 = tail call float @llvm.fmuladd.f32(float %0, float %2, float %1)
   %arrayidx6 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %3, float* %arrayidx6, align 4
@@ -934,11 +934,11 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double, double* %w, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx4 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %2 = load double* %arrayidx4, align 8
+  %2 = load double, double* %arrayidx4, align 8
   %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %1)
   %arrayidx6 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %3, double* %arrayidx6, align 8
@@ -964,9 +964,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx4, align 4
@@ -992,9 +992,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx2 = getelementptr inbounds double, double* %z, i64 %indvars.iv
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx4, align 8
@@ -1017,7 +1017,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @fabsf(float %0) nounwind readnone
   store float %call, float* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1051,7 +1051,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %x, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %call = tail call float @roundf(float %0) nounwind readnone
   store float %call, float* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1078,7 +1078,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds double, double* %x, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 4
+  %0 = load double, double* %arrayidx, align 4
   store double %0, double* %arrayidx, align 4
   tail call void @round(double %0) nounwind readnone
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -1103,7 +1103,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %call = tail call double @llvm.powi.f64(double %0, i32  %P) nounwind readnone
   %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
   store double %call, double* %arrayidx4, align 8
@@ -1127,7 +1127,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds double, double* %y, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %1 = trunc i64 %indvars.iv to i32
   %call = tail call double @llvm.powi.f64(double %0, i32  %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds double, double* %x, i64 %indvars.iv
@@ -1154,7 +1154,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone
   %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
   store i64 %call, i64* %arrayidx4, align 8
@@ -1180,7 +1180,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i64, i64* %y, i64 %indvars.iv
-  %0 = load i64* %arrayidx, align 8
+  %0 = load i64, i64* %arrayidx, align 8
   %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone
   %arrayidx4 = getelementptr inbounds i64, i64* %x, i64 %indvars.iv
   store i64 %call, i64* %arrayidx4, align 8
@@ -1206,9 +1206,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %call = tail call float @llvm.minnum.f32(float %0, float %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx4, align 4
@@ -1234,9 +1234,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %y, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %z, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %call = tail call float @llvm.maxnum.f32(float %0, float %1) nounwind readnone
   %arrayidx4 = getelementptr inbounds float, float* %x, i64 %indvars.iv
   store float %call, float* %arrayidx4, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/lifetime.ll b/llvm/test/Transforms/LoopVectorize/lifetime.ll
index 63bde5a..6e525ca 100644
--- a/llvm/test/Transforms/LoopVectorize/lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/lifetime.ll
@@ -20,7 +20,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   call void @llvm.lifetime.end(i64 4096, i8* %0) #1
   %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 8
+  %1 = load i32, i32* %arrayidx, align 8
   store i32 100, i32* %arrayidx, align 8
   call void @llvm.lifetime.start(i64 4096, i8* %0) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -50,7 +50,7 @@
   %1 = bitcast [1024 x i32]* %arr to i8*
   call void @llvm.lifetime.end(i64 4096, i8* %1) #1
   %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32* %arrayidx, align 8
+  %2 = load i32, i32* %arrayidx, align 8
   store i32 100, i32* %arrayidx, align 8
   call void @llvm.lifetime.start(i64 4096, i8* %1) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -79,7 +79,7 @@
   %1 = bitcast [1024 x i32]* %arr to i8*
   call void @llvm.lifetime.end(i64 4096, i8* %1) #1
   %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
-  %2 = load i32* %arrayidx, align 8
+  %2 = load i32, i32* %arrayidx, align 8
   store i32 100, i32* %arrayidx, align 8
   call void @llvm.lifetime.start(i64 4096, i8* %1) #1
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
index 326c4d4..d9efaa5 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-vect-memdep.ll
@@ -6,7 +6,7 @@
 
 define void @test_loop_novect(double** %arr, i64 %n) {
 for.body.lr.ph:
-  %t = load double** %arr, align 8
+  %t = load double*, double** %arr, align 8
   br label %for.body
 
 for.body:                                      ; preds = %for.body, %for.body.lr.ph
@@ -14,8 +14,8 @@
   %a = getelementptr inbounds double, double* %t, i64 %i
   %i.next = add nuw nsw i64 %i, 1
   %a.next = getelementptr inbounds double, double* %t, i64 %i.next
-  %t1 = load double* %a, align 8
-  %t2 = load double* %a.next, align 8
+  %t1 = load double, double* %a, align 8
+  %t2 = load double, double* %a.next, align 8
   store double %t1, double* %a.next, align 8
   store double %t2, double* %a, align 8
   %c = icmp eq i64 %i, %n
diff --git a/llvm/test/Transforms/LoopVectorize/memdep.ll b/llvm/test/Transforms/LoopVectorize/memdep.ll
index 43fccb7..fb60883 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep.ll
@@ -20,7 +20,7 @@
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %indvars.iv.next = add i32 %indvars.iv, 1
   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add1 = add nsw i32 %0, 1
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
   store i32 %add1, i32* %arrayidx3, align 4
@@ -45,7 +45,7 @@
 for.body:
   %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %indvars.iv.next = add i32 %indvars.iv, 1
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %indvars.iv.next
@@ -75,7 +75,7 @@
   %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %idxprom = sext i32 %i.01 to i64
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, 1
   %add1 = add nsw i32 %i.01, 2
   %idxprom2 = sext i32 %add1 to i64
@@ -106,12 +106,12 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
   store i32 %0, i32* %arrayidx2, align 4
   %indvars.iv.next = add nsw i64 %indvars.iv, 1
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv.next
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   store i32 %1, i32* %arrayidx, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
@@ -141,7 +141,7 @@
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
   store i32 %tmp.addr.08, i32* %arrayidx, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx3, align 4
+  %0 = load i32, i32* %arrayidx3, align 4
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp ne i32 %lftr.wideiv, 1024
   br i1 %exitcond, label %for.body, label %for.end
@@ -170,10 +170,10 @@
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
   %0 = add nsw i64 %indvars.iv, -3
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv, 4
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2
-  %3 = load i32* %arrayidx2, align 4
+  %3 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %3, %1
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
   store i32 %add3, i32* %arrayidx5, align 4
@@ -204,12 +204,12 @@
 for.body:
   %indvars.iv = phi i64 [ 16, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
   store i32 %0, i32* %arrayidx2, align 4
   %1 = add nsw i64 %indvars.iv, -3
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32* %arrayidx4, align 4
+  %2 = load i32, i32* %arrayidx4, align 4
   %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
   store i32 %2, i32* %arrayidx6, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
index 38df838..3c80ae0 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata-unroll.ll
@@ -24,7 +24,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
   store i32 %5, i32* %2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll
index 009463c..9a791ae 100644
--- a/llvm/test/Transforms/LoopVectorize/metadata.ll
+++ b/llvm/test/Transforms/LoopVectorize/metadata.ll
@@ -10,7 +10,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float, float* %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
@@ -23,7 +23,7 @@
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
+; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa ![[TFLT:[0-9]+]]
 ; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa ![[TINT:[0-9]+]]
 ; CHECK: ret i32 0
 
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index 858536f..5a0356f 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -27,7 +27,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -56,7 +56,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp slt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -84,7 +84,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp slt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -113,7 +113,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp sgt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -143,7 +143,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp ugt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -172,7 +172,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp ult i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -200,7 +200,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp ult i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -229,7 +229,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp ugt i32 %max.red.08, %0
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -258,7 +258,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp sge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -287,7 +287,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp sle i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -316,7 +316,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp uge i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -345,7 +345,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %cmp3 = icmp ule i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %max.red.08, i32 %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -369,8 +369,8 @@
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
   %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
-  %1 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %1
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -393,8 +393,8 @@
   %max.red.08 = phi i32 [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv
   %arrayidx1 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 1, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
-  %1 = load i32* %arrayidx1, align 4
+  %0 = load i32, i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %cmp3 = icmp sgt i32 %0, %max.red.08
   %max.red.0 = select i1 %cmp3, i32 %0, i32 %1
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -426,7 +426,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -452,7 +452,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp oge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -478,7 +478,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp olt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -504,7 +504,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ole float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -530,7 +530,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ugt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -556,7 +556,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp uge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -582,7 +582,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ult float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -608,7 +608,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ule float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -637,7 +637,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp olt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -663,7 +663,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ole float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -689,7 +689,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ogt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -715,7 +715,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp oge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -741,7 +741,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ult float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -767,7 +767,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ule float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -793,7 +793,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ugt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -819,7 +819,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp uge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -846,7 +846,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
-  %0 = load double* %arrayidx, align 4
+  %0 = load double, double* %arrayidx, align 4
   %cmp3 = fcmp olt double %0, %min.red.08
   %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -870,7 +870,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %cmp3 = fcmp ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
index cd022ad..9f7fb39 100644
--- a/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/multi-use-reduction-bug.ll
@@ -16,10 +16,10 @@
   %n = alloca i32, align 4
   %k7 = alloca i32, align 4
   %nf = alloca i32, align 4
-  %0 = load i32* %k7, align 4
+  %0 = load i32, i32* %k7, align 4
   %.neg1 = sub i32 0, %0
-  %n.promoted = load i32* %n, align 4
-  %nf.promoted = load i32* %nf, align 4
+  %n.promoted = load i32, i32* %n, align 4
+  %nf.promoted = load i32, i32* %nf, align 4
   br label %for.body
 
 for.body:
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index 1659af6..e79c931 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -28,7 +28,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds [40000 x i8], [40000 x i8] addrspace(1)* @Y, i64 0, i64 %indvars.iv
-  %0 = load i8 addrspace(1)* %arrayidx, align 1
+  %0 = load i8, i8 addrspace(1)* %arrayidx, align 1
   %add = add i8 %0, 1
   %arrayidx3 = getelementptr inbounds [40000 x i8], [40000 x i8]* @X, i64 0, i64 %indvars.iv
   store i8 %add, i8* %arrayidx3, align 1
diff --git a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
index 2835c66..865c8da 100644
--- a/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -34,10 +34,10 @@
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %indvars.iv27 = phi i64 [ %indvars.iv.next28, %for.body ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv27, !dbg !14
-  %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !22
   %idxprom1 = sext i32 %0 to i64, !dbg !14
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !14
-  %1 = load i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
+  %1 = load i32, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
   %inc = add nsw i32 %1, 1, !dbg !14
   store i32 %inc, i32* %arrayidx2, align 4, !dbg !14, !tbaa !22
   %indvars.iv.next28 = add nuw nsw i64 %indvars.iv27, 1, !dbg !10
@@ -48,10 +48,10 @@
 for.body7:                                        ; preds = %for.body7.preheader, %for.body7
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body7 ], [ 0, %for.body7.preheader ]
   %arrayidx9 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !20
-  %2 = load i32* %arrayidx9, align 4, !dbg !20, !tbaa !22
+  %2 = load i32, i32* %arrayidx9, align 4, !dbg !20, !tbaa !22
   %idxprom10 = sext i32 %2 to i64, !dbg !20
   %arrayidx11 = getelementptr inbounds i32, i32* %B, i64 %idxprom10, !dbg !20
-  %3 = load i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
+  %3 = load i32, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
   %inc12 = add nsw i32 %3, 1, !dbg !20
   store i32 %inc12, i32* %arrayidx11, align 4, !dbg !20, !tbaa !22
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !16
diff --git a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
index 326ffc8..bfa48a2 100644
--- a/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_idiv_reduction.ll
@@ -12,7 +12,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.05 = phi i32 [ 80, %entry ], [ %div, %for.body ]
   %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* @a, i64 0, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %div = sdiv i32 %r.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
index 02848a0..7e6b26c 100644
--- a/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_int_induction.ll
@@ -21,7 +21,7 @@
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
   %.03.i = phi i32* [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32* %.03.i, align 4
+  %4 = load i32, i32* %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
   %6 = getelementptr inbounds i32, i32* %.03.i, i64 1
   %7 = icmp eq i32* %6, %2
@@ -48,7 +48,7 @@
 .lr.ph.i:                                         ; preds = %0, %.lr.ph.i
   %.03.i = phi i32 addrspace(1)* [ %6, %.lr.ph.i ], [ %A, %0 ]
   %.012.i = phi i32 [ %5, %.lr.ph.i ], [ 0, %0 ]
-  %4 = load i32 addrspace(1)* %.03.i, align 4
+  %4 = load i32, i32 addrspace(1)* %.03.i, align 4
   %5 = add nsw i32 %4, %.012.i
   %6 = getelementptr inbounds i32, i32 addrspace(1)* %.03.i, i64 1
   %7 = icmp eq i32 addrspace(1)* %6, %2
diff --git a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
index bcd29c1..7030b6b 100644
--- a/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -20,7 +20,7 @@
 
 define i32 @main()  {
 bb:
-  %b.promoted = load i32* @b, align 4
+  %b.promoted = load i32, i32* @b, align 4
   br label %.lr.ph.i
 
 .lr.ph.i:
@@ -56,7 +56,7 @@
 define i32 @test2()  {
 entry:
   store i32 0, i32* @x1, align 4
-  %0 = load i32* @x0, align 4
+  %0 = load i32, i32* @x0, align 4
   br label %for.cond1.preheader
 
 for.cond1.preheader:
diff --git a/llvm/test/Transforms/LoopVectorize/no_switch.ll b/llvm/test/Transforms/LoopVectorize/no_switch.ll
index e24e91f..76c1c0c 100644
--- a/llvm/test/Transforms/LoopVectorize/no_switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/no_switch.ll
@@ -22,7 +22,7 @@
 for.body:                                         ; preds = %for.body.preheader, %for.inc
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.body.preheader ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !14
-  %0 = load i32* %arrayidx, align 4, !dbg !14, !tbaa !16
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !14, !tbaa !16
   switch i32 %0, label %for.inc [
     i32 0, label %sw.bb
     i32 1, label %sw.bb3
diff --git a/llvm/test/Transforms/LoopVectorize/non-const-n.ll b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
index 4b95775..9007b1b 100644
--- a/llvm/test/Transforms/LoopVectorize/non-const-n.ll
+++ b/llvm/test/Transforms/LoopVectorize/non-const-n.ll
@@ -21,9 +21,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/opt.ll b/llvm/test/Transforms/LoopVectorize/opt.ll
index 90d8b09..71b7e1f 100644
--- a/llvm/test/Transforms/LoopVectorize/opt.ll
+++ b/llvm/test/Transforms/LoopVectorize/opt.ll
@@ -17,7 +17,7 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %red.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %red.05
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 255
diff --git a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
index 4d7e315..62fc1d9 100644
--- a/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
+++ b/llvm/test/Transforms/LoopVectorize/ptr_loops.ll
@@ -17,7 +17,7 @@
 ; <label>:1                                       ; preds = %0, %1
   %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 18), %0 ], [ %4, %1 ]
   %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 0), %0 ], [ %5, %1 ]
-  %2 = load i32* %b.01, align 4
+  %2 = load i32, i32* %b.01, align 4
   %3 = shl nsw i32 %2, 1
   store i32 %3, i32* %p.02, align 4
   %4 = getelementptr inbounds i32, i32* %p.02, i64 -1
@@ -40,7 +40,7 @@
 ; <label>:1                                       ; preds = %0, %1
   %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 25), %0 ], [ %3, %1 ]
   %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 2), %0 ], [ %4, %1 ]
-  %2 = load i32* %b.01, align 4
+  %2 = load i32, i32* %b.01, align 4
   store i32 %2, i32* %p.02, align 4
   %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
   %4 = getelementptr inbounds i32, i32* %b.01, i64 1
@@ -62,7 +62,7 @@
 ; <label>:1                                       ; preds = %0, %1
   %p.02 = phi i32* [ getelementptr inbounds ([36 x i32]* @A, i64 0, i64 29), %0 ], [ %3, %1 ]
   %b.01 = phi i32* [ getelementptr inbounds ([36 x i32]* @B, i64 0, i64 5), %0 ], [ %4, %1 ]
-  %2 = load i32* %b.01, align 4
+  %2 = load i32, i32* %b.01, align 4
   store i32 %2, i32* %p.02, align 4
   %3 = getelementptr inbounds i32, i32* %p.02, i64 -1
   %4 = getelementptr inbounds i32, i32* %b.01, i64 1
diff --git a/llvm/test/Transforms/LoopVectorize/read-only.ll b/llvm/test/Transforms/LoopVectorize/read-only.ll
index ef12edb..f81afd6 100644
--- a/llvm/test/Transforms/LoopVectorize/read-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/read-only.ll
@@ -14,10 +14,10 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = add nsw i64 %indvars.iv, 13
   %5 = getelementptr inbounds i32, i32* %B, i64 %4
-  %6 = load i32* %5, align 4
+  %6 = load i32, i32* %5, align 4
   %7 = shl i32 %6, 1
   %8 = add i32 %3, %sum.02
   %9 = add i32 %8, %7
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index 70c63fe..647e58a 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -21,9 +21,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %sum.02, %6
   %8 = add i32 %7, %3
@@ -56,9 +56,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = mul i32 %prod.02, %6
   %8 = mul i32 %7, %3
@@ -91,9 +91,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = mul nsw i32 %5, %3
   %7 = trunc i64 %indvars.iv to i32
   %8 = add i32 %sum.02, %7
@@ -124,9 +124,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
   %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = trunc i64 %indvars.iv to i32
   %7 = add i32 %3, %6
   %8 = add i32 %7, %5
@@ -159,9 +159,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %mul = mul nsw i32 %1, %0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -192,9 +192,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %and = and i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -224,9 +224,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -256,9 +256,9 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -285,7 +285,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %sub = sub nsw i32 %0, %x.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -312,7 +312,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %sub = sub nsw i32 %x.05, %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -336,9 +336,9 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -381,9 +381,9 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %cmp3 = fcmp ogt float %0, %1
   br i1 %cmp3, label %if.then, label %for.inc
 
@@ -428,7 +428,7 @@
   %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ]
   %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %add = fadd fast float %sum.08, %0
   %add1 = fadd fast float %sum2.09, %add
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index 943fb9e..6b63a0d 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -19,7 +19,7 @@
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i64 %add.i7, -1
   %kind_.i = getelementptr inbounds i32, i32* %ptr, i64 %add.i
-  %tmp.i1 = load i32* %kind_.i, align 4
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -42,7 +42,7 @@
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i128 %add.i7, -1
   %kind_.i = getelementptr inbounds i32, i32* %ptr, i128 %add.i
-  %tmp.i1 = load i32* %kind_.i, align 4
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
@@ -66,7 +66,7 @@
   %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
   %add.i = add i16 %add.i7, -1
   %kind_.i = getelementptr inbounds i32, i32* %ptr, i16 %add.i
-  %tmp.i1 = load i32* %kind_.i, align 4
+  %tmp.i1 = load i32, i32* %kind_.i, align 4
   %inc.redux = add i32 %tmp.i1, %redux5
   %inc4 = add i32 %i.06, 1
   %exitcond = icmp ne i32 %inc4, 1024
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
index b0ecb2e..5bbc769 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_iter.ll
@@ -31,7 +31,7 @@
   %5 = shl nsw i32 %4, 1
   %6 = sext i32 %5 to i64
   %7 = getelementptr inbounds i32, i32* %A, i64 %6
-  %8 = load i32* %7, align 4
+  %8 = load i32, i32* %7, align 4
   %9 = add nsw i32 %8, %sum.01
   %indvars.iv.next = add i64 %indvars.iv, -1
   %10 = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
index 1d33647..8e7ac1f 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-address-space.ll
@@ -38,7 +38,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
@@ -65,7 +65,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
@@ -92,7 +92,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idxprom1
@@ -120,7 +120,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %idxprom
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
@@ -148,7 +148,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
@@ -177,7 +177,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %idxprom1
@@ -205,7 +205,7 @@
   %i.02 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
   %idxprom = sext i32 %i.02 to i64
   %arrayidx = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(2)* @q_as2, i64 0, i64 %idxprom
-  %0 = load i32 addrspace(2)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(2)* %arrayidx, align 4
   %mul = mul nsw i32 %0, 3
   %idxprom1 = sext i32 %i.02 to i64
   %arrayidx2 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(1)* @g_as1, i64 0, i64 %idxprom1
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
index bc3345b..6ee983d 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly-address-space.ll
@@ -15,9 +15,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
   %add = add nsw i32 %0, %1
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
   store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
@@ -40,9 +40,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %b, i64 %i.01
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %0, %1
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %i.01
   store i32 %add, i32 addrspace(1)* %arrayidx2, align 4
@@ -65,9 +65,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %c, i64 %i.01
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %0, %1
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
   store i32 %add, i32* %arrayidx2, align 4
@@ -90,9 +90,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %i.01
-  %1 = load i32 addrspace(1)* %arrayidx1, align 4
+  %1 = load i32, i32 addrspace(1)* %arrayidx1, align 4
   %add = add nsw i32 %0, %1
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
   store i32 %add, i32* %arrayidx2, align 4
@@ -115,9 +115,9 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.01 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %b, i64 %i.01
-  %0 = load i32 addrspace(1)* %arrayidx, align 4
+  %0 = load i32, i32 addrspace(1)* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(2)* %c, i64 %i.01
-  %1 = load i32 addrspace(2)* %arrayidx1, align 4
+  %1 = load i32, i32 addrspace(2)* %arrayidx1, align 4
   %add = add nsw i32 %0, %1
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %i.01
   store i32 %add, i32* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
index 75a0d56..a3b5a59 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-readonly.ll
@@ -23,9 +23,9 @@
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add = add nsw i32 %1, %0
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
   store i32 %add, i32* %arrayidx4, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index 483d35a..b1c5d40 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -22,7 +22,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %mul = fmul float %0, 3.000000e+00
   %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
   store float %mul, float* %arrayidx2, align 4
@@ -48,10 +48,10 @@
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %ind.sum = add i64 %iv, %offset
   %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
-  %l1 = load float* %arr.idx, align 4
+  %l1 = load float, float* %arr.idx, align 4
   %ind.sum2 = add i64 %iv, %offset2
   %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
-  %l2 = load float* %arr.idx2, align 4
+  %l2 = load float, float* %arr.idx2, align 4
   %m = fmul fast float %b, %l2
   %ad = fadd fast float %l1, %m
   store float %ad, float* %arr.idx, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/runtime-limit.ll
index 04b44cd..6bc71e1 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -14,18 +14,18 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.016 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.016
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.016
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.016
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
   %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.016
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
   %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.016
-  %4 = load i32* %arrayidx6, align 4
+  %4 = load i32, i32* %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
   %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.016
   store i32 %add7, i32* %arrayidx8, align 4
@@ -48,29 +48,29 @@
 for.body:                                         ; preds = %for.body, %entry
   %i.037 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.037
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %B, i64 %i.037
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
   %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %i.037
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %add, %2
   %arrayidx4 = getelementptr inbounds i32, i32* %E, i64 %i.037
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %add3, %3
   %arrayidx6 = getelementptr inbounds i32, i32* %F, i64 %i.037
-  %4 = load i32* %arrayidx6, align 4
+  %4 = load i32, i32* %arrayidx6, align 4
   %add7 = add nsw i32 %add5, %4
   %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %i.037
   store i32 %add7, i32* %arrayidx8, align 4
-  %5 = load i32* %arrayidx, align 4
-  %6 = load i32* %arrayidx1, align 4
+  %5 = load i32, i32* %arrayidx, align 4
+  %6 = load i32, i32* %arrayidx1, align 4
   %add11 = add nsw i32 %6, %5
-  %7 = load i32* %arrayidx2, align 4
+  %7 = load i32, i32* %arrayidx2, align 4
   %add13 = add nsw i32 %add11, %7
-  %8 = load i32* %arrayidx4, align 4
+  %8 = load i32, i32* %arrayidx4, align 4
   %add15 = add nsw i32 %add13, %8
-  %9 = load i32* %arrayidx6, align 4
+  %9 = load i32, i32* %arrayidx6, align 4
   %add17 = add nsw i32 %add15, %9
   %arrayidx18 = getelementptr inbounds i32, i32* %out2, i64 %i.037
   store i32 %add17, i32* %arrayidx18, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/safegep.ll b/llvm/test/Transforms/LoopVectorize/safegep.ll
index 1003759..ecef813 100644
--- a/llvm/test/Transforms/LoopVectorize/safegep.ll
+++ b/llvm/test/Transforms/LoopVectorize/safegep.ll
@@ -18,9 +18,9 @@
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
   %pp3 = getelementptr float, float* %A, i32 %i_15
-  %D.1396_10 = load float* %pp3, align 4
+  %D.1396_10 = load float, float* %pp3, align 4
   %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float* %pp24, align 4
+  %D.1398_15 = load float, float* %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
   store float %D.1400_18, float* %pp3, align 4
@@ -44,9 +44,9 @@
 "<bb 3>":
   %i_15 = phi i32 [ 0, %entry ], [ %i_19, %"<bb 3>" ]
   %pp3 = getelementptr float, float addrspace(5) * %A, i32 %i_15
-  %D.1396_10 = load float addrspace(5) * %pp3, align 4
+  %D.1396_10 = load float, float addrspace(5) * %pp3, align 4
   %pp24 = getelementptr float, float* %B, i32 %i_15
-  %D.1398_15 = load float* %pp24, align 4
+  %D.1398_15 = load float, float* %pp24, align 4
   %D.1399_17 = fadd float %D.1398_15, %K
   %D.1400_18 = fmul float %D.1396_10, %D.1399_17
   store float %D.1400_18, float addrspace(5) * %pp3, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/same-base-access.ll b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
index fe94dd1..31cff0e 100644
--- a/llvm/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/same-base-access.ll
@@ -26,33 +26,33 @@
   br label %4
 
 ; <label>:4                                       ; preds = %25, %0
-  %5 = load i32* %k, align 4
-  %6 = load i32* %3, align 4
+  %5 = load i32, i32* %k, align 4
+  %6 = load i32, i32* %3, align 4
   %7 = icmp slt i32 %5, %6
   br i1 %7, label %8, label %28
 
 ; <label>:8                                       ; preds = %4
-  %9 = load i32* %k, align 4
+  %9 = load i32, i32* %k, align 4
   %10 = sub nsw i32 %9, 1
   %11 = sext i32 %10 to i64
-  %12 = load double** %1, align 8
+  %12 = load double*, double** %1, align 8
   %13 = getelementptr inbounds double, double* %12, i64 %11
-  %14 = load double* %13, align 8
-  %15 = load i32* %k, align 4
+  %14 = load double, double* %13, align 8
+  %15 = load i32, i32* %k, align 4
   %16 = sext i32 %15 to i64
-  %17 = load double** %2, align 8
+  %17 = load double*, double** %2, align 8
   %18 = getelementptr inbounds double, double* %17, i64 %16
-  %19 = load double* %18, align 8
+  %19 = load double, double* %18, align 8
   %20 = fadd double %14, %19
-  %21 = load i32* %k, align 4
+  %21 = load i32, i32* %k, align 4
   %22 = sext i32 %21 to i64
-  %23 = load double** %1, align 8
+  %23 = load double*, double** %1, align 8
   %24 = getelementptr inbounds double, double* %23, i64 %22
   store double %20, double* %24, align 8
   br label %25
 
 ; <label>:25                                      ; preds = %8
-  %26 = load i32* %k, align 4
+  %26 = load i32, i32* %k, align 4
   %27 = add nsw i32 %26, 1
   store i32 %27, i32* %k, align 4
   br label %4
@@ -87,7 +87,7 @@
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %7 ]
   %2 = mul nsw i64 %indvars.iv, 7
   %3 = getelementptr inbounds i32, i32* %a, i64 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = icmp sgt i32 %4, 3
   br i1 %5, label %6, label %7
 
diff --git a/llvm/test/Transforms/LoopVectorize/scalar-select.ll b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
index b10e80e..b17b202 100644
--- a/llvm/test/Transforms/LoopVectorize/scalar-select.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalar-select.ll
@@ -19,9 +19,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   %sel = select i1 %cond, i32 %6, i32 zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll b/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
index 58abcb0..cfc1d6d 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-exitlim-crash.ll
@@ -29,14 +29,14 @@
   br i1 %cmp514, label %for.cond7.preheader.lr.ph, label %for.end26
 
 for.cond7.preheader.lr.ph:                        ; preds = %for.cond4.preheader
-  %0 = load i32** @e, align 8, !tbaa !4
+  %0 = load i32*, i32** @e, align 8, !tbaa !4
   br label %for.cond7.preheader
 
 for.cond7.preheader:                              ; preds = %for.cond7.preheader.lr.ph, %for.inc23
   %y.017 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %inc24, %for.inc23 ]
   %i.116 = phi i32 [ 0, %for.cond7.preheader.lr.ph ], [ %i.2.lcssa, %for.inc23 ]
   %n.015 = phi i32 [ undef, %for.cond7.preheader.lr.ph ], [ %inc25, %for.inc23 ]
-  %1 = load i32* @b, align 4, !tbaa !5
+  %1 = load i32, i32* @b, align 4, !tbaa !5
   %tobool11 = icmp eq i32 %1, 0
   br i1 %tobool11, label %for.inc23, label %for.body8.lr.ph
 
@@ -63,7 +63,7 @@
   %indvars.iv = phi i64 [ %3, %for.body13.lr.ph ], [ %indvars.iv.next, %for.body13 ]
   %add.ptr.sum = add i64 %idx.ext, %indvars.iv
   %arrayidx = getelementptr inbounds i32, i32* @a, i64 %add.ptr.sum
-  %4 = load i32* %arrayidx, align 4, !tbaa !5
+  %4 = load i32, i32* %arrayidx, align 4, !tbaa !5
   %arrayidx15 = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
   store i32 %4, i32* %arrayidx15, align 4, !tbaa !5
   %indvars.iv.next = add i64 %indvars.iv, 1
@@ -75,11 +75,11 @@
   br label %for.inc19
 
 for.inc19:                                        ; preds = %for.cond11.for.inc19_crit_edge, %for.body8
-  %6 = load i32* @c, align 4, !tbaa !5
+  %6 = load i32, i32* @c, align 4, !tbaa !5
   %inc20 = add nsw i32 %6, 1
   store i32 %inc20, i32* @c, align 4, !tbaa !5
   %indvars.iv.next20 = add i64 %indvars.iv19, 1
-  %7 = load i32* @b, align 4, !tbaa !5
+  %7 = load i32, i32* @b, align 4, !tbaa !5
   %tobool = icmp eq i32 %7, 0
   br i1 %tobool, label %for.cond7.for.inc23_crit_edge, label %for.body8
 
diff --git a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
index c925a05..fc996ed 100644
--- a/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/simple-unroll.ll
@@ -24,7 +24,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
   store i32 %5, i32* %2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/small-loop.ll b/llvm/test/Transforms/LoopVectorize/small-loop.ll
index bc6e21f..ce606d1 100644
--- a/llvm/test/Transforms/LoopVectorize/small-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/small-loop.ll
@@ -16,9 +16,9 @@
 ; <label>:1                                       ; preds = %1, %0
   %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @b, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = getelementptr inbounds [2048 x i32], [2048 x i32]* @c, i64 0, i64 %indvars.iv
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %5, %3
   %7 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
   store i32 %6, i32* %7, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
index 12c15a9..b444a69 100644
--- a/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
+++ b/llvm/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -18,7 +18,7 @@
 for.body:                                         ; preds = %for.body.lr.ph, %for.body
   %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx, align 4
   %mul = mul nuw i32 %1, 333
   store i32 %mul, i32* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
index 2512d4f..4d62df1 100644
--- a/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/store-shuffle-bug.ll
@@ -32,12 +32,12 @@
   %0 = add i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds [100 x i32], [100 x i32]* @uf, i64 0, i64 %0
   %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* @xi, i64 0, i64 %0
-  %1 = load i32* %arrayidx3, align 4
-  %2 = load i32* %arrayidx, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx, align 4
   %add4 = add nsw i32 %2, %1
   store i32 %add4, i32* %arrayidx, align 4
   %arrayidx7 = getelementptr inbounds [100 x i32], [100 x i32]* @q, i64 0, i64 %0
-  %3 = load i32* %arrayidx7, align 4
+  %3 = load i32, i32* %arrayidx7, align 4
   %add8 = add nsw i32 %add4, %3
   store i32 %add8, i32* %arrayidx, align 4
   %indvars.iv.next = add i64 %indvars.iv, -1
diff --git a/llvm/test/Transforms/LoopVectorize/struct_access.ll b/llvm/test/Transforms/LoopVectorize/struct_access.ll
index 0d68e2d..1e4019a 100644
--- a/llvm/test/Transforms/LoopVectorize/struct_access.ll
+++ b/llvm/test/Transforms/LoopVectorize/struct_access.ll
@@ -33,7 +33,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %x = getelementptr inbounds %struct.coordinate, %struct.coordinate* %A, i64 %indvars.iv, i32 0
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
@@ -75,7 +75,7 @@
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
   %x = getelementptr inbounds %struct.lit, %struct.lit* %A, i64 %indvars.iv, i32 0
-  %0 = load i32* %x, align 4
+  %0 = load i32, i32* %x, align 4
   %add = add nsw i32 %0, %sum.05
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
diff --git a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
index 8469a9d..06d0002 100644
--- a/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
+++ b/llvm/test/Transforms/LoopVectorize/tbaa-nodep.ll
@@ -11,7 +11,7 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float, float* %arrayidx, align 4, !tbaa !0
   %conv = fptosi float %0 to i32
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %conv, i32* %arrayidx2, align 4, !tbaa !4
@@ -30,7 +30,7 @@
 ; CHECK-NEXT: br label %vector.body
 ; CHECK: vector.body:
 
-; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
@@ -38,7 +38,7 @@
 ; CHECK-NOTBAA-LABEL: @test1
 ; CHECK-NOTBAA: icmp uge i32*
 
-; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK-NOTBAA: store <4 x i32> %{{.*}}, <4 x i32>* %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
@@ -52,9 +52,9 @@
 for.body:                                         ; preds = %for.body, %entry
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %0 = load float, float* %arrayidx, align 4, !tbaa !0
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  %1 = load i32* %arrayidx2, align 4, !tbaa !4
+  %1 = load i32, i32* %arrayidx2, align 4, !tbaa !4
   %conv = sitofp i32 %1 to float
   %mul = fmul float %0, %conv
   %arrayidx4 = getelementptr inbounds float, float* %c, i64 %indvars.iv
@@ -74,7 +74,7 @@
 ; CHECK: icmp uge float*
 ; CHECK-NOT: icmp uge i32*
 
-; CHECK: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
 
 ; CHECK: ret i32 0
@@ -85,7 +85,7 @@
 ; CHECK-NOTBAA-DAG: icmp uge float*
 ; CHECK-NOTBAA-DAG: icmp uge i32*
 
-; CHECK-NOTBAA: load <4 x float>* %{{.*}}, align 4, !tbaa
+; CHECK-NOTBAA: load <4 x float>, <4 x float>* %{{.*}}, align 4, !tbaa
 ; CHECK-NOTBAA: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 4, !tbaa
 
 ; CHECK-NOTBAA: ret i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
index fc23d3d..c23ad77 100644
--- a/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
+++ b/llvm/test/Transforms/LoopVectorize/unroll_novec.ll
@@ -10,11 +10,11 @@
 ;    a[i] += i;
 ;  }
 ;CHECK-LABEL: @inc(
-;CHECK: load i32*
-;CHECK: load i32*
-;CHECK: load i32*
-;CHECK: load i32*
-;CHECK-NOT: load i32*
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK: load i32, i32*
+;CHECK-NOT: load i32, i32*
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
 ;CHECK: add nsw i32
@@ -34,7 +34,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds [2048 x i32], [2048 x i32]* @a, i64 0, i64 %indvars.iv
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   %4 = trunc i64 %indvars.iv to i32
   %5 = add nsw i32 %3, %4
   store i32 %5, i32* %2, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
index 4fb0318..ce4601f 100644
--- a/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
+++ b/llvm/test/Transforms/LoopVectorize/value-ptr-bug.ll
@@ -38,7 +38,7 @@
   %sp.4 = phi i8* [ %tmp30, %block1 ], [ %incdec.ptr273, %do.body272 ]
   %dp.addr.4 = phi i8* [ %tmp29, %block1 ], [ %incdec.ptr274, %do.body272 ]
   %incdec.ptr273 = getelementptr inbounds i8, i8* %sp.4, i64 1
-  %tmp31 = load i8* %sp.4, align 1
+  %tmp31 = load i8, i8* %sp.4, align 1
   %incdec.ptr274 = getelementptr inbounds i8, i8* %dp.addr.4, i64 1
   store i8 %tmp31, i8* %dp.addr.4, align 1
   %dec = add i32 %row_width.5, -1
diff --git a/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll
index 4ad34ad..0d7f8b9 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll
@@ -46,10 +46,10 @@
 
   %0 = add nsw i64 %indvars.iv, -5
   %arrayidx = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
+  %1 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access !1
   %2 = add nsw i64 %indvars.iv, 2
   %arrayidx2 = getelementptr inbounds float, float* %a, i64 %2
-  %3 = load float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
+  %3 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access !1
   %mul = fmul float %1, %3
   %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
   store float %mul, float* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !1
diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
index 4a748f2..7bf5a60 100644
--- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll
+++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll
@@ -21,7 +21,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv2
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %mul = fmul float %0, %0
   store float %mul, float* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
@@ -43,10 +43,10 @@
   %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %0 = add nsw i64 %indvars.iv2, -5
   %arrayidx = getelementptr inbounds float, float* %a, i64 %0
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %2 = add nsw i64 %indvars.iv2, 2
   %arrayidx2 = getelementptr inbounds float, float* %a, i64 %2
-  %3 = load float* %arrayidx2, align 4
+  %3 = load float, float* %arrayidx2, align 4
   %mul = fmul float %1, %3
   %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv2
   store float %mul, float* %arrayidx4, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll
index f5f39cc..5d0e96b 100644
--- a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll
+++ b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll
@@ -29,7 +29,7 @@
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
   %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32* %__first.addr.04.i, align 4
+  %0 = load i32, i32* %__first.addr.04.i, align 4
   %add.i = add nsw i32 %0, %__init.addr.05.i
   %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
   %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
@@ -55,7 +55,7 @@
 for.body.i:                                       ; preds = %entry, %for.body.i
   %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ]
   %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ]
-  %0 = load i32* %__first.addr.04.i, align 4
+  %0 = load i32, i32* %__first.addr.04.i, align 4
   %add.i = add nsw i32 %0, %__init.addr.05.i
   %incdec.ptr.i = getelementptr inbounds i32, i32* %__first.addr.04.i, i64 1
   %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
index 1a0b81c..a9d319e5 100644
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -35,10 +35,10 @@
   %mul = mul i32 %iv.trunc, %BStride
   %mul64 = zext i32 %mul to i64
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 %mul64
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %mul2 = mul nsw i64 %indvars.iv, %CStride
   %arrayidx3 = getelementptr inbounds i32, i32* %C, i64 %mul2
-  %1 = load i32* %arrayidx3, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
   %mul4 = mul nsw i32 %1, %0
   %mul3 = mul nsw i64 %indvars.iv, %AStride
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %mul3
@@ -77,7 +77,7 @@
   %mul = mul nsw i32 %0, %conv
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds double, double* %x, i64 %idxprom
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %arrayidx3 = getelementptr inbounds double, double* %c, i64 %indvars.iv
   store double %1, double* %arrayidx3, align 8
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/write-only.ll b/llvm/test/Transforms/LoopVectorize/write-only.ll
index cd1b330..b2bc045 100644
--- a/llvm/test/Transforms/LoopVectorize/write-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/write-only.ll
@@ -13,7 +13,7 @@
 .lr.ph:                                           ; preds = %0, %.lr.ph
   %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
   %2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   %4 = fmul float %3, 3.000000e+00
   store float %4, float* %2, align 4
   %indvars.iv.next = add i64 %indvars.iv, 1
diff --git a/llvm/test/Transforms/LowerAtomic/atomic-swap.ll b/llvm/test/Transforms/LowerAtomic/atomic-swap.ll
index cb11241..7700052 100644
--- a/llvm/test/Transforms/LowerAtomic/atomic-swap.ll
+++ b/llvm/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -5,7 +5,7 @@
   %i = alloca i8
   %pair = cmpxchg i8* %i, i8 0, i8 42 monotonic monotonic
   %j = extractvalue { i8, i1 } %pair, 0
-; CHECK: [[OLDVAL:%[a-z0-9]+]] = load i8* [[ADDR:%[a-z0-9]+]]
+; CHECK: [[OLDVAL:%[a-z0-9]+]] = load i8, i8* [[ADDR:%[a-z0-9]+]]
 ; CHECK-NEXT: [[SAME:%[a-z0-9]+]] = icmp eq i8 [[OLDVAL]], 0
 ; CHECK-NEXT: [[TO_STORE:%[a-z0-9]+]] = select i1 [[SAME]], i8 42, i8 [[OLDVAL]]
 ; CHECK-NEXT: store i8 [[TO_STORE]], i8* [[ADDR]]
diff --git a/llvm/test/Transforms/LowerBitSets/simple.ll b/llvm/test/Transforms/LowerBitSets/simple.ll
index 79e01b9..704ab72 100644
--- a/llvm/test/Transforms/LowerBitSets/simple.ll
+++ b/llvm/test/Transforms/LowerBitSets/simple.ll
@@ -61,7 +61,7 @@
 
   ; CHECK: [[R8:%[^ ]*]] = lshr i32 [[R5]], 5
   ; CHECK: [[R9:%[^ ]*]] = getelementptr i32, i32* bitcast ([9 x i8]* @bitset1.bits to i32*), i32 [[R8]]
-  ; CHECK: [[R10:%[^ ]*]] = load i32* [[R9]]
+  ; CHECK: [[R10:%[^ ]*]] = load i32, i32* [[R9]]
   ; CHECK: [[R11:%[^ ]*]] = and i32 [[R5]], 31
   ; CHECK: [[R12:%[^ ]*]] = shl i32 1, [[R11]]
   ; CHECK: [[R13:%[^ ]*]] = and i32 [[R10]], [[R12]]
@@ -107,7 +107,7 @@
 
   ; CHECK: [[T8:%[^ ]*]] = lshr i32 [[T5]], 5
   ; CHECK: [[T9:%[^ ]*]] = getelementptr i32, i32* bitcast ([9 x i8]* @bitset3.bits to i32*), i32 [[T8]]
-  ; CHECK: [[T10:%[^ ]*]] = load i32* [[T9]]
+  ; CHECK: [[T10:%[^ ]*]] = load i32, i32* [[T9]]
   ; CHECK: [[T11:%[^ ]*]] = and i32 [[T5]], 31
   ; CHECK: [[T12:%[^ ]*]] = shl i32 1, [[T11]]
   ; CHECK: [[T13:%[^ ]*]] = and i32 [[T10]], [[T12]]
diff --git a/llvm/test/Transforms/LowerExpectIntrinsic/basic.ll b/llvm/test/Transforms/LowerExpectIntrinsic/basic.ll
index f4326c8..5d72371 100644
--- a/llvm/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/llvm/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -7,7 +7,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp sgt i32 %tmp, 1
   %conv = zext i1 %cmp to i32
   %conv1 = sext i32 %conv to i64
@@ -27,7 +27,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -41,7 +41,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
   %tobool = icmp ne i64 %expval, 0
@@ -59,7 +59,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -69,7 +69,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %tobool = icmp ne i32 %tmp, 0
   %lnot = xor i1 %tobool, true
   %lnot.ext = zext i1 %lnot to i32
@@ -90,7 +90,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -100,7 +100,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %tobool = icmp ne i32 %tmp, 0
   %lnot = xor i1 %tobool, true
   %lnot1 = xor i1 %lnot, true
@@ -122,7 +122,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -132,7 +132,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp slt i32 %tmp, 0
   %conv = zext i1 %cmp to i32
   %conv1 = sext i32 %conv to i64
@@ -152,7 +152,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -162,7 +162,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
 ; CHECK: !prof !2
@@ -181,7 +181,7 @@
   br label %return
 
 return:                                           ; preds = %sw.epilog, %sw.bb
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -191,7 +191,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %conv = sext i32 %tmp to i64
   %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
 ; CHECK: !prof !3
@@ -202,7 +202,7 @@
   ]
 
 sw.bb:                                            ; preds = %entry, %entry
-  %tmp1 = load i32* %x.addr, align 4
+  %tmp1 = load i32, i32* %x.addr, align 4
   store i32 %tmp1, i32* %retval
   br label %return
 
@@ -211,7 +211,7 @@
   br label %return
 
 return:                                           ; preds = %sw.epilog, %sw.bb
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -221,7 +221,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp sgt i32 %tmp, 1
   %conv = zext i1 %cmp to i32
   %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
@@ -240,7 +240,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -252,7 +252,7 @@
   %retval = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
-  %tmp = load i32* %x.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4
   %cmp = icmp sgt i32 %tmp, 1
   %expval = call i1 @llvm.expect.i1(i1 %cmp, i1 1)
 ; CHECK: !prof !0
@@ -269,7 +269,7 @@
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
diff --git a/llvm/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll b/llvm/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
index 3673c04..22173b4 100644
--- a/llvm/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
+++ b/llvm/test/Transforms/LowerSwitch/2014-06-10-SwitchContiguousOpt.ll
@@ -6,7 +6,7 @@
   %retval = alloca i32, align 4
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
-  %0 = load i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
   switch i32 %0, label %sw.default [
     i32 0, label %sw.bb
     i32 1, label %sw.bb1
diff --git a/llvm/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll b/llvm/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
index ecdd767..2652a6c 100644
--- a/llvm/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
+++ b/llvm/test/Transforms/LowerSwitch/2014-06-11-SwitchDefaultUnreachableOpt.ll
@@ -21,7 +21,7 @@
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   store i32 %a, i32* %2, align 4
-  %3 = load i32* %2, align 4
+  %3 = load i32, i32* %2, align 4
   switch i32 %3, label %6 [
     i32 0, label %4
     i32 1, label %5
@@ -39,6 +39,6 @@
   unreachable
 
 ; <label>:7
-  %8 = load i32* %1
+  %8 = load i32, i32* %1
   ret i32 %8
 }
diff --git a/llvm/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll b/llvm/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
index 777f375..49b5605 100644
--- a/llvm/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
+++ b/llvm/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
@@ -6,6 +6,6 @@
 define i32 @test() {
         ; To be promoted
 	%X = alloca i32		; <i32*> [#uses=1]
-	%Y = load i32* %X		; <i32> [#uses=1]
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
 	ret i32 %Y
 }
diff --git a/llvm/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll b/llvm/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
index f5f1ee3..a013ff4 100644
--- a/llvm/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
+++ b/llvm/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
@@ -10,7 +10,7 @@
 	store i32 2, i32* %X
 	br i1 %c2, label %Exit, label %Exit
 Exit:		; preds = %B2, %B2, %0
-	%Y = load i32* %X		; <i32> [#uses=1]
+	%Y = load i32, i32* %X		; <i32> [#uses=1]
 	ret i32 %Y
 }
 
diff --git a/llvm/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll b/llvm/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
index e82caa9..de7280e 100644
--- a/llvm/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
+++ b/llvm/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
@@ -7,10 +7,10 @@
 	%p = alloca i32*		; <i32**> [#uses=2]
 	store i32 0, i32* %a
 	store i32* %a, i32** %p
-	%tmp.0 = load i32** %p		; <i32*> [#uses=1]
-	%tmp.1 = load i32* %tmp.0		; <i32> [#uses=1]
+	%tmp.0 = load i32*, i32** %p		; <i32*> [#uses=1]
+	%tmp.1 = load i32, i32* %tmp.0		; <i32> [#uses=1]
 	store i32 %tmp.1, i32* %result
-	%tmp.2 = load i32* %result		; <i32> [#uses=1]
+	%tmp.2 = load i32, i32* %result		; <i32> [#uses=1]
 	ret i32 %tmp.2
 }
 
diff --git a/llvm/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll b/llvm/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
index 1d38efc..8d55a1d 100644
--- a/llvm/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
+++ b/llvm/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
@@ -9,11 +9,11 @@
 	br i1 %C, label %L1, label %L2
 L1:		; preds = %0
 	store i32 %B, i32* %A
-	%D = load i32* %A		; <i32> [#uses=1]
+	%D = load i32, i32* %A		; <i32> [#uses=1]
 	call void @test( i32 %D, i1 false )
 	br label %L3
 L2:		; preds = %0
-	%E = load i32* %A		; <i32> [#uses=1]
+	%E = load i32, i32* %A		; <i32> [#uses=1]
 	call void @test( i32 %E, i1 true )
 	br label %L3
 L3:		; preds = %L2, %L1
diff --git a/llvm/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll b/llvm/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
index b064b13..f0f1fdc 100644
--- a/llvm/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
+++ b/llvm/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
@@ -14,31 +14,31 @@
 	store i32 0, i32* %i
 	br label %loopentry
 loopentry:		; preds = %endif, %entry
-	%tmp.0 = load i32* %n_addr		; <i32> [#uses=1]
+	%tmp.0 = load i32, i32* %n_addr		; <i32> [#uses=1]
 	%tmp.1 = add i32 %tmp.0, 1		; <i32> [#uses=1]
-	%tmp.2 = load i32* %i		; <i32> [#uses=1]
+	%tmp.2 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp.3 = icmp sgt i32 %tmp.1, %tmp.2		; <i1> [#uses=2]
 	%tmp.4 = zext i1 %tmp.3 to i32		; <i32> [#uses=0]
 	br i1 %tmp.3, label %no_exit, label %return
 no_exit:		; preds = %loopentry
-	%tmp.5 = load i32* %undef		; <i32> [#uses=1]
+	%tmp.5 = load i32, i32* %undef		; <i32> [#uses=1]
 	store i32 %tmp.5, i32* %out
 	store i32 0, i32* %undef
-	%tmp.6 = load i32* %i		; <i32> [#uses=1]
+	%tmp.6 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp.7 = icmp sgt i32 %tmp.6, 0		; <i1> [#uses=2]
 	%tmp.8 = zext i1 %tmp.7 to i32		; <i32> [#uses=0]
 	br i1 %tmp.7, label %then, label %endif
 then:		; preds = %no_exit
-	%tmp.9 = load i8** %p_addr		; <i8*> [#uses=1]
-	%tmp.10 = load i32* %i		; <i32> [#uses=1]
+	%tmp.9 = load i8*, i8** %p_addr		; <i8*> [#uses=1]
+	%tmp.10 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp.11 = sub i32 %tmp.10, 1		; <i32> [#uses=1]
 	%tmp.12 = getelementptr i8, i8* %tmp.9, i32 %tmp.11		; <i8*> [#uses=1]
-	%tmp.13 = load i32* %out		; <i32> [#uses=1]
+	%tmp.13 = load i32, i32* %out		; <i32> [#uses=1]
 	%tmp.14 = trunc i32 %tmp.13 to i8		; <i8> [#uses=1]
 	store i8 %tmp.14, i8* %tmp.12
 	br label %endif
 endif:		; preds = %then, %no_exit
-	%tmp.15 = load i32* %i		; <i32> [#uses=1]
+	%tmp.15 = load i32, i32* %i		; <i32> [#uses=1]
 	%inc = add i32 %tmp.15, 1		; <i32> [#uses=1]
 	store i32 %inc, i32* %i
 	br label %loopentry
diff --git a/llvm/test/Transforms/Mem2Reg/2005-11-28-Crash.ll b/llvm/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
index 8fd3351..4b1d7f66 100644
--- a/llvm/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
+++ b/llvm/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
@@ -41,7 +41,7 @@
 loopexit:		; preds = %loopentry
 	br label %endif.4
 then.4:		; No predecessors!
-	%tmp.61 = load i32* %flags		; <i32> [#uses=0]
+	%tmp.61 = load i32, i32* %flags		; <i32> [#uses=0]
 	br label %out
 dead_block_after_goto:		; No predecessors!
 	br label %endif.4
diff --git a/llvm/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/llvm/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
index ea581d1..812b8b6 100644
--- a/llvm/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
+++ b/llvm/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -22,7 +22,7 @@
 	br i1 %toBool, label %bb, label %bb5
 
 bb:		; preds = %entry
-	%tmp4 = load volatile i32* %v, align 4		; <i32> [#uses=1]
+	%tmp4 = load volatile i32, i32* %v, align 4		; <i32> [#uses=1]
 	store i32 %tmp4, i32* %tmp, align 4
 	br label %bb6
 
@@ -33,12 +33,12 @@
 	br label %bb6
 
 bb6:		; preds = %bb5, %bb
-	%tmp7 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	store i32 %tmp7, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %bb6
-	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	%retval8 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval8
 }
 
diff --git a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index a7369c0..16067f5 100644
--- a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -15,18 +15,18 @@
   store i32 %i, i32* %i_addr
   call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !{}), !dbg !8
   store double %j, double* %j_addr
-  %1 = load i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
+  %1 = load i32, i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
   %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
   %3 = sitofp i32 %2 to double, !dbg !10          ; <double> [#uses=1]
-  %4 = load double* %j_addr, align 8, !dbg !10    ; <double> [#uses=1]
+  %4 = load double, double* %j_addr, align 8, !dbg !10    ; <double> [#uses=1]
   %5 = fadd double %3, %4, !dbg !10               ; <double> [#uses=1]
   store double %5, double* %0, align 8, !dbg !10
-  %6 = load double* %0, align 8, !dbg !10         ; <double> [#uses=1]
+  %6 = load double, double* %0, align 8, !dbg !10         ; <double> [#uses=1]
   store double %6, double* %retval, align 8, !dbg !10
   br label %return, !dbg !10
 
 return:                                           ; preds = %entry
-  %retval1 = load double* %retval, !dbg !10       ; <double> [#uses=1]
+  %retval1 = load double, double* %retval, !dbg !10       ; <double> [#uses=1]
   ret double %retval1, !dbg !10
 }
 
diff --git a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 76d2a1a..b8543bc 100644
--- a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -13,16 +13,16 @@
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.declare(metadata i32* %a_addr, metadata !0, metadata !{}), !dbg !7
   store i32 %a, i32* %a_addr
-  %0 = load i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
+  %0 = load i32, i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
   call void @llvm.dbg.declare(metadata i32* %x_addr.i, metadata !9, metadata !{}) nounwind, !dbg !15
   store i32 %0, i32* %x_addr.i
   call void @llvm.dbg.declare(metadata i64* %y_addr.i, metadata !16, metadata !{}) nounwind, !dbg !15
   store i64 55, i64* %y_addr.i
   call void @llvm.dbg.declare(metadata i8** %z_addr.i, metadata !17, metadata !{}) nounwind, !dbg !15
   store i8* bitcast (void (i32)* @baz to i8*), i8** %z_addr.i
-  %1 = load i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
-  %2 = load i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
-  %3 = load i8** %z_addr.i, align 8, !dbg !18     ; <i8*> [#uses=1]
+  %1 = load i32, i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
+  %2 = load i64, i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
+  %3 = load i8*, i8** %z_addr.i, align 8, !dbg !18     ; <i8*> [#uses=1]
   call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
   br label %return, !dbg !19
 
diff --git a/llvm/test/Transforms/Mem2Reg/PromoteMemToRegister.ll b/llvm/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
index 1be6b03..b7f3994 100644
--- a/llvm/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
+++ b/llvm/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
@@ -6,12 +6,12 @@
 	%J = alloca double		; <double*> [#uses=2]
 	store i32 %i, i32* %I
 	store double %j, double* %J
-	%t1 = load i32* %I		; <i32> [#uses=1]
+	%t1 = load i32, i32* %I		; <i32> [#uses=1]
 	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
 	store i32 %t2, i32* %I
-	%t3 = load i32* %I		; <i32> [#uses=1]
+	%t3 = load i32, i32* %I		; <i32> [#uses=1]
 	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
-	%t5 = load double* %J		; <double> [#uses=1]
+	%t5 = load double, double* %J		; <double> [#uses=1]
 	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
 	ret double %t6
 }
diff --git a/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll b/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
index 5013229..eeeb72f 100644
--- a/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
+++ b/llvm/test/Transforms/Mem2Reg/UndefValuesMerge.ll
@@ -7,7 +7,7 @@
 	store i32 %i, i32* %I
 	br label %Cont
 Cont:		; preds = %T, %0
-	%Y = load i32* %I		; <i32> [#uses=1]
+	%Y = load i32, i32* %I		; <i32> [#uses=1]
 	ret i32 %Y
 }
 
diff --git a/llvm/test/Transforms/Mem2Reg/atomic.ll b/llvm/test/Transforms/Mem2Reg/atomic.ll
index 5bc9e92..f20043d 100644
--- a/llvm/test/Transforms/Mem2Reg/atomic.ll
+++ b/llvm/test/Transforms/Mem2Reg/atomic.ll
@@ -7,6 +7,6 @@
 ; CHECK: ret i32 %x
   %a = alloca i32
   store atomic i32 %x, i32* %a seq_cst, align 4
-  %r = load atomic i32* %a seq_cst, align 4
+  %r = load atomic i32, i32* %a seq_cst, align 4
   ret i32 %r
 }
diff --git a/llvm/test/Transforms/Mem2Reg/crash.ll b/llvm/test/Transforms/Mem2Reg/crash.ll
index 59e2c0b..a4a31b1 100644
--- a/llvm/test/Transforms/Mem2Reg/crash.ll
+++ b/llvm/test/Transforms/Mem2Reg/crash.ll
@@ -14,7 +14,7 @@
   br label %bb15
 
 bb15:
-  %B = load i32* %whichFlag
+  %B = load i32, i32* %whichFlag
   ret i32 %B
 
 lpad86:
@@ -33,11 +33,11 @@
   br label %bb15
 
 bb15:
-  %B = load i32* %whichFlag
+  %B = load i32, i32* %whichFlag
   ret i32 %B
 
 invcont2:
-  %C = load i32* %whichFlag
+  %C = load i32, i32* %whichFlag
   store i32 %C, i32* %whichFlag
   br label %bb15
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index e28b24a..ffbb299 100644
--- a/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -15,7 +15,7 @@
   %b_i8 = bitcast %b* %b_var to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 1, i1 false)
   %tmp1 = getelementptr %b, %b* %b_var, i32 0, i32 0
-  %tmp2 = load float* %tmp1
+  %tmp2 = load float, float* %tmp1
   ret float %tmp2
 }
 
diff --git a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
index bf04814..8e4a023 100644
--- a/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
+++ b/llvm/test/Transforms/MemCpyOpt/2011-06-02-CallSlotOverwritten.ll
@@ -24,13 +24,13 @@
 
   %bit1 = bitcast %struct1* %x to i64*
   %bit2 = bitcast %struct2* %y to i64*
-  %load = load i64* %bit1, align 8
+  %load = load i64, i64* %bit1, align 8
   store i64 %load, i64* %bit2, align 8
 
-; CHECK: %load = load i64* %bit1, align 8
+; CHECK: %load = load i64, i64* %bit1, align 8
 ; CHECK: store i64 %load, i64* %bit2, align 8
 
   %gep1 = getelementptr %struct2, %struct2* %y, i32 0, i32 0, i32 0
-  %ret = load i32* %gep1
+  %ret = load i32, i32* %gep1
   ret i32 %ret
 }
diff --git a/llvm/test/Transforms/MemCpyOpt/atomic.ll b/llvm/test/Transforms/MemCpyOpt/atomic.ll
index ee7e548..5be6b15 100644
--- a/llvm/test/Transforms/MemCpyOpt/atomic.ll
+++ b/llvm/test/Transforms/MemCpyOpt/atomic.ll
@@ -33,7 +33,7 @@
   %new = alloca i32
   call void @otherf(i32* nocapture %old)
   store atomic i32 0, i32* @x unordered, align 4
-  %v = load i32* %old
+  %v = load i32, i32* %old
   store i32 %v, i32* %new
   call void @otherf(i32* nocapture %new)  
   ret void
diff --git a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
index 888701d..55cbe59 100644
--- a/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -14,7 +14,7 @@
   call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
   %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
 ; CHECK-NOT: load
-  %tmp2.i.i = load i32** %tmp.i.i, align 8
+  %tmp2.i.i = load i32*, i32** %tmp.i.i, align 8
   %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr", %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
 ; CHECK-NOT: store
   store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
diff --git a/llvm/test/Transforms/MemCpyOpt/memcpy.ll b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
index f8c33f0..72445cf 100644
--- a/llvm/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/memcpy.ll
@@ -196,7 +196,7 @@
   %a = alloca i32, align 4
   store i32 %y, i32* %a
   call void @foo(i32* noalias nocapture %a)
-  %c = load i32* %a
+  %c = load i32, i32* %a
   %d = bitcast %opaque* %x to i32*
   store i32 %c, i32* %d
   ret void
diff --git a/llvm/test/Transforms/MemCpyOpt/sret.ll b/llvm/test/Transforms/MemCpyOpt/sret.ll
index 3f3c13d..34ba4c4 100644
--- a/llvm/test/Transforms/MemCpyOpt/sret.ll
+++ b/llvm/test/Transforms/MemCpyOpt/sret.ll
@@ -10,12 +10,12 @@
   %iz = alloca %0
   %memtmp = alloca %0, align 16
   %tmp1 = getelementptr %0, %0* %z, i32 0, i32 1
-  %tmp2 = load x86_fp80* %tmp1, align 16
+  %tmp2 = load x86_fp80, x86_fp80* %tmp1, align 16
   %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
   %tmp4 = getelementptr %0, %0* %iz, i32 0, i32 1
   %real = getelementptr %0, %0* %iz, i32 0, i32 0
   %tmp7 = getelementptr %0, %0* %z, i32 0, i32 0
-  %tmp8 = load x86_fp80* %tmp7, align 16
+  %tmp8 = load x86_fp80, x86_fp80* %tmp7, align 16
   store x86_fp80 %tmp3, x86_fp80* %real, align 16
   store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
   call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
diff --git a/llvm/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/llvm/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
index 689dc22..e684da8 100644
--- a/llvm/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
+++ b/llvm/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -36,15 +36,15 @@
   store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
   store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
   store i32 %_line, i32* %_line_addr
-  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %0 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
   call void @_ZN2kc13impl_filelineC2Ev() nounwind
-  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %2 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
   %4 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %3, i32 0, i32 0
   %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
   store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
-  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
   %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
   br i1 %7, label %bb, label %bb1
 
@@ -57,20 +57,20 @@
   br label %bb2
 
 bb1:                                              ; preds = %entry
-  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %9 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
   store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
   br label %bb2
 
 bb2:                                              ; preds = %bb1, %invcont
-  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %10 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
   %12 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %11, i32 0, i32 1
-  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  %13 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
   store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
-  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
   %16 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %15, i32 0, i32 2
-  %17 = load i32* %_line_addr, align 4
+  %17 = load i32, i32* %_line_addr, align 4
   store i32 %17, i32* %16, align 4
   ret void
 
@@ -79,21 +79,21 @@
               cleanup
   %exn = extractvalue { i8*, i32 } %eh_ptr, 0
   store i8* %exn, i8** %eh_exception
-  %eh_ptr4 = load i8** %eh_exception
+  %eh_ptr4 = load i8*, i8** %eh_exception
   %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
-  %eh_select = load i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.150, align 4
-  %eh_value = load i8** %eh_exception
+  %eh_value = load i8*, i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.149, align 4
-  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
   call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
-  %20 = load i8** %save_eptr.149, align 4
+  %20 = load i8*, i8** %save_eptr.149, align 4
   store i8* %20, i8** %eh_exception, align 4
-  %21 = load i32* %save_filt.150, align 4
+  %21 = load i32, i32* %save_filt.150, align 4
   store i32 %21, i32* %eh_selector, align 4
-  %eh_ptr6 = load i8** %eh_exception
+  %eh_ptr6 = load i8*, i8** %eh_exception
   call void @_Unwind_Resume_or_Rethrow()
   unreachable
 }
@@ -105,7 +105,7 @@
   %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
   %"alloca point" = bitcast i32 0 to i32
   store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
-  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %0 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
   %1 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %0, i32 0, i32 0
   %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
   store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
@@ -114,7 +114,7 @@
   br i1 %toBool, label %bb1, label %return
 
 bb1:                                              ; preds = %entry
-  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %4 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
   %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
   call void @_ZdlPv() nounwind
   br label %return
@@ -130,7 +130,7 @@
   %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
   %"alloca point" = bitcast i32 0 to i32
   store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
-  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %0 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
   %1 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %0, i32 0, i32 0
   %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
   store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
@@ -139,7 +139,7 @@
   br i1 %toBool, label %bb1, label %return
 
 bb1:                                              ; preds = %entry
-  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %4 = load %"struct.kc::impl_fileline"*, %"struct.kc::impl_fileline"** %this_addr, align 4
   %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
   call void @_ZdlPv() nounwind
   br label %return
@@ -162,15 +162,15 @@
   store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
   store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
   store i32 %_line, i32* %_line_addr
-  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %0 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
   call void @_ZN2kc13impl_filelineC2Ev() nounwind
-  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %2 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
   %4 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %3, i32 0, i32 0
   %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
   store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
-  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
   %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
   br i1 %7, label %bb, label %bb1
 
@@ -183,20 +183,20 @@
   br label %bb2
 
 bb1:                                              ; preds = %entry
-  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %9 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
   store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
   br label %bb2
 
 bb2:                                              ; preds = %bb1, %invcont
-  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %10 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
   %12 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %11, i32 0, i32 1
-  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  %13 = load %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
   store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
-  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine", %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
   %16 = getelementptr inbounds %"struct.kc::impl_fileline", %"struct.kc::impl_fileline"* %15, i32 0, i32 2
-  %17 = load i32* %_line_addr, align 4
+  %17 = load i32, i32* %_line_addr, align 4
   store i32 %17, i32* %16, align 4
   ret void
 
@@ -205,21 +205,21 @@
               cleanup
   %exn = extractvalue { i8*, i32 } %eh_ptr, 0
   store i8* %exn, i8** %eh_exception
-  %eh_ptr4 = load i8** %eh_exception
+  %eh_ptr4 = load i8*, i8** %eh_exception
   %eh_select5 = extractvalue { i8*, i32 } %eh_ptr, 1
   store i32 %eh_select5, i32* %eh_selector
-  %eh_select = load i32* %eh_selector
+  %eh_select = load i32, i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.148, align 4
-  %eh_value = load i8** %eh_exception
+  %eh_value = load i8*, i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.147, align 4
-  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"*, %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
   %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
   call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
-  %20 = load i8** %save_eptr.147, align 4
+  %20 = load i8*, i8** %save_eptr.147, align 4
   store i8* %20, i8** %eh_exception, align 4
-  %21 = load i32* %save_filt.148, align 4
+  %21 = load i32, i32* %save_filt.148, align 4
   store i32 %21, i32* %eh_selector, align 4
-  %eh_ptr6 = load i8** %eh_exception
+  %eh_ptr6 = load i8*, i8** %eh_exception
   call void @_Unwind_Resume_or_Rethrow()
   unreachable
 }
diff --git a/llvm/test/Transforms/MergeFunc/address-spaces.ll b/llvm/test/Transforms/MergeFunc/address-spaces.ll
index c8b6f6eb..1cfecae 100644
--- a/llvm/test/Transforms/MergeFunc/address-spaces.ll
+++ b/llvm/test/Transforms/MergeFunc/address-spaces.ll
@@ -10,7 +10,7 @@
 ; CHECK-LABEL: @store_as0(
 ; CHECK: call void @foo(
   %gep = getelementptr i32, i32* %x, i32 4
-  %y = load i32* %gep
+  %y = load i32, i32* %gep
   call void @foo(i32 %y) nounwind
   ret i32 %y
 }
@@ -19,7 +19,7 @@
 ; CHECK-LABEL: @store_as1(
 ; CHECK: call void @foo(
   %gep = getelementptr i32, i32 addrspace(1)* %x, i32 4
-  %y = load i32 addrspace(1)* %gep
+  %y = load i32, i32 addrspace(1)* %gep
   call void @foo(i32 %y) nounwind
   ret i32 %y
 }
@@ -28,7 +28,7 @@
 ; CHECK-LABEL: @store_as2(
 ; CHECK: call void @foo(
   %gep = getelementptr i32, i32 addrspace(2)* %x, i32 4
-  %y = load i32 addrspace(2)* %gep
+  %y = load i32, i32 addrspace(2)* %gep
   call void @foo(i32 %y) nounwind
   ret i32 %y
 }
diff --git a/llvm/test/Transforms/MergeFunc/crash.ll b/llvm/test/Transforms/MergeFunc/crash.ll
index bc27380..3319c8b 100644
--- a/llvm/test/Transforms/MergeFunc/crash.ll
+++ b/llvm/test/Transforms/MergeFunc/crash.ll
@@ -22,7 +22,7 @@
 
 define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
   %1 = getelementptr inbounds %.qux.2496, %.qux.2496* %this, i32 0, i32 1, i32 1
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   ret i32 %2
 }
 
@@ -41,6 +41,6 @@
 
 define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
   %1 = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
-  %2 = load i8** %1, align 4
+  %2 = load i8*, i8** %1, align 4
   ret i8* %2
 }
diff --git a/llvm/test/Transforms/MergeFunc/inttoptr-address-space.ll b/llvm/test/Transforms/MergeFunc/inttoptr-address-space.ll
index f69dfeb..5f672de 100644
--- a/llvm/test/Transforms/MergeFunc/inttoptr-address-space.ll
+++ b/llvm/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -12,7 +12,7 @@
 define internal i32 @func10(%.qux.2496 addrspace(1)* nocapture %this) align 2 {
 bb:
   %tmp = getelementptr inbounds %.qux.2496, %.qux.2496 addrspace(1)* %this, i32 0, i32 1, i32 1
-  %tmp1 = load i32 addrspace(1)* %tmp, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
   ret i32 %tmp1
 }
 
@@ -24,6 +24,6 @@
 ; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
   %tmp = getelementptr inbounds %.qux.2585, %.qux.2585 addrspace(1)* %this, i32 0, i32 2
-  %tmp1 = load i8* addrspace(1)* %tmp, align 4
+  %tmp1 = load i8*, i8* addrspace(1)* %tmp, align 4
   ret i8* %tmp1
 }
diff --git a/llvm/test/Transforms/MergeFunc/inttoptr.ll b/llvm/test/Transforms/MergeFunc/inttoptr.ll
index be8367f..5e95275 100644
--- a/llvm/test/Transforms/MergeFunc/inttoptr.ll
+++ b/llvm/test/Transforms/MergeFunc/inttoptr.ll
@@ -24,7 +24,7 @@
 define internal i32 @func10(%.qux.2496* nocapture %this) align 2 {
 bb:
   %tmp = getelementptr inbounds %.qux.2496, %.qux.2496* %this, i32 0, i32 1, i32 1
-  %tmp1 = load i32* %tmp, align 4
+  %tmp1 = load i32, i32* %tmp, align 4
   ret i32 %tmp1
 }
 
@@ -51,6 +51,6 @@
 ; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
   %tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
-  %tmp1 = load i8** %tmp, align 4
+  %tmp1 = load i8*, i8** %tmp, align 4
   ret i8* %tmp1
 }
diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-struct-return.ll b/llvm/test/Transforms/MergeFunc/mergefunc-struct-return.ll
index 5af4421..14db399 100644
--- a/llvm/test/Transforms/MergeFunc/mergefunc-struct-return.ll
+++ b/llvm/test/Transforms/MergeFunc/mergefunc-struct-return.ll
@@ -18,7 +18,7 @@
   %v2 = getelementptr %kv1, %kv1* %tmp, i32 0, i32 0
   store i32* null, i32** %v2
   call void @noop()
-  %v3 = load %kv1* %tmp
+  %v3 = load %kv1, %kv1* %tmp
   ret %kv1 %v3
 }
 
@@ -35,6 +35,6 @@
   store i8* null, i8** %v2
   call void @noop()
 
-  %v3 = load %kv2* %tmp
+  %v3 = load %kv2, %kv2* %tmp
   ret %kv2 %v3
 }
diff --git a/llvm/test/Transforms/MergeFunc/ranges.ll b/llvm/test/Transforms/MergeFunc/ranges.ll
index d3e4d94..46a0c76 100644
--- a/llvm/test/Transforms/MergeFunc/ranges.ll
+++ b/llvm/test/Transforms/MergeFunc/ranges.ll
@@ -1,31 +1,31 @@
 ; RUN: opt -mergefunc -S < %s | FileCheck %s
 define i1 @cmp_with_range(i8*, i8*) {
-  %v1 = load i8* %0, !range !0
-  %v2 = load i8* %1, !range !0
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
   %out = icmp eq i8 %v1, %v2
   ret i1 %out
 }
 
 define i1 @cmp_no_range(i8*, i8*) {
 ; CHECK-LABEL: @cmp_no_range
-; CHECK-NEXT  %v1 = load i8* %0
-; CHECK-NEXT  %v2 = load i8* %1
+; CHECK-NEXT  %v1 = load i8, i8* %0
+; CHECK-NEXT  %v2 = load i8, i8* %1
 ; CHECK-NEXT  %out = icmp eq i8 %v1, %v2
 ; CHECK-NEXT  ret i1 %out
-  %v1 = load i8* %0
-  %v2 = load i8* %1
+  %v1 = load i8, i8* %0
+  %v2 = load i8, i8* %1
   %out = icmp eq i8 %v1, %v2
   ret i1 %out
 }
 
 define i1 @cmp_different_range(i8*, i8*) {
 ; CHECK-LABEL: @cmp_different_range
-; CHECK-NEXT:  %v1 = load i8* %0, !range !1
-; CHECK-NEXT:  %v2 = load i8* %1, !range !1
+; CHECK-NEXT:  %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8, i8* %1, !range !1
 ; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
 ; CHECK-NEXT:  ret i1 %out
-  %v1 = load i8* %0, !range !1
-  %v2 = load i8* %1, !range !1
+  %v1 = load i8, i8* %0, !range !1
+  %v2 = load i8, i8* %1, !range !1
   %out = icmp eq i8 %v1, %v2
   ret i1 %out
 }
@@ -33,8 +33,8 @@
 define i1 @cmp_with_same_range(i8*, i8*) {
 ; CHECK-LABEL: @cmp_with_same_range
 ; CHECK: tail call i1 @cmp_with_range
-  %v1 = load i8* %0, !range !0
-  %v2 = load i8* %1, !range !0
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
   %out = icmp eq i8 %v1, %v2
   ret i1 %out
 }
diff --git a/llvm/test/Transforms/MergeFunc/vector.ll b/llvm/test/Transforms/MergeFunc/vector.ll
index 56f74e6..ef13753 100644
--- a/llvm/test/Transforms/MergeFunc/vector.ll
+++ b/llvm/test/Transforms/MergeFunc/vector.ll
@@ -22,7 +22,7 @@
 define linkonce_odr void @_ZNSt6vectorIlSaIlEED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
 entry:
   %tmp2.i.i = bitcast %"class.std::vector"* %this to i64**
-  %tmp3.i.i = load i64** %tmp2.i.i, align 8
+  %tmp3.i.i = load i64*, i64** %tmp2.i.i, align 8
   %tobool.i.i.i = icmp eq i64* %tmp3.i.i, null
   br i1 %tobool.i.i.i, label %_ZNSt6vectorIlSaIlEED2Ev.exit, label %if.then.i.i.i
 
@@ -40,7 +40,7 @@
 define linkonce_odr void @_ZNSt6vectorIPvSaIS0_EED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
 entry:
   %tmp2.i.i = bitcast %"class.std::vector"* %this to i8***
-  %tmp3.i.i = load i8*** %tmp2.i.i, align 8
+  %tmp3.i.i = load i8**, i8*** %tmp2.i.i, align 8
   %tobool.i.i.i = icmp eq i8** %tmp3.i.i, null
   br i1 %tobool.i.i.i, label %_ZNSt6vectorIPvSaIS0_EED2Ev.exit, label %if.then.i.i.i
 
diff --git a/llvm/test/Transforms/MetaRenamer/metarenamer.ll b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
index d639ee5d..e126bed 100644
--- a/llvm/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/llvm/test/Transforms/MetaRenamer/metarenamer.ll
@@ -59,7 +59,7 @@
   br label %5
 
 ; <label>:5                                       ; preds = %9, %0
-  %6 = load i32* %i, align 4
+  %6 = load i32, i32* %i, align 4
   %7 = icmp slt i32 %6, 10
   br i1 %7, label %8, label %12
 
@@ -67,24 +67,24 @@
   br label %9
 
 ; <label>:9                                       ; preds = %8
-  %10 = load i32* %i, align 4
+  %10 = load i32, i32* %i, align 4
   %11 = add nsw i32 %10, 1
   store i32 %11, i32* %i, align 4
   br label %5
 
 ; <label>:12                                      ; preds = %5
-  %13 = load i32* %local_1_xxx, align 4
-  %14 = load i32* %1, align 4
+  %13 = load i32, i32* %local_1_xxx, align 4
+  %14 = load i32, i32* %1, align 4
   %15 = add nsw i32 %13, %14
-  %16 = load i32* %local_2_xxx, align 4
+  %16 = load i32, i32* %local_2_xxx, align 4
   %17 = add nsw i32 %15, %16
-  %18 = load i32* %2, align 4
+  %18 = load i32, i32* %2, align 4
   %19 = add nsw i32 %17, %18
-  %20 = load i32* @func_5_xxx.static_local_3_xxx, align 4
+  %20 = load i32, i32* @func_5_xxx.static_local_3_xxx, align 4
   %21 = add nsw i32 %19, %20
-  %22 = load i32* %3, align 4
+  %22 = load i32, i32* %3, align 4
   %23 = add nsw i32 %21, %22
-  %24 = load i32* %4, align 4
+  %24 = load i32, i32* %4, align 4
   %25 = add nsw i32 %23, %24
   ret i32 %25
 }
diff --git a/llvm/test/Transforms/ObjCARC/allocas.ll b/llvm/test/Transforms/ObjCARC/allocas.ll
index 1fbb01c..ac26a09 100644
--- a/llvm/test/Transforms/ObjCARC/allocas.ll
+++ b/llvm/test/Transforms/ObjCARC/allocas.ll
@@ -56,7 +56,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %A, align 8
-  %y = load i8** %A
+  %y = load i8*, i8** %A
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -78,7 +78,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %gep, align 8
-  %y = load i8** %A
+  %y = load i8*, i8** %A
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -101,7 +101,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %gep, align 8
-  %y = load i8** %gep
+  %y = load i8*, i8** %gep
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -135,7 +135,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %gep, align 8
-  %y = load i8** %gep
+  %y = load i8*, i8** %gep
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -168,7 +168,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %gep, align 8
-  %y = load i8** %gep
+  %y = load i8*, i8** %gep
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -191,7 +191,7 @@
   tail call i8* @objc_retain(i8* %x)
   tail call i8* @objc_retain(i8* %x)
   store i8* %x, i8** %A, align 8
-  %y = load i8** %A
+  %y = load i8*, i8** %A
   call void @use_alloca(i8** %A)
   call void @objc_release(i8* %y), !clang.imprecise_release !0
   call void @use_pointer(i8* %x)
@@ -214,7 +214,7 @@
 entry:
   %A = alloca i8*
   store i8* %x, i8** %A, align 8
-  %y = load i8** %A
+  %y = load i8*, i8** %A
   br label %bb1
 
 bb1:
@@ -246,7 +246,7 @@
   %gep1 = getelementptr i8*, i8** %A, i32 0
   store i8* %x, i8** %gep1, align 8
   %gep2 = getelementptr i8*, i8** %A, i32 0
-  %y = load i8** %gep2
+  %y = load i8*, i8** %gep2
   br label %bb1
 
 bb1:
@@ -278,7 +278,7 @@
   %gep1 = getelementptr i8*, i8** %A, i32 2
   store i8* %x, i8** %gep1, align 8
   %gep2 = getelementptr i8*, i8** %A, i32 2
-  %y = load i8** %gep2
+  %y = load i8*, i8** %gep2
   tail call i8* @objc_retain(i8* %x)
   br label %bb1
 
@@ -314,7 +314,7 @@
   %gepbb11 = getelementptr i8*, i8** %Abb1, i32 2
   store i8* %x, i8** %gepbb11, align 8
   %gepbb12 = getelementptr i8*, i8** %Abb1, i32 2
-  %ybb1 = load i8** %gepbb12
+  %ybb1 = load i8*, i8** %gepbb12
   br label %bb3
 
 bb2:
@@ -322,7 +322,7 @@
   %gepbb21 = getelementptr i8*, i8** %Abb2, i32 2
   store i8* %x, i8** %gepbb21, align 8
   %gepbb22 = getelementptr i8*, i8** %Abb2, i32 2
-  %ybb2 = load i8** %gepbb22
+  %ybb2 = load i8*, i8** %gepbb22
   br label %bb3
 
 bb3:
@@ -391,7 +391,7 @@
 arraydestroy.body:
   %arraydestroy.elementPast = phi i8** [ %gep, %entry ], [ %arraydestroy.element, %arraydestroy.body ]
   %arraydestroy.element = getelementptr inbounds i8*, i8** %arraydestroy.elementPast, i64 -1
-  %destroy_tmp = load i8** %arraydestroy.element, align 8
+  %destroy_tmp = load i8*, i8** %arraydestroy.element, align 8
   call void @objc_release(i8* %destroy_tmp), !clang.imprecise_release !0
   %objs_ptr = getelementptr inbounds [2 x i8*], [2 x i8*]* %objs, i64 0, i64 0
   %arraydestroy.cmp = icmp eq i8** %arraydestroy.element, %objs_ptr
@@ -404,7 +404,7 @@
 arraydestroy.body1:
   %arraydestroy.elementPast1 = phi i8** [ %gep1, %arraydestroy.done ], [ %arraydestroy.element1, %arraydestroy.body1 ]
   %arraydestroy.element1 = getelementptr inbounds i8*, i8** %arraydestroy.elementPast1, i64 -1
-  %destroy_tmp1 = load i8** %arraydestroy.element1, align 8
+  %destroy_tmp1 = load i8*, i8** %arraydestroy.element1, align 8
   call void @objc_release(i8* %destroy_tmp1), !clang.imprecise_release !0
   %keys_ptr = getelementptr inbounds [2 x i8*], [2 x i8*]* %keys, i64 0, i64 0
   %arraydestroy.cmp1 = icmp eq i8** %arraydestroy.element1, %keys_ptr
@@ -470,7 +470,7 @@
 arraydestroy.body:
   %arraydestroy.elementPast = phi i8** [ %gep, %entry ], [ %arraydestroy.element, %arraydestroy.body ]
   %arraydestroy.element = getelementptr inbounds i8*, i8** %arraydestroy.elementPast, i64 -1
-  %destroy_tmp = load i8** %arraydestroy.element, align 8
+  %destroy_tmp = load i8*, i8** %arraydestroy.element, align 8
   call void @objc_release(i8* %destroy_tmp), !clang.imprecise_release !0
   %objs_ptr = getelementptr inbounds [2 x i8*], [2 x i8*]* %objs, i64 0, i64 0
   %arraydestroy.cmp = icmp eq i8** %arraydestroy.element, %objs_ptr
@@ -483,7 +483,7 @@
 arraydestroy.body1:
   %arraydestroy.elementPast1 = phi i8** [ %gep1, %arraydestroy.done ], [ %arraydestroy.element1, %arraydestroy.body1 ]
   %arraydestroy.element1 = getelementptr inbounds i8*, i8** %arraydestroy.elementPast1, i64 -1
-  %destroy_tmp1 = load i8** %arraydestroy.element1, align 8
+  %destroy_tmp1 = load i8*, i8** %arraydestroy.element1, align 8
   call void @objc_release(i8* %destroy_tmp1), !clang.imprecise_release !0
   %keys_ptr = getelementptr inbounds [2 x i8*], [2 x i8*]* %keys, i64 0, i64 0
   %arraydestroy.cmp1 = icmp eq i8** %arraydestroy.element1, %keys_ptr
diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll
index 9c91a05..be75436 100644
--- a/llvm/test/Transforms/ObjCARC/basic.ll
+++ b/llvm/test/Transforms/ObjCARC/basic.ll
@@ -286,7 +286,7 @@
 loop:
   %c = bitcast i32* %x to i8*
   call void @objc_release(i8* %c) nounwind
-  %j = load volatile i1* %q
+  %j = load volatile i1, i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -306,7 +306,7 @@
 loop:
   %c = bitcast i32* %x to i8*
   call void @objc_release(i8* %c) nounwind, !clang.imprecise_release !0
-  %j = load volatile i1* %q
+  %j = load volatile i1, i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -330,7 +330,7 @@
 loop:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
-  %j = load volatile i1* %q
+  %j = load volatile i1, i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -350,7 +350,7 @@
 loop:
   %a = bitcast i32* %x to i8*
   %0 = call i8* @objc_retain(i8* %a) nounwind
-  %j = load volatile i1* %q
+  %j = load volatile i1, i1* %q
   br i1 %j, label %loop, label %return
 
 return:
@@ -1366,7 +1366,7 @@
 define void @test24(i8* %r, i8* %a) {
   call i8* @objc_retain(i8* %a)
   call void @use_pointer(i8* %r)
-  %q = load i8* %a
+  %q = load i8, i8* %a
   call void @objc_release(i8* %a)
   ret void
 }
@@ -2005,7 +2005,7 @@
 ; CHECK-NOT: objc_
 ; CHECK: }
 define void @test44(i8** %pp) {
-  %p = load i8** %pp
+  %p = load i8*, i8** %pp
   %q = call i8* @objc_retain(i8* %p)
   call void @objc_release(i8* %q)
   ret void
@@ -2021,8 +2021,8 @@
 ; CHECK: call void @objc_release(i8* %p)
 ; CHECK: }
 define void @test45(i8** %pp, i8** %qq) {
-  %p = load i8** %pp
-  %q = load i8** %qq
+  %p = load i8*, i8** %pp
+  %q = load i8*, i8** %qq
   call i8* @objc_retain(i8* %p)
   call void @objc_release(i8* %q)
   call void @use_pointer(i8* %p)
@@ -2154,10 +2154,10 @@
 ; CHECK: ret void
 ; CHECK: }
 define void @test52a(i8** %zz, i8** %pp) {
-  %p = load i8** %pp
+  %p = load i8*, i8** %pp
   %1 = call i8* @objc_retain(i8* %p)
   call void @callee()
-  %z = load i8** %zz
+  %z = load i8*, i8** %zz
   call void @use_pointer(i8* %z)
   call void @objc_release(i8* %p)
   ret void
@@ -2171,10 +2171,10 @@
 ; CHECK: ret void
 ; CHECK: }
 define void @test52b(i8** %zz, i8** %pp) {
-  %p = load i8** %pp
+  %p = load i8*, i8** %pp
   %1 = call i8* @objc_retain(i8* %p)
   call void @callee()
-  %z = load i8** %zz
+  %z = load i8*, i8** %zz
   call void @use_pointer(i8* %z)
   call void @objc_release(i8* %p), !clang.imprecise_release !0
   ret void
@@ -2189,10 +2189,10 @@
 ; CHECK: @objc_
 ; CHECK: }
 define void @test53(void ()** %zz, i8** %pp) {
-  %p = load i8** %pp
+  %p = load i8*, i8** %pp
   %1 = call i8* @objc_retain(i8* %p)
   call void @callee()
-  %z = load void ()** %zz
+  %z = load void ()*, void ()** %zz
   call void @callee_fnptr(void ()* %z)
   call void @objc_release(i8* %p)
   ret void
@@ -2341,8 +2341,8 @@
 ; CHECK: call void @objc_release
 ; CHECK: }
 define void @test60a() {
-  %t = load i8** @constptr
-  %s = load i8** @something
+  %t = load i8*, i8** @constptr
+  %s = load i8*, i8** @something
   call i8* @objc_retain(i8* %s)
   call void @callee()
   call void @use_pointer(i8* %t)
@@ -2356,8 +2356,8 @@
 ; CHECK-NOT: call i8* @objc_rrelease
 ; CHECK: }
 define void @test60b() {
-  %t = load i8** @constptr
-  %s = load i8** @something
+  %t = load i8*, i8** @constptr
+  %s = load i8*, i8** @something
   call i8* @objc_retain(i8* %s)
   call i8* @objc_retain(i8* %s)
   call void @callee()
@@ -2370,8 +2370,8 @@
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60c() {
-  %t = load i8** @constptr
-  %s = load i8** @something
+  %t = load i8*, i8** @constptr
+  %s = load i8*, i8** @something
   call i8* @objc_retain(i8* %s)
   call void @callee()
   call void @use_pointer(i8* %t)
@@ -2383,8 +2383,8 @@
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60d() {
-  %t = load i8** @constptr
-  %s = load i8** @something
+  %t = load i8*, i8** @constptr
+  %s = load i8*, i8** @something
   call i8* @objc_retain(i8* %t)
   call void @callee()
   call void @use_pointer(i8* %s)
@@ -2396,8 +2396,8 @@
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test60e() {
-  %t = load i8** @constptr
-  %s = load i8** @something
+  %t = load i8*, i8** @constptr
+  %s = load i8*, i8** @something
   call i8* @objc_retain(i8* %t)
   call void @callee()
   call void @use_pointer(i8* %s)
@@ -2412,7 +2412,7 @@
 ; CHECK-NOT: @objc_
 ; CHECK: }
 define void @test61() {
-  %t = load i8** @constptr
+  %t = load i8*, i8** @constptr
   call i8* @objc_retain(i8* %t)
   call void @callee()
   call void @use_pointer(i8* %t)
@@ -2432,7 +2432,7 @@
 
 loop:
   call i8* @objc_retain(i8* %x)
-  %q = load i1* %p
+  %q = load i1, i1* %p
   br i1 %q, label %loop.more, label %exit
 
 loop.more:
@@ -2459,7 +2459,7 @@
 
 loop:
   call i8* @objc_retain(i8* %x)
-  %q = load i1* %p
+  %q = load i1, i1* %p
   br i1 %q, label %loop.more, label %exit
 
 loop.more:
@@ -2485,7 +2485,7 @@
 
 loop:
   call i8* @objc_retain(i8* %x)
-  %q = load i1* %p
+  %q = load i1, i1* %p
   br i1 %q, label %loop.more, label %exit
 
 loop.more:
@@ -2681,31 +2681,31 @@
   %1 = tail call i8* @objc_retain(i8* %0) nounwind
   tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !0, metadata !{})
   tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !0, metadata !{})
-  %ivar = load i64* @"OBJC_IVAR_$_A.myZ", align 8
+  %ivar = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr = getelementptr i8, i8* %0, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to float*
-  %tmp2 = load float* %tmp1, align 4
+  %tmp2 = load float, float* %tmp1, align 4
   %conv = fpext float %tmp2 to double
   %add.ptr.sum = add i64 %ivar, 4
   %tmp6 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum
   %2 = bitcast i8* %tmp6 to float*
-  %tmp7 = load float* %2, align 4
+  %tmp7 = load float, float* %2, align 4
   %conv8 = fpext float %tmp7 to double
   %add.ptr.sum36 = add i64 %ivar, 8
   %tmp12 = getelementptr inbounds i8, i8* %0, i64 %add.ptr.sum36
   %arrayidx = bitcast i8* %tmp12 to float*
-  %tmp13 = load float* %arrayidx, align 4
+  %tmp13 = load float, float* %arrayidx, align 4
   %conv14 = fpext float %tmp13 to double
   %tmp12.sum = add i64 %ivar, 12
   %arrayidx19 = getelementptr inbounds i8, i8* %0, i64 %tmp12.sum
   %3 = bitcast i8* %arrayidx19 to float*
-  %tmp20 = load float* %3, align 4
+  %tmp20 = load float, float* %3, align 4
   %conv21 = fpext float %tmp20 to double
   %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([33 x i8]* @.str4, i64 0, i64 0), double %conv, double %conv8, double %conv14, double %conv21)
-  %ivar23 = load i64* @"OBJC_IVAR_$_A.myZ", align 8
+  %ivar23 = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr24 = getelementptr i8, i8* %0, i64 %ivar23
   %4 = bitcast i8* %add.ptr24 to i128*
-  %srcval = load i128* %4, align 4
+  %srcval = load i128, i128* %4, align 4
   tail call void @objc_release(i8* %0) nounwind
   %tmp29 = trunc i128 %srcval to i64
   %tmp30 = bitcast i64 %tmp29 to <2 x float>
@@ -2752,7 +2752,7 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.010 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %1 = tail call i8* @objc_retain(i8* %x) nounwind
-  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp5 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %1, i8* %tmp5)
   tail call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
   %inc = add nsw i64 %i.010, 1
@@ -2828,12 +2828,12 @@
   %tmp7 = bitcast %2* %self to i8*
   %tmp8 = call i8* @objc_retain(i8* %tmp7) nounwind
   store %4* null, %4** %err, align 8
-  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_17", align 8
-  %tmp2 = load %struct.__CFString** @kUTTypePlainText, align 8
-  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_19", align 8
+  %tmp1 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_17", align 8
+  %tmp2 = load %struct.__CFString*, %struct.__CFString** @kUTTypePlainText, align 8
+  %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_19", align 8
   %tmp4 = bitcast %struct._class_t* %tmp1 to i8*
   %call5 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp4, i8* %tmp3, %struct.__CFString* %tmp2)
-  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_21", align 8
+  %tmp5 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_21", align 8
   %tmp6 = bitcast %3* %pboard to i8*
   %call76 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp5, i8* %call5)
   %tmp9 = call i8* @objc_retain(i8* %call76) nounwind
@@ -2841,7 +2841,7 @@
   br i1 %tobool, label %end, label %land.lhs.true
 
 land.lhs.true:                                    ; preds = %entry
-  %tmp11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_23", align 8
+  %tmp11 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_23", align 8
   %call137 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp11, i8* %tmp9)
   %tmp = bitcast i8* %call137 to %1*
   %tmp10 = call i8* @objc_retain(i8* %call137) nounwind
@@ -2852,14 +2852,14 @@
   br i1 %tobool16, label %end, label %if.then
 
 if.then:                                          ; preds = %land.lhs.true
-  %tmp19 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
+  %tmp19 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
   %call21 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %call137, i8* %tmp19)
   %tobool22 = icmp eq i8 %call21, 0
   br i1 %tobool22, label %if.then44, label %land.lhs.true23
 
 land.lhs.true23:                                  ; preds = %if.then
-  %tmp24 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
-  %tmp26 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
+  %tmp24 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
+  %tmp26 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
   %tmp27 = bitcast %struct._class_t* %tmp24 to i8*
   %call2822 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp27, i8* %tmp26, i8* %call137)
   %tmp13 = bitcast i8* %call2822 to %5*
@@ -2869,38 +2869,38 @@
   br i1 %tobool30, label %if.then44, label %if.end
 
 if.end:                                           ; preds = %land.lhs.true23
-  %tmp32 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
-  %tmp33 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
+  %tmp32 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
+  %tmp33 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
   %tmp34 = bitcast %struct._class_t* %tmp32 to i8*
   %call35 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp34, i8* %tmp33)
-  %tmp37 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
+  %tmp37 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
   %call3923 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call35, i8* %tmp37, i8* %call2822, i32 signext 1, %4** %err)
   %cmp = icmp eq i8* %call3923, null
   br i1 %cmp, label %if.then44, label %end
 
 if.then44:                                        ; preds = %if.end, %land.lhs.true23, %if.then
   %url.025 = phi %5* [ %tmp13, %if.end ], [ %tmp13, %land.lhs.true23 ], [ null, %if.then ]
-  %tmp49 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_35", align 8
+  %tmp49 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_35", align 8
   %call51 = call %struct._NSRange bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %struct._NSRange (i8*, i8*, i64, i64)*)(i8* %call137, i8* %tmp49, i64 0, i64 0)
   %call513 = extractvalue %struct._NSRange %call51, 0
   %call514 = extractvalue %struct._NSRange %call51, 1
-  %tmp52 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_37", align 8
+  %tmp52 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_37", align 8
   %call548 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call137, i8* %tmp52, i64 %call513, i64 %call514)
-  %tmp55 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_38", align 8
-  %tmp56 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_40", align 8
+  %tmp55 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_38", align 8
+  %tmp56 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_40", align 8
   %tmp57 = bitcast %struct._class_t* %tmp55 to i8*
   %call58 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp57, i8* %tmp56)
-  %tmp59 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_42", align 8
+  %tmp59 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_42", align 8
   %call6110 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call548, i8* %tmp59, i8* %call58)
   %tmp15 = call i8* @objc_retain(i8* %call6110) nounwind
   call void @objc_release(i8* %call137) nounwind
-  %tmp64 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_46", align 8
+  %tmp64 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_46", align 8
   %call66 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, %1*)*)(i8* %call6110, i8* %tmp64, %1* bitcast (%struct.NSConstantString* @_unnamed_cfstring_44 to %1*))
   %tobool67 = icmp eq i8 %call66, 0
   br i1 %tobool67, label %if.end74, label %if.then68
 
 if.then68:                                        ; preds = %if.then44
-  %tmp70 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_48", align 8
+  %tmp70 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_48", align 8
   %call7220 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6110, i8* %tmp70)
   %tmp16 = call i8* @objc_retain(i8* %call7220) nounwind
   call void @objc_release(i8* %call6110) nounwind
@@ -2909,52 +2909,52 @@
 if.end74:                                         ; preds = %if.then68, %if.then44
   %filename.0.in = phi i8* [ %call7220, %if.then68 ], [ %call6110, %if.then44 ]
   %filename.0 = bitcast i8* %filename.0.in to %1*
-  %tmp17 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to i8**), align 16
+  %tmp17 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to i8**), align 16
   %tmp18 = bitcast i8* %tmp17 to i8 (i8*, %struct._message_ref_t*, i8*, ...)*
   %call78 = call signext i8 (i8*, %struct._message_ref_t*, i8*, ...)* %tmp18(i8* %call137, %struct._message_ref_t* bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to %struct._message_ref_t*), i8* %filename.0.in)
   %tobool79 = icmp eq i8 %call78, 0
   br i1 %tobool79, label %land.lhs.true80, label %if.then109
 
 land.lhs.true80:                                  ; preds = %if.end74
-  %tmp82 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
+  %tmp82 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
   %call84 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %filename.0.in, i8* %tmp82)
   %tobool86 = icmp eq i8 %call84, 0
   br i1 %tobool86, label %if.then109, label %if.end106
 
 if.end106:                                        ; preds = %land.lhs.true80
-  %tmp88 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
-  %tmp90 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
+  %tmp88 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
+  %tmp90 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
   %tmp91 = bitcast %struct._class_t* %tmp88 to i8*
   %call9218 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp91, i8* %tmp90, i8* %filename.0.in)
   %tmp20 = bitcast i8* %call9218 to %5*
   %tmp21 = call i8* @objc_retain(i8* %call9218) nounwind
   %tmp22 = bitcast %5* %url.025 to i8*
   call void @objc_release(i8* %tmp22) nounwind
-  %tmp94 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
-  %tmp95 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
+  %tmp94 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
+  %tmp95 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
   %tmp96 = bitcast %struct._class_t* %tmp94 to i8*
   %call97 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp96, i8* %tmp95)
-  %tmp99 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
+  %tmp99 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
   %call10119 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call97, i8* %tmp99, i8* %call9218, i32 signext 1, %4** %err)
   %phitmp = icmp eq i8* %call10119, null
   br i1 %phitmp, label %if.then109, label %end
 
 if.then109:                                       ; preds = %if.end106, %land.lhs.true80, %if.end74
   %url.129 = phi %5* [ %tmp20, %if.end106 ], [ %url.025, %if.end74 ], [ %url.025, %land.lhs.true80 ]
-  %tmp110 = load %4** %err, align 8
+  %tmp110 = load %4*, %4** %err, align 8
   %tobool111 = icmp eq %4* %tmp110, null
   br i1 %tobool111, label %if.then112, label %if.end125
 
 if.then112:                                       ; preds = %if.then109
-  %tmp113 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_50", align 8
-  %tmp114 = load %1** @NSCocoaErrorDomain, align 8
-  %tmp115 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_51", align 8
+  %tmp113 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_50", align 8
+  %tmp114 = load %1*, %1** @NSCocoaErrorDomain, align 8
+  %tmp115 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_51", align 8
   %call117 = call %1* @truncatedString(%1* %filename.0, i64 1034)
-  %tmp118 = load %1** @NSFilePathErrorKey, align 8
-  %tmp119 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_53", align 8
+  %tmp118 = load %1*, %1** @NSFilePathErrorKey, align 8
+  %tmp119 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_53", align 8
   %tmp120 = bitcast %struct._class_t* %tmp115 to i8*
   %call12113 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp120, i8* %tmp119, %1* %call117, %1* %tmp118, i8* null)
-  %tmp122 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_55", align 8
+  %tmp122 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_55", align 8
   %tmp123 = bitcast %struct._class_t* %tmp113 to i8*
   %call12414 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp123, i8* %tmp122, %1* %tmp114, i64 258, i8* %call12113)
   %tmp23 = call i8* @objc_retain(i8* %call12414) nounwind
@@ -2965,11 +2965,11 @@
 
 if.end125:                                        ; preds = %if.then112, %if.then109
   %tmp127 = phi %4* [ %tmp110, %if.then109 ], [ %tmp28, %if.then112 ]
-  %tmp126 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_56", align 8
-  %tmp128 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_58", align 8
+  %tmp126 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_56", align 8
+  %tmp128 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_58", align 8
   %tmp129 = bitcast %struct._class_t* %tmp126 to i8*
   %call13015 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp129, i8* %tmp128, %4* %tmp127)
-  %tmp131 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_60", align 8
+  %tmp131 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_60", align 8
   %call13317 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call13015, i8* %tmp131)
   br label %end
 
diff --git a/llvm/test/Transforms/ObjCARC/cfg-hazards.ll b/llvm/test/Transforms/ObjCARC/cfg-hazards.ll
index 746d56d..8407e446 100644
--- a/llvm/test/Transforms/ObjCARC/cfg-hazards.ll
+++ b/llvm/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -421,7 +421,7 @@
   store i8* %a, i8** %block, align 8
   %casted_block = bitcast i8** %block to void ()*
   call void @block_callee(void ()* %casted_block)
-  %reloaded_a = load i8** %block, align 8
+  %reloaded_a = load i8*, i8** %block, align 8
   call void @objc_release(i8* %reloaded_a) nounwind, !clang.imprecise_release !0
   br i1 undef, label %loop, label %exit
   
diff --git a/llvm/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/llvm/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
index 078e45f..8b1a02f 100644
--- a/llvm/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
+++ b/llvm/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -16,11 +16,11 @@
 
 define hidden void @y(%0* nocapture %self, %1* %preferencesController) nounwind {
 entry:
-  %ivar = load i64* @"OBJC_IVAR_$_Controller.preferencesController", align 8
+  %ivar = load i64, i64* @"OBJC_IVAR_$_Controller.preferencesController", align 8
   %tmp = bitcast %0* %self to i8*
   %add.ptr = getelementptr inbounds i8, i8* %tmp, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to %1**
-  %tmp2 = load %1** %tmp1, align 8
+  %tmp2 = load %1*, %1** %tmp1, align 8
   %tmp3 = bitcast %1* %preferencesController to i8*
   %tmp4 = tail call i8* @objc_retain(i8* %tmp3) nounwind
   %tmp5 = bitcast %1* %tmp2 to i8*
diff --git a/llvm/test/Transforms/ObjCARC/contract-storestrong.ll b/llvm/test/Transforms/ObjCARC/contract-storestrong.ll
index c218e33..aadc3a2 100644
--- a/llvm/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/llvm/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -16,7 +16,7 @@
 define void @test0(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = load i8** @x, align 8
+  %tmp = load i8*, i8** @x, align 8
   store i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -27,7 +27,7 @@
 ; CHECK-LABEL: define void @test1(i8* %p) {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
-; CHECK-NEXT:   %tmp = load volatile i8** @x, align 8
+; CHECK-NEXT:   %tmp = load volatile i8*, i8** @x, align 8
 ; CHECK-NEXT:   store i8* %0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
@@ -35,7 +35,7 @@
 define void @test1(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = load volatile i8** @x, align 8
+  %tmp = load volatile i8*, i8** @x, align 8
   store i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -46,7 +46,7 @@
 ; CHECK-LABEL: define void @test2(i8* %p) {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
-; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   %tmp = load i8*, i8** @x, align 8
 ; CHECK-NEXT:   store volatile i8* %0, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
@@ -54,7 +54,7 @@
 define void @test2(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = load i8** @x, align 8
+  %tmp = load i8*, i8** @x, align 8
   store volatile i8* %0, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -66,7 +66,7 @@
 ; CHECK-LABEL: define void @test3(i8* %newValue) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
-; CHECK-NEXT:    %x1 = load i8** @x, align 8
+; CHECK-NEXT:    %x1 = load i8*, i8** @x, align 8
 ; CHECK-NEXT:    store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:    tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
 ; CHECK-NEXT:    tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
@@ -75,7 +75,7 @@
 define void @test3(i8* %newValue) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   store i8* %newValue, i8** @x, align 8
   tail call void @use_pointer(i8* %x1), !clang.arc.no_objc_arc_exceptions !0
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
@@ -87,7 +87,7 @@
 ; CHECK-LABEL:  define i1 @test4(i8* %newValue, i8* %foo) {
 ; CHECK-NEXT:   entry:
 ; CHECK-NEXT:     %x0 = tail call i8* @objc_retain(i8* %newValue) [[NUW]]
-; CHECK-NEXT:     %x1 = load i8** @x, align 8
+; CHECK-NEXT:     %x1 = load i8*, i8** @x, align 8
 ; CHECK-NEXT:     store i8* %x0, i8** @x, align 8
 ; CHECK-NEXT:     %t = icmp eq i8* %x1, %foo
 ; CHECK-NEXT:     tail call void @objc_release(i8* %x1) [[NUW]], !clang.imprecise_release !0
@@ -96,7 +96,7 @@
 define i1 @test4(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   store i8* %newValue, i8** @x, align 8
   %t = icmp eq i8* %x1, %foo
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
@@ -112,7 +112,7 @@
 define i1 @test5(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   %t = icmp eq i8* %x1, %foo
   store i8* %newValue, i8** @x, align 8
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
@@ -128,7 +128,7 @@
 define i1 @test6(i8* %newValue, i8* %foo) {
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
   %t = icmp eq i8* %x1, %foo
   store i8* %newValue, i8** @x, align 8
@@ -140,14 +140,14 @@
 ; CHECK-LABEL: define void @test7(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) [[NUW]]
-; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   %tmp = load i8*, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test7(i8* %p) {
 entry:
   %0 = tail call i8* @objc_retain(i8* %p) nounwind
-  %tmp = load i8** @x, align 8
+  %tmp = load i8*, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
 }
@@ -156,14 +156,14 @@
 
 ; CHECK-LABEL: define void @test8(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   %tmp = load i8*, i8** @x, align 8
 ; CHECK-NEXT:   store i8* %p, i8** @x, align 8
 ; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) [[NUW]]
 ; CHECK-NEXT:   ret void
 ; CHECK-NEXT: }
 define void @test8(i8* %p) {
 entry:
-  %tmp = load i8** @x, align 8
+  %tmp = load i8*, i8** @x, align 8
   store i8* %p, i8** @x, align 8
   tail call void @objc_release(i8* %tmp) nounwind
   ret void
@@ -181,7 +181,7 @@
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
   tail call void @objc_release(i8* %unrelated_ptr) nounwind, !clang.imprecise_release !0
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
   %t = icmp eq i8* %x1, %foo
   store i8* %newValue, i8** @x, align 8
@@ -196,7 +196,7 @@
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
   call void @use_pointer(i8* %unrelated_ptr)
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
   %t = icmp eq i8* %x1, %foo
   store i8* %newValue, i8** @x, align 8
@@ -211,7 +211,7 @@
 entry:
   %x0 = tail call i8* @objc_retain(i8* %newValue) nounwind
   %t = icmp eq i8* %newValue, %foo
-  %x1 = load i8** @x, align 8
+  %x1 = load i8*, i8** @x, align 8
   tail call void @objc_release(i8* %x1) nounwind, !clang.imprecise_release !0
   store i8* %newValue, i8** @x, align 8
   ret i1 %t
diff --git a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index c72566c..ff9a5ef 100644
--- a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -36,8 +36,8 @@
 
 define i32 @main() uwtable ssp {
 entry:
-  %tmp = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
-  %tmp1 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
+  %tmp = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
+  %tmp1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
   %tmp2 = bitcast %struct._class_t* %tmp to i8*, !dbg !37
 ; CHECK: call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1)
   %call = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp2, i8* %tmp1), !dbg !37, !clang.arc.no_objc_arc_exceptions !38
@@ -88,8 +88,8 @@
 entry:
   %tmp = call i8* @objc_retain(i8* %obj) nounwind
   call void @llvm.dbg.value(metadata i8* %obj, i64 0, metadata !32, metadata !{}), !dbg !55
-  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
+  %tmp1 = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1", align 8, !dbg !56
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_5", align 8, !dbg !56, !invariant.load !38
   %tmp3 = bitcast %struct._class_t* %tmp1 to i8*, !dbg !56
   call void (i8*, i8*, %0*, %0*, ...)* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*, %0*, ...)*)(i8* %tmp3, i8* %tmp2, %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*), %0* bitcast (%struct.NSConstantString* @_unnamed_cfstring_3 to %0*)), !dbg !56, !clang.arc.no_objc_arc_exceptions !38
   call void @objc_release(i8* %obj) nounwind, !dbg !58, !clang.imprecise_release !38
diff --git a/llvm/test/Transforms/ObjCARC/escape.ll b/llvm/test/Transforms/ObjCARC/escape.ll
index fe60099..c7a1b03 100644
--- a/llvm/test/Transforms/ObjCARC/escape.ll
+++ b/llvm/test/Transforms/ObjCARC/escape.ll
@@ -47,13 +47,13 @@
   store i8* %tmp5, i8** %block.captured, align 8
   %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
   %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-  %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %tmp8 = load %struct.__block_byref_weakLogNTimes*, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
   %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes, %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6
   %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8**
   %tmp10 = call i8* @objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind
   %tmp11 = getelementptr inbounds i8, i8* %tmp7, i64 16
   %tmp12 = bitcast i8* %tmp11 to i8**
-  %tmp13 = load i8** %tmp12, align 8
+  %tmp13 = load i8*, i8** %tmp12, align 8
   %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)*
   call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0
   call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
@@ -102,13 +102,13 @@
   store i8* %tmp5, i8** %block.captured, align 8
   %tmp6 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>* %block to i8*
   %tmp7 = call i8* @objc_retainBlock(i8* %tmp6) nounwind, !clang.arc.copy_on_escape !0
-  %tmp8 = load %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
+  %tmp8 = load %struct.__block_byref_weakLogNTimes*, %struct.__block_byref_weakLogNTimes** %byref.forwarding, align 8
   %weakLogNTimes3 = getelementptr inbounds %struct.__block_byref_weakLogNTimes, %struct.__block_byref_weakLogNTimes* %tmp8, i64 0, i32 6
   %tmp9 = bitcast void (...)** %weakLogNTimes3 to i8**
   %tmp10 = call i8* @not_really_objc_storeWeak(i8** %tmp9, i8* %tmp7) nounwind
   %tmp11 = getelementptr inbounds i8, i8* %tmp7, i64 16
   %tmp12 = bitcast i8* %tmp11 to i8**
-  %tmp13 = load i8** %tmp12, align 8
+  %tmp13 = load i8*, i8** %tmp12, align 8
   %tmp14 = bitcast i8* %tmp13 to void (i8*, i32)*
   call void %tmp14(i8* %tmp7, i32 10) nounwind, !clang.arc.no_objc_arc_exceptions !0
   call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
diff --git a/llvm/test/Transforms/ObjCARC/gvn.ll b/llvm/test/Transforms/ObjCARC/gvn.ll
index 2d120e7..6f82854 100644
--- a/llvm/test/Transforms/ObjCARC/gvn.ll
+++ b/llvm/test/Transforms/ObjCARC/gvn.ll
@@ -11,15 +11,15 @@
 
 ; CHECK: define i8* @test0(i32 %n)
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: %s = load i8** @x
+; CHECK-NEXT: %s = load i8*, i8** @x
 ; CHECK-NOT: load
 ; CHECK: ret i8* %s
 ; CHECK-NEXT: }
 define i8* @test0(i32 %n) nounwind {
 entry:
-  %s = load i8** @x
+  %s = load i8*, i8** @x
   %0 = tail call i8* @objc_retain(i8* %s) nounwind
-  %t = load i8** @x
+  %t = load i8*, i8** @x
   ret i8* %t
 }
 
@@ -33,9 +33,9 @@
 ; CHECK: }
 define i8* @test1(i32 %n) nounwind {
 entry:
-  %s = load i8** @x
+  %s = load i8*, i8** @x
   %0 = call i32 @objc_sync_enter(i8* %s)
-  %t = load i8** @x
+  %t = load i8*, i8** @x
   %1 = call i32 @objc_sync_exit(i8* %s)
   ret i8* %t
 }
diff --git a/llvm/test/Transforms/ObjCARC/intrinsic-use.ll b/llvm/test/Transforms/ObjCARC/intrinsic-use.ll
index b1e56c8..d85cb3e 100644
--- a/llvm/test/Transforms/ObjCARC/intrinsic-use.ll
+++ b/llvm/test/Transforms/ObjCARC/intrinsic-use.ll
@@ -22,13 +22,13 @@
 ; CHECK-NEXT:   store i8* %y, i8** %temp0
 ; CHECK-NEXT:   @objc_retain(i8* %y)
 ; CHECK-NEXT:   call void @test0_helper
-; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8*, i8** %temp0
 ; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_release(i8* %y)
 ; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
 ; CHECK-NEXT:   call void @test0_helper
-; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8*, i8** %temp1
 ; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
 ; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
@@ -47,13 +47,13 @@
   %1 = call i8* @objc_retain(i8* %y) nounwind
   store i8* %y, i8** %temp0
   call void @test0_helper(i8* %x, i8** %temp0)
-  %val1 = load i8** %temp0
+  %val1 = load i8*, i8** %temp0
   %2 = call i8* @objc_retain(i8* %val1) nounwind
   call void (...)* @clang.arc.use(i8* %y) nounwind
   call void @objc_release(i8* %y) nounwind
   store i8* %val1, i8** %temp1
   call void @test0_helper(i8* %x, i8** %temp1)
-  %val2 = load i8** %temp1
+  %val2 = load i8*, i8** %temp1
   %3 = call i8* @objc_retain(i8* %val2) nounwind
   call void (...)* @clang.arc.use(i8* %val1) nounwind
   call void @objc_release(i8* %val1) nounwind
@@ -70,13 +70,13 @@
 ; CHECK-NEXT:   store i8* %y, i8** %temp0
 ; CHECK-NEXT:   @objc_retain(i8* %y)
 ; CHECK-NEXT:   call void @test0_helper
-; CHECK-NEXT:   [[VAL1:%.*]] = load i8** %temp0
+; CHECK-NEXT:   [[VAL1:%.*]] = load i8*, i8** %temp0
 ; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* %y)
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_release(i8* %y)
 ; CHECK-NEXT:   store i8* [[VAL1]], i8** %temp1
 ; CHECK-NEXT:   call void @test0_helper
-; CHECK-NEXT:   [[VAL2:%.*]] = load i8** %temp1
+; CHECK-NEXT:   [[VAL2:%.*]] = load i8*, i8** %temp1
 ; CHECK-NEXT:   call void (...)* @clang.arc.use(i8* [[VAL1]])
 ; CHECK-NEXT:   @objc_retain(i8* [[VAL2]])
 ; CHECK-NEXT:   @objc_release(i8* [[VAL1]])
@@ -93,13 +93,13 @@
   %1 = call i8* @objc_retain(i8* %y) nounwind
   store i8* %y, i8** %temp0
   call void @test0_helper(i8* %x, i8** %temp0)
-  %val1 = load i8** %temp0
+  %val1 = load i8*, i8** %temp0
   %2 = call i8* @objc_retain(i8* %val1) nounwind
   call void (...)* @clang.arc.use(i8* %y) nounwind
   call void @objc_release(i8* %y) nounwind, !clang.imprecise_release !0
   store i8* %val1, i8** %temp1
   call void @test0_helper(i8* %x, i8** %temp1)
-  %val2 = load i8** %temp1
+  %val2 = load i8*, i8** %temp1
   %3 = call i8* @objc_retain(i8* %val2) nounwind
   call void (...)* @clang.arc.use(i8* %val1) nounwind
   call void @objc_release(i8* %val1) nounwind, !clang.imprecise_release !0
diff --git a/llvm/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/llvm/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
index 85fd06b..9894eb4 100644
--- a/llvm/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
+++ b/llvm/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -80,20 +80,20 @@
 
 define hidden %14* @foo(%15* %arg, %16* %arg2) {
 bb:
-  %tmp = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_3725", align 8
+  %tmp = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_3725", align 8
   %tmp4 = bitcast %15* %arg to i8*
   %tmp5 = tail call %18* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %18* (i8*, i8*)*)(i8* %tmp4, i8* %tmp)
   %tmp6 = bitcast %18* %tmp5 to i8*
   %tmp7 = tail call i8* @objc_retain(i8* %tmp6) nounwind
-  %tmp8 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_40", align 8
-  %tmp9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4227", align 8
+  %tmp8 = load %2*, %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_40", align 8
+  %tmp9 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_4227", align 8
   %tmp10 = bitcast %2* %tmp8 to i8*
   %tmp11 = tail call %19* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %19* (i8*, i8*)*)(i8* %tmp10, i8* %tmp9)
-  %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4631", align 8
+  %tmp12 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_4631", align 8
   %tmp13 = bitcast %19* %tmp11 to i8*
   %tmp14 = tail call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, %13*)*)(i8* %tmp13, i8* %tmp12, %13* bitcast (%12* @_unnamed_cfstring_386 to %13*))
   %tmp15 = bitcast %16* %arg2 to i8*
-  %tmp16 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
+  %tmp16 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
   %tmp17 = bitcast i8* %tmp16 to i64 (i8*, %1*)*
   %tmp18 = tail call i64 %tmp17(i8* %tmp15, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_count" to %1*))
   %tmp19 = icmp eq i64 %tmp18, 0
@@ -110,104 +110,104 @@
 
 bb25:                                             ; preds = %bb22, %bb20
   %tmp26 = phi i1 [ %tmp21, %bb20 ], [ false, %bb22 ]
-  %tmp27 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_188", align 8
+  %tmp27 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_188", align 8
   %tmp28 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp7, i8* %tmp27)
   %tmp29 = tail call i8* @objc_explicit_autorelease(i8* %tmp28) nounwind
   %tmp30 = bitcast i8* %tmp29 to %18*
   tail call void @objc_release(i8* %tmp7) nounwind
-  %tmp31 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
+  %tmp31 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
   %tmp32 = tail call %20* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %20* (i8*, i8*)*)(i8* %tmp29, i8* %tmp31)
-  %tmp33 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_391", align 8
+  %tmp33 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_391", align 8
   %tmp34 = bitcast %20* %tmp32 to i8*
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %16*)*)(i8* %tmp34, i8* %tmp33, %16* %arg2)
   br i1 %tmp26, label %bb46, label %bb35
 
 bb35:                                             ; preds = %bb25
-  %tmp36 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
+  %tmp36 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
   %tmp37 = tail call %20* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %20* (i8*, i8*)*)(i8* %tmp29, i8* %tmp36)
-  %tmp38 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_70", align 8
-  %tmp39 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_393", align 8
+  %tmp38 = load %2*, %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_70", align 8
+  %tmp39 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_393", align 8
   %tmp40 = bitcast %2* %tmp38 to i8*
   %tmp41 = tail call %21* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %21* (i8*, i8*, i8)*)(i8* %tmp40, i8* %tmp39, i8 signext 1)
   %tmp42 = bitcast %21* %tmp41 to i8*
-  %tmp43 = load %13** @NSPrintHeaderAndFooter, align 8
-  %tmp44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_159", align 8
+  %tmp43 = load %13*, %13** @NSPrintHeaderAndFooter, align 8
+  %tmp44 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_159", align 8
   %tmp45 = bitcast %20* %tmp37 to i8*
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, %13*)*)(i8* %tmp45, i8* %tmp44, i8* %tmp42, %13* %tmp43)
   br label %bb46
 
 bb46:                                             ; preds = %bb35, %bb25, %bb22
   %tmp47 = phi %18* [ %tmp30, %bb35 ], [ %tmp30, %bb25 ], [ %tmp23, %bb22 ]
-  %tmp48 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp48 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
   %tmp49 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp48)
   %tmp50 = bitcast %22* %tmp49 to i8*
-  %tmp51 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
+  %tmp51 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
   %tmp52 = bitcast i8* %tmp51 to i64 (i8*, %1*)*
   %tmp53 = tail call i64 %tmp52(i8* %tmp50, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_count" to %1*))
   %tmp54 = icmp eq i64 %tmp53, 0
   br i1 %tmp54, label %bb55, label %bb57
 
 bb55:                                             ; preds = %bb46
-  %tmp56 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_395", align 8
+  %tmp56 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_395", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* %tmp4, i8* %tmp56)
   br label %bb57
 
 bb57:                                             ; preds = %bb55, %bb46
-  %tmp58 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_396", align 8
-  %tmp59 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp58 = load %2*, %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_396", align 8
+  %tmp59 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
   %tmp60 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp59)
   %tmp61 = bitcast %22* %tmp60 to i8*
-  %tmp62 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
+  %tmp62 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
   %tmp63 = bitcast i8* %tmp62 to i8* (i8*, %1*, i64)*
   %tmp64 = tail call i8* %tmp63(i8* %tmp61, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to %1*), i64 0)
-  %tmp65 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_398", align 8
+  %tmp65 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_398", align 8
   %tmp66 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp64, i8* %tmp65)
   %tmp67 = bitcast i8* %tmp66 to %23*
-  %tmp68 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_400", align 8
+  %tmp68 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_400", align 8
   %tmp69 = bitcast %2* %tmp58 to i8*
   %tmp70 = tail call %14* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %14* (i8*, i8*, %23*, %18*)*)(i8* %tmp69, i8* %tmp68, %23* %tmp67, %18* %tmp47)
   %tmp71 = bitcast %14* %tmp70 to i8*
   ; hack to prevent the optimize from using objc_retainAutoreleasedReturnValue.
   %tmp71x = getelementptr i8, i8* %tmp71, i64 1
   %tmp72 = tail call i8* @objc_retain(i8* %tmp71x) nounwind
-  %tmp73 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_402", align 8
+  %tmp73 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_402", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8)*)(i8* %tmp72, i8* %tmp73, i8 signext 1)
-  %tmp74 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_404", align 8
+  %tmp74 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_404", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8)*)(i8* %tmp72, i8* %tmp74, i8 signext 1)
-  %tmp75 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp75 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
   %tmp76 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp75)
   %tmp77 = bitcast %22* %tmp76 to i8*
-  %tmp78 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
+  %tmp78 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
   %tmp79 = bitcast i8* %tmp78 to i8* (i8*, %1*, i64)*
   %tmp80 = tail call i8* %tmp79(i8* %tmp77, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to %1*), i64 0)
-  %tmp81 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_406", align 8
+  %tmp81 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_406", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i64)*)(i8* %tmp80, i8* %tmp81, i64 9223372036854775807)
-  %tmp82 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_408", align 8
+  %tmp82 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_408", align 8
   %tmp83 = tail call %24* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %24* (i8*, i8*)*)(i8* %tmp72, i8* %tmp82)
   %tmp84 = bitcast %24* %tmp83 to i8*
   %tmp85 = tail call i8* @objc_retain(i8* %tmp84) nounwind
-  %tmp86 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_409", align 8
+  %tmp86 = load %2*, %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_409", align 8
   %tmp87 = bitcast %2* %tmp86 to i8*
-  %tmp88 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_alloc" to i8**), align 16
+  %tmp88 = load i8*, i8** bitcast (%0* @"\01l_objc_msgSend_fixup_alloc" to i8**), align 16
   %tmp89 = bitcast i8* %tmp88 to i8* (i8*, %1*)*
   %tmp90 = tail call i8* %tmp89(i8* %tmp87, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_alloc" to %1*))
-  %tmp91 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_8", align 8
+  %tmp91 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_8", align 8
   %tmp92 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp90, i8* %tmp91)
   %tmp93 = tail call i8* @objc_explicit_autorelease(i8* %tmp92) nounwind
   %tmp94 = bitcast i8* %tmp93 to %25*
-  %tmp95 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_411", align 8
+  %tmp95 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_411", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %25*)*)(i8* %tmp85, i8* %tmp95, %25* %tmp94)
   tail call void @objc_release(i8* %tmp93) nounwind
-  %tmp96 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_148", align 8
+  %tmp96 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_148", align 8
   %tmp97 = tail call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %tmp4, i8* %tmp96)
   %tmp98 = icmp eq i8 %tmp97, 0
   br i1 %tmp98, label %bb99, label %bb104
 
 bb99:                                             ; preds = %bb57
-  %tmp100 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_413", align 8
+  %tmp100 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_413", align 8
   %tmp101 = tail call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* %tmp85, i8* %tmp100)
   %tmp102 = or i64 %tmp101, 12
-  %tmp103 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_415", align 8
+  %tmp103 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_415", align 8
   tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i64)*)(i8* %tmp85, i8* %tmp103, i64 %tmp102)
   br label %bb104
 
diff --git a/llvm/test/Transforms/ObjCARC/move-and-merge-autorelease.ll b/llvm/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
index e5d2f07..5d19f35 100644
--- a/llvm/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
+++ b/llvm/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
@@ -35,10 +35,10 @@
 
 define hidden %0* @foo(%1* %arg, %3* %arg3) {
 bb:
-  %tmp16 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_620", align 8
+  %tmp16 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_620", align 8
   %tmp17 = bitcast %3* %arg3 to i8*
   %tmp18 = call %4* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %4* (i8*, i8*)*)(i8* %tmp17, i8* %tmp16)
-  %tmp19 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_622", align 8
+  %tmp19 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_622", align 8
   %tmp20 = bitcast %4* %tmp18 to i8*
   %tmp21 = call %5* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %5* (i8*, i8*)*)(i8* %tmp20, i8* %tmp19)
   %tmp22 = bitcast %5* %tmp21 to i8*
@@ -48,11 +48,11 @@
   br i1 %tmp26, label %bb81, label %bb27
 
 bb27:                                             ; preds = %bb
-  %tmp29 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_11", align 8
+  %tmp29 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_11", align 8
   %tmp30 = bitcast %1* %arg to i8*
   %tmp31 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp30, i8* %tmp29)
   %tmp34 = call i8* @objc_retain(i8* %tmp31) nounwind
-  %tmp37 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_421455", align 8
+  %tmp37 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_421455", align 8
   %tmp39 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*)*)(i8* %tmp34, i8* %tmp37)
   %tmp40 = bitcast %0* %tmp39 to i8*
   %tmp41 = call i8* @objc_retain(i8* %tmp40) nounwind
@@ -61,7 +61,7 @@
   br i1 %tmp44, label %bb45, label %bb55
 
 bb45:                                             ; preds = %bb27
-  %tmp47 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_624", align 8
+  %tmp47 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_624", align 8
   %tmp49 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*)*)(i8* %tmp34, i8* %tmp47)
   %tmp51 = bitcast %0* %tmp49 to i8*
   %tmp52 = call i8* @objc_retain(i8* %tmp51) nounwind
@@ -74,14 +74,14 @@
   br i1 %tmp57, label %bb76, label %bb58
 
 bb58:                                             ; preds = %bb55
-  %tmp60 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_598", align 8
+  %tmp60 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_598", align 8
   %tmp61 = bitcast %0* %tmp13.0 to i8*
   %tmp62 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %tmp61, i8* %tmp60)
   %tmp64 = icmp eq i8 %tmp62, 0
   br i1 %tmp64, label %bb76, label %bb65
 
 bb65:                                             ; preds = %bb58
-  %tmp68 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_626", align 8
+  %tmp68 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_626", align 8
   %tmp69 = bitcast %0* %tmp13.0 to i8*
   %tmp70 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %5*)*)(i8* %tmp69, i8* %tmp68, %5* %tmp24)
   %tmp72 = bitcast %0* %tmp70 to i8*
diff --git a/llvm/test/Transforms/ObjCARC/nested.ll b/llvm/test/Transforms/ObjCARC/nested.ll
index ac0e7c7..fdd67f7 100644
--- a/llvm/test/Transforms/ObjCARC/nested.ll
+++ b/llvm/test/Transforms/ObjCARC/nested.ll
@@ -37,15 +37,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -57,8 +57,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr3 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr3, align 8
+  %mutationsptr3 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr3, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -67,16 +67,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp5 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call6 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp5, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call6, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -102,15 +102,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call3, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -122,8 +122,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr4 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr4, align 8
+  %mutationsptr4 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr4, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -132,16 +132,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call7, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -162,20 +162,20 @@
 entry:
   %state.ptr = alloca %struct.__objcFastEnumerationState, align 8
   %items.ptr = alloca [16 x i8*], align 8
-  %tmp = load i8** @g, align 8
+  %tmp = load i8*, i8** @g, align 8
   %0 = call i8* @objc_retain(i8* %tmp) nounwind
   %tmp2 = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp4 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp4 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp4, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -187,8 +187,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr5, align 8
+  %mutationsptr5 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr5, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -197,16 +197,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp7 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp7 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call8 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp7, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call8, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -233,15 +233,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call3, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -253,8 +253,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr4 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr4, align 8
+  %mutationsptr4 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr4, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -263,16 +263,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call7, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -300,15 +300,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call3, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -320,8 +320,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr4 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr4, align 8
+  %mutationsptr4 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr4, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -330,16 +330,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call7, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -369,15 +369,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call3, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -389,8 +389,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.notmutated ]
-  %mutationsptr4 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr4, align 8
+  %mutationsptr4 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr4, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -399,16 +399,16 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   call void @use(i8* %3)
   %4 = add i64 %forcoll.index, 1
   %exitcond = icmp eq i64 %4, %umax
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call7, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -435,15 +435,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %1 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp2 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp2 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call3 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp2, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call3, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   %stateitems.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 1
   br label %forcoll.loopbody.outer
 
@@ -455,8 +455,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ 0, %forcoll.loopbody.outer ], [ %4, %forcoll.next ]
-  %mutationsptr4 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr4, align 8
+  %mutationsptr4 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr4, align 8
   %2 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %2, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -465,9 +465,9 @@
   br label %forcoll.notmutated
 
 forcoll.notmutated:
-  %stateitems = load i8*** %stateitems.ptr, align 8
+  %stateitems = load i8**, i8*** %stateitems.ptr, align 8
   %currentitem.ptr = getelementptr i8*, i8** %stateitems, i64 %forcoll.index
-  %3 = load i8** %currentitem.ptr, align 8
+  %3 = load i8*, i8** %currentitem.ptr, align 8
   %tobool = icmp eq i8* %3, null
   br i1 %tobool, label %forcoll.next, label %if.then
 
@@ -481,7 +481,7 @@
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %1, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %5 = icmp eq i64 %call7, 0
   br i1 %5, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -512,15 +512,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call4, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   br label %forcoll.loopbody.outer
 
 forcoll.loopbody.outer:
@@ -531,8 +531,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated.forcoll.loopbody_crit_edge ], [ 1, %forcoll.loopbody.outer ]
-  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr5, align 8
+  %mutationsptr5 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr5, align 8
   %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -549,7 +549,7 @@
   br label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %4 = icmp eq i64 %call7, 0
   br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -579,15 +579,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call4, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   br label %forcoll.loopbody.outer
 
 forcoll.loopbody.outer:
@@ -598,8 +598,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
-  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr5, align 8
+  %mutationsptr5 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr5, align 8
   %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -613,7 +613,7 @@
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %4 = icmp eq i64 %call7, 0
   br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -646,15 +646,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call4, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   br label %forcoll.loopbody.outer
 
 forcoll.loopbody.outer:
@@ -665,8 +665,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated.forcoll.loopbody_crit_edge ], [ 1, %forcoll.loopbody.outer ]
-  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr5, align 8
+  %mutationsptr5 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr5, align 8
   %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -683,7 +683,7 @@
   br label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %4 = icmp eq i64 %call7, 0
   br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -714,15 +714,15 @@
   %tmp = bitcast %struct.__objcFastEnumerationState* %state.ptr to i8*
   call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 8, i1 false)
   %2 = call i8* @objc_retain(i8* %0) nounwind
-  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp3 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call4 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp3, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %iszero = icmp eq i64 %call4, 0
   br i1 %iszero, label %forcoll.empty, label %forcoll.loopinit
 
 forcoll.loopinit:
   %mutationsptr.ptr = getelementptr inbounds %struct.__objcFastEnumerationState, %struct.__objcFastEnumerationState* %state.ptr, i64 0, i32 2
-  %mutationsptr = load i64** %mutationsptr.ptr, align 8
-  %forcoll.initial-mutations = load i64* %mutationsptr, align 8
+  %mutationsptr = load i64*, i64** %mutationsptr.ptr, align 8
+  %forcoll.initial-mutations = load i64, i64* %mutationsptr, align 8
   br label %forcoll.loopbody.outer
 
 forcoll.loopbody.outer:
@@ -733,8 +733,8 @@
 
 forcoll.loopbody:
   %forcoll.index = phi i64 [ %phitmp, %forcoll.notmutated ], [ 0, %forcoll.loopbody.outer ]
-  %mutationsptr5 = load i64** %mutationsptr.ptr, align 8
-  %statemutations = load i64* %mutationsptr5, align 8
+  %mutationsptr5 = load i64*, i64** %mutationsptr.ptr, align 8
+  %statemutations = load i64, i64* %mutationsptr5, align 8
   %3 = icmp eq i64 %statemutations, %forcoll.initial-mutations
   br i1 %3, label %forcoll.notmutated, label %forcoll.mutated
 
@@ -748,7 +748,7 @@
   br i1 %exitcond, label %forcoll.refetch, label %forcoll.loopbody
 
 forcoll.refetch:
-  %tmp6 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %tmp6 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
   %call7 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, %struct.__objcFastEnumerationState*, [16 x i8*]*, i64)*)(i8* %2, i8* %tmp6, %struct.__objcFastEnumerationState* %state.ptr, [16 x i8*]* %items.ptr, i64 16)
   %4 = icmp eq i64 %call7, 0
   br i1 %4, label %forcoll.empty, label %forcoll.loopbody.outer
@@ -796,7 +796,7 @@
   %foo5 = call i8* @objc_retainBlock(i8* %foo4) nounwind
   call void @use(i8* %foo5), !clang.arc.no_objc_arc_exceptions !0
   call void @objc_release(i8* %foo5) nounwind
-  %strongdestroy = load i8** %foo, align 8
+  %strongdestroy = load i8*, i8** %foo, align 8
   call void @objc_release(i8* %strongdestroy) nounwind, !clang.imprecise_release !0
   %foo10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 5
   %block.isa11 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 0
@@ -815,7 +815,7 @@
   %foo21 = call i8* @objc_retainBlock(i8* %foo20) nounwind
   call void @use(i8* %foo21), !clang.arc.no_objc_arc_exceptions !0
   call void @objc_release(i8* %foo21) nounwind
-  %strongdestroy25 = load i8** %foo10, align 8
+  %strongdestroy25 = load i8*, i8** %foo10, align 8
   call void @objc_release(i8* %strongdestroy25) nounwind, !clang.imprecise_release !0
   call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
   ret void
diff --git a/llvm/test/Transforms/ObjCARC/provenance.ll b/llvm/test/Transforms/ObjCARC/provenance.ll
index 937c689..aa5a932 100644
--- a/llvm/test/Transforms/ObjCARC/provenance.ll
+++ b/llvm/test/Transforms/ObjCARC/provenance.ll
@@ -10,28 +10,28 @@
 declare void @g(i8)
 
 define void @f(i8* %a, i8** %b, i8** %c) {
-  %y1 = load i8* %a
+  %y1 = load i8, i8* %a
   call void @g(i8 %y1)
 
-  %y2 = load i8** %b
-  %y3 = load i8** %c
+  %y2 = load i8*, i8** %b
+  %y3 = load i8*, i8** %c
 
-  %x0 = load i8* @"\01l_objc_msgSend_fixup_"
+  %x0 = load i8, i8* @"\01l_objc_msgSend_fixup_"
   call void @g(i8 %x0)
 
-  %x1 = load i8* @g1
+  %x1 = load i8, i8* @g1
   call void @g(i8 %x1)
 
-  %x2 = load i8* @g2
+  %x2 = load i8, i8* @g2
   call void @g(i8 %x2)
 
-  %x3 = load i8* @g3
+  %x3 = load i8, i8* @g3
   call void @g(i8 %x3)
 
-  %x4 = load i8* @g4
+  %x4 = load i8, i8* @g4
   call void @g(i8 %x4)
 
-  %x5 = load i8* @g5
+  %x5 = load i8, i8* @g5
   call void @g(i8 %x5)
   ret void
 }
diff --git a/llvm/test/Transforms/ObjCARC/retain-block-side-effects.ll b/llvm/test/Transforms/ObjCARC/retain-block-side-effects.ll
index b4f4089..5f5def9 100644
--- a/llvm/test/Transforms/ObjCARC/retain-block-side-effects.ll
+++ b/llvm/test/Transforms/ObjCARC/retain-block-side-effects.ll
@@ -6,7 +6,7 @@
 
 ; CHECK: %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) [[NUW:#[0-9]+]]
 ; CHECK: %tmp17 = bitcast i8* %tmp16 to void ()*
-; CHECK: %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
+; CHECK: %tmp18 = load %struct.__block_byref_repeater*, %struct.__block_byref_repeater** %byref.forwarding, align 8
 ; CHECK: %repeater12 = getelementptr inbounds %struct.__block_byref_repeater, %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
 ; CHECK: store void ()* %tmp17, void ()** %repeater12, align 8
 
@@ -29,9 +29,9 @@
   %tmp15 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0*, i8* }>* %block to i8*
   %tmp16 = call i8* @objc_retainBlock(i8* %tmp15) nounwind
   %tmp17 = bitcast i8* %tmp16 to void ()*
-  %tmp18 = load %struct.__block_byref_repeater** %byref.forwarding, align 8
+  %tmp18 = load %struct.__block_byref_repeater*, %struct.__block_byref_repeater** %byref.forwarding, align 8
   %repeater12 = getelementptr inbounds %struct.__block_byref_repeater, %struct.__block_byref_repeater* %tmp18, i64 0, i32 6
-  %tmp13 = load void ()** %repeater12, align 8
+  %tmp13 = load void ()*, void ()** %repeater12, align 8
   store void ()* %tmp17, void ()** %repeater12, align 8
   ret void
 }
diff --git a/llvm/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll b/llvm/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
index 888895f..14a22f5 100644
--- a/llvm/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
+++ b/llvm/test/Transforms/PhaseOrdering/2010-03-22-empty-baseclass.ll
@@ -29,11 +29,11 @@
   store %struct.empty_base_t* %4, %struct.empty_base_t** %2, align 8
   call void @_ZN7empty_tC1Ev(%struct.empty_base_t* %1) nounwind
   %5 = call i32* @_ZN5boost15compressed_pairI7empty_tiE6secondEv(%"struct.boost::compressed_pair<empty_t,int>"* %x) ssp ; <i32*> [#uses=1]
-  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %6 = load i32, i32* %5, align 4                      ; <i32> [#uses=1]
   %7 = icmp ne i32 %6, -3                         ; <i1> [#uses=1]
   %8 = zext i1 %7 to i8                           ; <i8> [#uses=1]
   store i8 %8, i8* %retval.1, align 1
-  %9 = load i8* %retval.1, align 1                ; <i8> [#uses=1]
+  %9 = load i8, i8* %retval.1, align 1                ; <i8> [#uses=1]
   %toBool = icmp ne i8 %9, 0                      ; <i1> [#uses=1]
   br i1 %toBool, label %bb, label %bb1
 
@@ -44,14 +44,14 @@
 
 bb1:                                              ; preds = %entry
   store i32 0, i32* %0, align 4
-  %11 = load i32* %0, align 4                     ; <i32> [#uses=1]
+  %11 = load i32, i32* %0, align 4                     ; <i32> [#uses=1]
   store i32 %11, i32* %retval, align 4
   br label %return
 
 ; CHECK-NOT: x.second() was clobbered
 ; CHECK: ret i32
 return:                                           ; preds = %bb1
-  %retval2 = load i32* %retval                    ; <i32> [#uses=1]
+  %retval2 = load i32, i32* %retval                    ; <i32> [#uses=1]
   ret i32 %retval2
 }
 
@@ -71,7 +71,7 @@
   %this_addr = alloca %struct.empty_base_t*, align 8 ; <%struct.empty_base_t**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store %struct.empty_base_t* %this, %struct.empty_base_t** %this_addr
-  %0 = load %struct.empty_base_t** %this_addr, align 8 ; <%struct.empty_base_t*> [#uses=1]
+  %0 = load %struct.empty_base_t*, %struct.empty_base_t** %this_addr, align 8 ; <%struct.empty_base_t*> [#uses=1]
   call void @_ZN12empty_base_tC2Ev(%struct.empty_base_t* %0) nounwind
   br label %return
 
@@ -86,15 +86,15 @@
   %0 = alloca i32*                                ; <i32**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %this, %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr
-  %1 = load %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr, align 8 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
+  %1 = load %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*, %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr, align 8 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
   %2 = getelementptr inbounds %"struct.boost::details::compressed_pair_imp<empty_t,int,1>", %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %1, i32 0, i32 0 ; <i32*> [#uses=1]
   store i32* %2, i32** %0, align 8
-  %3 = load i32** %0, align 8                     ; <i32*> [#uses=1]
+  %3 = load i32*, i32** %0, align 8                     ; <i32*> [#uses=1]
   store i32* %3, i32** %retval, align 8
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load i32** %retval                   ; <i32*> [#uses=1]
+  %retval1 = load i32*, i32** %retval                   ; <i32*> [#uses=1]
   ret i32* %retval1
 }
 
@@ -105,16 +105,16 @@
   %0 = alloca i32*                                ; <i32**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store %"struct.boost::compressed_pair<empty_t,int>"* %this, %"struct.boost::compressed_pair<empty_t,int>"** %this_addr
-  %1 = load %"struct.boost::compressed_pair<empty_t,int>"** %this_addr, align 8 ; <%"struct.boost::compressed_pair<empty_t,int>"*> [#uses=1]
+  %1 = load %"struct.boost::compressed_pair<empty_t,int>"*, %"struct.boost::compressed_pair<empty_t,int>"** %this_addr, align 8 ; <%"struct.boost::compressed_pair<empty_t,int>"*> [#uses=1]
   %2 = getelementptr inbounds %"struct.boost::compressed_pair<empty_t,int>", %"struct.boost::compressed_pair<empty_t,int>"* %1, i32 0, i32 0 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
   %3 = call i32* @_ZN5boost7details19compressed_pair_impI7empty_tiLi1EE6secondEv(%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %2) nounwind ; <i32*> [#uses=1]
   store i32* %3, i32** %0, align 8
-  %4 = load i32** %0, align 8                     ; <i32*> [#uses=1]
+  %4 = load i32*, i32** %0, align 8                     ; <i32*> [#uses=1]
   store i32* %4, i32** %retval, align 8
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load i32** %retval                   ; <i32*> [#uses=1]
+  %retval1 = load i32*, i32** %retval                   ; <i32*> [#uses=1]
   ret i32* %retval1
 }
 
@@ -125,15 +125,15 @@
   %0 = alloca %struct.empty_base_t*               ; <%struct.empty_base_t**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %this, %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr
-  %1 = load %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr, align 8 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
+  %1 = load %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*, %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"** %this_addr, align 8 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
   %2 = bitcast %"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %1 to %struct.empty_base_t* ; <%struct.empty_base_t*> [#uses=1]
   store %struct.empty_base_t* %2, %struct.empty_base_t** %0, align 8
-  %3 = load %struct.empty_base_t** %0, align 8    ; <%struct.empty_base_t*> [#uses=1]
+  %3 = load %struct.empty_base_t*, %struct.empty_base_t** %0, align 8    ; <%struct.empty_base_t*> [#uses=1]
   store %struct.empty_base_t* %3, %struct.empty_base_t** %retval, align 8
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load %struct.empty_base_t** %retval  ; <%struct.empty_base_t*> [#uses=1]
+  %retval1 = load %struct.empty_base_t*, %struct.empty_base_t** %retval  ; <%struct.empty_base_t*> [#uses=1]
   ret %struct.empty_base_t* %retval1
 }
 
@@ -144,16 +144,16 @@
   %0 = alloca %struct.empty_base_t*               ; <%struct.empty_base_t**> [#uses=2]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   store %"struct.boost::compressed_pair<empty_t,int>"* %this, %"struct.boost::compressed_pair<empty_t,int>"** %this_addr
-  %1 = load %"struct.boost::compressed_pair<empty_t,int>"** %this_addr, align 8 ; <%"struct.boost::compressed_pair<empty_t,int>"*> [#uses=1]
+  %1 = load %"struct.boost::compressed_pair<empty_t,int>"*, %"struct.boost::compressed_pair<empty_t,int>"** %this_addr, align 8 ; <%"struct.boost::compressed_pair<empty_t,int>"*> [#uses=1]
   %2 = getelementptr inbounds %"struct.boost::compressed_pair<empty_t,int>", %"struct.boost::compressed_pair<empty_t,int>"* %1, i32 0, i32 0 ; <%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"*> [#uses=1]
   %3 = call %struct.empty_base_t* @_ZN5boost7details19compressed_pair_impI7empty_tiLi1EE5firstEv(%"struct.boost::details::compressed_pair_imp<empty_t,int,1>"* %2) nounwind ; <%struct.empty_base_t*> [#uses=1]
   store %struct.empty_base_t* %3, %struct.empty_base_t** %0, align 8
-  %4 = load %struct.empty_base_t** %0, align 8    ; <%struct.empty_base_t*> [#uses=1]
+  %4 = load %struct.empty_base_t*, %struct.empty_base_t** %0, align 8    ; <%struct.empty_base_t*> [#uses=1]
   store %struct.empty_base_t* %4, %struct.empty_base_t** %retval, align 8
   br label %return
 
 return:                                           ; preds = %entry
-  %retval1 = load %struct.empty_base_t** %retval  ; <%struct.empty_base_t*> [#uses=1]
+  %retval1 = load %struct.empty_base_t*, %struct.empty_base_t** %retval  ; <%struct.empty_base_t*> [#uses=1]
   ret %struct.empty_base_t* %retval1
 }
 
diff --git a/llvm/test/Transforms/PhaseOrdering/PR6627.ll b/llvm/test/Transforms/PhaseOrdering/PR6627.ll
index 0f81079..f214955 100644
--- a/llvm/test/Transforms/PhaseOrdering/PR6627.ll
+++ b/llvm/test/Transforms/PhaseOrdering/PR6627.ll
@@ -8,7 +8,7 @@
 define void @test2(i8* %arrayidx) nounwind ssp {
 entry:
   %xx = bitcast i8* %arrayidx to i32*
-  %x1 = load i32* %xx, align 4
+  %x1 = load i32, i32* %xx, align 4
   %tmp = trunc i32 %x1 to i8
   %conv = zext i8 %tmp to i32
   %cmp = icmp eq i32 %conv, 127
@@ -16,21 +16,21 @@
 
 land.lhs.true:                                    ; preds = %entry
   %arrayidx4 = getelementptr inbounds i8, i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
+  %tmp5 = load i8, i8* %arrayidx4, align 1
   %conv6 = zext i8 %tmp5 to i32
   %cmp7 = icmp eq i32 %conv6, 69
   br i1 %cmp7, label %land.lhs.true9, label %if.end
 
 land.lhs.true9:                                   ; preds = %land.lhs.true
   %arrayidx12 = getelementptr inbounds i8, i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
+  %tmp13 = load i8, i8* %arrayidx12, align 1
   %conv14 = zext i8 %tmp13 to i32
   %cmp15 = icmp eq i32 %conv14, 76
   br i1 %cmp15, label %land.lhs.true17, label %if.end
 
 land.lhs.true17:                                  ; preds = %land.lhs.true9
   %arrayidx20 = getelementptr inbounds i8, i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
+  %tmp21 = load i8, i8* %arrayidx20, align 1
   %conv22 = zext i8 %tmp21 to i32
   %cmp23 = icmp eq i32 %conv22, 70
   br i1 %cmp23, label %if.then, label %if.end
@@ -43,7 +43,7 @@
   ret void
 
 ; CHECK-LABEL: @test2(
-; CHECK: %x1 = load i32* %xx, align 4
+; CHECK: %x1 = load i32, i32* %xx, align 4
 ; CHECK-NEXT: icmp eq i32 %x1, 1179403647
 ; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
 }
@@ -53,28 +53,28 @@
 ; requiring widening.
 define void @test2a(i8* %arrayidx) nounwind ssp {
 entry:
-  %x1 = load i8* %arrayidx, align 4
+  %x1 = load i8, i8* %arrayidx, align 4
   %conv = zext i8 %x1 to i32
   %cmp = icmp eq i32 %conv, 127
   br i1 %cmp, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %entry
   %arrayidx4 = getelementptr inbounds i8, i8* %arrayidx, i64 1
-  %tmp5 = load i8* %arrayidx4, align 1
+  %tmp5 = load i8, i8* %arrayidx4, align 1
   %conv6 = zext i8 %tmp5 to i32
   %cmp7 = icmp eq i32 %conv6, 69
   br i1 %cmp7, label %land.lhs.true9, label %if.end
 
 land.lhs.true9:                                   ; preds = %land.lhs.true
   %arrayidx12 = getelementptr inbounds i8, i8* %arrayidx, i64 2
-  %tmp13 = load i8* %arrayidx12, align 1
+  %tmp13 = load i8, i8* %arrayidx12, align 1
   %conv14 = zext i8 %tmp13 to i32
   %cmp15 = icmp eq i32 %conv14, 76
   br i1 %cmp15, label %land.lhs.true17, label %if.end
 
 land.lhs.true17:                                  ; preds = %land.lhs.true9
   %arrayidx20 = getelementptr inbounds i8, i8* %arrayidx, i64 3
-  %tmp21 = load i8* %arrayidx20, align 1
+  %tmp21 = load i8, i8* %arrayidx20, align 1
   %conv22 = zext i8 %tmp21 to i32
   %cmp23 = icmp eq i32 %conv22, 70
   br i1 %cmp23, label %if.then, label %if.end
@@ -87,7 +87,7 @@
   ret void
 
 ; CHECK-LABEL: @test2a(
-; CHECK: %x1 = load i32* {{.*}}, align 4
+; CHECK: %x1 = load i32, i32* {{.*}}, align 4
 ; CHECK-NEXT: icmp eq i32 %x1, 1179403647
 ; CHECK-NEXT: br i1 {{.*}}, label %if.then, label %if.end 
 }
diff --git a/llvm/test/Transforms/PhaseOrdering/basic.ll b/llvm/test/Transforms/PhaseOrdering/basic.ll
index a8d4bb8..ef57e55 100644
--- a/llvm/test/Transforms/PhaseOrdering/basic.ll
+++ b/llvm/test/Transforms/PhaseOrdering/basic.ll
@@ -13,9 +13,9 @@
   %i = alloca i8*, align 8
   %call = call i8* @malloc(i64 1)
   store i8* %call, i8** %i, align 8
-  %tmp = load i8** %i, align 8
+  %tmp = load i8*, i8** %i, align 8
   store i8 1, i8* %tmp
-  %tmp1 = load i8** %i, align 8
+  %tmp1 = load i8*, i8** %i, align 8
   call void @free(i8* %tmp1)
   ret void
 
@@ -37,9 +37,9 @@
   %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
   store i32 %add, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %p, i64 1
-  %0 = load i32* %arrayidx2, align 4
+  %0 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 0
-  %1 = load i32* %arrayidx3, align 4
+  %1 = load i32, i32* %arrayidx3, align 4
   %mul = mul i32 2, %1
   %sub = sub i32 %0, %mul
   ret i32 %sub
diff --git a/llvm/test/Transforms/PhaseOrdering/gdce.ll b/llvm/test/Transforms/PhaseOrdering/gdce.ll
index 95f0675..56d5cbc 100644
--- a/llvm/test/Transforms/PhaseOrdering/gdce.ll
+++ b/llvm/test/Transforms/PhaseOrdering/gdce.ll
@@ -27,7 +27,7 @@
   store i32 0, i32* %retval
   store i32 1, i32* %cleanup.dest.slot
   call void @_ZN4BaseD1Ev(%class.Base* %b)
-  %0 = load i32* %retval
+  %0 = load i32, i32* %retval
   ret i32 %0
 }
 
@@ -35,7 +35,7 @@
 entry:
   %this.addr = alloca %class.Base*, align 8
   store %class.Base* %this, %class.Base** %this.addr, align 8
-  %this1 = load %class.Base** %this.addr
+  %this1 = load %class.Base*, %class.Base** %this.addr
   call void @_ZN4BaseC2Ev(%class.Base* %this1)
   ret void
 }
@@ -44,7 +44,7 @@
 entry:
   %this.addr = alloca %class.Base*, align 8
   store %class.Base* %this, %class.Base** %this.addr, align 8
-  %this1 = load %class.Base** %this.addr
+  %this1 = load %class.Base*, %class.Base** %this.addr
   call void @_ZN4BaseD2Ev(%class.Base* %this1)
   ret void
 }
@@ -53,7 +53,7 @@
 entry:
   %this.addr = alloca %class.Base*, align 8
   store %class.Base* %this, %class.Base** %this.addr, align 8
-  %this1 = load %class.Base** %this.addr
+  %this1 = load %class.Base*, %class.Base** %this.addr
   ret void
 }
 
@@ -61,7 +61,7 @@
 entry:
   %this.addr = alloca %class.Base*, align 8
   store %class.Base* %this, %class.Base** %this.addr, align 8
-  %this1 = load %class.Base** %this.addr
+  %this1 = load %class.Base*, %class.Base** %this.addr
   %0 = bitcast %class.Base* %this1 to i8***
   store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0
   ret void
@@ -73,7 +73,7 @@
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
   store %class.Base* %this, %class.Base** %this.addr, align 8
-  %this1 = load %class.Base** %this.addr
+  %this1 = load %class.Base*, %class.Base** %this.addr
   invoke void @_ZN4BaseD1Ev(%class.Base* %this1)
           to label %invoke.cont unwind label %lpad
 
@@ -94,8 +94,8 @@
   br label %eh.resume
 
 eh.resume:                                        ; preds = %lpad
-  %exn = load i8** %exn.slot
-  %sel = load i32* %ehselector.slot
+  %exn = load i8*, i8** %exn.slot
+  %sel = load i32, i32* %ehselector.slot
   %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
   resume { i8*, i32 } %lpad.val2
diff --git a/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll b/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
index c4185d2..1c8f0d2 100644
--- a/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
+++ b/llvm/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
@@ -5,7 +5,7 @@
 
 define void @exp_averages_intraday__deviation() {
 entry:
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %1 = shl i32 %0, 2
   %2 = add nsw i32 undef, %1
   %3 = add nsw i32 %2, undef
diff --git a/llvm/test/Transforms/Reassociate/basictest.ll b/llvm/test/Transforms/Reassociate/basictest.ll
index 0194ce2..015d3b0b 100644
--- a/llvm/test/Transforms/Reassociate/basictest.ll
+++ b/llvm/test/Transforms/Reassociate/basictest.ll
@@ -28,9 +28,9 @@
 @f = external global i32
 
 define void @test3() {
-  %A = load i32* @a
-  %B = load i32* @b
-  %C = load i32* @c
+  %A = load i32, i32* @a
+  %B = load i32, i32* @b
+  %C = load i32, i32* @c
   %t1 = add i32 %A, %B
   %t2 = add i32 %t1, %C
   %t3 = add i32 %C, %A
@@ -49,9 +49,9 @@
 }
 
 define void @test4() {
-  %A = load i32* @a
-  %B = load i32* @b
-  %C = load i32* @c
+  %A = load i32, i32* @a
+  %B = load i32, i32* @b
+  %C = load i32, i32* @c
   %t1 = add i32 %A, %B
   %t2 = add i32 %t1, %C
   %t3 = add i32 %C, %A
@@ -70,9 +70,9 @@
 }
 
 define void @test5() {
-  %A = load i32* @a
-  %B = load i32* @b
-  %C = load i32* @c
+  %A = load i32, i32* @a
+  %B = load i32, i32* @b
+  %C = load i32, i32* @c
   %t1 = add i32 %B, %A
   %t2 = add i32 %t1, %C
   %t3 = add i32 %C, %A
@@ -91,11 +91,11 @@
 }
 
 define i32 @test6() {
-  %tmp.0 = load i32* @a
-  %tmp.1 = load i32* @b
+  %tmp.0 = load i32, i32* @a
+  %tmp.1 = load i32, i32* @b
   ; (a+b)
   %tmp.2 = add i32 %tmp.0, %tmp.1
-  %tmp.4 = load i32* @c
+  %tmp.4 = load i32, i32* @c
   ; (a+b)+c
   %tmp.5 = add i32 %tmp.2, %tmp.4
   ; (a+c)
diff --git a/llvm/test/Transforms/Reassociate/crash.ll b/llvm/test/Transforms/Reassociate/crash.ll
index 770f973..f8774ea 100644
--- a/llvm/test/Transforms/Reassociate/crash.ll
+++ b/llvm/test/Transforms/Reassociate/crash.ll
@@ -17,7 +17,7 @@
 ; PR5981
 define i32 @test2() nounwind ssp {
 entry:
-  %0 = load i32* undef, align 4
+  %0 = load i32, i32* undef, align 4
   %1 = mul nsw i32 undef, %0
   %2 = mul nsw i32 undef, %0
   %3 = add nsw i32 undef, %1
@@ -50,7 +50,7 @@
   br label %_33
 
 _33:                                              ; preds = %_33, %_
-  %tmp348 = load i8* %arg, align 1
+  %tmp348 = load i8, i8* %arg, align 1
   %tmp349 = lshr i8 %tmp348, 7
   %tmp350 = or i8 %tmp349, 42
   %tmp351 = add i8 %tmp350, -42
@@ -123,7 +123,7 @@
 ; PR12963
 @a = external global i8
 define i8 @f0(i8 %x) {
-  %t0 = load i8* @a
+  %t0 = load i8, i8* @a
   %t1 = mul i8 %x, %x
   %t2 = mul i8 %t1, %t1
   %t3 = mul i8 %t2, %t2
diff --git a/llvm/test/Transforms/Reassociate/fast-basictest.ll b/llvm/test/Transforms/Reassociate/fast-basictest.ll
index 67b07f4..64b74e3 100644
--- a/llvm/test/Transforms/Reassociate/fast-basictest.ll
+++ b/llvm/test/Transforms/Reassociate/fast-basictest.ll
@@ -48,9 +48,9 @@
 ; CHECK-NOT: fadd fast float
 ; CHECK: ret void
 
-  %A = load float* @fa
-  %B = load float* @fb
-  %C = load float* @fc
+  %A = load float, float* @fa
+  %B = load float, float* @fb
+  %C = load float, float* @fc
   %t1 = fadd fast float %A, %B
   %t2 = fadd fast float %t1, %C
   %t3 = fadd fast float %C, %A
@@ -69,9 +69,9 @@
 ; CHECK-NOT: fadd
 ; CHECK: ret void
 
-  %A = load float* @fa
-  %B = load float* @fb
-  %C = load float* @fc
+  %A = load float, float* @fa
+  %B = load float, float* @fb
+  %C = load float, float* @fc
   %t1 = fadd fast float %A, %B
   %t2 = fadd fast float %t1, %C
   %t3 = fadd fast float %C, %A
@@ -90,9 +90,9 @@
 ; CHECK-NOT: fadd
 ; CHECK: ret void
 
-  %A = load float* @fa
-  %B = load float* @fb
-  %C = load float* @fc
+  %A = load float, float* @fa
+  %B = load float, float* @fb
+  %C = load float, float* @fc
   %t1 = fadd fast float %B, %A
   %t2 = fadd fast float %t1, %C
   %t3 = fadd fast float %C, %A
diff --git a/llvm/test/Transforms/Reassociate/pr12245.ll b/llvm/test/Transforms/Reassociate/pr12245.ll
index e9b5355..0e7152e 100644
--- a/llvm/test/Transforms/Reassociate/pr12245.ll
+++ b/llvm/test/Transforms/Reassociate/pr12245.ll
@@ -6,34 +6,34 @@
 
 define i32 @fn2() nounwind uwtable ssp {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %dec = add nsw i32 %0, -1
   store i32 %dec, i32* @a, align 4
-  %1 = load i32* @d, align 4
+  %1 = load i32, i32* @d, align 4
   %sub = sub nsw i32 %dec, %1
   store i32 %sub, i32* @d, align 4
-  %2 = load i32* @a, align 4
+  %2 = load i32, i32* @a, align 4
   %dec1 = add nsw i32 %2, -1
   store i32 %dec1, i32* @a, align 4
-  %3 = load i32* @d, align 4
+  %3 = load i32, i32* @d, align 4
   %sub2 = sub nsw i32 %dec1, %3
   store i32 %sub2, i32* @d, align 4
-  %4 = load i32* @a, align 4
+  %4 = load i32, i32* @a, align 4
   %dec3 = add nsw i32 %4, -1
   store i32 %dec3, i32* @a, align 4
-  %5 = load i32* @d, align 4
+  %5 = load i32, i32* @d, align 4
   %sub4 = sub nsw i32 %dec3, %5
   store i32 %sub4, i32* @d, align 4
-  %6 = load i32* @a, align 4
+  %6 = load i32, i32* @a, align 4
   %dec5 = add nsw i32 %6, -1
   store i32 %dec5, i32* @a, align 4
-  %7 = load i32* @d, align 4
+  %7 = load i32, i32* @d, align 4
   %sub6 = sub nsw i32 %dec5, %7
   store i32 %sub6, i32* @d, align 4
-  %8 = load i32* @a, align 4
+  %8 = load i32, i32* @a, align 4
   %dec7 = add nsw i32 %8, -1
   store i32 %dec7, i32* @a, align 4
-  %9 = load i32* @d, align 4
+  %9 = load i32, i32* @d, align 4
   %sub8 = sub nsw i32 %dec7, %9
   store i32 %sub8, i32* @d, align 4
   ret i32 0
diff --git a/llvm/test/Transforms/Reassociate/pr21205.ll b/llvm/test/Transforms/Reassociate/pr21205.ll
index fcc7150..0c6fd3a 100644
--- a/llvm/test/Transforms/Reassociate/pr21205.ll
+++ b/llvm/test/Transforms/Reassociate/pr21205.ll
@@ -11,7 +11,7 @@
 
 define i32 @test1() {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %conv = sitofp i32 %0 to float
   %sub = fsub fast float %conv, undef
   %sub1 = fadd fast float %sub, -1.000000e+00
diff --git a/llvm/test/Transforms/RewriteStatepointsForGC/basics.ll b/llvm/test/Transforms/RewriteStatepointsForGC/basics.ll
index 252d5f1..c1a1e4e 100644
--- a/llvm/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/llvm/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -35,14 +35,14 @@
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: load i8 addrspace(1)* %derived.relocated
-; CHECK-NEXT: load i8 addrspace(1)* %obj.relocated
+; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
+; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
 entry:
   %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
   call i32 (void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(void ()* @foo, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
 
-  %a = load i8 addrspace(1)* %derived
-  %b = load i8 addrspace(1)* %obj
+  %a = load i8, i8 addrspace(1)* %derived
+  %b = load i8, i8 addrspace(1)* %obj
   %c = sub i8 %a, %b
   ret i8 %c
 }
diff --git a/llvm/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll b/llvm/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
index 8cdc077..e5a1d679 100644
--- a/llvm/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
+++ b/llvm/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
@@ -3,7 +3,7 @@
 
 define void @cprop_test11(i32* %data.1) {
 entry:
-	%tmp.1 = load i32* %data.1		; <i32> [#uses=3]
+	%tmp.1 = load i32, i32* %data.1		; <i32> [#uses=3]
 	%tmp.41 = icmp sgt i32 %tmp.1, 1		; <i1> [#uses=1]
 	br i1 %tmp.41, label %no_exit, label %loopexit
 no_exit:		; preds = %endif, %then, %entry
diff --git a/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll b/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
index a353e98..9724e6e 100644
--- a/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
+++ b/llvm/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
@@ -37,7 +37,7 @@
 	%tmp52 = icmp sgt i32 %D, 0		; <i1> [#uses=1]
 	br i1 %tmp52, label %cond_true53, label %cond_next71
 cond_true53:		; preds = %cond_next50
-	%tmp54 = load i32* @JUMP		; <i32> [#uses=1]
+	%tmp54 = load i32, i32* @JUMP		; <i32> [#uses=1]
 	%tmp55 = icmp eq i32 %tmp54, 1		; <i1> [#uses=1]
 	br i1 %tmp55, label %cond_true56, label %cond_next63
 cond_true56:		; preds = %cond_true53
@@ -68,8 +68,8 @@
 cond_next252:		; preds = %cond_next208, %entry
 	%D.0.0 = phi i32 [ 0, %entry ], [ %tmp229, %cond_next208 ]		; <i32> [#uses=1]
 	%tmp254 = getelementptr i8*, i8** null, i32 1		; <i8**> [#uses=1]
-	%tmp256 = load i8** %tmp254		; <i8*> [#uses=1]
-	%tmp258 = load i8* %tmp256		; <i8> [#uses=1]
+	%tmp256 = load i8*, i8** %tmp254		; <i8*> [#uses=1]
+	%tmp258 = load i8, i8* %tmp256		; <i8> [#uses=1]
 	%tmp259 = icmp eq i8 %tmp258, 45		; <i1> [#uses=1]
 	br i1 %tmp259, label %cond_true260, label %bb263
 cond_true260:		; preds = %cond_next252
diff --git a/llvm/test/Transforms/SCCP/2006-12-04-PackedType.ll b/llvm/test/Transforms/SCCP/2006-12-04-PackedType.ll
index 34a5fb2..05cf5ff 100644
--- a/llvm/test/Transforms/SCCP/2006-12-04-PackedType.ll
+++ b/llvm/test/Transforms/SCCP/2006-12-04-PackedType.ll
@@ -104,13 +104,13 @@
 define void @gldLLVMVecPointRender(%struct.GLDContextRec* %ctx) {
 entry:
 	%tmp.uip = getelementptr %struct.GLDContextRec, %struct.GLDContextRec* %ctx, i32 0, i32 22		; <i32*> [#uses=1]
-	%tmp = load i32* %tmp.uip		; <i32> [#uses=3]
+	%tmp = load i32, i32* %tmp.uip		; <i32> [#uses=3]
 	%tmp91 = lshr i32 %tmp, 5		; <i32> [#uses=1]
 	%tmp92 = trunc i32 %tmp91 to i1		; <i1> [#uses=1]
 	br i1 %tmp92, label %cond_true93, label %cond_next116
 cond_true93:		; preds = %entry
 	%tmp.upgrd.1 = getelementptr %struct.GLDContextRec, %struct.GLDContextRec* %ctx, i32 0, i32 31, i32 14		; <i32*> [#uses=1]
-	%tmp95 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
+	%tmp95 = load i32, i32* %tmp.upgrd.1		; <i32> [#uses=1]
 	%tmp95.upgrd.2 = sitofp i32 %tmp95 to float		; <float> [#uses=1]
 	%tmp108 = fmul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
 	br label %cond_next116
diff --git a/llvm/test/Transforms/SCCP/apint-array.ll b/llvm/test/Transforms/SCCP/apint-array.ll
index 2cb420a..eff6cc9 100644
--- a/llvm/test/Transforms/SCCP/apint-array.ll
+++ b/llvm/test/Transforms/SCCP/apint-array.ll
@@ -8,14 +8,14 @@
 Head:
    %A = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 1
 
-   %B = load i101* %A
+   %B = load i101, i101* %A
    %C = icmp sge i101 %B, 1
    br i1 %C, label %True, label %False
 True:
    %D = and i101 %B, 1
    %E = trunc i101 %D to i32
    %F = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 %E
-   %G = load i101* %F
+   %G = load i101, i101* %F
    br label %False
 False:
    %H = phi i101 [%G, %True], [-1, %Head]
diff --git a/llvm/test/Transforms/SCCP/apint-bigarray.ll b/llvm/test/Transforms/SCCP/apint-bigarray.ll
index 082bd90..e023199 100644
--- a/llvm/test/Transforms/SCCP/apint-bigarray.ll
+++ b/llvm/test/Transforms/SCCP/apint-bigarray.ll
@@ -11,13 +11,13 @@
 define i10000 @caller()
 {
         %Y = call i10000* @test(i10000 -1)
-        %Z = load i10000* %Y
+        %Z = load i10000, i10000* %Y
         ret i10000 %Z 
 }
 
 define i10000 @caller2()
 {
         %Y = call i10000* @test(i10000 1)
-        %Z = load i10000* %Y
+        %Z = load i10000, i10000* %Y
         ret i10000 %Z 
 }
diff --git a/llvm/test/Transforms/SCCP/apint-bigint2.ll b/llvm/test/Transforms/SCCP/apint-bigint2.ll
index 639e07c..f28b966 100644
--- a/llvm/test/Transforms/SCCP/apint-bigint2.ll
+++ b/llvm/test/Transforms/SCCP/apint-bigint2.ll
@@ -7,12 +7,12 @@
 {
 Head:
    %A = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 1
-   %B = load i101* %A
+   %B = load i101, i101* %A
    %D = and i101 %B, 1
    %DD = or i101 %D, 1
    %E = trunc i101 %DD to i32
    %F = getelementptr [6 x i101], [6 x i101]* @Y, i32 0, i32 %E
-   %G = load i101* %F
+   %G = load i101, i101* %F
  
    ret i101 %G
 }
diff --git a/llvm/test/Transforms/SCCP/apint-ipsccp3.ll b/llvm/test/Transforms/SCCP/apint-ipsccp3.ll
index 68987ae..c99ae58 100644
--- a/llvm/test/Transforms/SCCP/apint-ipsccp3.ll
+++ b/llvm/test/Transforms/SCCP/apint-ipsccp3.ll
@@ -5,13 +5,13 @@
 
 
 define void @foo() {
-	%X = load i66* @G
+	%X = load i66, i66* @G
 	store i66 %X, i66* @G
 	ret void
 }
 
 define i66 @bar() {
-	%V = load i66* @G
+	%V = load i66, i66* @G
 	%C = icmp eq i66 %V, 17
 	br i1 %C, label %T, label %F
 T:
diff --git a/llvm/test/Transforms/SCCP/apint-ipsccp4.ll b/llvm/test/Transforms/SCCP/apint-ipsccp4.ll
index 33c5aad..be06d03 100644
--- a/llvm/test/Transforms/SCCP/apint-ipsccp4.ll
+++ b/llvm/test/Transforms/SCCP/apint-ipsccp4.ll
@@ -10,13 +10,13 @@
 
 define internal float @test2() {
 	%A = getelementptr [2 x { i212, float}], [2 x { i212, float}]* @Y, i32 0, i32 1, i32 1
-	%B = load float* %A
+	%B = load float, float* %A
 	ret float %B
 }
 
 define internal float  @test3() {
 	%A = getelementptr [2 x { i212, float}], [2 x { i212, float}]* @Y, i32 0, i32 0, i32 1
-	%B = load float* %A
+	%B = load float, float* %A
 	ret float %B
 }
 
diff --git a/llvm/test/Transforms/SCCP/apint-load.ll b/llvm/test/Transforms/SCCP/apint-load.ll
index 407237b..17506fc 100644
--- a/llvm/test/Transforms/SCCP/apint-load.ll
+++ b/llvm/test/Transforms/SCCP/apint-load.ll
@@ -7,19 +7,19 @@
 @Y = constant [2 x { i212, float }] [ { i212, float } { i212 12, float 1.0 }, 
                                      { i212, float } { i212 37, float 0x3FF3B2FEC0000000 } ]
 define i212 @test1() {
-	%B = load i212* @X
+	%B = load i212, i212* @X
 	ret i212 %B
 }
 
 define internal float @test2() {
 	%A = getelementptr [2 x { i212, float}], [2 x { i212, float}]* @Y, i32 0, i32 1, i32 1
-	%B = load float* %A
+	%B = load float, float* %A
 	ret float %B
 }
 
 define internal i212 @test3() {
 	%A = getelementptr [2 x { i212, float}], [2 x { i212, float}]* @Y, i32 0, i32 0, i32 0
-	%B = load i212* %A
+	%B = load i212, i212* %A
 	ret i212 %B
 }
 
diff --git a/llvm/test/Transforms/SCCP/atomic-load-store.ll b/llvm/test/Transforms/SCCP/atomic-load-store.ll
index 53e4c10..45b5d7c 100644
--- a/llvm/test/Transforms/SCCP/atomic-load-store.ll
+++ b/llvm/test/Transforms/SCCP/atomic-load-store.ll
@@ -6,7 +6,7 @@
 @C = internal constant i32 222
 
 define i32 @test1() {
-	%V = load atomic i32* @G seq_cst, align 4
+	%V = load atomic i32, i32* @G seq_cst, align 4
 	%C = icmp eq i32 %V, 17
 	br i1 %C, label %T, label %F
 T:
@@ -21,7 +21,7 @@
 ; CHECK: ret i32 17
 
 define i32 @test2() {
-	%V = load atomic i32* @C seq_cst, align 4
+	%V = load atomic i32, i32* @C seq_cst, align 4
 	ret i32 %V
 }
 
diff --git a/llvm/test/Transforms/SCCP/ipsccp-basic.ll b/llvm/test/Transforms/SCCP/ipsccp-basic.ll
index 107b7af..c74063f 100644
--- a/llvm/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/llvm/test/Transforms/SCCP/ipsccp-basic.ll
@@ -50,7 +50,7 @@
 @G = internal global i32 undef
 
 define void @test3a() {
-	%X = load i32* @G
+	%X = load i32, i32* @G
 	store i32 %X, i32* @G
 	ret void
 }
@@ -59,7 +59,7 @@
 
 
 define i32 @test3b() {
-	%V = load i32* @G
+	%V = load i32, i32* @G
 	%C = icmp eq i32 %V, 17
 	br i1 %C, label %T, label %F
 T:
@@ -203,7 +203,7 @@
 define void @test9() {
 entry:
         %local_foo = alloca {  }
-        load {  }* @test9g
+        load {  }, {  }* @test9g
         store {  } %0, {  }* %local_foo
         ret void
 }
diff --git a/llvm/test/Transforms/SCCP/loadtest.ll b/llvm/test/Transforms/SCCP/loadtest.ll
index 3a8ffe6..b88b44b 100644
--- a/llvm/test/Transforms/SCCP/loadtest.ll
+++ b/llvm/test/Transforms/SCCP/loadtest.ll
@@ -10,25 +10,25 @@
 @Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ]		; <[2 x { i32, float }]*> [#uses=2]
 
 define i32 @test1() {
-	%B = load i32* @X		; <i32> [#uses=1]
+	%B = load i32, i32* @X		; <i32> [#uses=1]
 	ret i32 %B
 }
 
 define float @test2() {
 	%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
-	%B = load float* %A		; <float> [#uses=1]
+	%B = load float, float* %A		; <float> [#uses=1]
 	ret float %B
 }
 
 define i32 @test3() {
 	%A = getelementptr [2 x { i32, float }], [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
-	%B = load i32* %A
+	%B = load i32, i32* %A
 	ret i32 %B
 }
 
 define i8 @test4() {
 	%A = bitcast i32* @X to i8*
-	%B = load i8* %A
+	%B = load i8, i8* %A
 	ret i8 %B
 }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
index cee51fd..1cff73d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -9,7 +9,7 @@
 ; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
 ; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
 ; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
 ; CHECK: %5 = fsub fast <2 x float> %2, %4
 ; CHECK: %6 = fmul fast <2 x float> %5, %5
 ; CHECK: %7 = extractelement <2 x float> %6, i32 0
@@ -24,10 +24,10 @@
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
   %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float* %arrayidx4, align 4
+  %0 = load float, float* %arrayidx4, align 4
   %sub = fsub fast float %conv, %0
   %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float* %arrayidx9, align 4
+  %1 = load float, float* %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10
@@ -44,7 +44,7 @@
 ; CHECK: %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
 ; CHECK: %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
 ; CHECK: %3 = bitcast float* %arrayidx4 to <2 x float>*
-; CHECK: %4 = load <2 x float>* %3, align 4
+; CHECK: %4 = load <2 x float>, <2 x float>* %3, align 4
 ; CHECK: %5 = fsub fast <2 x float> %2, %4
 ; CHECK: %6 = fmul fast <2 x float> %5, %5
 ; CHECK: %7 = extractelement <2 x float> %6, i32 0
@@ -59,10 +59,10 @@
   %conv5 = sitofp i32 %ymin to float
   %conv = sitofp i32 %xmin to float
   %arrayidx4 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 0
-  %0 = load float* %arrayidx4, align 4
+  %0 = load float, float* %arrayidx4, align 4
   %sub = fsub fast float %conv, %0
   %arrayidx9 = getelementptr inbounds %structA, %structA* %J, i64 0, i32 0, i64 1
-  %1 = load float* %arrayidx9, align 4
+  %1 = load float, float* %arrayidx9, align 4
   %sub10 = fsub fast float %conv5, %1
   %mul11 = fmul fast float %sub, %sub
   %mul12 = fmul fast float %sub10, %sub10
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
index d3afc05..6ff1118 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/load-store-q.ll
@@ -15,8 +15,8 @@
 define void @f(double* %p, double* %q) {
   %addr2 = getelementptr double, double* %q, i32 1
   %addr = getelementptr double, double* %p, i32 1
-  %x = load double* %p
-  %y = load double* %addr
+  %x = load double, double* %p
+  %y = load double, double* %addr
   call void @g()
   store double %x, double* %q
   store double %y, double* %addr2
@@ -40,7 +40,7 @@
   store double %p1, double* %q
   store double %p2, double* %addr2
 
-  %x = load double* %p
-  %y = load double* %addr
+  %x = load double, double* %p
+  %y = load double, double* %addr
   br label %loop
 }
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
index dddcf3c..72c7082 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -9,31 +9,31 @@
 
 define void @test1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c) {
 entry:
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
   store i32 %div, i32* %a, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %div6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32* %arrayidx9, align 4
+  %5 = load i32, i32* %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
   %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
   store i32 %div11, i32* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32* %arrayidx14, align 4
+  %7 = load i32, i32* %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
index b8e9a03..57d7cce 100644
--- a/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
+++ b/llvm/test/Transforms/SLPVectorizer/ARM/memory.ll
@@ -10,10 +10,10 @@
 ; CHECK-NOT: store <2 x double>
 define void @expensive_double_store(double* noalias %dst, double* noalias %src, i64 %count) {
 entry:
-  %0 = load double* %src, align 8
+  %0 = load double, double* %src, align 8
   store double %0, double* %dst, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst, i64 1
   store double %1, double* %arrayidx3, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
index f88b86d..9ed86f8 100644
--- a/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/R600/simplebb.ll
@@ -6,17 +6,17 @@
 ; Simple 3-pair chain with loads and stores
 define void @test1_as_3_3_3(double addrspace(3)* %a, double addrspace(3)* %b, double addrspace(3)* %c) {
 ; CHECK-LABEL: @test1_as_3_3_3(
-; CHECK: load <2 x double> addrspace(3)*
-; CHECK: load <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
 ; CHECK: ret
-  %i0 = load double addrspace(3)* %a, align 8
-  %i1 = load double addrspace(3)* %b, align 8
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double addrspace(3)* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double addrspace(3)* %b, i64 1
-  %i4 = load double addrspace(3)* %arrayidx4, align 8
+  %i4 = load double, double addrspace(3)* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double addrspace(3)* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
@@ -26,17 +26,17 @@
 
 define void @test1_as_3_0_0(double addrspace(3)* %a, double* %b, double* %c) {
 ; CHECK-LABEL: @test1_as_3_0_0(
-; CHECK: load <2 x double> addrspace(3)*
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double> addrspace(3)*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double>* %
 ; CHECK: ret
-  %i0 = load double addrspace(3)* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double addrspace(3)* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double addrspace(3)* %a, i64 1
-  %i3 = load double addrspace(3)* %arrayidx3, align 8
+  %i3 = load double, double addrspace(3)* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -46,17 +46,17 @@
 
 define void @test1_as_0_0_3(double* %a, double* %b, double addrspace(3)* %c) {
 ; CHECK-LABEL: @test1_as_0_0_3(
-; CHECK: load <2 x double>*
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: store <2 x double> %{{.*}}, <2 x double> addrspace(3)* %
 ; CHECK: ret
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double addrspace(3)* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double addrspace(3)* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
index 8c2777a..bc0beec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/addsub.ll
@@ -21,35 +21,35 @@
 ; Function Attrs: nounwind uwtable
 define void @addsub() #0 {
 entry:
-  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
   %add1 = add nsw i32 %2, %3
   %add2 = add nsw i32 %add, %add1
   store i32 %add2, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
   %add3 = add nsw i32 %4, %5
-  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
   %add4 = add nsw i32 %6, %7
   %sub = sub nsw i32 %add3, %add4
   store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %8 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %add7 = add nsw i32 %add5, %add6
   store i32 %add7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %12 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %sub10 = sub nsw i32 %add8, %add9
   store i32 %sub10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
@@ -65,35 +65,35 @@
 ; Function Attrs: nounwind uwtable
 define void @subadd() #0 {
 entry:
-  %0 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
-  %1 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
+  %0 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4
+  %1 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4
   %add = add nsw i32 %0, %1
-  %2 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
-  %3 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
+  %2 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4
+  %3 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 0), align 4
   %add1 = add nsw i32 %2, %3
   %sub = sub nsw i32 %add, %add1
   store i32 %sub, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4
-  %4 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
-  %5 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
+  %4 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4
+  %5 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4
   %add2 = add nsw i32 %4, %5
-  %6 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
-  %7 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
+  %6 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4
+  %7 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 1), align 4
   %add3 = add nsw i32 %6, %7
   %add4 = add nsw i32 %add2, %add3
   store i32 %add4, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4
-  %8 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
-  %9 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
+  %8 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4
+  %9 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4
   %add5 = add nsw i32 %8, %9
-  %10 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
-  %11 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
+  %10 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4
+  %11 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 2), align 4
   %add6 = add nsw i32 %10, %11
   %sub7 = sub nsw i32 %add5, %add6
   store i32 %sub7, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4
-  %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
-  %13 = load i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
+  %12 = load i32, i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4
+  %13 = load i32, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4
   %add8 = add nsw i32 %12, %13
-  %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
-  %15 = load i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
+  %14 = load i32, i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4
+  %15 = load i32, i32* getelementptr inbounds ([4 x i32]* @e, i32 0, i64 3), align 4
   %add9 = add nsw i32 %14, %15
   %add10 = add nsw i32 %add8, %add9
   store i32 %add10, i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4
@@ -107,20 +107,20 @@
 ; Function Attrs: nounwind uwtable
 define void @faddfsub() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %add = fadd float %0, %1
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %sub = fsub float %2, %3
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %add1 = fadd float %4, %5
   store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %sub2 = fsub float %6, %7
   store float %sub2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -133,20 +133,20 @@
 ; Function Attrs: nounwind uwtable
 define void @fsubfadd() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %sub = fsub float %0, %1
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %add = fadd float %2, %3
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %sub1 = fsub float %4, %5
   store float %sub1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %add2 = fadd float %6, %7
   store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -159,20 +159,20 @@
 ; Function Attrs: nounwind uwtable
 define void @No_faddfsub() #0 {
 entry:
-  %0 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %1 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
+  %0 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
   %add = fadd float %0, %1
   store float %add, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
   %add1 = fadd float %2, %3
   store float %add1, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
   %add2 = fadd float %4, %5
   store float %add2, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   %sub = fsub float %6, %7
   store float %sub, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   ret void
@@ -189,20 +189,20 @@
 ; CHECK: %4 = fsub <4 x float> %1, %2
 ; CHECK: %5 = shufflevector <4 x float> %3, <4 x float> %4, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 define void @reorder_alt() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
   %3 = fadd float %1, %2
   store float %3, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
   store float %6, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
   store float %9, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %10 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
   store float %12, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   ret void
@@ -222,27 +222,27 @@
 ; CHECK: %8 = fsub <4 x float> %1, %6
 ; CHECK: %9 = shufflevector <4 x float> %7, <4 x float> %8, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 define void @reorder_alt_subTree() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %3 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %3 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 0), align 4
   %4 = fsub float %2, %3
   %5 = fadd float %1, %4
   store float %5, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %6 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 1), align 4
+  %6 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 1), align 4
   %9 = fadd float %7, %8
   %10 = fsub float %6, %9
   store float %10, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %12 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
-  %13 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 2), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %12 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %13 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 2), align 4
   %14 = fsub float %12, %13
   %15 = fadd float %11, %14
   store float %15, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %16 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
-  %17 = load float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 3), align 4
-  %18 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %16 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %17 = load float, float* getelementptr inbounds ([4 x float]* @fd, i32 0, i64 3), align 4
+  %18 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
   %19 = fadd float %17, %18
   %20 = fsub float %16, %19
   store float %20, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
@@ -258,18 +258,18 @@
 ; CHECK: fsub <2 x double>
 ; CHECK: shufflevector <2 x double> 
 define void @reorder_alt_rightsubTree(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %d) {
-  %1 = load double* %a
-  %2 = load double* %b
+  %1 = load double, double* %a
+  %2 = load double, double* %b
   %3 = fadd double %1, %2
-  %4 = load double* %d
+  %4 = load double, double* %d
   %5 = fsub double %3, %4
   store double %5, double* %c
   %6 = getelementptr inbounds double, double* %d, i64 1
-  %7 = load double* %6
+  %7 = load double, double* %6
   %8 = getelementptr inbounds double, double* %a, i64 1
-  %9 = load double* %8
+  %9 = load double, double* %8
   %10 = getelementptr inbounds double, double* %b, i64 1
-  %11 = load double* %10
+  %11 = load double, double* %10
   %12 = fadd double %9, %11
   %13 = fadd double %7, %12
   %14 = getelementptr inbounds double, double* %c, i64 1
@@ -290,20 +290,20 @@
 ; CHECK-NOT: fsub <4 x float>
 ; CHECK-NOT: shufflevector
 define void @no_vec_shuff_reorder() #0 {
-  %1 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
-  %2 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
+  %1 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 0), align 4
+  %2 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 0), align 4
   %3 = fadd float %1, %2
   store float %3, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 0), align 4
-  %4 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
-  %5 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
+  %4 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 1), align 4
+  %5 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 1), align 4
   %6 = fsub float %4, %5
   store float %6, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 1), align 4
-  %7 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
-  %8 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
+  %7 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 2), align 4
+  %8 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 2), align 4
   %9 = fadd float %7, %8
   store float %9, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 2), align 4
-  %10 = load float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
-  %11 = load float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
+  %10 = load float, float* getelementptr inbounds ([4 x float]* @fb, i32 0, i64 3), align 4
+  %11 = load float, float* getelementptr inbounds ([4 x float]* @fa, i32 0, i64 3), align 4
   %12 = fsub float %10, %11
   store float %12, float* getelementptr inbounds ([4 x float]* @fc, i32 0, i64 3), align 4
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/align.ll b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
index 2d6afaf..b74b709 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/align.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/align.ll
@@ -8,16 +8,16 @@
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
   %agg.tmp.i.i.sroa.0 = alloca [3 x double], align 16
-; CHECK: %[[V0:[0-9]+]] = load <2 x double>* %[[V2:[0-9]+]], align 8
-  %i0 = load double* %a 
-  %i1 = load double* %b 
+; CHECK: %[[V0:[0-9]+]] = load <2 x double>, <2 x double>* %[[V2:[0-9]+]], align 8
+  %i0 = load double, double* %a 
+  %i1 = load double, double* %b 
   %mul = fmul double %i0, %i1
   %store1 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 1
   %store2 = getelementptr inbounds [3 x double], [3 x double]* %agg.tmp.i.i.sroa.0, i64 0, i64 2
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
 ; CHECK: store <2 x double> %[[V1:[0-9]+]], <2 x double>* %[[V2:[0-9]+]], align 8
   store double %mul, double* %store1
@@ -37,13 +37,13 @@
 
 define void @test2(float * %a, float * %b) {
 entry:
-  %l0 = load float* %a
+  %l0 = load float, float* %a
   %a1 = getelementptr inbounds float, float* %a, i64 1
-  %l1 = load float* %a1
+  %l1 = load float, float* %a1
   %a2 = getelementptr inbounds float, float* %a, i64 2
-  %l2 = load float* %a2
+  %l2 = load float, float* %a2
   %a3 = getelementptr inbounds float, float* %a, i64 3
-  %l3 = load float* %a3
+  %l3 = load float, float* %a3
   store float %l0, float* %b
   %b1 = getelementptr inbounds float, float* %b, i64 1
   store float %l1, float* %b1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 1bb0382..b76ac2c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -15,14 +15,14 @@
 ; CHECK: ret void
 define void @sin_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @sin(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @sin(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -36,14 +36,14 @@
 ; CHECK: ret void
 define void @cos_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @cos(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @cos(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -57,14 +57,14 @@
 ; CHECK: ret void
 define void @pow_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @pow(double %mul,double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @pow(double %mul5,double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -79,14 +79,14 @@
 ; CHECK: ret void
 define void @exp2_libm(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @exp2(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @exp2(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -102,14 +102,14 @@
 ; CHECK: ret void
 define void @round_custom(i64* %a, i64* %b, i64* %c) {
 entry:
-  %i0 = load i64* %a, align 8
-  %i1 = load i64* %b, align 8
+  %i0 = load i64, i64* %a, align 8
+  %i1 = load i64, i64* %b, align 8
   %mul = mul i64 %i0, %i1
   %call = tail call i64 @round(i64 %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 1
-  %i3 = load i64* %arrayidx3, align 8
+  %i3 = load i64, i64* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds i64, i64* %b, i64 1
-  %i4 = load i64* %arrayidx4, align 8
+  %i4 = load i64, i64* %arrayidx4, align 8
   %mul5 = mul i64 %i3, %i4
   %call5 = tail call i64 @round(i64 %mul5) nounwind readnone
   store i64 %call, i64* %c, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
index 357efc5..044db5d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -15,21 +15,21 @@
 ;CHECK: store <4 x i32>
 define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
 entry:
-  %0 = load i8* %B, align 1
+  %0 = load i8, i8* %B, align 1
   %conv = sext i8 %0 to i32
   store i32 %conv, i32* %A, align 4
   %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8* %arrayidx2, align 1
+  %1 = load i8, i8* %arrayidx2, align 1
   %conv3 = sext i8 %1 to i32
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
   store i32 %conv3, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i8, i8* %B, i64 2
-  %2 = load i8* %arrayidx5, align 1
+  %2 = load i8, i8* %arrayidx5, align 1
   %conv6 = sext i8 %2 to i32
   %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 2
   store i32 %conv6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i8, i8* %B, i64 3
-  %3 = load i8* %arrayidx8, align 1
+  %3 = load i8, i8* %arrayidx8, align 1
   %conv9 = sext i8 %3 to i32
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
   store i32 %conv9, i32* %arrayidx10, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
index 92efaa1..a3e2b21 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
@@ -17,12 +17,12 @@
 define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %B, i64 10
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %tobool = fcmp une double %0, 0.000000e+00
   %cond = select i1 %tobool, double %G, double 1.000000e+00
   store double %cond, double* %A, align 8
   %arrayidx2 = getelementptr inbounds double, double* %B, i64 11
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %tobool3 = fcmp une double %1, 0.000000e+00
   %cond7 = select i1 %tobool3, double %G, double 1.000000e+00
   %arrayidx8 = getelementptr inbounds double, double* %A, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index 4b78ac3..8555fe0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -22,13 +22,13 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
index ab7380a..1ad4d69 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/consecutive-access.ll
@@ -21,25 +21,25 @@
   %mul = mul nsw i32 %u, 3
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %add5 = fadd double %0, %1
   store double %add5, double* %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double* %arrayidx13, align 8
+  %2 = load double, double* %arrayidx13, align 8
   %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double* %arrayidx17, align 8
+  %3 = load double, double* %arrayidx17, align 8
   %add18 = fadd double %2, %3
   store double %add18, double* %arrayidx13, align 8
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
   %arrayidx26 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom25
-  %4 = load double* %arrayidx26, align 8
+  %4 = load double, double* %arrayidx26, align 8
   %arrayidx30 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom25
-  %5 = load double* %arrayidx30, align 8
+  %5 = load double, double* %arrayidx30, align 8
   %add31 = fadd double %4, %5
   store double %add31, double* %arrayidx26, align 8
   ret void
@@ -58,17 +58,17 @@
   %mul = mul nsw i32 %u, 2
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx4 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %add5 = fadd double %0, %1
   store double %add5, double* %arrayidx, align 8
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x double], [2000 x double]* @A, i32 0, i64 %idxprom12
-  %2 = load double* %arrayidx13, align 8
+  %2 = load double, double* %arrayidx13, align 8
   %arrayidx17 = getelementptr inbounds [2000 x double], [2000 x double]* @B, i32 0, i64 %idxprom12
-  %3 = load double* %arrayidx17, align 8
+  %3 = load double, double* %arrayidx17, align 8
   %add18 = fadd double %2, %3
   store double %add18, double* %arrayidx13, align 8
   ret void
@@ -85,33 +85,33 @@
   %mul = mul nsw i32 %u, 4
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %add5 = fadd float %0, %1
   store float %add5, float* %arrayidx, align 4
   %add11 = add nsw i32 %mul, 1
   %idxprom12 = sext i32 %add11 to i64
   %arrayidx13 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom12
-  %2 = load float* %arrayidx13, align 4
+  %2 = load float, float* %arrayidx13, align 4
   %arrayidx17 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom12
-  %3 = load float* %arrayidx17, align 4
+  %3 = load float, float* %arrayidx17, align 4
   %add18 = fadd float %2, %3
   store float %add18, float* %arrayidx13, align 4
   %add24 = add nsw i32 %mul, 2
   %idxprom25 = sext i32 %add24 to i64
   %arrayidx26 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom25
-  %4 = load float* %arrayidx26, align 4
+  %4 = load float, float* %arrayidx26, align 4
   %arrayidx30 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom25
-  %5 = load float* %arrayidx30, align 4
+  %5 = load float, float* %arrayidx30, align 4
   %add31 = fadd float %4, %5
   store float %add31, float* %arrayidx26, align 4
   %add37 = add nsw i32 %mul, 3
   %idxprom38 = sext i32 %add37 to i64
   %arrayidx39 = getelementptr inbounds [2000 x float], [2000 x float]* @C, i32 0, i64 %idxprom38
-  %6 = load float* %arrayidx39, align 4
+  %6 = load float, float* %arrayidx39, align 4
   %arrayidx43 = getelementptr inbounds [2000 x float], [2000 x float]* @D, i32 0, i64 %idxprom38
-  %7 = load float* %arrayidx43, align 4
+  %7 = load float, float* %arrayidx43, align 4
   %add44 = fadd float %6, %7
   store float %add44, float* %arrayidx39, align 4
   ret void
@@ -143,12 +143,12 @@
   %mul = mul nsw i32 %0, 2
   %idxprom = sext i32 %mul to i64
   %arrayidx = getelementptr inbounds double, double* %A, i64 %idxprom
-  %2 = load double* %arrayidx, align 8
+  %2 = load double, double* %arrayidx, align 8
   %mul1 = fmul double 7.000000e+00, %2
   %add = add nsw i32 %mul, 1
   %idxprom3 = sext i32 %add to i64
   %arrayidx4 = getelementptr inbounds double, double* %A, i64 %idxprom3
-  %3 = load double* %arrayidx4, align 8
+  %3 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double 7.000000e+00, %3
   %add6 = fadd double %mul1, %mul5
   %add7 = fadd double %1, %add6
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
index b53169c..ecae70e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/continue_vectorizing.ll
@@ -9,21 +9,21 @@
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c, double* %d) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
   store double %mul5, double* %arrayidx5, align 8
   %0 = bitcast double* %a to <4 x i32>*
-  %1 = load <4 x i32>* %0, align 8
+  %1 = load <4 x i32>, <4 x i32>* %0, align 8
   %2 = bitcast double* %b to <4 x i32>*
-  %3 = load <4 x i32>* %2, align 8
+  %3 = load <4 x i32>, <4 x i32>* %2, align 8
   %4 = mul <4 x i32> %1, %3
   %5 = bitcast double* %d to <4 x i32>*
   store <4 x i32> %4, <4 x i32>* %5, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
index dc99366..9046c35 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_binaryop.ll
@@ -7,7 +7,7 @@
 
 define i32 @fn1() {
 entry:
-  %init = load double* @a, align 8
+  %init = load double, double* @a, align 8
   br label %loop
 
 loop:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
index b5b2f26..1bad671 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll
@@ -45,7 +45,7 @@
 entry:
   %arrayidx26 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 1
   %arrayidx36 = getelementptr inbounds %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332, %class.GIM_TRIANGLE_CALCULATION_CACHE.9.34.69.94.119.144.179.189.264.284.332* %this, i64 0, i32 2, i64 0, i32 0, i64 2
-  %0 = load float* %arrayidx36, align 4
+  %0 = load float, float* %arrayidx36, align 4
   %add587 = fadd float undef, undef
   %sub600 = fsub float %add587, undef
   store float %sub600, float* undef, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index 8ca6394..f10c862 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -13,7 +13,7 @@
   %s1.055 = phi float [ 0.000000e+00, %entry ], [ %cond.i40, %for.body ]
   %s0.054 = phi float [ 0.000000e+00, %entry ], [ %cond.i44, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %src, i64 %indvars.iv
-  %0 = load float* %arrayidx, align 4
+  %0 = load float, float* %arrayidx, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %arrayidx2 = getelementptr inbounds float, float* %dest, i64 %indvars.iv
   store float %acc1.056, float* %arrayidx2, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
index f1ef957..28b7aa3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_dequeue.ll
@@ -8,12 +8,12 @@
 define void @_ZSt6uniqueISt15_Deque_iteratorIdRdPdEET_S4_S4_(%"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* nocapture %__last) {
 entry:
   %_M_cur2.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 0
-  %0 = load double** %_M_cur2.i.i, align 8
+  %0 = load double*, double** %_M_cur2.i.i, align 8
   %_M_first3.i.i = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__first, i64 0, i32 1
   %_M_cur2.i.i81 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 0
-  %1 = load double** %_M_cur2.i.i81, align 8
+  %1 = load double*, double** %_M_cur2.i.i81, align 8
   %_M_first3.i.i83 = getelementptr inbounds %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731", %"struct.std::_Deque_iterator.4.157.174.208.259.276.344.731"* %__last, i64 0, i32 1
-  %2 = load double** %_M_first3.i.i83, align 8
+  %2 = load double*, double** %_M_first3.i.i83, align 8
   br i1 undef, label %_ZSt13adjacent_findISt15_Deque_iteratorIdRdPdEET_S4_S4_.exit, label %while.cond.i.preheader
 
 while.cond.i.preheader:                           ; preds = %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
index aa18572..bd1e8f7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_gep.ll
@@ -8,7 +8,7 @@
 ; Function Attrs: nounwind uwtable
 define i32 @fn1() {
 entry:
-  %0 = load i64** @a, align 8
+  %0 = load i64*, i64** @a, align 8
   %add.ptr = getelementptr inbounds i64, i64* %0, i64 1
   %1 = ptrtoint i64* %add.ptr to i64
   %arrayidx = getelementptr inbounds i64, i64* %0, i64 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
index 4ddb27a..70b13fd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -81,10 +81,10 @@
 entry:
   %arrayidx41 = getelementptr inbounds double, double* %inbuf, i64 2
   %arrayidx44 = getelementptr inbounds double, double* %inbuf, i64 1
-  %0 = load double* %arrayidx44, align 8
+  %0 = load double, double* %arrayidx44, align 8
   %add46 = fadd double %0, undef
   store double %add46, double* %arrayidx41, align 8
-  %1 = load double* %inbuf, align 8
+  %1 = load double, double* %inbuf, align 8
   %add49 = fadd double %1, %0
   store double %add49, double* %arrayidx44, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
index 109c3c9..f82343f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll
@@ -55,9 +55,9 @@
 
 define void @zot(%struct.hoge* %arg) {
 bb:
-  %tmp = load double* undef, align 8
+  %tmp = load double, double* undef, align 8
   %tmp1 = fsub double %tmp, undef
-  %tmp2 = load double* undef, align 8
+  %tmp2 = load double, double* undef, align 8
   %tmp3 = fsub double %tmp2, undef
   %tmp4 = fmul double %tmp3, undef
   %tmp5 = fmul double %tmp3, undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index 8da3c34..9a5eb12 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -13,14 +13,14 @@
 
 define i32 @fn1() {
 entry:
-  %0 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
-  %1 = load i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
-  %2 = load i32* @d, align 4
+  %0 = load i32, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 0), align 4
+  %1 = load i32, i32* getelementptr inbounds (%struct.DState* @b, i32 0, i32 1), align 4
+  %2 = load i32, i32* @d, align 4
   %cond = icmp eq i32 %2, 0
   br i1 %cond, label %sw.bb, label %save_state_and_return
 
 sw.bb:                                            ; preds = %entry
-  %3 = load i32* @c, align 4
+  %3 = load i32, i32* @c, align 4
   %and = and i32 %3, 7
   store i32 %and, i32* @a, align 4
   switch i32 %and, label %if.end [
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index 8f023f8..45ca99a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -30,8 +30,8 @@
   %9 = phi double [ 1.800000e+01, %0 ], [ %10, %18 ], [ %10, %17 ], [ %10, %17 ]
   store double %9, double* %1, align 8
   store double %8, double* %2, align 8
-  %10 = load double* %3, align 8
-  %11 = load double* %4, align 8
+  %10 = load double, double* %3, align 8
+  %11 = load double, double* %4, align 8
   br i1 undef, label %12, label %13
 
 ; <label>:12                                      ; preds = %7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
index 1f78f92..ea0064d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cross_block_slp.ll
@@ -26,9 +26,9 @@
 ;CHECK: ret
 define i32 @foo(double* nocapture %A, float* nocapture %B, i32 %g) {
 entry:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx1 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %add = fadd float %0, 5.000000e+00
   %add2 = fadd float %1, 8.000000e+00
   %tobool = icmp eq i32 %g, 0
@@ -40,12 +40,12 @@
 
 if.end:
   %conv = fpext float %add to double
-  %2 = load double* %A, align 8
+  %2 = load double, double* %A, align 8
   %add4 = fadd double %conv, %2
   store double %add4, double* %A, align 8
   %conv5 = fpext float %add2 to double
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 1
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %add7 = fadd double %conv5, %3
   store double %add7, double* %arrayidx6, align 8
   ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index a0db886..9f56e21 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -22,12 +22,12 @@
 define i32 @test(double* nocapture %G) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %G, i64 5
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %mul = fmul double %0, 4.000000e+00
   %add = fadd double %mul, 1.000000e+00
   store double %add, double* %G, align 8
   %arrayidx2 = getelementptr inbounds double, double* %G, i64 6
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul3 = fmul double %1, 3.000000e+00
   %add4 = fadd double %mul3, 6.000000e+00
   %arrayidx5 = getelementptr inbounds double, double* %G, i64 1
@@ -55,26 +55,26 @@
 ;CHECK: ret
 define i32 @foo(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %mul = fmul double %0, 7.900000e+00
   %conv = sitofp i32 %n to double
   %mul1 = fmul double %conv, %mul
   %add = fadd double %mul1, 6.000000e+00
   store double %add, double* %A, align 8
   %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx3, align 8
+  %1 = load double, double* %arrayidx3, align 8
   %mul4 = fmul double %1, 7.700000e+00
   %mul6 = fmul double %conv, %mul4
   %add7 = fadd double %mul6, 2.000000e+00
   store double %add7, double* %arrayidx3, align 8
   %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx9, align 8
+  %2 = load double, double* %arrayidx9, align 8
   %mul10 = fmul double %2, 7.600000e+00
   %mul12 = fmul double %conv, %mul10
   %add13 = fadd double %mul12, 3.000000e+00
   store double %add13, double* %arrayidx9, align 8
   %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx15, align 8
+  %3 = load double, double* %arrayidx15, align 8
   %mul16 = fmul double %3, 7.400000e+00
   %mul18 = fmul double %conv, %mul16
   %add19 = fadd double %mul18, 4.000000e+00
@@ -102,7 +102,7 @@
 define i32 @test2(double* nocapture %G, i32 %k) {
   %1 = icmp eq i32 %k, 0
   %2 = getelementptr inbounds double, double* %G, i64 5
-  %3 = load double* %2, align 8
+  %3 = load double, double* %2, align 8
   %4 = fmul double %3, 4.000000e+00
   br i1 %1, label %12, label %5
 
@@ -110,7 +110,7 @@
   %6 = fadd double %4, 1.000000e+00
   store double %6, double* %G, align 8
   %7 = getelementptr inbounds double, double* %G, i64 6
-  %8 = load double* %7, align 8
+  %8 = load double, double* %7, align 8
   %9 = fmul double %8, 3.000000e+00
   %10 = fadd double %9, 6.000000e+00
   %11 = getelementptr inbounds double, double* %G, i64 1
@@ -122,7 +122,7 @@
   %14 = getelementptr inbounds double, double* %G, i64 2
   store double %13, double* %14, align 8
   %15 = getelementptr inbounds double, double* %G, i64 6
-  %16 = load double* %15, align 8
+  %16 = load double, double* %15, align 8
   %17 = fmul double %16, 3.000000e+00
   %18 = fadd double %17, 8.000000e+00
   %19 = getelementptr inbounds double, double* %G, i64 3
@@ -147,26 +147,26 @@
 ;CHECK: ret
 define i32 @foo4(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %mul = fmul double %0, 7.900000e+00
   %conv = sitofp i32 %n to double
   %mul1 = fmul double %conv, %mul
   %add = fadd double %mul1, 6.000000e+00
   store double %add, double* %A, align 8
   %arrayidx3 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx3, align 8
+  %1 = load double, double* %arrayidx3, align 8
   %mul4 = fmul double %1, 7.900000e+00
   %mul6 = fmul double %conv, %mul4
   %add7 = fadd double %mul6, 6.000000e+00
   store double %add7, double* %arrayidx3, align 8
   %arrayidx9 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx9, align 8
+  %2 = load double, double* %arrayidx9, align 8
   %mul10 = fmul double %2, 7.900000e+00
   %mul12 = fmul double %conv, %mul10
   %add13 = fadd double %mul12, 6.000000e+00
   store double %add13, double* %arrayidx9, align 8
   %arrayidx15 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx15, align 8
+  %3 = load double, double* %arrayidx15, align 8
   %mul16 = fmul double %3, 7.900000e+00
   %mul18 = fmul double %conv, %mul16
   %add19 = fadd double %mul18, 6.000000e+00
@@ -189,12 +189,12 @@
 ;CHECK: ret
 define i32 @partial_mrg(double* nocapture %A, i32 %n) {
 entry:
-  %0 = load double* %A, align 8
+  %0 = load double, double* %A, align 8
   %conv = sitofp i32 %n to double
   %mul = fmul double %conv, %0
   store double %mul, double* %A, align 8
   %arrayidx2 = getelementptr inbounds double, double* %A, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %mul4 = fmul double %conv, %1
   store double %mul4, double* %arrayidx2, align 8
   %cmp = icmp slt i32 %n, 4
@@ -202,11 +202,11 @@
 
 if.end:                                           ; preds = %entry
   %arrayidx7 = getelementptr inbounds double, double* %A, i64 2
-  %2 = load double* %arrayidx7, align 8
+  %2 = load double, double* %arrayidx7, align 8
   %mul9 = fmul double %conv, %2
   store double %mul9, double* %arrayidx7, align 8
   %arrayidx11 = getelementptr inbounds double, double* %A, i64 3
-  %3 = load double* %arrayidx11, align 8
+  %3 = load double, double* %arrayidx11, align 8
   %add = add nsw i32 %n, 4
   %conv12 = sitofp i32 %add to double
   %mul13 = fmul double %conv12, %3
@@ -228,18 +228,18 @@
 
 sw.epilog7:                                       ; No predecessors!
   %.in = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 1
-  %0 = load double* %.in, align 8
+  %0 = load double, double* %.in, align 8
   %add = fadd double undef, 0.000000e+00
   %add6 = fadd double %add, %0
-  %1 = load double* @a, align 8
+  %1 = load double, double* @a, align 8
   %add8 = fadd double %1, 0.000000e+00
   %_dy = getelementptr inbounds %class.B.53.55, %class.B.53.55* %this, i64 0, i32 0, i32 2
-  %2 = load double* %_dy, align 8
+  %2 = load double, double* %_dy, align 8
   %add10 = fadd double %add8, %2
   br i1 undef, label %if.then12, label %if.end13
 
 if.then12:                                        ; preds = %sw.epilog7
-  %3 = load double* undef, align 8
+  %3 = load double, double* undef, align 8
   br label %if.end13
 
 if.end13:                                         ; preds = %if.then12, %sw.epilog7, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
index 59f2923..0a4e961 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cycle_dup.ll
@@ -23,15 +23,15 @@
 ;CHECK-NEXT:ret i32 undef
 define i32 @foo(i32* nocapture %A) #0 {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx1, align 4
+  %1 = load i32, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx2, align 4
+  %2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx3, align 4
+  %3 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 13
-  %4 = load i32* %arrayidx4, align 4
+  %4 = load i32, i32* %arrayidx4, align 4
   %cmp24 = icmp sgt i32 %4, 0
   br i1 %cmp24, label %for.body, label %for.end
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index d145a7d..c28ccc5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -15,7 +15,7 @@
 ;CHECK: @depth
 ;CHECK: getelementptr inbounds {{.*}}, !dbg ![[LOC:[0-9]+]]
 ;CHECK: bitcast double* {{.*}}, !dbg ![[LOC]]
-;CHECK: load <2 x double>* {{.*}}, !dbg ![[LOC]]
+;CHECK: load <2 x double>, <2 x double>* {{.*}}, !dbg ![[LOC]]
 ;CHECK: store <2 x double> {{.*}}, !dbg ![[LOC2:[0-9]+]]
 ;CHECK: ret
 ;CHECK: ![[LOC]] = !MDLocation(line: 4, scope:
@@ -33,9 +33,9 @@
 
 for.body.lr.ph:                                   ; preds = %entry
   %arrayidx = getelementptr inbounds double, double* %A, i64 4, !dbg !24
-  %0 = load double* %arrayidx, align 8, !dbg !24
+  %0 = load double, double* %arrayidx, align 8, !dbg !24
   %arrayidx1 = getelementptr inbounds double, double* %A, i64 5, !dbg !29
-  %1 = load double* %arrayidx1, align 8, !dbg !29
+  %1 = load double, double* %arrayidx1, align 8, !dbg !29
   br label %for.end, !dbg !23
 
 for.end:                                          ; preds = %for.body.lr.ph, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
index 692c0f6..4e2c02f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/diamond.ll
@@ -18,22 +18,22 @@
 ; CHECK: ret
 define i32 @foo(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) #0 {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
@@ -56,22 +56,22 @@
 ; CHECK-NEXT: ret
 define i32 @extr_user(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
@@ -86,22 +86,22 @@
 ; CHECK-NEXT: ret
 define i32 @extr_user1(i32* noalias nocapture %B, i32* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
-  %0 = load i32* %A, align 4
+  %0 = load i32, i32* %A, align 4
   %mul238 = add i32 %m, %n
   %add = mul i32 %0, %mul238
   store i32 %add, i32* %B, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add8 = mul i32 %1, %mul238
   %arrayidx9 = getelementptr inbounds i32, i32* %B, i64 1
   store i32 %add8, i32* %arrayidx9, align 4
   %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 2
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   %add14 = mul i32 %2, %mul238
   %arrayidx15 = getelementptr inbounds i32, i32* %B, i64 2
   store i32 %add14, i32* %arrayidx15, align 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 3
-  %3 = load i32* %arrayidx16, align 4
+  %3 = load i32, i32* %arrayidx16, align 4
   %add20 = mul i32 %3, %mul238
   %arrayidx21 = getelementptr inbounds i32, i32* %B, i64 3
   store i32 %add20, i32* %arrayidx21, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index 68cef94..bf2febd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -34,8 +34,8 @@
 define double @ext_user(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) {
 entry:
   %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double* %arrayidx, align 8
-  %1 = load double* %A, align 8
+  %0 = load double, double* %arrayidx, align 8
+  %1 = load double, double* %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -69,9 +69,9 @@
 define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %c,
                 i32 * noalias %d) {
 entry:
-  %0 = load i32* %d, align 4
+  %0 = load i32, i32* %d, align 4
   %conv = sitofp i32 %0 to float
-  %1 = load float* %c
+  %1 = load float, float* %c
   %sub = fsub float 0.000000e+00, %1
   %mul = fmul float %sub, 0.000000e+00
   %add = fadd float %conv, %mul
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
index 5ac07a7..9a6ee2a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract.ll
@@ -9,7 +9,7 @@
 ;CHECK: ret void
 define void @fextr(double* %ptr) {
 entry:
-  %LD = load <2 x double>* undef
+  %LD = load <2 x double>, <2 x double>* undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 0
@@ -27,7 +27,7 @@
 ;CHECK: ret void
 define void @fextr1(double* %ptr) {
 entry:
-  %LD = load <2 x double>* undef
+  %LD = load <2 x double>, <2 x double>* undef
   %V0 = extractelement <2 x double> %LD, i32 0
   %V1 = extractelement <2 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 1  ; <--- incorrect order
@@ -45,7 +45,7 @@
 ;CHECK: ret void
 define void @fextr2(double* %ptr) {
 entry:
-  %LD = load <4 x double>* undef
+  %LD = load <4 x double>, <4 x double>* undef
   %V0 = extractelement <4 x double> %LD, i32 0  ; <--- invalid size.
   %V1 = extractelement <4 x double> %LD, i32 1
   %P0 = getelementptr inbounds double, double* %ptr, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index a68ac7d..6e5415b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -7,7 +7,7 @@
 ; Function Attrs: nounwind ssp uwtable
 define i32 @fn1() {
 entry:
-  %0 = load i64** @a, align 8
+  %0 = load i64*, i64** @a, align 8
   %add.ptr = getelementptr inbounds i64, i64* %0, i64 11
   %1 = ptrtoint i64* %add.ptr to i64
   store i64 %1, i64* %add.ptr, align 8
@@ -25,32 +25,32 @@
 declare float @llvm.powi.f32(float, i32)
 define void @fn2(i32* %a, i32* %b, float* %c) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %fp1 = sitofp i32 %add1 to float
   %call1 = tail call float @llvm.powi.f32(float %fp1,i32 %add1) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %fp2 = sitofp i32 %add2 to float
   %call2 = tail call float @llvm.powi.f32(float %fp2,i32 %add1) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %fp3 = sitofp i32 %add3 to float
   %call3 = tail call float @llvm.powi.f32(float %fp3,i32 %add1) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %fp4 = sitofp i32 %add4 to float
   %call4 = tail call float @llvm.powi.f32(float %fp4,i32 %add1) nounwind readnone
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
index 2890c9f..7db8d75 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/flag.ll
@@ -16,16 +16,16 @@
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
index 6c6a7bd..3f952d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -10,12 +10,12 @@
 ; CHECK: <2 x i32*>
 define void @foo1 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y) {
   %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32, i32* %2, i64 16
   %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
   store i32* %3, i32** %4, align 8
   %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32** %5, align 8
+  %6 = load i32*, i32** %5, align 8
   %7 = getelementptr inbounds i32, i32* %6, i64 16
   %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
   store i32* %7, i32** %8, align 8
@@ -28,12 +28,12 @@
 ; CHECK-NOT: <2 x i32*>
 define void @foo2 ({ i32*, i32* }* noalias %x, { i32*, i32* }* noalias %y, i32 %i) {
   %1 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 0
-  %2 = load i32** %1, align 8
+  %2 = load i32*, i32** %1, align 8
   %3 = getelementptr inbounds i32, i32* %2, i32 %i
   %4 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 0
   store i32* %3, i32** %4, align 8
   %5 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %y, i64 0, i32 1
-  %6 = load i32** %5, align 8
+  %6 = load i32*, i32** %5, align 8
   %7 = getelementptr inbounds i32, i32* %6, i32 %i
   %8 = getelementptr inbounds { i32*, i32* }, { i32*, i32* }* %x, i64 0, i32 1
   store i32* %7, i32** %8, align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
index c5e5b25..36c939b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hoist.ll
@@ -31,22 +31,22 @@
 for.body:                                         ; preds = %entry, %for.body
   %i.024 = phi i32 [ 0, %entry ], [ %add10, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.024
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %n
   store i32 %add, i32* %arrayidx, align 4
   %add121 = or i32 %i.024, 1
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %add121
-  %1 = load i32* %arrayidx2, align 4
+  %1 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %1, %k
   store i32 %add3, i32* %arrayidx2, align 4
   %add422 = or i32 %i.024, 2
   %arrayidx5 = getelementptr inbounds i32, i32* %A, i32 %add422
-  %2 = load i32* %arrayidx5, align 4
+  %2 = load i32, i32* %arrayidx5, align 4
   %add6 = add nsw i32 %2, %n
   store i32 %add6, i32* %arrayidx5, align 4
   %add723 = or i32 %i.024, 3
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i32 %add723
-  %3 = load i32* %arrayidx8, align 4
+  %3 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %3, %k
   store i32 %add9, i32* %arrayidx8, align 4
   %add10 = add nsw i32 %i.024, 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 21d38c4..83b2e01 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -33,21 +33,21 @@
   %sum.032 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add17, %for.body ]
   %mul = shl nsw i64 %i.033, 2
   %arrayidx = getelementptr inbounds float, float* %A, i64 %mul
-  %1 = load float* %arrayidx, align 4
+  %1 = load float, float* %arrayidx, align 4
   %mul2 = fmul float %1, 7.000000e+00
   %add28 = or i64 %mul, 1
   %arrayidx4 = getelementptr inbounds float, float* %A, i64 %add28
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %mul5 = fmul float %2, 7.000000e+00
   %add6 = fadd fast float %mul2, %mul5
   %add829 = or i64 %mul, 2
   %arrayidx9 = getelementptr inbounds float, float* %A, i64 %add829
-  %3 = load float* %arrayidx9, align 4
+  %3 = load float, float* %arrayidx9, align 4
   %mul10 = fmul float %3, 7.000000e+00
   %add11 = fadd fast float %add6, %mul10
   %add1330 = or i64 %mul, 3
   %arrayidx14 = getelementptr inbounds float, float* %A, i64 %add1330
-  %4 = load float* %arrayidx14, align 4
+  %4 = load float, float* %arrayidx14, align 4
   %mul15 = fmul float %4, 7.000000e+00
   %add16 = fadd fast float %add11, %mul15
   %add17 = fadd fast float %sum.032, %add16
@@ -85,13 +85,13 @@
   br i1 %cmp38, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx9 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx9, align 4
+  %2 = load float, float* %arrayidx9, align 4
   %arrayidx15 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx15, align 4
+  %3 = load float, float* %arrayidx15, align 4
   %4 = sext i32 %n to i64
   br label %for.body
 
@@ -100,21 +100,21 @@
   %sum.039 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %mul21, %for.body ]
   %mul = shl nsw i64 %i.040, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %5 = load float* %arrayidx2, align 4
+  %5 = load float, float* %arrayidx2, align 4
   %mul3 = fmul float %0, %5
   %add35 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add35
-  %6 = load float* %arrayidx6, align 4
+  %6 = load float, float* %arrayidx6, align 4
   %mul7 = fmul float %1, %6
   %add8 = fadd fast float %mul3, %mul7
   %add1136 = or i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1136
-  %7 = load float* %arrayidx12, align 4
+  %7 = load float, float* %arrayidx12, align 4
   %mul13 = fmul float %2, %7
   %add14 = fadd fast float %add8, %mul13
   %add1737 = or i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1737
-  %8 = load float* %arrayidx18, align 4
+  %8 = load float, float* %arrayidx18, align 4
   %mul19 = fmul float %3, %8
   %add20 = fadd fast float %add14, %mul19
   %mul21 = fmul float %sum.039, %add20
@@ -157,23 +157,23 @@
   br i1 %cmp81, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx9 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx9, align 4
+  %2 = load float, float* %arrayidx9, align 4
   %arrayidx15 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx15, align 4
+  %3 = load float, float* %arrayidx15, align 4
   %arrayidx21 = getelementptr inbounds float, float* %B, i64 4
-  %4 = load float* %arrayidx21, align 4
+  %4 = load float, float* %arrayidx21, align 4
   %arrayidx27 = getelementptr inbounds float, float* %B, i64 5
-  %5 = load float* %arrayidx27, align 4
+  %5 = load float, float* %arrayidx27, align 4
   %arrayidx33 = getelementptr inbounds float, float* %B, i64 6
-  %6 = load float* %arrayidx33, align 4
+  %6 = load float, float* %arrayidx33, align 4
   %arrayidx39 = getelementptr inbounds float, float* %B, i64 7
-  %7 = load float* %arrayidx39, align 4
+  %7 = load float, float* %arrayidx39, align 4
   %arrayidx45 = getelementptr inbounds float, float* %B, i64 8
-  %8 = load float* %arrayidx45, align 4
+  %8 = load float, float* %arrayidx45, align 4
   %9 = sext i32 %n to i64
   br label %for.body
 
@@ -182,46 +182,46 @@
   %sum.082 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add51, %for.body ]
   %mul = mul nsw i64 %i.083, 6
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %10 = load float* %arrayidx2, align 4
+  %10 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %10
   %add80 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add80
-  %11 = load float* %arrayidx6, align 4
+  %11 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %1, %11
   %add8 = fadd fast float %mul3, %mul7
   %add11 = add nsw i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add11
-  %12 = load float* %arrayidx12, align 4
+  %12 = load float, float* %arrayidx12, align 4
   %mul13 = fmul fast float %2, %12
   %add14 = fadd fast float %add8, %mul13
   %add17 = add nsw i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add17
-  %13 = load float* %arrayidx18, align 4
+  %13 = load float, float* %arrayidx18, align 4
   %mul19 = fmul fast float %3, %13
   %add20 = fadd fast float %add14, %mul19
   %add23 = add nsw i64 %mul, 4
   %arrayidx24 = getelementptr inbounds float, float* %A, i64 %add23
-  %14 = load float* %arrayidx24, align 4
+  %14 = load float, float* %arrayidx24, align 4
   %mul25 = fmul fast float %4, %14
   %add26 = fadd fast float %add20, %mul25
   %add29 = add nsw i64 %mul, 5
   %arrayidx30 = getelementptr inbounds float, float* %A, i64 %add29
-  %15 = load float* %arrayidx30, align 4
+  %15 = load float, float* %arrayidx30, align 4
   %mul31 = fmul fast float %5, %15
   %add32 = fadd fast float %add26, %mul31
   %add35 = add nsw i64 %mul, 6
   %arrayidx36 = getelementptr inbounds float, float* %A, i64 %add35
-  %16 = load float* %arrayidx36, align 4
+  %16 = load float, float* %arrayidx36, align 4
   %mul37 = fmul fast float %6, %16
   %add38 = fadd fast float %add32, %mul37
   %add41 = add nsw i64 %mul, 7
   %arrayidx42 = getelementptr inbounds float, float* %A, i64 %add41
-  %17 = load float* %arrayidx42, align 4
+  %17 = load float, float* %arrayidx42, align 4
   %mul43 = fmul fast float %7, %17
   %add44 = fadd fast float %add38, %mul43
   %add47 = add nsw i64 %mul, 8
   %arrayidx48 = getelementptr inbounds float, float* %A, i64 %add47
-  %18 = load float* %arrayidx48, align 4
+  %18 = load float, float* %arrayidx48, align 4
   %mul49 = fmul fast float %8, %18
   %add50 = fadd fast float %add44, %mul49
   %add51 = fadd fast float %sum.082, %add50
@@ -259,13 +259,13 @@
   br i1 %cmp41, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load float* %B, align 4
+  %0 = load float, float* %B, align 4
   %arrayidx4 = getelementptr inbounds float, float* %B, i64 1
-  %1 = load float* %arrayidx4, align 4
+  %1 = load float, float* %arrayidx4, align 4
   %arrayidx10 = getelementptr inbounds float, float* %B, i64 2
-  %2 = load float* %arrayidx10, align 4
+  %2 = load float, float* %arrayidx10, align 4
   %arrayidx16 = getelementptr inbounds float, float* %B, i64 3
-  %3 = load float* %arrayidx16, align 4
+  %3 = load float, float* %arrayidx16, align 4
   %4 = sext i32 %n to i64
   br label %for.body
 
@@ -274,22 +274,22 @@
   %sum.042 = phi float [ 0.000000e+00, %for.body.lr.ph ], [ %add21, %for.body ]
   %mul = shl nsw i64 %i.043, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %5 = load float* %arrayidx2, align 4
+  %5 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %0, %5
   %add = fadd fast float %sum.042, %mul3
   %add638 = or i64 %mul, 1
   %arrayidx7 = getelementptr inbounds float, float* %A, i64 %add638
-  %6 = load float* %arrayidx7, align 4
+  %6 = load float, float* %arrayidx7, align 4
   %mul8 = fmul fast float %1, %6
   %add9 = fadd fast float %add, %mul8
   %add1239 = or i64 %mul, 2
   %arrayidx13 = getelementptr inbounds float, float* %A, i64 %add1239
-  %7 = load float* %arrayidx13, align 4
+  %7 = load float, float* %arrayidx13, align 4
   %mul14 = fmul fast float %2, %7
   %add15 = fadd fast float %add9, %mul14
   %add1840 = or i64 %mul, 3
   %arrayidx19 = getelementptr inbounds float, float* %A, i64 %add1840
-  %8 = load float* %arrayidx19, align 4
+  %8 = load float, float* %arrayidx19, align 4
   %mul20 = fmul fast float %3, %8
   %add21 = fadd fast float %add15, %mul20
   %inc = add nsw i64 %i.043, 1
@@ -335,27 +335,27 @@
 for.body:
   %i.039 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %C.addr.038 = phi float* [ %C, %for.body.lr.ph ], [ %incdec.ptr, %for.body ]
-  %1 = load float* %B, align 4
+  %1 = load float, float* %B, align 4
   %mul = shl nsw i64 %i.039, 2
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %mul
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %mul3 = fmul fast float %1, %2
-  %3 = load float* %arrayidx4, align 4
+  %3 = load float, float* %arrayidx4, align 4
   %add34 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds float, float* %A, i64 %add34
-  %4 = load float* %arrayidx6, align 4
+  %4 = load float, float* %arrayidx6, align 4
   %mul7 = fmul fast float %3, %4
   %add8 = fadd fast float %mul3, %mul7
-  %5 = load float* %arrayidx9, align 4
+  %5 = load float, float* %arrayidx9, align 4
   %add1135 = or i64 %mul, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %add1135
-  %6 = load float* %arrayidx12, align 4
+  %6 = load float, float* %arrayidx12, align 4
   %mul13 = fmul fast float %5, %6
   %add14 = fadd fast float %add8, %mul13
-  %7 = load float* %arrayidx15, align 4
+  %7 = load float, float* %arrayidx15, align 4
   %add1736 = or i64 %mul, 3
   %arrayidx18 = getelementptr inbounds float, float* %A, i64 %add1736
-  %8 = load float* %arrayidx18, align 4
+  %8 = load float, float* %arrayidx18, align 4
   %mul19 = fmul fast float %7, %8
   %add20 = fadd fast float %add14, %mul19
   store float %add20, float* %C.addr.038, align 4
@@ -389,9 +389,9 @@
   br i1 %cmp17, label %for.body.lr.ph, label %for.end
 
 for.body.lr.ph:
-  %0 = load double* %B, align 8
+  %0 = load double, double* %B, align 8
   %arrayidx4 = getelementptr inbounds double, double* %B, i64 1
-  %1 = load double* %arrayidx4, align 8
+  %1 = load double, double* %arrayidx4, align 8
   %2 = sext i32 %n to i64
   br label %for.body
 
@@ -399,11 +399,11 @@
   %i.018 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
   %mul = shl nsw i64 %i.018, 2
   %arrayidx2 = getelementptr inbounds double, double* %A, i64 %mul
-  %3 = load double* %arrayidx2, align 8
+  %3 = load double, double* %arrayidx2, align 8
   %mul3 = fmul fast double %0, %3
   %add16 = or i64 %mul, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %add16
-  %4 = load double* %arrayidx6, align 8
+  %4 = load double, double* %arrayidx6, align 8
   %mul7 = fmul fast double %1, %4
   %add8 = fadd fast double %mul3, %mul7
   %arrayidx9 = getelementptr inbounds double, double* %C, i64 %i.018
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
index 3b80472..f7283f0d0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/implicitfloat.ll
@@ -9,13 +9,13 @@
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c) noimplicitfloat { ; <------ noimplicitfloat attribute here!
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
index 9bc44f2..26f0b9b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/in-tree-user.ll
@@ -20,14 +20,14 @@
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %0 = shl nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds double, double* %A, i64 %0
-  %1 = load double* %arrayidx, align 8
+  %1 = load double, double* %arrayidx, align 8
   %mul1 = fmul double %conv, %1
   %mul2 = fmul double %mul1, 7.000000e+00
   %add = fadd double %mul2, 5.000000e+00
   %InTreeUser = fadd double %add, %add    ; <------------------ In tree user.
   %2 = or i64 %0, 1
   %arrayidx6 = getelementptr inbounds double, double* %A, i64 %2
-  %3 = load double* %arrayidx6, align 8
+  %3 = load double, double* %arrayidx6, align 8
   %mul8 = fmul double %conv, %3
   %mul9 = fmul double %mul8, 4.000000e+00
   %add10 = fadd double %mul9, 9.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
index 974d7e6..cc5a4af 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/intrinsic.ll
@@ -13,14 +13,14 @@
 ;CHECK: ret
 define void @vec_fabs_f64(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %call = tail call double @llvm.fabs.f64(double %mul) nounwind readnone
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %call5 = tail call double @llvm.fabs.f64(double %mul5) nounwind readnone
   store double %call, double* %c, align 8
@@ -39,31 +39,31 @@
 ;CHECK: ret
 define void @vec_copysign_f32(float* %a, float* %b, float* noalias %c) {
 entry:
-  %0 = load float* %a, align 4
-  %1 = load float* %b, align 4
+  %0 = load float, float* %a, align 4
+  %1 = load float, float* %b, align 4
   %call0 = tail call float @llvm.copysign.f32(float %0, float %1) nounwind readnone
   store float %call0, float* %c, align 4
 
   %ix2 = getelementptr inbounds float, float* %a, i64 1
-  %2 = load float* %ix2, align 4
+  %2 = load float, float* %ix2, align 4
   %ix3 = getelementptr inbounds float, float* %b, i64 1
-  %3 = load float* %ix3, align 4
+  %3 = load float, float* %ix3, align 4
   %call1 = tail call float @llvm.copysign.f32(float %2, float %3) nounwind readnone
   %c1 = getelementptr inbounds float, float* %c, i64 1
   store float %call1, float* %c1, align 4
 
   %ix4 = getelementptr inbounds float, float* %a, i64 2
-  %4 = load float* %ix4, align 4
+  %4 = load float, float* %ix4, align 4
   %ix5 = getelementptr inbounds float, float* %b, i64 2
-  %5 = load float* %ix5, align 4
+  %5 = load float, float* %ix5, align 4
   %call2 = tail call float @llvm.copysign.f32(float %4, float %5) nounwind readnone
   %c2 = getelementptr inbounds float, float* %c, i64 2
   store float %call2, float* %c2, align 4
 
   %ix6 = getelementptr inbounds float, float* %a, i64 3
-  %6 = load float* %ix6, align 4
+  %6 = load float, float* %ix6, align 4
   %ix7 = getelementptr inbounds float, float* %b, i64 3
-  %7 = load float* %ix7, align 4
+  %7 = load float, float* %ix7, align 4
   %call3 = tail call float @llvm.copysign.f32(float %6, float %7) nounwind readnone
   %c3 = getelementptr inbounds float, float* %c, i64 3
   store float %call3, float* %c3, align 4
@@ -75,29 +75,29 @@
 
 define void @vec_bswap_i32(i32* %a, i32* %b, i32* %c) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.bswap.i32(i32 %add1) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.bswap.i32(i32 %add2) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.bswap.i32(i32 %add3) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.bswap.i32(i32 %add4) nounwind readnone
 
@@ -122,29 +122,29 @@
 
 define void @vec_ctlz_i32(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 true) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 true) nounwind readnone
 
@@ -167,29 +167,29 @@
 
 define void @vec_ctlz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.ctlz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.ctlz.i32(i32 %add2,i1 false) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.ctlz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.ctlz.i32(i32 %add4,i1 false) nounwind readnone
 
@@ -212,29 +212,29 @@
 
 define void @vec_cttz_i32(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 true) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 true) nounwind readnone
 
@@ -257,29 +257,29 @@
 
 define void @vec_cttz_i32_neg(i32* %a, i32* %b, i32* %c, i1) {
 entry:
-  %i0 = load i32* %a, align 4
-  %i1 = load i32* %b, align 4
+  %i0 = load i32, i32* %a, align 4
+  %i1 = load i32, i32* %b, align 4
   %add1 = add i32 %i0, %i1
   %call1 = tail call i32 @llvm.cttz.i32(i32 %add1,i1 true) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i32 1
-  %i2 = load i32* %arrayidx2, align 4
+  %i2 = load i32, i32* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i32 1
-  %i3 = load i32* %arrayidx3, align 4
+  %i3 = load i32, i32* %arrayidx3, align 4
   %add2 = add i32 %i2, %i3
   %call2 = tail call i32 @llvm.cttz.i32(i32 %add2,i1 false) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds i32, i32* %a, i32 2
-  %i4 = load i32* %arrayidx4, align 4
+  %i4 = load i32, i32* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds i32, i32* %b, i32 2
-  %i5 = load i32* %arrayidx5, align 4
+  %i5 = load i32, i32* %arrayidx5, align 4
   %add3 = add i32 %i4, %i5
   %call3 = tail call i32 @llvm.cttz.i32(i32 %add3,i1 true) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds i32, i32* %a, i32 3
-  %i6 = load i32* %arrayidx6, align 4
+  %i6 = load i32, i32* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds i32, i32* %b, i32 3
-  %i7 = load i32* %arrayidx7, align 4
+  %i7 = load i32, i32* %arrayidx7, align 4
   %add4 = add i32 %i6, %i7
   %call4 = tail call i32 @llvm.cttz.i32(i32 %add4,i1 false) nounwind readnone
 
@@ -300,29 +300,29 @@
 declare float @llvm.powi.f32(float, i32)
 define void @vec_powi_f32(float* %a, float* %b, float* %c, i32 %P) {
 entry:
-  %i0 = load float* %a, align 4
-  %i1 = load float* %b, align 4
+  %i0 = load float, float* %a, align 4
+  %i1 = load float, float* %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float* %arrayidx2, align 4
+  %i2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float* %arrayidx3, align 4
+  %i3 = load float, float* %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %P) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float* %arrayidx4, align 4
+  %i4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float* %arrayidx5, align 4
+  %i5 = load float, float* %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float* %arrayidx6, align 4
+  %i6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float* %arrayidx7, align 4
+  %i7 = load float, float* %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %P) nounwind readnone
 
@@ -346,29 +346,29 @@
 
 define void @vec_powi_f32_neg(float* %a, float* %b, float* %c, i32 %P, i32 %Q) {
 entry:
-  %i0 = load float* %a, align 4
-  %i1 = load float* %b, align 4
+  %i0 = load float, float* %a, align 4
+  %i1 = load float, float* %b, align 4
   %add1 = fadd float %i0, %i1
   %call1 = tail call float @llvm.powi.f32(float %add1,i32 %P) nounwind readnone
 
   %arrayidx2 = getelementptr inbounds float, float* %a, i32 1
-  %i2 = load float* %arrayidx2, align 4
+  %i2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %b, i32 1
-  %i3 = load float* %arrayidx3, align 4
+  %i3 = load float, float* %arrayidx3, align 4
   %add2 = fadd float %i2, %i3
   %call2 = tail call float @llvm.powi.f32(float %add2,i32 %Q) nounwind readnone
 
   %arrayidx4 = getelementptr inbounds float, float* %a, i32 2
-  %i4 = load float* %arrayidx4, align 4
+  %i4 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %b, i32 2
-  %i5 = load float* %arrayidx5, align 4
+  %i5 = load float, float* %arrayidx5, align 4
   %add3 = fadd float %i4, %i5
   %call3 = tail call float @llvm.powi.f32(float %add3,i32 %P) nounwind readnone
 
   %arrayidx6 = getelementptr inbounds float, float* %a, i32 3
-  %i6 = load float* %arrayidx6, align 4
+  %i6 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %b, i32 3
-  %i7 = load float* %arrayidx7, align 4
+  %i7 = load float, float* %arrayidx7, align 4
   %add4 = fadd float %i6, %i7
   %call4 = tail call float @llvm.powi.f32(float %add4,i32 %Q) nounwind readnone
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
index cd9d59f..f87dabf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -13,9 +13,9 @@
 ; CHECK: ret
 define i32 @test(double* nocapture %A, i8* nocapture %B) {
 entry:
-  %0 = load i8* %B, align 1
+  %0 = load i8, i8* %B, align 1
   %arrayidx1 = getelementptr inbounds i8, i8* %B, i64 1
-  %1 = load i8* %arrayidx1, align 1
+  %1 = load i8, i8* %arrayidx1, align 1
   %add = add i8 %0, 3
   %add4 = add i8 %1, 3
   %conv6 = sitofp i8 %add to double
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
index c113d89..0c16c34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/loopinvariant.ll
@@ -19,42 +19,42 @@
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add1 = add nsw i32 %0, %n
   store i32 %add1, i32* %arrayidx, align 4
   %1 = or i64 %indvars.iv, 1
   %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %1
-  %2 = load i32* %arrayidx4, align 4
+  %2 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %2, %n
   store i32 %add5, i32* %arrayidx4, align 4
   %3 = or i64 %indvars.iv, 2
   %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %add9 = add nsw i32 %4, %n
   store i32 %add9, i32* %arrayidx8, align 4
   %5 = or i64 %indvars.iv, 3
   %arrayidx12 = getelementptr inbounds i32, i32* %A, i64 %5
-  %6 = load i32* %arrayidx12, align 4
+  %6 = load i32, i32* %arrayidx12, align 4
   %add13 = add nsw i32 %6, %n
   store i32 %add13, i32* %arrayidx12, align 4
   %7 = or i64 %indvars.iv, 4
   %arrayidx16 = getelementptr inbounds i32, i32* %A, i64 %7
-  %8 = load i32* %arrayidx16, align 4
+  %8 = load i32, i32* %arrayidx16, align 4
   %add17 = add nsw i32 %8, %n
   store i32 %add17, i32* %arrayidx16, align 4
   %9 = or i64 %indvars.iv, 5
   %arrayidx20 = getelementptr inbounds i32, i32* %A, i64 %9
-  %10 = load i32* %arrayidx20, align 4
+  %10 = load i32, i32* %arrayidx20, align 4
   %add21 = add nsw i32 %10, %n
   store i32 %add21, i32* %arrayidx20, align 4
   %11 = or i64 %indvars.iv, 6
   %arrayidx24 = getelementptr inbounds i32, i32* %A, i64 %11
-  %12 = load i32* %arrayidx24, align 4
+  %12 = load i32, i32* %arrayidx24, align 4
   %add25 = add nsw i32 %12, %n
   store i32 %add25, i32* %arrayidx24, align 4
   %13 = or i64 %indvars.iv, 7
   %arrayidx28 = getelementptr inbounds i32, i32* %A, i64 %13
-  %14 = load i32* %arrayidx28, align 4
+  %14 = load i32, i32* %arrayidx28, align 4
   %add29 = add nsw i32 %14, %n
   store i32 %add29, i32* %arrayidx28, align 4
   %indvars.iv.next = add i64 %indvars.iv, 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index 0aa1d12..ebef6b5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -12,13 +12,13 @@
 
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8, !tbaa !4
-  %i1 = load double* %b, align 8, !tbaa !4
+  %i0 = load double, double* %a, align 8, !tbaa !4
+  %i1 = load double, double* %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !0
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !0
   store double %mul, double* %c, align 8, !tbaa !4
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -35,13 +35,13 @@
 
 define void @test2(double* %a, double* %b, i8* %e) {
 entry:
-  %i0 = load double* %a, align 8, !tbaa !4
-  %i1 = load double* %b, align 8, !tbaa !4
+  %i0 = load double, double* %a, align 8, !tbaa !4
+  %i1 = load double, double* %b, align 8, !tbaa !4
   %mul = fmul double %i0, %i1, !fpmath !1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8, !tbaa !4
+  %i3 = load double, double* %arrayidx3, align 8, !tbaa !4
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8, !tbaa !4
+  %i4 = load double, double* %arrayidx4, align 8, !tbaa !4
   %mul5 = fmul double %i3, %i4, !fpmath !1
   %c = bitcast i8* %e to double*
   store double %mul, double* %c, align 8, !tbaa !4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
index 91f9ad5..993054a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -25,9 +25,9 @@
 ;CHECK: store <2 x double>
 ;CHECK: ret
 define i32 @bar(double* nocapture %A, i32 %d) {
-  %1 = load double* %A, align 8
+  %1 = load double, double* %A, align 8
   %2 = getelementptr inbounds double, double* %A, i64 1
-  %3 = load double* %2, align 8
+  %3 = load double, double* %2, align 8
   %4 = fptrunc double %1 to float
   %5 = fptrunc double %3 to float
   %6 = icmp eq i32 %d, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
index 93204e9..3197f6d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_user.ll
@@ -20,27 +20,27 @@
 define i32 @foo(i32* nocapture %A, i32 %n) {
   %1 = mul nsw i32 %n, 5
   %2 = add nsw i32 %1, 7
-  %3 = load i32* %A, align 4
+  %3 = load i32, i32* %A, align 4
   %4 = add nsw i32 %2, %3
   store i32 %4, i32* %A, align 4
   %5 = add nsw i32 %1, 8
   %6 = getelementptr inbounds i32, i32* %A, i64 1
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = add nsw i32 %5, %7
   store i32 %8, i32* %6, align 4
   %9 = add nsw i32 %1, 9
   %10 = getelementptr inbounds i32, i32* %A, i64 2
-  %11 = load i32* %10, align 4
+  %11 = load i32, i32* %10, align 4
   %12 = add nsw i32 %9, %11
   store i32 %12, i32* %10, align 4
   %13 = add nsw i32 %1, 10
   %14 = getelementptr inbounds i32, i32* %A, i64 3
-  %15 = load i32* %14, align 4
+  %15 = load i32, i32* %14, align 4
   %16 = add nsw i32 %13, %15
   store i32 %16, i32* %14, align 4
   %17 = add nsw i32 %1, 11
   %18 = getelementptr inbounds i32, i32* %A, i64 4
-  %19 = load i32* %18, align 4
+  %19 = load i32, i32* %18, align 4
   %20 = add nsw i32 %17, %19
   store i32 %20, i32* %18, align 4
   ret i32 undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
index 6c8beb8..25f049a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/odd_store.ll
@@ -19,14 +19,14 @@
 ;CHECK: ret
 define i32 @foo(i8* noalias nocapture %A, float* noalias nocapture %B, float %T) {
   %1 = getelementptr inbounds float, float* %B, i64 10
-  %2 = load float* %1, align 4
+  %2 = load float, float* %1, align 4
   %3 = fmul float %2, %T
   %4 = fpext float %3 to double
   %5 = fadd double %4, 4.000000e+00
   %6 = fptosi double %5 to i8
   store i8 %6, i8* %A, align 1
   %7 = getelementptr inbounds float, float* %B, i64 11
-  %8 = load float* %7, align 4
+  %8 = load float, float* %7, align 4
   %9 = fmul float %8, %T
   %10 = fpext float %9 to double
   %11 = fadd double %10, 5.000000e+00
@@ -34,7 +34,7 @@
   %13 = getelementptr inbounds i8, i8* %A, i64 1
   store i8 %12, i8* %13, align 1
   %14 = getelementptr inbounds float, float* %B, i64 12
-  %15 = load float* %14, align 4
+  %15 = load float, float* %14, align 4
   %16 = fmul float %15, %T
   %17 = fpext float %16 to double
   %18 = fadd double %17, 6.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
index 929c3c2..4a88dbf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll
@@ -14,8 +14,8 @@
 define void @shuffle_operands1(double * noalias %from, double * noalias %to,
                                double %v1, double %v2) {
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v1
   %v1_2 = fadd double %v2, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -36,8 +36,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %p
   %v1_2 = fadd double %v0_1, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -61,8 +61,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_2, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -86,8 +86,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %p, %v0_1
   %v1_2 = fadd double %v0_1, %v0_2
   %to_2 = getelementptr double, double * %to, i64 1
@@ -112,8 +112,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_2, %v0_1
   %v1_2 = fadd double %p, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -137,8 +137,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %p, %v0_1
   %to_2 = getelementptr double, double * %to, i64 1
@@ -163,8 +163,8 @@
 lp:
   %p = phi double [ 1.000000e+00, %lp ], [ 0.000000e+00, %entry ]
   %from_1 = getelementptr double, double *%from, i64 1
-  %v0_1 = load double * %from
-  %v0_2 = load double * %from_1
+  %v0_1 = load double , double * %from
+  %v0_2 = load double , double * %from_1
   %v1_1 = fadd double %v0_1, %v0_2
   %v1_2 = fadd double %v0_1, %p
   %to_2 = getelementptr double, double * %to, i64 1
@@ -181,7 +181,7 @@
 
 ; CHECK-LABEL: good_load_order
 
-; CHECK: %[[V1:[0-9]+]] = load <4 x float>*
+; CHECK: %[[V1:[0-9]+]] = load <4 x float>, <4 x float>*
 ; CHECK: %[[V2:[0-9]+]] = insertelement <4 x float> undef, float %1, i32 0
 ; CHECK: %[[V3:[0-9]+]] = shufflevector <4 x float> %[[V2]], <4 x float> %[[V1]], <4 x i32> <i32 0, i32 4, i32 5, i32 6>
 ; CHECK:                = fmul <4 x float> %[[V1]], %[[V3]]
@@ -193,7 +193,7 @@
   br label %for.cond1.preheader
 
 for.cond1.preheader:
-  %0 = load float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
+  %0 = load float, float* getelementptr inbounds ([32000 x float]* @a, i64 0, i64 0), align 16
   br label %for.body3
 
 for.body3:
@@ -201,28 +201,28 @@
   %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ]
   %2 = add nsw i64 %indvars.iv, 1
   %arrayidx = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %2
-  %3 = load float* %arrayidx, align 4
+  %3 = load float, float* %arrayidx, align 4
   %arrayidx5 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv
   %mul6 = fmul float %3, %1
   store float %mul6, float* %arrayidx5, align 4
   %4 = add nsw i64 %indvars.iv, 2
   %arrayidx11 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %4
-  %5 = load float* %arrayidx11, align 4
+  %5 = load float, float* %arrayidx11, align 4
   %mul15 = fmul float %5, %3
   store float %mul15, float* %arrayidx, align 4
   %6 = add nsw i64 %indvars.iv, 3
   %arrayidx21 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %6
-  %7 = load float* %arrayidx21, align 4
+  %7 = load float, float* %arrayidx21, align 4
   %mul25 = fmul float %7, %5
   store float %mul25, float* %arrayidx11, align 4
   %8 = add nsw i64 %indvars.iv, 4
   %arrayidx31 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %8
-  %9 = load float* %arrayidx31, align 4
+  %9 = load float, float* %arrayidx31, align 4
   %mul35 = fmul float %9, %7
   store float %mul35, float* %arrayidx21, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 5
   %arrayidx41 = getelementptr inbounds [32000 x float], [32000 x float]* @a, i64 0, i64 %indvars.iv.next
-  %10 = load float* %arrayidx41, align 4
+  %10 = load float, float* %arrayidx41, align 4
   %mul45 = fmul float %10, %9
   store float %mul45, float* %arrayidx31, align 4
   %11 = trunc i64 %indvars.iv.next to i32
@@ -238,17 +238,17 @@
 ;  c[1] = b[1]+a[1]; // swapped b[1] and a[1]
 
 ; CHECK-LABEL: load_reorder_double
-; CHECK: load <2 x double>*
+; CHECK: load <2 x double>, <2 x double>*
 ; CHECK: fadd <2 x double>
 define void @load_reorder_double(double* nocapture %c, double* noalias nocapture readonly %a, double* noalias nocapture readonly %b){
-  %1 = load double* %a
-  %2 = load double* %b
+  %1 = load double, double* %a
+  %2 = load double, double* %b
   %3 = fadd double %1, %2
   store double %3, double* %c
   %4 = getelementptr inbounds double, double* %b, i64 1
-  %5 = load double* %4
+  %5 = load double, double* %4
   %6 = getelementptr inbounds double, double* %a, i64 1
-  %7 = load double* %6
+  %7 = load double, double* %6
   %8 = fadd double %5, %7
   %9 = getelementptr inbounds double, double* %c, i64 1
   store double %8, double* %9
@@ -262,31 +262,31 @@
 ;  c[3] = a[3]+b[3];
 
 ; CHECK-LABEL: load_reorder_float
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
 ; CHECK: fadd <4 x float>
 define void @load_reorder_float(float* nocapture %c, float* noalias nocapture readonly %a, float* noalias nocapture readonly %b){
-  %1 = load float* %a
-  %2 = load float* %b
+  %1 = load float, float* %a
+  %2 = load float, float* %b
   %3 = fadd float %1, %2
   store float %3, float* %c
   %4 = getelementptr inbounds float, float* %b, i64 1
-  %5 = load float* %4
+  %5 = load float, float* %4
   %6 = getelementptr inbounds float, float* %a, i64 1
-  %7 = load float* %6
+  %7 = load float, float* %6
   %8 = fadd float %5, %7
   %9 = getelementptr inbounds float, float* %c, i64 1
   store float %8, float* %9
   %10 = getelementptr inbounds float, float* %a, i64 2
-  %11 = load float* %10
+  %11 = load float, float* %10
   %12 = getelementptr inbounds float, float* %b, i64 2
-  %13 = load float* %12
+  %13 = load float, float* %12
   %14 = fadd float %11, %13
   %15 = getelementptr inbounds float, float* %c, i64 2
   store float %14, float* %15
   %16 = getelementptr inbounds float, float* %a, i64 3
-  %17 = load float* %16
+  %17 = load float, float* %16
   %18 = getelementptr inbounds float, float* %b, i64 3
-  %19 = load float* %18
+  %19 = load float, float* %18
   %20 = fadd float %17, %19
   %21 = getelementptr inbounds float, float* %c, i64 3
   store float %20, float* %21
@@ -300,43 +300,43 @@
 ; a[3] = (b[3]+c[3])+d[3];
 
 ; CHECK-LABEL: opcode_reorder
-; CHECK: load <4 x float>*
+; CHECK: load <4 x float>, <4 x float>*
 ; CHECK: fadd <4 x float>
 define void @opcode_reorder(float* noalias nocapture %a, float* noalias nocapture readonly %b, 
                             float* noalias nocapture readonly %c,float* noalias nocapture readonly %d){
-  %1 = load float* %b
-  %2 = load float* %c
+  %1 = load float, float* %b
+  %2 = load float, float* %c
   %3 = fadd float %1, %2
-  %4 = load float* %d
+  %4 = load float, float* %d
   %5 = fadd float %3, %4
   store float %5, float* %a
   %6 = getelementptr inbounds float, float* %d, i64 1
-  %7 = load float* %6
+  %7 = load float, float* %6
   %8 = getelementptr inbounds float, float* %b, i64 1
-  %9 = load float* %8
+  %9 = load float, float* %8
   %10 = getelementptr inbounds float, float* %c, i64 1
-  %11 = load float* %10
+  %11 = load float, float* %10
   %12 = fadd float %9, %11
   %13 = fadd float %7, %12
   %14 = getelementptr inbounds float, float* %a, i64 1
   store float %13, float* %14
   %15 = getelementptr inbounds float, float* %b, i64 2
-  %16 = load float* %15
+  %16 = load float, float* %15
   %17 = getelementptr inbounds float, float* %c, i64 2
-  %18 = load float* %17
+  %18 = load float, float* %17
   %19 = fadd float %16, %18
   %20 = getelementptr inbounds float, float* %d, i64 2
-  %21 = load float* %20
+  %21 = load float, float* %20
   %22 = fadd float %19, %21
   %23 = getelementptr inbounds float, float* %a, i64 2
   store float %22, float* %23
   %24 = getelementptr inbounds float, float* %b, i64 3
-  %25 = load float* %24
+  %25 = load float, float* %24
   %26 = getelementptr inbounds float, float* %c, i64 3
-  %27 = load float* %26
+  %27 = load float, float* %26
   %28 = fadd float %25, %27
   %29 = getelementptr inbounds float, float* %d, i64 3
-  %30 = load float* %29
+  %30 = load float, float* %29
   %31 = fadd float %28, %30
   %32 = getelementptr inbounds float, float* %a, i64 3
   store float %31, float* %32
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
index d6954caad..824e999 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/opt.ll
@@ -15,13 +15,13 @@
 
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
index d2ecd45..0fa72c9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -5,7 +5,7 @@
 
 define void @updateModelQPFrame(i32 %m_Bits) {
 entry:
-  %0 = load double* undef, align 8
+  %0 = load double, double* undef, align 8
   %mul = fmul double undef, %0
   %mul2 = fmul double undef, %mul
   %mul4 = fmul double %0, %mul2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index 7654577..ef94467 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -30,9 +30,9 @@
 
 if.else:                                          ; preds = %entry
   %arrayidx = getelementptr inbounds double, double* %A, i64 10
-  %0 = load double* %arrayidx, align 8
+  %0 = load double, double* %arrayidx, align 8
   %arrayidx1 = getelementptr inbounds double, double* %A, i64 11
-  %1 = load double* %arrayidx1, align 8
+  %1 = load double, double* %arrayidx1, align 8
   br label %if.end
 
 if.end:                                           ; preds = %entry, %if.else
@@ -70,8 +70,8 @@
 define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
 entry:
   %arrayidx = getelementptr inbounds double, double* %A, i64 1
-  %0 = load double* %arrayidx, align 8
-  %1 = load double* %A, align 8
+  %0 = load double, double* %arrayidx, align 8
+  %1 = load double, double* %A, align 8
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -123,15 +123,15 @@
 
 define float @foo3(float* nocapture readonly %A) #0 {
 entry:
-  %0 = load float* %A, align 4
+  %0 = load float, float* %A, align 4
   %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %A, i64 3
-  %3 = load float* %arrayidx3, align 4
+  %3 = load float, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %A, i64 4
-  %4 = load float* %arrayidx4, align 4
+  %4 = load float, float* %arrayidx4, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body, %entry
@@ -149,17 +149,17 @@
   %add11 = fadd float %G.053, %mul10
   %7 = add nsw i64 %indvars.iv, 2
   %arrayidx14 = getelementptr inbounds float, float* %A, i64 %7
-  %8 = load float* %arrayidx14, align 4
+  %8 = load float, float* %arrayidx14, align 4
   %mul15 = fmul float %8, 9.000000e+00
   %add16 = fadd float %B.054, %mul15
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3
   %arrayidx19 = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %9 = load float* %arrayidx19, align 4
+  %9 = load float, float* %arrayidx19, align 4
   %mul20 = fmul float %9, 1.000000e+01
   %add21 = fadd float %Y.055, %mul20
   %10 = add nsw i64 %indvars.iv, 4
   %arrayidx24 = getelementptr inbounds float, float* %A, i64 %10
-  %11 = load float* %arrayidx24, align 4
+  %11 = load float, float* %arrayidx24, align 4
   %mul25 = fmul float %11, 1.100000e+01
   %add26 = fadd float %P.056, %mul25
   %12 = trunc i64 %indvars.iv.next to i32
@@ -215,22 +215,22 @@
 ; We disable the vectorization of x86_fp80 for now. 
 
 entry:
-  %i1.0 = load x86_fp80* %i1, align 16
+  %i1.0 = load x86_fp80, x86_fp80* %i1, align 16
   %i1.gep1 = getelementptr x86_fp80, x86_fp80* %i1, i64 1
-  %i1.1 = load x86_fp80* %i1.gep1, align 16
-; CHECK: load x86_fp80*
-; CHECK: load x86_fp80*
+  %i1.1 = load x86_fp80, x86_fp80* %i1.gep1, align 16
+; CHECK: load x86_fp80, x86_fp80*
+; CHECK: load x86_fp80, x86_fp80*
 ; CHECK-NOT: insertelement <2 x x86_fp80>
 ; CHECK-NOT: insertelement <2 x x86_fp80>
   br i1 undef, label %then, label %end
 
 then:
   %i2.gep0 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 0
-  %i2.0 = load x86_fp80* %i2.gep0, align 16
+  %i2.0 = load x86_fp80, x86_fp80* %i2.gep0, align 16
   %i2.gep1 = getelementptr inbounds x86_fp80, x86_fp80* %i2, i64 1
-  %i2.1 = load x86_fp80* %i2.gep1, align 16
-; CHECK: load x86_fp80*
-; CHECK: load x86_fp80*
+  %i2.1 = load x86_fp80, x86_fp80* %i2.gep1, align 16
+; CHECK: load x86_fp80, x86_fp80*
+; CHECK: load x86_fp80, x86_fp80*
 ; CHECK-NOT: insertelement <2 x x86_fp80>
 ; CHECK-NOT: insertelement <2 x x86_fp80>
   br label %end
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
index fd8d361..6162830 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -12,7 +12,7 @@
 define void @Rf_GReset() {
 entry:
   %sub = fsub double -0.000000e+00, undef
-  %0 = load double* @d, align 8
+  %0 = load double, double* @d, align 8
   %sub1 = fsub double -0.000000e+00, %0
   br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
index 3da83f9..fa08eff 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi_overalignedtype.ll
@@ -12,22 +12,22 @@
 ; size is less than the alignment, and through various different GEP formations.
 
 entry:
-  %i1.0 = load double* %i1, align 16
+  %i1.0 = load double, double* %i1, align 16
   %i1.gep1 = getelementptr double, double* %i1, i64 1
-  %i1.1 = load double* %i1.gep1, align 16
-; CHECK: load double*
-; CHECK: load double*
+  %i1.1 = load double, double* %i1.gep1, align 16
+; CHECK: load double, double*
+; CHECK: load double, double*
 ; CHECK: insertelement <2 x double>
 ; CHECK: insertelement <2 x double>
   br i1 undef, label %then, label %end
 
 then:
   %i2.gep0 = getelementptr inbounds double, double* %i2, i64 0
-  %i2.0 = load double* %i2.gep0, align 16
+  %i2.0 = load double, double* %i2.gep0, align 16
   %i2.gep1 = getelementptr inbounds double, double* %i2, i64 1
-  %i2.1 = load double* %i2.gep1, align 16
-; CHECK: load double*
-; CHECK: load double*
+  %i2.1 = load double, double* %i2.gep1, align 16
+; CHECK: load double, double*
+; CHECK: load double, double*
 ; CHECK: insertelement <2 x double>
 ; CHECK: insertelement <2 x double>
   br label %end
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
index 02512b3..a97b870 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -4,36 +4,36 @@
 target triple = "x86_64-unknown-linux-gnu"
 
 ;CHECK-LABEL: @powof2div(
-;CHECK: load <4 x i32>*
+;CHECK: load <4 x i32>, <4 x i32>*
 ;CHECK: add nsw <4 x i32>
 ;CHECK: sdiv <4 x i32>
 define void @powof2div(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c){
 entry:
-  %0 = load i32* %b, align 4
-  %1 = load i32* %c, align 4
+  %0 = load i32, i32* %b, align 4
+  %1 = load i32, i32* %c, align 4
   %add = add nsw i32 %1, %0
   %div = sdiv i32 %add, 2
   store i32 %div, i32* %a, align 4
   %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 1
-  %2 = load i32* %arrayidx3, align 4
+  %2 = load i32, i32* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %c, i64 1
-  %3 = load i32* %arrayidx4, align 4
+  %3 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %3, %2
   %div6 = sdiv i32 %add5, 2
   %arrayidx7 = getelementptr inbounds i32, i32* %a, i64 1
   store i32 %div6, i32* %arrayidx7, align 4
   %arrayidx8 = getelementptr inbounds i32, i32* %b, i64 2
-  %4 = load i32* %arrayidx8, align 4
+  %4 = load i32, i32* %arrayidx8, align 4
   %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
-  %5 = load i32* %arrayidx9, align 4
+  %5 = load i32, i32* %arrayidx9, align 4
   %add10 = add nsw i32 %5, %4
   %div11 = sdiv i32 %add10, 2
   %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 2
   store i32 %div11, i32* %arrayidx12, align 4
   %arrayidx13 = getelementptr inbounds i32, i32* %b, i64 3
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %arrayidx14 = getelementptr inbounds i32, i32* %c, i64 3
-  %7 = load i32* %arrayidx14, align 4
+  %7 = load i32, i32* %arrayidx14, align 4
   %add15 = add nsw i32 %7, %6
   %div16 = sdiv i32 %add15, 2
   %arrayidx17 = getelementptr inbounds i32, i32* %a, i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
index 3f9d775..c22ed34 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16628.ll
@@ -11,10 +11,10 @@
 define void @f() {
 entry:
   %call = tail call i32 (...)* @g()
-  %0 = load i32* @c, align 4
+  %0 = load i32, i32* @c, align 4
   %lnot = icmp eq i32 %0, 0
   %lnot.ext = zext i1 %lnot to i32
-  %1 = load i16* @a, align 2
+  %1 = load i16, i16* @a, align 2
   %lnot2 = icmp eq i16 %1, 0
   %lnot.ext3 = zext i1 %lnot2 to i32
   %or = or i32 %lnot.ext3, %lnot.ext
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index 5fe038e..0de14ec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -7,10 +7,10 @@
 ; Function Attrs: noreturn nounwind readonly
 define i32 @fn1() #0 {
 entry:
-  %0 = load i32** @a, align 4, !tbaa !4
-  %1 = load i32* %0, align 4, !tbaa !5
+  %0 = load i32*, i32** @a, align 4, !tbaa !4
+  %1 = load i32, i32* %0, align 4, !tbaa !5
   %arrayidx1 = getelementptr inbounds i32, i32* %0, i32 1
-  %2 = load i32* %arrayidx1, align 4, !tbaa !5
+  %2 = load i32, i32* %arrayidx1, align 4, !tbaa !5
   br label %do.body
 
 do.body:                                          ; preds = %do.body, %entry
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
index f5e2467..a687aec 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr19657.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-;CHECK: load <2 x double>*
+;CHECK: load <2 x double>, <2 x double>*
 ;CHECK: fadd <2 x double>
 ;CHECK: store <2 x double>
 
@@ -11,60 +11,60 @@
 define void @foo(double* %x) #0 {
   %1 = alloca double*, align 8
   store double* %x, double** %1, align 8
-  %2 = load double** %1, align 8
+  %2 = load double*, double** %1, align 8
   %3 = getelementptr inbounds double, double* %2, i64 0
-  %4 = load double* %3, align 8
-  %5 = load double** %1, align 8
+  %4 = load double, double* %3, align 8
+  %5 = load double*, double** %1, align 8
   %6 = getelementptr inbounds double, double* %5, i64 0
-  %7 = load double* %6, align 8
+  %7 = load double, double* %6, align 8
   %8 = fadd double %4, %7
-  %9 = load double** %1, align 8
+  %9 = load double*, double** %1, align 8
   %10 = getelementptr inbounds double, double* %9, i64 0
-  %11 = load double* %10, align 8
+  %11 = load double, double* %10, align 8
   %12 = fadd double %8, %11
-  %13 = load double** %1, align 8
+  %13 = load double*, double** %1, align 8
   %14 = getelementptr inbounds double, double* %13, i64 0
   store double %12, double* %14, align 8
-  %15 = load double** %1, align 8
+  %15 = load double*, double** %1, align 8
   %16 = getelementptr inbounds double, double* %15, i64 1
-  %17 = load double* %16, align 8
-  %18 = load double** %1, align 8
+  %17 = load double, double* %16, align 8
+  %18 = load double*, double** %1, align 8
   %19 = getelementptr inbounds double, double* %18, i64 1
-  %20 = load double* %19, align 8
+  %20 = load double, double* %19, align 8
   %21 = fadd double %17, %20
-  %22 = load double** %1, align 8
+  %22 = load double*, double** %1, align 8
   %23 = getelementptr inbounds double, double* %22, i64 1
-  %24 = load double* %23, align 8
+  %24 = load double, double* %23, align 8
   %25 = fadd double %21, %24
-  %26 = load double** %1, align 8
+  %26 = load double*, double** %1, align 8
   %27 = getelementptr inbounds double, double* %26, i64 1
   store double %25, double* %27, align 8
-  %28 = load double** %1, align 8
+  %28 = load double*, double** %1, align 8
   %29 = getelementptr inbounds double, double* %28, i64 2
-  %30 = load double* %29, align 8
-  %31 = load double** %1, align 8
+  %30 = load double, double* %29, align 8
+  %31 = load double*, double** %1, align 8
   %32 = getelementptr inbounds double, double* %31, i64 2
-  %33 = load double* %32, align 8
+  %33 = load double, double* %32, align 8
   %34 = fadd double %30, %33
-  %35 = load double** %1, align 8
+  %35 = load double*, double** %1, align 8
   %36 = getelementptr inbounds double, double* %35, i64 2
-  %37 = load double* %36, align 8
+  %37 = load double, double* %36, align 8
   %38 = fadd double %34, %37
-  %39 = load double** %1, align 8
+  %39 = load double*, double** %1, align 8
   %40 = getelementptr inbounds double, double* %39, i64 2
   store double %38, double* %40, align 8
-  %41 = load double** %1, align 8
+  %41 = load double*, double** %1, align 8
   %42 = getelementptr inbounds double, double* %41, i64 3
-  %43 = load double* %42, align 8
-  %44 = load double** %1, align 8
+  %43 = load double, double* %42, align 8
+  %44 = load double*, double** %1, align 8
   %45 = getelementptr inbounds double, double* %44, i64 3
-  %46 = load double* %45, align 8
+  %46 = load double, double* %45, align 8
   %47 = fadd double %43, %46
-  %48 = load double** %1, align 8
+  %48 = load double*, double** %1, align 8
   %49 = getelementptr inbounds double, double* %48, i64 3
-  %50 = load double* %49, align 8
+  %50 = load double, double* %49, align 8
   %51 = fadd double %47, %50
-  %52 = load double** %1, align 8
+  %52 = load double*, double** %1, align 8
   %53 = getelementptr inbounds double, double* %52, i64 3
   store double %51, double* %53, align 8
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index 49ddd9b..a3b0c8f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -15,10 +15,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr exact i32 %load2, 1
@@ -41,10 +41,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = lshr exact i32 %load1, 1
   %op2 = lshr i32 %load2, 1
@@ -67,10 +67,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
@@ -93,10 +93,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = add nsw i32 %load2, 1
@@ -119,10 +119,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add nuw i32 %load2, 1
@@ -145,10 +145,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nuw i32 %load1, 1
   %op2 = add i32 %load2, 1
@@ -171,10 +171,10 @@
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd nnan ninf float %load2, 1.0
@@ -197,10 +197,10 @@
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd nnan float %load1, 1.0
   %op2 = fadd ninf float %load2, 1.0
@@ -223,10 +223,10 @@
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast nnan float %load1, 1.0
   %op2 = fadd fast nnan ninf float %load2, 1.0
@@ -249,10 +249,10 @@
   %idx3 = getelementptr inbounds float, float* %x, i64 2
   %idx4 = getelementptr inbounds float, float* %x, i64 3
 
-  %load1 = load float* %idx1, align 4
-  %load2 = load float* %idx2, align 4
-  %load3 = load float* %idx3, align 4
-  %load4 = load float* %idx4, align 4
+  %load1 = load float, float* %idx1, align 4
+  %load2 = load float, float* %idx2, align 4
+  %load3 = load float, float* %idx3, align 4
+  %load4 = load float, float* %idx4, align 4
 
   %op1 = fadd fast float %load1, 1.0
   %op2 = fadd fast float %load2, 1.0
@@ -276,10 +276,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
@@ -303,10 +303,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add nsw i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
@@ -330,10 +330,10 @@
   %idx3 = getelementptr inbounds i32, i32* %x, i64 2
   %idx4 = getelementptr inbounds i32, i32* %x, i64 3
 
-  %load1 = load i32* %idx1, align 4
-  %load2 = load i32* %idx2, align 4
-  %load3 = load i32* %idx3, align 4
-  %load4 = load i32* %idx4, align 4
+  %load1 = load i32, i32* %idx1, align 4
+  %load2 = load i32, i32* %idx2, align 4
+  %load3 = load i32, i32* %idx3, align 4
+  %load4 = load i32, i32* %idx4, align 4
 
   %op1 = add i32 %load1, 1
   %op2 = sub nsw i32 %load2, 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index 1dc63563..4c5f126 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -24,11 +24,11 @@
   %sum.014 = phi double [ %add6, %for.body ], [ 0.000000e+00, %entry ]
   %mul = shl nsw i32 %i.015, 1
   %arrayidx = getelementptr inbounds double, double* %A, i32 %mul
-  %0 = load double* %arrayidx, align 4
+  %0 = load double, double* %arrayidx, align 4
   %mul1 = fmul double %0, 7.000000e+00
   %add12 = or i32 %mul, 1
   %arrayidx3 = getelementptr inbounds double, double* %A, i32 %add12
-  %1 = load double* %arrayidx3, align 4
+  %1 = load double, double* %arrayidx3, align 4
   %mul4 = fmul double %1, 7.000000e+00
   %add5 = fadd double %mul1, %mul4
   %add6 = fadd double %sum.014, %add5
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index cd3175c..507a61a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -14,12 +14,12 @@
   %sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
   %2 = shl nsw i32 %i.02, 1
   %3 = getelementptr inbounds double, double* %D, i32 %2
-  %4 = load double* %3, align 4
+  %4 = load double, double* %3, align 4
   %A4 = fmul double %4, %4
   %A42 = fmul double %A4, %A4
   %5 = or i32 %2, 1
   %6 = getelementptr inbounds double, double* %D, i32 %5
-  %7 = load double* %6, align 4
+  %7 = load double, double* %6, align 4
   %A7 = fmul double %7, %7
   %A72 = fmul double %A7, %A7
   %8 = fadd double %A42, %A72
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/return.ll b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
index 13a6cf4..242edf5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/return.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/return.ll
@@ -13,17 +13,17 @@
 ; }
 
 ; CHECK-LABEL: @return1
-; CHECK: %0 = load <2 x double>*
-; CHECK: %1 = load <2 x double>*
+; CHECK: %0 = load <2 x double>, <2 x double>*
+; CHECK: %1 = load <2 x double>, <2 x double>*
 ; CHECK: %2 = fadd <2 x double>
 
 define double @return1() {
 entry:
-  %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
-  %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
+  %a0 = load double, double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8
+  %b0 = load double, double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8
   %add0 = fadd double %a0, %b0
-  %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
-  %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
+  %a1 = load double, double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8
+  %b1 = load double, double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8
   %add1 = fadd double %a1, %b1
   %add2 = fadd double %add0, %add1
   ret double %add2
@@ -34,20 +34,20 @@
 ; }
 
 ; CHECK-LABEL: @return2
-; CHECK: %1 = load <2 x double>*
-; CHECK: %3 = load <2 x double>* %2
+; CHECK: %1 = load <2 x double>, <2 x double>*
+; CHECK: %3 = load <2 x double>, <2 x double>* %2
 ; CHECK: %4 = fadd <2 x double> %1, %3
 
 define double @return2(double* nocapture readonly %x) {
 entry:
-  %x0 = load double* %x, align 4
+  %x0 = load double, double* %x, align 4
   %arrayidx1 = getelementptr inbounds double, double* %x, i32 2
-  %x2 = load double* %arrayidx1, align 4
+  %x2 = load double, double* %arrayidx1, align 4
   %add3 = fadd double %x0, %x2
   %arrayidx2 = getelementptr inbounds double, double* %x, i32 1
-  %x1 = load double* %arrayidx2, align 4
+  %x1 = load double, double* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds double, double* %x, i32 3
-  %x3 = load double* %arrayidx3, align 4
+  %x3 = load double, double* %arrayidx3, align 4
   %add4 = fadd double %x1, %x3
   %add5 = fadd double %add3, %add4
   ret double %add5
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
index 2a3cc6d..0bdb7da 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -33,11 +33,11 @@
 
 define float @foo(float* nocapture readonly %A) {
 entry:
-  %0 = load float* %A, align 4
+  %0 = load float, float* %A, align 4
   %arrayidx1 = getelementptr inbounds float, float* %A, i64 1
-  %1 = load float* %arrayidx1, align 4
+  %1 = load float, float* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 2
-  %2 = load float* %arrayidx2, align 4
+  %2 = load float, float* %arrayidx2, align 4
   br label %for.body
 
 for.body:                                         ; preds = %for.body.for.body_crit_edge, %entry
@@ -50,12 +50,12 @@
   %add4 = fadd float %R.030, %mul
   %4 = add nsw i64 %indvars.iv, 1
   %arrayidx7 = getelementptr inbounds float, float* %A, i64 %4
-  %5 = load float* %arrayidx7, align 4
+  %5 = load float, float* %arrayidx7, align 4
   %mul8 = fmul float %5, 8.000000e+00
   %add9 = fadd float %G.031, %mul8
   %6 = add nsw i64 %indvars.iv, 2
   %arrayidx12 = getelementptr inbounds float, float* %A, i64 %6
-  %7 = load float* %arrayidx12, align 4
+  %7 = load float, float* %arrayidx12, align 4
   %mul13 = fmul float %7, 9.000000e+00
   %add14 = fadd float %B.032, %mul13
   %indvars.iv.next = add i64 %indvars.iv, 3
@@ -65,7 +65,7 @@
 
 for.body.for.body_crit_edge:                      ; preds = %for.body
   %arrayidx3.phi.trans.insert = getelementptr inbounds float, float* %A, i64 %indvars.iv.next
-  %.pre = load float* %arrayidx3.phi.trans.insert, align 4
+  %.pre = load float, float* %arrayidx3.phi.trans.insert, align 4
   br label %for.body
 
 for.end:                                          ; preds = %for.body
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
index da2654a..a9ca093 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/saxpy.ll
@@ -10,34 +10,34 @@
 
 define void @SAXPY(i32* noalias nocapture %x, i32* noalias nocapture %y, i32 %a, i64 %i) {
   %1 = getelementptr inbounds i32, i32* %x, i64 %i
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   %3 = mul nsw i32 %2, %a
   %4 = getelementptr inbounds i32, i32* %y, i64 %i
-  %5 = load i32* %4, align 4
+  %5 = load i32, i32* %4, align 4
   %6 = add nsw i32 %3, %5
   store i32 %6, i32* %1, align 4
   %7 = add i64 %i, 1
   %8 = getelementptr inbounds i32, i32* %x, i64 %7
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %10 = mul nsw i32 %9, %a
   %11 = getelementptr inbounds i32, i32* %y, i64 %7
-  %12 = load i32* %11, align 4
+  %12 = load i32, i32* %11, align 4
   %13 = add nsw i32 %10, %12
   store i32 %13, i32* %8, align 4
   %14 = add i64 %i, 2
   %15 = getelementptr inbounds i32, i32* %x, i64 %14
-  %16 = load i32* %15, align 4
+  %16 = load i32, i32* %15, align 4
   %17 = mul nsw i32 %16, %a
   %18 = getelementptr inbounds i32, i32* %y, i64 %14
-  %19 = load i32* %18, align 4
+  %19 = load i32, i32* %18, align 4
   %20 = add nsw i32 %17, %19
   store i32 %20, i32* %15, align 4
   %21 = add i64 %i, 3
   %22 = getelementptr inbounds i32, i32* %x, i64 %21
-  %23 = load i32* %22, align 4
+  %23 = load i32, i32* %22, align 4
   %24 = mul nsw i32 %23, %a
   %25 = getelementptr inbounds i32, i32* %y, i64 %21
-  %26 = load i32* %25, align 4
+  %26 = load i32, i32* %25, align 4
   %27 = add nsw i32 %24, %26
   store i32 %27, i32* %22, align 4
   ret void
@@ -48,13 +48,13 @@
   %1 = add i64 %i, 1
   %2 = getelementptr inbounds i32, i32* %x, i64 %1
   %3 = getelementptr inbounds i32, i32* %y, i64 %1
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = add nsw i32 undef, %4
   store i32 %5, i32* %2, align 4
   %6 = add i64 %i, 2
   %7 = getelementptr inbounds i32, i32* %x, i64 %6
   %8 = getelementptr inbounds i32, i32* %y, i64 %6
-  %9 = load i32* %8, align 4
+  %9 = load i32, i32* %8, align 4
   %10 = add nsw i32 undef, %9
   store i32 %10, i32* %7, align 4
   ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
index 9e23a6a..33bdc6a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/scheduling.ll
@@ -25,40 +25,40 @@
   %a.088 = phi i32 [ 0, %entry ], [ %add52, %for.body ]
   %1 = shl i64 %indvars.iv, 3
   %arrayidx = getelementptr inbounds i32, i32* %diff, i64 %1
-  %2 = load i32* %arrayidx, align 4
+  %2 = load i32, i32* %arrayidx, align 4
   %3 = or i64 %1, 4
   %arrayidx2 = getelementptr inbounds i32, i32* %diff, i64 %3
-  %4 = load i32* %arrayidx2, align 4
+  %4 = load i32, i32* %arrayidx2, align 4
   %add3 = add nsw i32 %4, %2
   %arrayidx6 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 0
   store i32 %add3, i32* %arrayidx6, align 16
   %add10 = add nsw i32 %add3, %a.088
   %5 = or i64 %1, 1
   %arrayidx13 = getelementptr inbounds i32, i32* %diff, i64 %5
-  %6 = load i32* %arrayidx13, align 4
+  %6 = load i32, i32* %arrayidx13, align 4
   %7 = or i64 %1, 5
   %arrayidx16 = getelementptr inbounds i32, i32* %diff, i64 %7
-  %8 = load i32* %arrayidx16, align 4
+  %8 = load i32, i32* %arrayidx16, align 4
   %add17 = add nsw i32 %8, %6
   %arrayidx20 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 1
   store i32 %add17, i32* %arrayidx20, align 4
   %add24 = add nsw i32 %add10, %add17
   %9 = or i64 %1, 2
   %arrayidx27 = getelementptr inbounds i32, i32* %diff, i64 %9
-  %10 = load i32* %arrayidx27, align 4
+  %10 = load i32, i32* %arrayidx27, align 4
   %11 = or i64 %1, 6
   %arrayidx30 = getelementptr inbounds i32, i32* %diff, i64 %11
-  %12 = load i32* %arrayidx30, align 4
+  %12 = load i32, i32* %arrayidx30, align 4
   %add31 = add nsw i32 %12, %10
   %arrayidx34 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 2
   store i32 %add31, i32* %arrayidx34, align 8
   %add38 = add nsw i32 %add24, %add31
   %13 = or i64 %1, 3
   %arrayidx41 = getelementptr inbounds i32, i32* %diff, i64 %13
-  %14 = load i32* %arrayidx41, align 4
+  %14 = load i32, i32* %arrayidx41, align 4
   %15 = or i64 %1, 7
   %arrayidx44 = getelementptr inbounds i32, i32* %diff, i64 %15
-  %16 = load i32* %arrayidx44, align 4
+  %16 = load i32, i32* %arrayidx44, align 4
   %add45 = add nsw i32 %16, %14
   %arrayidx48 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %m2, i64 0, i64 %indvars.iv, i64 3
   store i32 %add45, i32* %arrayidx48, align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
index ccb165f..c9bb884 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -13,16 +13,16 @@
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
 ;CHECK:load <4 x i32>
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
 ;CHECK:mul <4 x i32>
   %14 = mul i32 %4, 7
 ;CHECK:add <4 x i32>
@@ -62,16 +62,16 @@
   %i.019 = phi i64 [ %26, %.lr.ph ], [ 0, %0 ]
   %2 = shl i64 %i.019, 2
   %3 = getelementptr inbounds i32, i32* %in, i64 %2
-  %4 = load i32* %3, align 4
+  %4 = load i32, i32* %3, align 4
   %5 = or i64 %2, 1
   %6 = getelementptr inbounds i32, i32* %in, i64 %5
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   %8 = or i64 %2, 2
   %9 = getelementptr inbounds i32, i32* %in, i64 %8
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   %11 = or i64 %2, 3
   %12 = getelementptr inbounds i32, i32* %in, i64 %11
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
   %14 = mul i32 %4, 7
   %15 = add i32 %14, 7
   %16 = mul i32 %7, 7
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
index 83c0e82..a5d9ad9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -9,13 +9,13 @@
 ; CHECK: ret
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -29,13 +29,13 @@
 ; CHECK: ret
 define void @test2(double* %a, double* %b, i8* %e) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   %c = bitcast i8* %e to double*
   store double %mul, double* %c, align 8
@@ -52,13 +52,13 @@
 ; CHECK: ret
 define void @test_volatile_load(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load volatile double* %a, align 8
-  %i1 = load volatile double* %b, align 8
+  %i0 = load volatile double, double* %a, align 8
+  %i1 = load volatile double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
@@ -72,13 +72,13 @@
 ; CHECK: ret
 define void @test_volatile_store(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store volatile double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
index cbce687..6c93222 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/tiny-tree.ll
@@ -17,10 +17,10 @@
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double* %src.addr.013, align 8
+  %0 = load double, double* %src.addr.013, align 8
   store double %0, double* %dst.addr.014, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 1
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1
   store double %1, double* %arrayidx3, align 8
   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
@@ -47,18 +47,18 @@
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float* %src.addr.021, align 4
+  %0 = load float, float* %src.addr.021, align 4
   store float %0, float* %dst.addr.022, align 4
   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 1
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
   store float %1, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
   store float %2, float* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
   store float %3, float* %arrayidx7, align 4
   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
@@ -85,10 +85,10 @@
   %i.015 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.014 = phi double* [ %add.ptr4, %for.body ], [ %dst, %entry ]
   %src.addr.013 = phi double* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load double* %src.addr.013, align 8
+  %0 = load double, double* %src.addr.013, align 8
   store double %0, double* %dst.addr.014, align 8
   %arrayidx2 = getelementptr inbounds double, double* %src.addr.013, i64 2
-  %1 = load double* %arrayidx2, align 8
+  %1 = load double, double* %arrayidx2, align 8
   %arrayidx3 = getelementptr inbounds double, double* %dst.addr.014, i64 1 
   store double %1, double* %arrayidx3, align 8
   %add.ptr = getelementptr inbounds double, double* %src.addr.013, i64 %i.015
@@ -115,18 +115,18 @@
   %i.023 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
   %dst.addr.022 = phi float* [ %add.ptr8, %for.body ], [ %dst, %entry ]
   %src.addr.021 = phi float* [ %add.ptr, %for.body ], [ %src, %entry ]
-  %0 = load float* %src.addr.021, align 4
+  %0 = load float, float* %src.addr.021, align 4
   store float %0, float* %dst.addr.022, align 4
   %arrayidx2 = getelementptr inbounds float, float* %src.addr.021, i64 4 
-  %1 = load float* %arrayidx2, align 4
+  %1 = load float, float* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds float, float* %dst.addr.022, i64 1
   store float %1, float* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds float, float* %src.addr.021, i64 2
-  %2 = load float* %arrayidx4, align 4
+  %2 = load float, float* %arrayidx4, align 4
   %arrayidx5 = getelementptr inbounds float, float* %dst.addr.022, i64 2
   store float %2, float* %arrayidx5, align 4
   %arrayidx6 = getelementptr inbounds float, float* %src.addr.021, i64 3
-  %3 = load float* %arrayidx6, align 4
+  %3 = load float, float* %arrayidx6, align 4
   %arrayidx7 = getelementptr inbounds float, float* %dst.addr.022, i64 3
   store float %3, float* %arrayidx7, align 4
   %add.ptr = getelementptr inbounds float, float* %src.addr.021, i64 %i.023
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
index b1d23e9..f29f69d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -12,15 +12,15 @@
 
 bb1:                                    ; an unreachable block
   %t3 = getelementptr inbounds i32, i32* %x, i64 4
-  %t4 = load i32* %t3, align 4
+  %t4 = load i32, i32* %t3, align 4
   %t5 = getelementptr inbounds i32, i32* %x, i64 5
-  %t6 = load i32* %t5, align 4
+  %t6 = load i32, i32* %t5, align 4
   %bad = fadd float %bad, 0.000000e+00  ; <- an instruction with self dependency,
                                         ;    but legal in unreachable code
   %t7 = getelementptr inbounds i32, i32* %x, i64 6
-  %t8 = load i32* %t7, align 4
+  %t8 = load i32, i32* %t7, align 4
   %t9 = getelementptr inbounds i32, i32* %x, i64 7
-  %t10 = load i32* %t9, align 4
+  %t10 = load i32, i32* %t9, align 4
   br label %bb2
 
 bb2:
diff --git a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
index cca309b..efd5386 100644
--- a/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
+++ b/llvm/test/Transforms/SLPVectorizer/XCore/no-vector-registers.ll
@@ -8,13 +8,13 @@
 ; CHECK-NOT: <2 x double>
 define void @test1(double* %a, double* %b, double* %c) {
 entry:
-  %i0 = load double* %a, align 8
-  %i1 = load double* %b, align 8
+  %i0 = load double, double* %a, align 8
+  %i1 = load double, double* %b, align 8
   %mul = fmul double %i0, %i1
   %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
-  %i3 = load double* %arrayidx3, align 8
+  %i3 = load double, double* %arrayidx3, align 8
   %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
-  %i4 = load double* %arrayidx4, align 8
+  %i4 = load double, double* %arrayidx4, align 8
   %mul5 = fmul double %i3, %i4
   store double %mul, double* %c, align 8
   %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
diff --git a/llvm/test/Transforms/SROA/address-spaces.ll b/llvm/test/Transforms/SROA/address-spaces.ll
index c709834..004695d 100644
--- a/llvm/test/Transforms/SROA/address-spaces.ll
+++ b/llvm/test/Transforms/SROA/address-spaces.ll
@@ -10,7 +10,7 @@
 ; Make sure an illegal bitcast isn't introduced
 define void @test_address_space_1_1(<2 x i64> addrspace(1)* %a, i16 addrspace(1)* %b) {
 ; CHECK-LABEL: @test_address_space_1_1(
-; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
 ; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
 ; CHECK: ret void
   %aa = alloca <2 x i64>, align 16
@@ -24,7 +24,7 @@
 
 define void @test_address_space_1_0(<2 x i64> addrspace(1)* %a, i16* %b) {
 ; CHECK-LABEL: @test_address_space_1_0(
-; CHECK: load <2 x i64> addrspace(1)* %a, align 2
+; CHECK: load <2 x i64>, <2 x i64> addrspace(1)* %a, align 2
 ; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
 ; CHECK: ret void
   %aa = alloca <2 x i64>, align 16
@@ -38,7 +38,7 @@
 
 define void @test_address_space_0_1(<2 x i64>* %a, i16 addrspace(1)* %b) {
 ; CHECK-LABEL: @test_address_space_0_1(
-; CHECK: load <2 x i64>* %a, align 2
+; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
 ; CHECK: store <2 x i64> {{.*}}, <2 x i64> addrspace(1)* {{.*}}, align 2
 ; CHECK: ret void
   %aa = alloca <2 x i64>, align 16
diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll
index e631bba..455d142 100644
--- a/llvm/test/Transforms/SROA/alignment.ll
+++ b/llvm/test/Transforms/SROA/alignment.ll
@@ -6,9 +6,9 @@
 define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
 ; CHECK-LABEL: @test1(
 ; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 0
-; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16
+; CHECK: %[[a0:.*]] = load i8, i8* %[[gep_a0]], align 16
 ; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %a, i64 0, i32 1
-; CHECK: %[[a1:.*]] = load i8* %[[gep_a1]], align 1
+; CHECK: %[[a1:.*]] = load i8, i8* %[[gep_a1]], align 1
 ; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 0
 ; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
 ; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %b, i64 0, i32 1
@@ -31,7 +31,7 @@
 define void @test2() {
 ; CHECK-LABEL: @test2(
 ; CHECK: alloca i16
-; CHECK: load i8* %{{.*}}
+; CHECK: load i8, i8* %{{.*}}
 ; CHECK: store i8 42, i8* %{{.*}}
 ; CHECK: ret void
 
@@ -41,7 +41,7 @@
   %cast1 = bitcast i8* %gep1 to i16*
   store volatile i16 0, i16* %cast1
   %gep2 = getelementptr { i8, i8, i8, i8 }, { i8, i8, i8, i8 }* %a, i32 0, i32 2
-  %result = load i8* %gep2
+  %result = load i8, i8* %gep2
   store i8 42, i8* %gep2
   ret void
 }
@@ -49,7 +49,7 @@
 define void @PR13920(<2 x i64>* %a, i16* %b) {
 ; Test that alignments on memcpy intrinsics get propagated to loads and stores.
 ; CHECK-LABEL: @PR13920(
-; CHECK: load <2 x i64>* %a, align 2
+; CHECK: load <2 x i64>, <2 x i64>* %a, align 2
 ; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
 ; CHECK: ret void
 
@@ -93,10 +93,10 @@
 ; CHECK: alloca [9 x i8]
 ; CHECK: alloca [9 x i8]
 ; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
-; CHECK: load volatile i16* %{{.*}}, align 1
-; CHECK: load double* %{{.*}}, align 1
+; CHECK: load volatile i16, i16* %{{.*}}, align 1
+; CHECK: load double, double* %{{.*}}, align 1
 ; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
-; CHECK: load volatile i16* %{{.*}}, align 1
+; CHECK: load volatile i16, i16* %{{.*}}, align 1
 ; CHECK: ret void
 
 entry:
@@ -106,15 +106,15 @@
   store volatile double 0.0, double* %ptr1, align 1
   %weird_gep1 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 7
   %weird_cast1 = bitcast i8* %weird_gep1 to i16*
-  %weird_load1 = load volatile i16* %weird_cast1, align 1
+  %weird_load1 = load volatile i16, i16* %weird_cast1, align 1
 
   %raw2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 9
   %ptr2 = bitcast i8* %raw2 to double*
-  %d1 = load double* %ptr1, align 1
+  %d1 = load double, double* %ptr1, align 1
   store volatile double %d1, double* %ptr2, align 1
   %weird_gep2 = getelementptr inbounds [18 x i8], [18 x i8]* %a, i32 0, i32 16
   %weird_cast2 = bitcast i8* %weird_gep2 to i16*
-  %weird_load2 = load volatile i16* %weird_cast2, align 1
+  %weird_load2 = load volatile i16, i16* %weird_cast2, align 1
 
   ret void
 }
@@ -136,7 +136,7 @@
 
   %raw2 = getelementptr inbounds [16 x i8], [16 x i8]* %a, i32 0, i32 8
   %ptr2 = bitcast i8* %raw2 to double*
-  %val = load double* %ptr1, align 1
+  %val = load double, double* %ptr1, align 1
   store volatile double %val, double* %ptr2, align 1
 
   ret void
@@ -156,11 +156,11 @@
   %ptr2 = bitcast i8* %raw2 to double*
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
-; CHECK: %[[val2:.*]] = load double* %{{.*}}, align 1
-; CHECK: %[[val1:.*]] = load double* %{{.*}}, align 1
+; CHECK: %[[val2:.*]] = load double, double* %{{.*}}, align 1
+; CHECK: %[[val1:.*]] = load double, double* %{{.*}}, align 1
 
-  %val1 = load double* %ptr2, align 1
-  %val2 = load double* %ptr1, align 1
+  %val1 = load double, double* %ptr2, align 1
+  %val2 = load double, double* %ptr1, align 1
 
   store double %val1, double* %ptr1, align 1
   store double %val2, double* %ptr2, align 1
diff --git a/llvm/test/Transforms/SROA/basictest.ll b/llvm/test/Transforms/SROA/basictest.ll
index f6b1c27..a59192d 100644
--- a/llvm/test/Transforms/SROA/basictest.ll
+++ b/llvm/test/Transforms/SROA/basictest.ll
@@ -19,7 +19,7 @@
   call void @llvm.lifetime.start(i64 4, i8* %a1.i8)
 
   store i32 0, i32* %a1
-  %v1 = load i32* %a1
+  %v1 = load i32, i32* %a1
 
   call void @llvm.lifetime.end(i64 4, i8* %a1.i8)
 
@@ -27,7 +27,7 @@
   call void @llvm.lifetime.start(i64 4, i8* %a2.i8)
 
   store float 0.0, float* %a2
-  %v2 = load float * %a2
+  %v2 = load float , float * %a2
   %v2.int = bitcast float %v2 to i32
   %sum1 = add i32 %v1, %v2.int
 
@@ -45,7 +45,7 @@
   %X = alloca { i32, float }
   %Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0
   store i32 0, i32* %Y
-  %Z = load i32* %Y
+  %Z = load i32, i32* %Y
   ret i32 %Z
 }
 
@@ -61,7 +61,7 @@
   br label %L2
 
 L2:
-  %Z = load i64* %B
+  %Z = load i64, i64* %B
   ret i64 %Z
 }
 
@@ -84,7 +84,7 @@
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8], [42 x i8]* %[[test3_a1]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
-; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[test3_r1:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8], [99 x i8]* %[[test3_a2]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
@@ -98,7 +98,7 @@
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a5]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 207
-; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[test3_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 208
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test3_a6]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
@@ -320,9 +320,9 @@
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 20
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[test4_r1:.*]] = load i16, i16* %[[bitcast]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 22
-; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[test4_r2:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 23
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a2]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
@@ -331,17 +331,17 @@
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 40
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[test4_r3:.*]] = load i16, i16* %[[bitcast]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 42
-; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[test4_r4:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 43
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a4]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 50
 ; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
-; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[test4_r5:.*]] = load i16, i16* %[[bitcast]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8, i8* %src, i64 52
-; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[test4_r6:.*]] = load i8, i8* %[[gep]]
 ; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8, i8* %src, i64 53
 ; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8], [7 x i8]* %[[test4_a5]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
@@ -422,7 +422,7 @@
   store float 0.0, float* %fptr
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 2
   %iptr = bitcast i8* %ptr to i16*
-  %val = load i16* %iptr
+  %val = load i16, i16* %iptr
   ret i16 %val
 }
 
@@ -430,7 +430,7 @@
 ; CHECK-LABEL: @test6(
 ; CHECK: alloca i32
 ; CHECK-NEXT: store volatile i32
-; CHECK-NEXT: load i32*
+; CHECK-NEXT: load i32, i32*
 ; CHECK-NEXT: ret i32
 
 entry:
@@ -438,7 +438,7 @@
   %ptr = getelementptr [4 x i8], [4 x i8]* %a, i32 0, i32 0
   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
   %iptr = bitcast i8* %ptr to i32*
-  %val = load i32* %iptr
+  %val = load i32, i32* %iptr
   ret i32 %val
 }
 
@@ -446,10 +446,10 @@
 ; CHECK-LABEL: @test7(
 ; CHECK: alloca i32
 ; CHECK-NEXT: bitcast i8* %src to i32*
-; CHECK-NEXT: load volatile i32*
+; CHECK-NEXT: load volatile i32, i32*
 ; CHECK-NEXT: store volatile i32
 ; CHECK-NEXT: bitcast i8* %dst to i32*
-; CHECK-NEXT: load volatile i32*
+; CHECK-NEXT: load volatile i32, i32*
 ; CHECK-NEXT: store volatile i32
 ; CHECK-NEXT: ret
 
@@ -472,27 +472,27 @@
 ; CHECK-NOT: alloca
 
   %s2.next.ptr = getelementptr %S2, %S2* %s2, i64 0, i32 1
-  %s2.next = load %S2** %s2.next.ptr
+  %s2.next = load %S2*, %S2** %s2.next.ptr
 ; CHECK:      %[[gep:.*]] = getelementptr %S2, %S2* %s2, i64 0, i32 1
-; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
+; CHECK-NEXT: %[[next:.*]] = load %S2*, %S2** %[[gep]]
 
   %s2.next.s1.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 0
-  %s2.next.s1 = load %S1** %s2.next.s1.ptr
+  %s2.next.s1 = load %S1*, %S1** %s2.next.s1.ptr
   %new.s1.ptr = getelementptr %S2, %S2* %new, i64 0, i32 0
   store %S1* %s2.next.s1, %S1** %new.s1.ptr
   %s2.next.next.ptr = getelementptr %S2, %S2* %s2.next, i64 0, i32 1
-  %s2.next.next = load %S2** %s2.next.next.ptr
+  %s2.next.next = load %S2*, %S2** %s2.next.next.ptr
   %new.next.ptr = getelementptr %S2, %S2* %new, i64 0, i32 1
   store %S2* %s2.next.next, %S2** %new.next.ptr
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 0
-; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
+; CHECK-NEXT: %[[next_s1:.*]] = load %S1*, %S1** %[[gep]]
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2, %S2* %[[next]], i64 0, i32 1
-; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
+; CHECK-NEXT: %[[next_next:.*]] = load %S2*, %S2** %[[gep]]
 
-  %new.s1 = load %S1** %new.s1.ptr
+  %new.s1 = load %S1*, %S1** %new.s1.ptr
   %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
 ; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
-  %new.next = load %S2** %new.next.ptr
+  %new.next = load %S2*, %S2** %new.next.ptr
   %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
 ; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
   ret %S2 %result2
@@ -530,7 +530,7 @@
   store i8 26, i8* %gep3, align 1
   %cast = bitcast { [3 x i8] }* %a to { i64 }*
   %elt = getelementptr inbounds { i64 }, { i64 }* %cast, i32 0, i32 0
-  %load = load i64* %elt
+  %load = load i64, i64* %elt
   %result = and i64 %load, 16777215
   ret i64 %result
 }
@@ -545,7 +545,7 @@
   %ptr = getelementptr [8 x i8], [8 x i8]* %a, i32 0, i32 0
   call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
   %s2ptrptr = bitcast i8* %ptr to %S2**
-  %s2ptr = load %S2** %s2ptrptr
+  %s2ptr = load %S2*, %S2** %s2ptrptr
   ret %S2* %s2ptr
 }
 
@@ -561,13 +561,13 @@
 good:
   %Y = getelementptr i32, i32* %X, i64 0
   store i32 0, i32* %Y
-  %Z = load i32* %Y
+  %Z = load i32, i32* %Y
   ret i32 %Z
 
 bad:
   %Y2 = getelementptr i32, i32* %X, i64 1
   store i32 0, i32* %Y2
-  %Z2 = load i32* %Y2
+  %Z2 = load i32, i32* %Y2
   ret i32 %Z2
 }
 
@@ -589,7 +589,7 @@
   %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
-  %ai = load i24* %aiptr
+  %ai = load i24, i24* %aiptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
@@ -607,11 +607,11 @@
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
   %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
-  %b0 = load i8* %b0ptr
+  %b0 = load i8, i8* %b0ptr
   %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
-  %b1 = load i8* %b1ptr
+  %b1 = load i8, i8* %b1ptr
   %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
-  %b2 = load i8* %b2ptr
+  %b2 = load i8, i8* %b2ptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
@@ -646,7 +646,7 @@
   store i8 0, i8* %b2ptr
   %iptrcast = bitcast [3 x i8]* %a to i16*
   %iptrgep = getelementptr i16, i16* %iptrcast, i64 1
-  %i = load i16* %iptrgep
+  %i = load i16, i16* %iptrgep
   %ret = zext i16 %i to i32
   ret i32 %ret
 }
@@ -672,15 +672,15 @@
   %4 = getelementptr inbounds %test14.struct, %test14.struct* %a, i32 0, i32 0
   %5 = bitcast [3 x i32]* %3 to i32*
   %6 = bitcast [3 x i32]* %4 to i32*
-  %7 = load i32* %6, align 4
+  %7 = load i32, i32* %6, align 4
   store i32 %7, i32* %5, align 4
   %8 = getelementptr inbounds i32, i32* %5, i32 1
   %9 = getelementptr inbounds i32, i32* %6, i32 1
-  %10 = load i32* %9, align 4
+  %10 = load i32, i32* %9, align 4
   store i32 %10, i32* %8, align 4
   %11 = getelementptr inbounds i32, i32* %5, i32 2
   %12 = getelementptr inbounds i32, i32* %6, i32 2
-  %13 = load i32* %12, align 4
+  %13 = load i32, i32* %12, align 4
   store i32 %13, i32* %11, align 4
   ret void
 }
@@ -735,7 +735,7 @@
 ; CHECK-LABEL: @test16(
 ; CHECK-NOT: alloca
 ; CHECK:      %[[srccast:.*]] = bitcast i8* %src to i24*
-; CHECK-NEXT: load i24* %[[srccast]]
+; CHECK-NEXT: load i24, i24* %[[srccast]]
 ; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
 ; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
 ; CHECK-NEXT: ret void
@@ -776,7 +776,7 @@
 ; CHECK:      %[[a:.*]] = alloca [34 x i8]
 ; CHECK:      %[[srcgep1:.*]] = getelementptr inbounds i8, i8* %src, i64 4
 ; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
-; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
+; CHECK-NEXT: %[[srcload:.*]] = load i32, i32* %[[srccast1]]
 ; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
 ; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
 ; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8], [34 x i8]* %[[a]], i64 0, i64 0
@@ -821,7 +821,7 @@
   %cast2 = bitcast { i64, i8* }* %a to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
   %gep = getelementptr inbounds { i64, i8* }, { i64, i8* }* %a, i32 0, i32 0
-  %val = load i64* %gep
+  %val = load i64, i64* %gep
   ret i32 undef
 }
 
@@ -845,9 +845,9 @@
   %gep3.2 = getelementptr i32, i32* %gep3.1, i32 -12
   store i32 3, i32* %gep3.2
 
-  %load1 = load i32* %gep1
-  %load2 = load i32* %gep2.2
-  %load3 = load i32* %gep3.2
+  %load1 = load i32, i32* %gep1
+  %load2 = load i32, i32* %gep2.2
+  %load3 = load i32, i32* %gep3.2
   %sum1 = add i32 %load1, %load2
   %sum2 = add i32 %sum1, %load3
   ret i32 %sum2
@@ -876,9 +876,9 @@
   store i8 255, i8* %gep5
   %cast1 = bitcast i8* %gep4 to i32*
   store i32 0, i32* %cast1
-  %load = load i8* %gep0
+  %load = load i8, i8* %gep0
   %gep6 = getelementptr i8, i8* %gep0, i32 1
-  %load2 = load i8* %gep6
+  %load2 = load i8, i8* %gep6
   %result = or i8 %load, %load2
   ret i8 %result
 }
@@ -895,7 +895,7 @@
 entry:
   %a = alloca i8
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
-  %tmp2 = load i8* %a
+  %tmp2 = load i8, i8* %a
   ret void
 }
 
@@ -919,7 +919,7 @@
 
 if.end:
   %gep = getelementptr %PR13916.struct, %PR13916.struct* %a, i32 0, i32 0
-  %tmp2 = load i8* %gep
+  %tmp2 = load i8, i8* %gep
   ret void
 }
 
@@ -968,7 +968,7 @@
   store double* %c, double** %b
   store double* %a, double** %b
   store double %x, double* %c
-  %ret = load double* %a
+  %ret = load double, double* %a
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 
@@ -1013,7 +1013,7 @@
 
   %gep2 = getelementptr { { [1 x { i32 }] } }, { { [1 x { i32 }] } }* %a1, i32 0, i32 0
   %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
-  %load1 = load { [1 x { float }] }* %ptrcast1
+  %load1 = load { [1 x { float }] }, { [1 x { float }] }* %ptrcast1
   %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
 
   %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
@@ -1021,7 +1021,7 @@
 
   %gep3 = getelementptr { {}, { float }, [0 x i8] }, { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
   %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
-  %load3 = load <4 x i8>* %ptrcast2
+  %load3 = load <4 x i8>, <4 x i8>* %ptrcast2
   %valcast1 = bitcast <4 x i8> %load3 to i32
 
   %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
@@ -1032,7 +1032,7 @@
 
   %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }, { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
   %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
-  %load4 = load { {}, float, {} }* %ptrcast4
+  %load4 = load { {}, float, {} }, { {}, float, {} }* %ptrcast4
   %unwrap2 = extractvalue { {}, float, {} } %load4, 1
   %valcast2 = bitcast float %unwrap2 to i32
 
@@ -1080,8 +1080,8 @@
   store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
 
   ; Do the actual math...
-  %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
-  %accum.real.i = load double* %d, align 8
+  %X.sroa.0.0.load1.i = load double, double* %X.sroa.0.i, align 8
+  %accum.real.i = load double, double* %d, align 8
   %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
   store double %add.r.i, double* %d, align 8
   call void @llvm.lifetime.end(i64 -1, i8* %0)
@@ -1104,13 +1104,13 @@
   ; CHECK-NOT: store
 
   %phi.realp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0
-  %phi.real = load float* %phi.realp
+  %phi.real = load float, float* %phi.realp
   %phi.imagp = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1
-  %phi.imag = load float* %phi.imagp
+  %phi.imag = load float, float* %phi.imagp
   ; CHECK:      %[[realp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 0
-  ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
+  ; CHECK-NEXT: %[[real:.*]] = load float, float* %[[realp]]
   ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }, { float, float }* %phi, i32 0, i32 1
-  ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
+  ; CHECK-NEXT: %[[imag:.*]] = load float, float* %[[imagp]]
 
   %real = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 0
   %imag = getelementptr inbounds { float, float }, { float, float }* %retval, i32 0, i32 1
@@ -1126,7 +1126,7 @@
   ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
   ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
 
-  %1 = load i64* %0, align 1
+  %1 = load i64, i64* %0, align 1
   ret i64 %1
   ; CHECK-NEXT: ret i64 %[[real_insert]]
 }
@@ -1199,18 +1199,18 @@
   %b.i1 = bitcast <{ i1 }>* %b to i1*
   store i1 %x, i1* %b.i1, align 8
   %b.i8 = bitcast <{ i1 }>* %b to i8*
-  %foo = load i8* %b.i8, align 1
+  %foo = load i8, i8* %b.i8, align 1
 ; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
 ; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
-; CHECK-NEXT: {{.*}} = load i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
 
   %a.i8 = bitcast <{ i1 }>* %a to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
-  %bar = load i8* %a.i8, align 1
+  %bar = load i8, i8* %a.i8, align 1
   %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
-  %baz = load i1* %a.i1, align 1
+  %baz = load i1, i1* %a.i1, align 1
 ; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
-; CHECK-NEXT: {{.*}} = load i1* %[[a_cast]], align 8
+; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8
 
   ret void
 }
@@ -1226,7 +1226,7 @@
 
   %cast = bitcast <3 x i8>* %a to i32*
   store i32 %x, i32* %cast, align 1
-  %y = load <3 x i8>* %a, align 4
+  %y = load <3 x i8>, <3 x i8>* %a, align 4
   ret <3 x i8> %y
 ; CHECK: ret <3 x i8>
 }
@@ -1242,7 +1242,7 @@
 
   store <3 x i8> %x, <3 x i8>* %a, align 1
   %cast = bitcast <3 x i8>* %a to i32*
-  %y = load i32* %cast, align 4
+  %y = load i32, i32* %cast, align 4
   ret i32 %y
 ; CHECK: ret i32
 }
@@ -1258,7 +1258,7 @@
 
   %a.i8 = bitcast i32* %a to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.i8, i8 0, i32 %x, i32 1, i1 false)
-  %v = load i32* %a
+  %v = load i32, i32* %a
   ret i32 %v
 }
 
@@ -1283,7 +1283,7 @@
 
 bb4:
   %src.gep3 = getelementptr inbounds i8, i8* %src, i32 3
-  %src.3 = load i8* %src.gep3
+  %src.3 = load i8, i8* %src.gep3
   %tmp.gep3 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 3
   store i8 %src.3, i8* %tmp.gep3
 ; CHECK: store i8
@@ -1292,7 +1292,7 @@
 
 bb3:
   %src.gep2 = getelementptr inbounds i8, i8* %src, i32 2
-  %src.2 = load i8* %src.gep2
+  %src.2 = load i8, i8* %src.gep2
   %tmp.gep2 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 2
   store i8 %src.2, i8* %tmp.gep2
 ; CHECK: store i8
@@ -1301,7 +1301,7 @@
 
 bb2:
   %src.gep1 = getelementptr inbounds i8, i8* %src, i32 1
-  %src.1 = load i8* %src.gep1
+  %src.1 = load i8, i8* %src.gep1
   %tmp.gep1 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 1
   store i8 %src.1, i8* %tmp.gep1
 ; CHECK: store i8
@@ -1310,7 +1310,7 @@
 
 bb1:
   %src.gep0 = getelementptr inbounds i8, i8* %src, i32 0
-  %src.0 = load i8* %src.gep0
+  %src.0 = load i8, i8* %src.gep0
   %tmp.gep0 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i32 0
   store i8 %src.0, i8* %tmp.gep0
 ; CHECK: store i8
@@ -1332,7 +1332,7 @@
   %c = alloca i64, align 8
   %p.0.c = select i1 undef, i64* %c, i64* %c
   %cond.in = select i1 undef, i64* %p.0.c, i64* %c
-  %cond = load i64* %cond.in, align 8
+  %cond = load i64, i64* %cond.in, align 8
   ret void
 }
 
@@ -1351,7 +1351,7 @@
 loop:
   %cond.in = select i1 undef, i64* %c, i64* %p.0.c
   %p.0.c = select i1 undef, i64* %c, i64* %c
-  %cond = load i64* %cond.in, align 8
+  %cond = load i64, i64* %cond.in, align 8
   br i1 undef, label %loop, label %exit
 
 exit:
@@ -1374,7 +1374,7 @@
   %b.cast = bitcast i32* %b to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b.cast, i8* %a, i32 4, i32 4, i1 true)
   %b.gep = getelementptr inbounds i8, i8* %b.cast, i32 2
-  load i8* %b.gep, align 2
+  load i8, i8* %b.gep, align 2
   unreachable
 }
 
@@ -1394,7 +1394,7 @@
   store <2 x float> undef, <2 x float>* %0, align 8
   %1 = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tv1, i64 0, i32 1, i64 0
   %cond105.in.i.i = select i1 undef, float* null, float* %1
-  %cond105.i.i = load float* %cond105.in.i.i, align 8
+  %cond105.i.i = load float, float* %cond105.in.i.i, align 8
   ret void
 }
 
@@ -1427,9 +1427,9 @@
 define void @test24(i8* %src, i8* %dst) {
 ; CHECK-LABEL: @test24(
 ; CHECK: alloca i64, align 16
-; CHECK: load volatile i64* %{{[^,]*}}, align 1
+; CHECK: load volatile i64, i64* %{{[^,]*}}, align 1
 ; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 16
-; CHECK: load volatile i64* %{{[^,]*}}, align 16
+; CHECK: load volatile i64, i64* %{{[^,]*}}, align 16
 ; CHECK: store volatile i64 %{{[^,]*}}, i64* %{{[^,]*}}, align 1
 
 entry:
@@ -1466,10 +1466,10 @@
   %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
   store float 0.0, float* %a.gep1
   store float 1.0, float* %a.gep2
-  %v = load i64* %a
+  %v = load i64, i64* %a
   store i64 %v, i64* %b
-  %f1 = load float* %b.gep1
-  %f2 = load float* %b.gep2
+  %f1 = load float, float* %b.gep1
+  %f2 = load float, float* %b.gep2
   %ret = fadd float %f1, %f2
   ret float %ret
 }
@@ -1482,8 +1482,8 @@
 ;
 ; CHECK-LABEL: @test26(
 ; CHECK-NOT: alloca
-; CHECK: %[[L1:.*]] = load i32* bitcast
-; CHECK: %[[L2:.*]] = load i32* bitcast
+; CHECK: %[[L1:.*]] = load i32, i32* bitcast
+; CHECK: %[[L2:.*]] = load i32, i32* bitcast
 ; CHECK: %[[F1:.*]] = bitcast i32 %[[L1]] to float
 ; CHECK: %[[F2:.*]] = bitcast i32 %[[L2]] to float
 ; CHECK: %[[SUM:.*]] = fadd float %[[F1]], %[[F2]]
@@ -1498,14 +1498,14 @@
   %a.cast = bitcast i64* %a to [2 x float]*
   %a.gep1 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 0
   %a.gep2 = getelementptr [2 x float], [2 x float]* %a.cast, i32 0, i32 1
-  %v1 = load i64* bitcast ([2 x float]* @complex1 to i64*)
+  %v1 = load i64, i64* bitcast ([2 x float]* @complex1 to i64*)
   store i64 %v1, i64* %a
-  %f1 = load float* %a.gep1
-  %f2 = load float* %a.gep2
+  %f1 = load float, float* %a.gep1
+  %f2 = load float, float* %a.gep2
   %sum = fadd float %f1, %f2
   store float %sum, float* %a.gep1
   store float %sum, float* %a.gep2
-  %v2 = load i64* %a
+  %v2 = load i64, i64* %a
   store i64 %v2, i64* bitcast ([2 x float]* @complex2 to i64*)
   ret void
 }
@@ -1534,10 +1534,10 @@
   %fptr3 = bitcast i8* %gep3 to float*
   store float 0.0, float* %fptr1
   store float 1.0, float* %fptr2
-  %v = load i64* %iptr1
+  %v = load i64, i64* %iptr1
   store i64 %v, i64* %iptr2
-  %f1 = load float* %fptr2
-  %f2 = load float* %fptr3
+  %f1 = load float, float* %fptr2
+  %f2 = load float, float* %fptr3
   %ret = fadd float %f1, %f2
   ret float %ret
 }
@@ -1560,7 +1560,7 @@
   %a = alloca i32
   %a.cast = bitcast i32* %a to i16*
   store volatile i16 42, i16* %a.cast
-  %load = load i32* %a
+  %load = load i32, i32* %a
   store i32 %load, i32* %a
   ret i32 %load
 }
@@ -1587,7 +1587,7 @@
   %a.cast1 = bitcast i64* %a to i32*
   %a.cast2 = bitcast i64* %a to i16*
   store volatile i16 42, i16* %a.cast2
-  %load = load i32* %a.cast1
+  %load = load i32, i32* %a.cast1
   store i32 %load, i32* %a.cast1
   %a.gep1 = getelementptr i32, i32* %a.cast1, i32 1
   %a.cast3 = bitcast i32* %a.gep1 to i8*
diff --git a/llvm/test/Transforms/SROA/big-endian.ll b/llvm/test/Transforms/SROA/big-endian.ll
index 7ba7bee..b5a04ca 100644
--- a/llvm/test/Transforms/SROA/big-endian.ll
+++ b/llvm/test/Transforms/SROA/big-endian.ll
@@ -23,7 +23,7 @@
   %a2ptr = getelementptr [3 x i8], [3 x i8]* %a, i64 0, i32 2
   store i8 0, i8* %a2ptr
   %aiptr = bitcast [3 x i8]* %a to i24*
-  %ai = load i24* %aiptr
+  %ai = load i24, i24* %aiptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[ext2:.*]] = zext i8 0 to i24
@@ -41,11 +41,11 @@
   %biptr = bitcast [3 x i8]* %b to i24*
   store i24 %ai, i24* %biptr
   %b0ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 0
-  %b0 = load i8* %b0ptr
+  %b0 = load i8, i8* %b0ptr
   %b1ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 1
-  %b1 = load i8* %b1ptr
+  %b1 = load i8, i8* %b1ptr
   %b2ptr = getelementptr [3 x i8], [3 x i8]* %b, i64 0, i32 2
-  %b2 = load i8* %b2ptr
+  %b2 = load i8, i8* %b2ptr
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 ; CHECK:      %[[shift0:.*]] = lshr i24 %[[insert0]], 16
@@ -102,7 +102,7 @@
 ; CHECK-NOT: load
 
   %aiptr = bitcast [7 x i8]* %a to i56*
-  %ai = load i56* %aiptr
+  %ai = load i56, i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
 ; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
diff --git a/llvm/test/Transforms/SROA/fca.ll b/llvm/test/Transforms/SROA/fca.ll
index fbadcf8..6eaa73f 100644
--- a/llvm/test/Transforms/SROA/fca.ll
+++ b/llvm/test/Transforms/SROA/fca.ll
@@ -19,7 +19,7 @@
   %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
   store i32 %y, i32* %gep2
 
-  %result = load { i32, i32 }* %a
+  %result = load { i32, i32 }, { i32, i32 }* %a
   ret { i32, i32 } %result
 }
 
@@ -30,7 +30,7 @@
 ; CHECK-LABEL: @test1(
 ; CHECK: alloca
 ; CHECK: alloca
-; CHECK: load volatile { i32, i32 }*
+; CHECK: load volatile { i32, i32 }, { i32, i32 }*
 ; CHECK: store volatile { i32, i32 }
 ; CHECK: ret { i32, i32 }
 
@@ -43,7 +43,7 @@
   %gep2 = getelementptr inbounds { i32, i32 }, { i32, i32 }* %a, i32 0, i32 1
   store i32 %y, i32* %gep2
 
-  %result = load volatile { i32, i32 }* %a
+  %result = load volatile { i32, i32 }, { i32, i32 }* %a
   store volatile { i32, i32 } %result, { i32, i32 }* %b
   ret { i32, i32 } %result
 }
diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll
index 883b2fb..e97bd66 100644
--- a/llvm/test/Transforms/SROA/phi-and-select.ll
+++ b/llvm/test/Transforms/SROA/phi-and-select.ll
@@ -11,8 +11,8 @@
   %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
 	store i32 0, i32* %a0
 	store i32 1, i32* %a1
-	%v0 = load i32* %a0
-	%v1 = load i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 
@@ -26,7 +26,7 @@
 	%phi = phi i32* [ %a1, %then ], [ %a0, %entry ]
 ; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ]
 
-	%result = load i32* %phi
+	%result = load i32, i32* %phi
 	ret i32 %result
 }
 
@@ -40,8 +40,8 @@
   %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
 	store i32 0, i32* %a0
 	store i32 1, i32* %a1
-	%v0 = load i32* %a0
-	%v1 = load i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 
@@ -49,7 +49,7 @@
 	%select = select i1 %cond, i32* %a1, i32* %a0
 ; CHECK: select i1 %{{.*}}, i32 1, i32 0
 
-	%result = load i32* %select
+	%result = load i32, i32* %select
 	ret i32 %result
 }
 
@@ -100,7 +100,7 @@
                   [ %a1b, %bb4 ], [ %a0b, %bb5 ], [ %a0b, %bb6 ], [ %a1b, %bb7 ]
 ; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ], [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ]
 
-	%result = load i32* %phi
+	%result = load i32, i32* %phi
 	ret i32 %result
 }
 
@@ -114,8 +114,8 @@
   %a1 = getelementptr [2 x i32], [2 x i32]* %a, i64 0, i32 1
 	store i32 0, i32* %a0
 	store i32 1, i32* %a1
-	%v0 = load i32* %a0
-	%v1 = load i32* %a1
+	%v0 = load i32, i32* %a0
+	%v1 = load i32, i32* %a1
 ; CHECK-NOT: store
 ; CHECK-NOT: load
 
@@ -123,7 +123,7 @@
 	%select = select i1 %cond, i32* %a0, i32* %a0
 ; CHECK-NOT: select
 
-	%result = load i32* %select
+	%result = load i32, i32* %select
 	ret i32 %result
 ; CHECK: ret i32 0
 }
@@ -141,7 +141,7 @@
 	%select = select i1 true, i32* %a1, i32* %b
 ; CHECK-NOT: select
 
-	%result = load i32* %select
+	%result = load i32, i32* %select
 ; CHECK-NOT: load
 
 	ret i32 %result
@@ -172,10 +172,10 @@
 ; CHECK: call void @f(i32* %[[select2]], i32* %[[select3]])
 
 
-	%result = load i32* %select
+	%result = load i32, i32* %select
 ; CHECK-NOT: load
 
-  %dead = load i32* %c
+  %dead = load i32, i32* %c
 
 	ret i32 %result
 ; CHECK: ret i32 1
@@ -202,7 +202,7 @@
 exit:
 	%P = phi i32* [ %Y1, %good ], [ %Y2, %bad ]
 ; CHECK: %[[phi:.*]] = phi i32 [ 0, %good ],
-  %Z2 = load i32* %P
+  %Z2 = load i32, i32* %P
   ret i32 %Z2
 ; CHECK: ret i32 %[[phi]]
 }
@@ -213,7 +213,7 @@
 ; CHECK-LABEL: @test8(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: load
-; CHECK: %[[value:.*]] = load i32* %ptr
+; CHECK: %[[value:.*]] = load i32, i32* %ptr
 ; CHECK-NOT: load
 ; CHECK: %[[result:.*]] = phi i32 [ undef, %else ], [ %[[value]], %then ]
 ; CHECK-NEXT: ret i32 %[[result]]
@@ -232,7 +232,7 @@
 
 exit:
   %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ]
-  %loaded = load i32* %phi, align 4
+  %loaded = load i32, i32* %phi, align 4
   ret i32 %loaded
 }
 
@@ -241,7 +241,7 @@
 ; CHECK-LABEL: @test9(
 ; CHECK-NOT: alloca
 ; CHECK-NOT: load
-; CHECK: %[[value:.*]] = load i32* %ptr
+; CHECK: %[[value:.*]] = load i32, i32* %ptr
 ; CHECK-NOT: load
 ; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 undef, i32 %[[value]]
 ; CHECK-NEXT: ret i32 %[[result]]
@@ -252,7 +252,7 @@
   %test = icmp ne i32 %b, 0
   %bitcast = bitcast float* %f to i32*
   %select = select i1 %test, i32* %bitcast, i32* %ptr
-  %loaded = load i32* %select, align 4
+  %loaded = load i32, i32* %select, align 4
   ret i32 %loaded
 }
 
@@ -262,9 +262,9 @@
 ; node.
 ; CHECK-LABEL: @test10(
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[argvalue:.*]] = load float, float* %ptr
 ; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
-; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]]
 ; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
 ; CHECK-NEXT: ret float %[[result]]
 
@@ -283,7 +283,7 @@
 
 exit:
   %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ]
-  %loaded = load float* %phi, align 4
+  %loaded = load float, float* %phi, align 4
   ret float %loaded
 }
 
@@ -292,8 +292,8 @@
 ; CHECK-LABEL: @test11(
 ; CHECK: %[[alloca:.*]] = alloca
 ; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
-; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
-; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[allocavalue:.*]] = load float, float* %[[cast]]
+; CHECK: %[[argvalue:.*]] = load float, float* %ptr
 ; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]]
 ; CHECK-NEXT: ret float %[[result]]
 
@@ -304,7 +304,7 @@
   %test = icmp ne i32 %b, 0
   %bitcast = bitcast double* %f to float*
   %select = select i1 %test, float* %bitcast, float* %ptr
-  %loaded = load float* %select, align 4
+  %loaded = load float, float* %select, align 4
   ret float %loaded
 }
 
@@ -320,7 +320,7 @@
   %a = alloca i32
   store i32 %x, i32* %a
   %dead = select i1 undef, i32* %a, i32* %p
-  %load = load i32* %a
+  %load = load i32, i32* %a
   ret i32 %load
 }
 
@@ -342,7 +342,7 @@
   br i1 undef, label %loop, label %exit
 
 exit:
-  %load = load i32* %a
+  %load = load i32, i32* %a
   ret i32 %load
 }
 
@@ -376,9 +376,9 @@
 exit:
   %f.phi = phi i32* [ %f, %then ], [ %f.select, %else ]
   %g.phi = phi i32* [ %g, %then ], [ %ptr, %else ]
-  %f.loaded = load i32* %f.phi
+  %f.loaded = load i32, i32* %f.phi
   %g.select = select i1 %b1, i32* %g, i32* %g.phi
-  %g.loaded = load i32* %g.select
+  %g.loaded = load i32, i32* %g.select
   %result = add i32 %f.loaded, %g.loaded
   ret i32 %result
 }
@@ -456,8 +456,8 @@
 ; CHECK: %[[ext:.*]] = zext i8 1 to i64
 
 if.end:
-  %tmp = load i64** %ptr
-  %result = load i64* %tmp
+  %tmp = load i64*, i64** %ptr
+  %result = load i64, i64* %tmp
 ; CHECK-NOT: load
 ; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
 
@@ -495,7 +495,7 @@
 
 end:
   %a.phi.f = phi float* [ %a.f, %then ], [ %a.raw.4.f, %else ]
-  %f = load float* %a.phi.f
+  %f = load float, float* %a.phi.f
   ret float %f
 ; CHECK: %[[phi:.*]] = phi float [ %[[lo_cast]], %then ], [ %[[hi_cast]], %else ]
 ; CHECK-NOT: load
@@ -528,7 +528,7 @@
 merge:
   %2 = phi float* [ %0, %then ], [ %1, %else ]
   store float 0.000000e+00, float* %temp, align 4
-  %3 = load float* %2, align 4
+  %3 = load float, float* %2, align 4
   ret float %3
 }
 
@@ -563,7 +563,7 @@
 merge:
   %3 = phi float* [ %1, %then2 ], [ %2, %else ]
   store float 0.000000e+00, float* %temp, align 4
-  %4 = load float* %3, align 4
+  %4 = load float, float* %3, align 4
   ret float %4
 }
 
diff --git a/llvm/test/Transforms/SROA/slice-order-independence.ll b/llvm/test/Transforms/SROA/slice-order-independence.ll
index 6b38f4c..7d57be6d 100644
--- a/llvm/test/Transforms/SROA/slice-order-independence.ll
+++ b/llvm/test/Transforms/SROA/slice-order-independence.ll
@@ -15,9 +15,9 @@
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb0 = bitcast i16** %b to i63*
-  %b0 = load i63* %pb0
+  %b0 = load i63, i63* %pb0
   %pb1 = bitcast i16** %b to i8**
-  %b1 = load i8** %pb1
+  %b1 = load i8*, i8** %pb1
   ret void
 }
 
@@ -30,8 +30,8 @@
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* %2, i32 16, i32 8, i1 false)
   %b = getelementptr inbounds { i16*, i32 }, { i16*, i32 }* %arg, i64 0, i32 0
   %pb1 = bitcast i16** %b to i8**
-  %b1 = load i8** %pb1
+  %b1 = load i8*, i8** %pb1
   %pb0 = bitcast i16** %b to i63*
-  %b0 = load i63* %pb0
+  %b0 = load i63, i63* %pb0
   ret void
 }
diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll
index 4d2a56f..a37a15a 100644
--- a/llvm/test/Transforms/SROA/slice-width.ll
+++ b/llvm/test/Transforms/SROA/slice-width.ll
@@ -14,14 +14,14 @@
 ; CHECK-LABEL: load_i32:
 ; CHECK-NOT: bitcast {{.*}} to i1
 ; CHECK-NOT: zext i1
-  %r0 = load i32* %arg
+  %r0 = load i32, i32* %arg
   br label %load_i1
 
 load_i1:
 ; CHECK-LABEL: load_i1:
 ; CHECK: bitcast {{.*}} to i1
   %p1 = bitcast i32* %arg to i1*
-  %t1 = load i1* %p1
+  %t1 = load i1, i1* %p1
   ret void
 }
 
@@ -43,15 +43,15 @@
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
-  %elt = load i64* %mid_p
+  %elt = load i64, i64* %mid_p
   store i64 %elt, i64* @i64_sink
   ret void
 }
 ; CHECK-LABEL: define void @memcpy_fp80_padding
 ; CHECK: alloca x86_fp80
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
-; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 1)
-; CHECK: load i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 2)
+; CHECK: load i64, i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 1)
+; CHECK: load i64, i64* getelementptr inbounds (%union.Foo* @foo_copy_source, i64 0, i32 2)
 
 define void @memset_fp80_padding() {
   %x = alloca %union.Foo
@@ -62,7 +62,7 @@
 
   ; Access a slice of the alloca to trigger SROA.
   %mid_p = getelementptr %union.Foo, %union.Foo* %x, i32 0, i32 1
-  %elt = load i64* %mid_p
+  %elt = load i64, i64* %mid_p
   store i64 %elt, i64* @i64_sink
   ret void
 }
@@ -90,7 +90,7 @@
   ; The following block does nothing; but appears to confuse SROA
   %unused1 = bitcast %S.vec3float* %tmp1 to %U.vec3float*
   %unused2 = getelementptr inbounds %U.vec3float, %U.vec3float* %unused1, i32 0, i32 0
-  %unused3 = load <4 x float>* %unused2, align 1
+  %unused3 = load <4 x float>, <4 x float>* %unused2, align 1
 
   ; Create a second temporary and copy %tmp1 into it
   %tmp2 = alloca %S.vec3float, align 4
diff --git a/llvm/test/Transforms/SROA/vector-conversion.ll b/llvm/test/Transforms/SROA/vector-conversion.ll
index 08d7960..91ae5be 100644
--- a/llvm/test/Transforms/SROA/vector-conversion.ll
+++ b/llvm/test/Transforms/SROA/vector-conversion.ll
@@ -10,7 +10,7 @@
 ; CHECK-NOT: store
 
   %cast = bitcast {<2 x i32*>, <2 x i32*>}* %a to <4 x i64>*
-  %vec = load <4 x i64>* %cast
+  %vec = load <4 x i64>, <4 x i64>* %cast
 ; CHECK-NOT: load
 ; CHECK: ptrtoint
 
@@ -26,7 +26,7 @@
 ; CHECK-NOT: store
 
   %cast = bitcast {<2 x i64>, <2 x i64>}* %a to <4 x i32*>*
-  %vec = load <4 x i32*>* %cast
+  %vec = load <4 x i32*>, <4 x i32*>* %cast
 ; CHECK-NOT: load
 ; CHECK: inttoptr
 
@@ -42,7 +42,7 @@
 ; CHECK-NOT: store
 
   %cast = bitcast {<1 x i32*>, <1 x i32*>}* %a to <2 x i64>*
-  %vec = load <2 x i64>* %cast
+  %vec = load <2 x i64>, <2 x i64>* %cast
 ; CHECK-NOT: load
 ; CHECK: ptrtoint
 ; CHECK: bitcast
diff --git a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll b/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll
index 30c93b0..37cf3943 100644
--- a/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll
+++ b/llvm/test/Transforms/SROA/vector-lifetime-intrinsic.ll
@@ -18,7 +18,7 @@
   call void @llvm.lifetime.start(i64 16, i8* %tmp8)
   store <4 x float> %arg1, <4 x float>* %tmp, align 16
   %tmp17 = bitcast <4 x float>* %tmp to <3 x float>*
-  %tmp18 = load <3 x float>* %tmp17
+  %tmp18 = load <3 x float>, <3 x float>* %tmp17
   %tmp20 = bitcast <4 x float>* %tmp to i8*
   call void @llvm.lifetime.end(i64 16, i8* %tmp20)
   call void @wombat3(<3 x float> %tmp18)
diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll
index 73f5ba0..2d9b26b 100644
--- a/llvm/test/Transforms/SROA/vector-promotion.ll
+++ b/llvm/test/Transforms/SROA/vector-promotion.ll
@@ -16,11 +16,11 @@
 ; CHECK-NOT: store
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: load
 ; CHECK:      extractelement <4 x i32> %x, i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
@@ -47,12 +47,12 @@
 ; CHECK-NOT: store
 
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
   %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
-  %tmp3.vec = load <2 x i32>* %a.tmp3.cast
+  %tmp3.vec = load <2 x i32>, <2 x i32>* %a.tmp3.cast
   %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
 ; CHECK-NOT: load
 ; CHECK:      %[[extract1:.*]] = extractelement <4 x i32> %x, i32 2
@@ -87,11 +87,11 @@
   %a.tmp1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
   %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
   call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: load
 ; CHECK:      %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
@@ -128,15 +128,15 @@
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: memcpy
-; CHECK:      %[[load:.*]] = load <4 x i32>* %z
+; CHECK:      %[[load:.*]] = load <4 x i32>, <4 x i32>* %z
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
-; CHECK-NEXT: %[[element_load:.*]] = load i32* %[[gep]]
+; CHECK-NEXT: %[[element_load:.*]] = load i32, i32* %[[gep]]
 ; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
@@ -175,15 +175,15 @@
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i16 0, i16 2
   %z.tmp1.cast = bitcast i32 addrspace(1)* %z.tmp1 to i8 addrspace(1)*
   call void @llvm.memcpy.p0i8.p1i8.i32(i8* %a.tmp1.cast, i8 addrspace(1)* %z.tmp1.cast, i32 4, i32 1, i1 false)
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: memcpy
-; CHECK:      %[[load:.*]] = load <4 x i32> addrspace(1)* %z
+; CHECK:      %[[load:.*]] = load <4 x i32>, <4 x i32> addrspace(1)* %z
 ; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %z, i64 0, i64 2
-; CHECK-NEXT: %[[element_load:.*]] = load i32 addrspace(1)* %[[gep]]
+; CHECK-NEXT: %[[element_load:.*]] = load i32, i32 addrspace(1)* %[[gep]]
 ; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
 ; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
@@ -221,11 +221,11 @@
   %z.tmp1 = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
   %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: memcpy
 ; CHECK:      %[[gep:.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* %z, i64 0, i64 2
 ; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
@@ -257,7 +257,7 @@
   store <4 x i64> %y, <4 x i64>* %p1
 ; CHECK: store <4 x i64> %y,
   %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }, { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
-  %res = load i64* %addr, align 4
+  %res = load i64, i64* %addr, align 4
   ret i64 %res
 }
 
@@ -287,7 +287,7 @@
   store i32 3, i32* %a.gep3
 ; CHECK-NEXT: insertelement <4 x i32>
 
-  %ret = load <4 x i32>* %a
+  %ret = load <4 x i32>, <4 x i32>* %a
 
   ret <4 x i32> %ret
 ; CHECK-NEXT: ret <4 x i32> 
@@ -303,18 +303,18 @@
 
   %a.gep0 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 0
   %a.cast0 = bitcast i32* %a.gep0 to <2 x i32>*
-  %first = load <2 x i32>* %a.cast0
+  %first = load <2 x i32>, <2 x i32>* %a.cast0
 ; CHECK-NOT: load
 ; CHECK:      %[[extract1:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
 
   %a.gep1 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 1
   %a.cast1 = bitcast i32* %a.gep1 to <2 x i32>*
-  %second = load <2 x i32>* %a.cast1
+  %second = load <2 x i32>, <2 x i32>* %a.cast1
 ; CHECK-NEXT: %[[extract2:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
 
   %a.gep2 = getelementptr <4 x i32>, <4 x i32>* %a, i32 0, i32 2
   %a.cast2 = bitcast i32* %a.gep2 to <2 x i32>*
-  %third = load <2 x i32>* %a.cast2
+  %third = load <2 x i32>, <2 x i32>* %a.cast2
 ; CHECK-NEXT: %[[extract3:.*]] = shufflevector <4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 
   %tmp = shufflevector <2 x i32> %first, <2 x i32> %second, <2 x i32> <i32 0, i32 2>
@@ -355,7 +355,7 @@
   call void @llvm.memset.p0i8.i32(i8* %a.cast3, i8 7, i32 4, i32 0, i1 false)
 ; CHECK-NEXT: insertelement <4 x float> 
 
-  %ret = load <4 x float>* %a
+  %ret = load <4 x float>, <4 x float>* %a
 
   ret <4 x float> %ret
 ; CHECK-NEXT: ret <4 x float> 
@@ -371,7 +371,7 @@
   %a.cast0 = bitcast float* %a.gep0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast0, i8* %x, i32 8, i32 0, i1 false)
 ; CHECK:      %[[xptr:.*]] = bitcast i8* %x to <2 x float>*
-; CHECK-NEXT: %[[x:.*]] = load <2 x float>* %[[xptr]]
+; CHECK-NEXT: %[[x:.*]] = load <2 x float>, <2 x float>* %[[xptr]]
 ; CHECK-NEXT: %[[expand_x:.*]] = shufflevector <2 x float> %[[x]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT: select <4 x i1> <i1 true, i1 true, i1 false, i1 false>  
 
@@ -379,7 +379,7 @@
   %a.cast1 = bitcast float* %a.gep1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast1, i8* %y, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[yptr:.*]] = bitcast i8* %y to <2 x float>*
-; CHECK-NEXT: %[[y:.*]] = load <2 x float>* %[[yptr]]
+; CHECK-NEXT: %[[y:.*]] = load <2 x float>, <2 x float>* %[[yptr]]
 ; CHECK-NEXT: %[[expand_y:.*]] = shufflevector <2 x float> %[[y]], <2 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 undef>
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 true, i1 true, i1 false>
 
@@ -387,7 +387,7 @@
   %a.cast2 = bitcast float* %a.gep2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast2, i8* %z, i32 8, i32 0, i1 false)
 ; CHECK-NEXT: %[[zptr:.*]] = bitcast i8* %z to <2 x float>*
-; CHECK-NEXT: %[[z:.*]] = load <2 x float>* %[[zptr]]
+; CHECK-NEXT: %[[z:.*]] = load <2 x float>, <2 x float>* %[[zptr]]
 ; CHECK-NEXT: %[[expand_z:.*]] = shufflevector <2 x float> %[[z]], <2 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
 ; CHECK-NEXT: select <4 x i1> <i1 false, i1 false, i1 true, i1 true>
 
@@ -395,7 +395,7 @@
   %a.cast3 = bitcast float* %a.gep3 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.cast3, i8* %f, i32 4, i32 0, i1 false)
 ; CHECK-NEXT: %[[fptr:.*]] = bitcast i8* %f to float*
-; CHECK-NEXT: %[[f:.*]] = load float* %[[fptr]]
+; CHECK-NEXT: %[[f:.*]] = load float, float* %[[fptr]]
 ; CHECK-NEXT: %[[insert_f:.*]] = insertelement <4 x float> 
 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %a.cast2, i32 8, i32 0, i1 false)
@@ -403,7 +403,7 @@
 ; CHECK-NEXT: %[[extract_out:.*]] = shufflevector <4 x float> %[[insert_f]], <4 x float> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT: store <2 x float> %[[extract_out]], <2 x float>* %[[outptr]]
 
-  %ret = load <4 x float>* %a
+  %ret = load <4 x float>, <4 x float>* %a
 
   ret <4 x float> %ret
 ; CHECK-NEXT: ret <4 x float> %[[insert_f]]
@@ -419,7 +419,7 @@
 
   store <3 x i8> undef, <3 x i8>* %retval, align 4
   %cast = bitcast <3 x i8>* %retval to i32*
-  %load = load i32* %cast, align 4
+  %load = load i32, i32* %cast, align 4
   ret i32 %load
 ; CHECK: ret i32
 }
@@ -437,7 +437,7 @@
 ; CHECK-NOT: store
 
   %cast = bitcast i32* %a to <2 x i8>*
-  %vec = load <2 x i8>* %cast
+  %vec = load <2 x i8>, <2 x i8>* %cast
 ; CHECK-NOT: load
 
   ret <2 x i8> %vec
@@ -459,7 +459,7 @@
   store <2 x i8> %x, <2 x i8>* %cast
 ; CHECK-NOT: store
 
-  %int = load i32* %a
+  %int = load i32, i32* %a
 ; CHECK-NOT: load
 
   ret i32 %int
@@ -484,11 +484,11 @@
 ; CHECK-NOT: store
 
   %a.tmp1 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 0, i64 1
-  %tmp1 = load i32* %a.tmp1
+  %tmp1 = load i32, i32* %a.tmp1
   %a.tmp2 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 1
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
   %a.tmp3 = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* %a.cast, i64 0, i64 1, i64 0
-  %tmp3 = load i32* %a.tmp3
+  %tmp3 = load i32, i32* %a.tmp3
 ; CHECK-NOT: load
 ; CHECK:      extractelement <2 x i32> %x, i32 1
 ; CHECK-NEXT: extractelement <2 x i32> %y, i32 1
@@ -515,9 +515,9 @@
   store <2 x i32> %x, <2 x i32>* %a.vec
 ; CHECK-NOT: store
 
-  %tmp1 = load i32* %a.i32
+  %tmp1 = load i32, i32* %a.i32
   %a.tmp2 = getelementptr inbounds i32, i32* %a.i32, i64 1
-  %tmp2 = load i32* %a.tmp2
+  %tmp2 = load i32, i32* %a.tmp2
 ; CHECK-NOT: load
 ; CHECK:      extractelement <2 x i32> %x, i32 0
 ; CHECK-NEXT: extractelement <2 x i32> %x, i32 1
@@ -545,7 +545,7 @@
 ; CHECK:      %[[V1:.*]] = insertelement <2 x i32> undef, i32 %x, i32 0
 ; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
 
-  %result = load <2 x i32>* %a.vec
+  %result = load <2 x i32>, <2 x i32>* %a.vec
 ; CHECK-NOT:  load
 
   ret <2 x i32> %result
@@ -570,7 +570,7 @@
 ; CHECK:      %[[V1:.*]] = bitcast <4 x i16> %x to <2 x i32>
 ; CHECK-NEXT: %[[V2:.*]] = insertelement <2 x i32> %[[V1]], i32 %y, i32 1
 
-  %result = load <2 x i32>* %a.vec1
+  %result = load <2 x i32>, <2 x i32>* %a.vec1
 ; CHECK-NOT:  load
 
   ret <2 x i32> %result
@@ -598,7 +598,7 @@
 ; CHECK-NEXT: %[[V3:.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i16> %[[V2]], <4 x i16> %x
 ; CHECK-NEXT: %[[V4:.*]] = bitcast <4 x i16> %[[V3]] to <2 x float>
 
-  %result = load <2 x float>* %a.vec1
+  %result = load <2 x float>, <2 x float>* %a.vec1
 ; CHECK-NOT:  load
 
   ret <2 x float> %result
@@ -616,7 +616,7 @@
 
   %cast2 = bitcast <3 x i32>* %a to <3 x float>*
   %cast3 = bitcast <3 x float>* %cast2 to <4 x float>*
-  %vec = load <4 x float>* %cast3
+  %vec = load <4 x float>, <4 x float>* %cast3
 ; CHECK-NOT: load
 
 ; CHECK:      %[[ret:.*]] = bitcast <4 x i32> undef to <4 x float>
diff --git a/llvm/test/Transforms/SROA/vectors-of-pointers.ll b/llvm/test/Transforms/SROA/vectors-of-pointers.ll
index 7e995b9..ff09e95 100644
--- a/llvm/test/Transforms/SROA/vectors-of-pointers.ll
+++ b/llvm/test/Transforms/SROA/vectors-of-pointers.ll
@@ -20,6 +20,6 @@
   unreachable
 
 bb0.exit257:
-  %0 = load <2 x i32*>* %Args.i, align 16
+  %0 = load <2 x i32*>, <2 x i32*>* %Args.i, align 16
   unreachable
 }
diff --git a/llvm/test/Transforms/SampleProfile/branch.ll b/llvm/test/Transforms/SampleProfile/branch.ll
index ef39093..13e362d 100644
--- a/llvm/test/Transforms/SampleProfile/branch.ll
+++ b/llvm/test/Transforms/SampleProfile/branch.ll
@@ -41,7 +41,7 @@
 
 if.end:                                           ; preds = %entry
   %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30
-  %0 = load i8** %arrayidx, align 8, !dbg !30, !tbaa !31
+  %0 = load i8*, i8** %arrayidx, align 8, !dbg !30, !tbaa !31
   %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
   tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !{}), !dbg !30
   %cmp1 = icmp sgt i32 %call, 100, !dbg !35
diff --git a/llvm/test/Transforms/SampleProfile/calls.ll b/llvm/test/Transforms/SampleProfile/calls.ll
index d566609..3194e62 100644
--- a/llvm/test/Transforms/SampleProfile/calls.ll
+++ b/llvm/test/Transforms/SampleProfile/calls.ll
@@ -30,8 +30,8 @@
   %y.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
-  %0 = load i32* %x.addr, align 4, !dbg !11
-  %1 = load i32* %y.addr, align 4, !dbg !11
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
   %add = add nsw i32 %0, %1, !dbg !11
   ret i32 %add, !dbg !11
 }
@@ -47,7 +47,7 @@
   br label %while.cond, !dbg !13
 
 while.cond:                                       ; preds = %if.end, %entry
-  %0 = load i32* %i, align 4, !dbg !14
+  %0 = load i32, i32* %i, align 4, !dbg !14
   %inc = add nsw i32 %0, 1, !dbg !14
   store i32 %inc, i32* %i, align 4, !dbg !14
   %cmp = icmp slt i32 %0, 400000000, !dbg !14
@@ -56,7 +56,7 @@
 ; CHECK: edge while.cond -> while.end probability is 1 / 5392 = 0.018546%
 
 while.body:                                       ; preds = %while.cond
-  %1 = load i32* %i, align 4, !dbg !16
+  %1 = load i32, i32* %i, align 4, !dbg !16
   %cmp1 = icmp ne i32 %1, 100, !dbg !16
   br i1 %cmp1, label %if.then, label %if.else, !dbg !16
 ; Without discriminator information, the profiler used to think that
@@ -68,8 +68,8 @@
 
 
 if.then:                                          ; preds = %while.body
-  %2 = load i32* %i, align 4, !dbg !18
-  %3 = load i32* %s, align 4, !dbg !18
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
   %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
   store i32 %call, i32* %s, align 4, !dbg !18
   br label %if.end, !dbg !18
@@ -82,7 +82,7 @@
   br label %while.cond, !dbg !22
 
 while.end:                                        ; preds = %while.cond
-  %4 = load i32* %s, align 4, !dbg !24
+  %4 = load i32, i32* %s, align 4, !dbg !24
   %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
   ret i32 0, !dbg !25
 }
diff --git a/llvm/test/Transforms/SampleProfile/discriminator.ll b/llvm/test/Transforms/SampleProfile/discriminator.ll
index cafc69d..704a407 100644
--- a/llvm/test/Transforms/SampleProfile/discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/discriminator.ll
@@ -31,33 +31,33 @@
   br label %while.cond, !dbg !11
 
 while.cond:                                       ; preds = %if.end, %entry
-  %0 = load i32* %i.addr, align 4, !dbg !12
+  %0 = load i32, i32* %i.addr, align 4, !dbg !12
   %cmp = icmp slt i32 %0, 100, !dbg !12
   br i1 %cmp, label %while.body, label %while.end, !dbg !12
 ; CHECK: edge while.cond -> while.body probability is 100 / 101 = 99.0099% [HOT edge]
 ; CHECK: edge while.cond -> while.end probability is 1 / 101 = 0.990099%
 
 while.body:                                       ; preds = %while.cond
-  %1 = load i32* %i.addr, align 4, !dbg !14
+  %1 = load i32, i32* %i.addr, align 4, !dbg !14
   %cmp1 = icmp slt i32 %1, 50, !dbg !14
   br i1 %cmp1, label %if.then, label %if.end, !dbg !14
 ; CHECK: edge while.body -> if.then probability is 5 / 100 = 5%
 ; CHECK: edge while.body -> if.end probability is 95 / 100 = 95% [HOT edge]
 
 if.then:                                          ; preds = %while.body
-  %2 = load i32* %x, align 4, !dbg !17
+  %2 = load i32, i32* %x, align 4, !dbg !17
   %dec = add nsw i32 %2, -1, !dbg !17
   store i32 %dec, i32* %x, align 4, !dbg !17
   br label %if.end, !dbg !17
 
 if.end:                                           ; preds = %if.then, %while.body
-  %3 = load i32* %i.addr, align 4, !dbg !19
+  %3 = load i32, i32* %i.addr, align 4, !dbg !19
   %inc = add nsw i32 %3, 1, !dbg !19
   store i32 %inc, i32* %i.addr, align 4, !dbg !19
   br label %while.cond, !dbg !20
 
 while.end:                                        ; preds = %while.cond
-  %4 = load i32* %x, align 4, !dbg !21
+  %4 = load i32, i32* %x, align 4, !dbg !21
   ret i32 %4, !dbg !21
 }
 
diff --git a/llvm/test/Transforms/SampleProfile/propagate.ll b/llvm/test/Transforms/SampleProfile/propagate.ll
index 594645f..520ab04 100644
--- a/llvm/test/Transforms/SampleProfile/propagate.ll
+++ b/llvm/test/Transforms/SampleProfile/propagate.ll
@@ -51,14 +51,14 @@
   store i32 %x, i32* %x.addr, align 4
   store i32 %y, i32* %y.addr, align 4
   store i64 %N, i64* %N.addr, align 8
-  %0 = load i32* %x.addr, align 4, !dbg !11
-  %1 = load i32* %y.addr, align 4, !dbg !11
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
   %cmp = icmp slt i32 %0, %1, !dbg !11
   br i1 %cmp, label %if.then, label %if.else, !dbg !11
 
 if.then:                                          ; preds = %entry
-  %2 = load i32* %y.addr, align 4, !dbg !13
-  %3 = load i32* %x.addr, align 4, !dbg !13
+  %2 = load i32, i32* %y.addr, align 4, !dbg !13
+  %3 = load i32, i32* %x.addr, align 4, !dbg !13
   %sub = sub nsw i32 %2, %3, !dbg !13
   %conv = sext i32 %sub to i64, !dbg !13
   store i64 %conv, i64* %retval, !dbg !13
@@ -69,16 +69,16 @@
   br label %for.cond, !dbg !15
 
 for.cond:                                         ; preds = %for.inc16, %if.else
-  %4 = load i64* %i, align 8, !dbg !15
-  %5 = load i64* %N.addr, align 8, !dbg !15
+  %4 = load i64, i64* %i, align 8, !dbg !15
+  %5 = load i64, i64* %N.addr, align 8, !dbg !15
   %cmp1 = icmp slt i64 %4, %5, !dbg !15
   br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
 ; CHECK: edge for.cond -> for.body probability is 10 / 11 = 90.9091% [HOT edge]
 ; CHECK: edge for.cond -> for.end18 probability is 1 / 11 = 9.09091%
 
 for.body:                                         ; preds = %for.cond
-  %6 = load i64* %i, align 8, !dbg !18
-  %7 = load i64* %N.addr, align 8, !dbg !18
+  %6 = load i64, i64* %i, align 8, !dbg !18
+  %7 = load i64, i64* %N.addr, align 8, !dbg !18
   %div = sdiv i64 %7, 3, !dbg !18
   %cmp2 = icmp sgt i64 %6, %div, !dbg !18
   br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
@@ -86,14 +86,14 @@
 ; CHECK: edge for.body -> if.end probability is 4 / 5 = 80%
 
 if.then3:                                         ; preds = %for.body
-  %8 = load i32* %x.addr, align 4, !dbg !21
+  %8 = load i32, i32* %x.addr, align 4, !dbg !21
   %dec = add nsw i32 %8, -1, !dbg !21
   store i32 %dec, i32* %x.addr, align 4, !dbg !21
   br label %if.end, !dbg !21
 
 if.end:                                           ; preds = %if.then3, %for.body
-  %9 = load i64* %i, align 8, !dbg !22
-  %10 = load i64* %N.addr, align 8, !dbg !22
+  %9 = load i64, i64* %i, align 8, !dbg !22
+  %10 = load i64, i64* %N.addr, align 8, !dbg !22
   %div4 = sdiv i64 %10, 4, !dbg !22
   %cmp5 = icmp sgt i64 %9, %div4, !dbg !22
   br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
@@ -101,10 +101,10 @@
 ; CHECK: edge if.end -> if.else7 probability is 6339 / 6342 = 99.9527% [HOT edge]
 
 if.then6:                                         ; preds = %if.end
-  %11 = load i32* %y.addr, align 4, !dbg !24
+  %11 = load i32, i32* %y.addr, align 4, !dbg !24
   %inc = add nsw i32 %11, 1, !dbg !24
   store i32 %inc, i32* %y.addr, align 4, !dbg !24
-  %12 = load i32* %x.addr, align 4, !dbg !26
+  %12 = load i32, i32* %x.addr, align 4, !dbg !26
   %add = add nsw i32 %12, 3, !dbg !26
   store i32 %add, i32* %x.addr, align 4, !dbg !26
   br label %if.end15, !dbg !27
@@ -114,26 +114,26 @@
   br label %for.cond8, !dbg !28
 
 for.cond8:                                        ; preds = %for.inc, %if.else7
-  %13 = load i32* %j, align 4, !dbg !28
+  %13 = load i32, i32* %j, align 4, !dbg !28
   %conv9 = zext i32 %13 to i64, !dbg !28
-  %14 = load i64* %i, align 8, !dbg !28
+  %14 = load i64, i64* %i, align 8, !dbg !28
   %cmp10 = icmp slt i64 %conv9, %14, !dbg !28
   br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
 ; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16192 = 99.9938% [HOT edge]
 ; CHECK: edge for.cond8 -> for.end probability is 1 / 16192 = 0.00617589%
 
 for.body11:                                       ; preds = %for.cond8
-  %15 = load i32* %j, align 4, !dbg !31
-  %16 = load i32* %x.addr, align 4, !dbg !31
+  %15 = load i32, i32* %j, align 4, !dbg !31
+  %16 = load i32, i32* %x.addr, align 4, !dbg !31
   %add12 = add i32 %16, %15, !dbg !31
   store i32 %add12, i32* %x.addr, align 4, !dbg !31
-  %17 = load i32* %y.addr, align 4, !dbg !33
+  %17 = load i32, i32* %y.addr, align 4, !dbg !33
   %sub13 = sub nsw i32 %17, 3, !dbg !33
   store i32 %sub13, i32* %y.addr, align 4, !dbg !33
   br label %for.inc, !dbg !34
 
 for.inc:                                          ; preds = %for.body11
-  %18 = load i32* %j, align 4, !dbg !28
+  %18 = load i32, i32* %j, align 4, !dbg !28
   %inc14 = add i32 %18, 1, !dbg !28
   store i32 %inc14, i32* %j, align 4, !dbg !28
   br label %for.cond8, !dbg !28
@@ -145,7 +145,7 @@
   br label %for.inc16, !dbg !35
 
 for.inc16:                                        ; preds = %if.end15
-  %19 = load i64* %i, align 8, !dbg !15
+  %19 = load i64, i64* %i, align 8, !dbg !15
   %inc17 = add nsw i64 %19, 1, !dbg !15
   store i64 %inc17, i64* %i, align 8, !dbg !15
   br label %for.cond, !dbg !15
@@ -154,15 +154,15 @@
   br label %if.end19
 
 if.end19:                                         ; preds = %for.end18
-  %20 = load i32* %y.addr, align 4, !dbg !36
-  %21 = load i32* %x.addr, align 4, !dbg !36
+  %20 = load i32, i32* %y.addr, align 4, !dbg !36
+  %21 = load i32, i32* %x.addr, align 4, !dbg !36
   %mul = mul nsw i32 %20, %21, !dbg !36
   %conv20 = sext i32 %mul to i64, !dbg !36
   store i64 %conv20, i64* %retval, !dbg !36
   br label %return, !dbg !36
 
 return:                                           ; preds = %if.end19, %if.then
-  %22 = load i64* %retval, !dbg !37
+  %22 = load i64, i64* %retval, !dbg !37
   ret i64 %22, !dbg !37
 }
 
@@ -177,12 +177,12 @@
   store i32 5678, i32* %x, align 4, !dbg !38
   store i32 1234, i32* %y, align 4, !dbg !39
   store i64 999999, i64* %N, align 8, !dbg !40
-  %0 = load i32* %x, align 4, !dbg !41
-  %1 = load i32* %y, align 4, !dbg !41
-  %2 = load i64* %N, align 8, !dbg !41
-  %3 = load i32* %x, align 4, !dbg !41
-  %4 = load i32* %y, align 4, !dbg !41
-  %5 = load i64* %N, align 8, !dbg !41
+  %0 = load i32, i32* %x, align 4, !dbg !41
+  %1 = load i32, i32* %y, align 4, !dbg !41
+  %2 = load i64, i64* %N, align 8, !dbg !41
+  %3 = load i32, i32* %x, align 4, !dbg !41
+  %4 = load i32, i32* %y, align 4, !dbg !41
+  %5 = load i64, i64* %N, align 8, !dbg !41
   %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !41
   %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !41
   ret i32 0, !dbg !42
diff --git a/llvm/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/llvm/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
index e5a6be9..336c0a9 100644
--- a/llvm/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
+++ b/llvm/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
@@ -8,6 +8,6 @@
 	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=1]
         ; Must preserve arrayness!
 	%Z = getelementptr i32, i32* %Y, i64 1		; <i32*> [#uses=1]
-	%A = load i32* %Z		; <i32> [#uses=1]
+	%A = load i32, i32* %Z		; <i32> [#uses=1]
 	ret i32 %A
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/llvm/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
index a455e4f..2701fda 100644
--- a/llvm/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
+++ b/llvm/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
@@ -12,7 +12,7 @@
 	%tmp14 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 1		; <<4 x i32>*> [#uses=1]
 	store <4 x i32> %tmp10, <4 x i32>* %tmp14
 	%tmp15 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4		; <i32*> [#uses=1]
-	%tmp.upgrd.4 = load i32* %tmp15		; <i32> [#uses=1]
+	%tmp.upgrd.4 = load i32, i32* %tmp15		; <i32> [#uses=1]
 	ret i32 %tmp.upgrd.4
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index 9e73452..966b179 100644
--- a/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/llvm/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -14,7 +14,7 @@
   %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
   %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
-  %tmp5 = load %struct.UnionType** %pointerToUnion
+  %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
   %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
   %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
diff --git a/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
index 52bd2c7..2de2f67 100644
--- a/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ b/llvm/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@@ -11,20 +11,20 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i16 %b, i16* %b_addr
 	%tmp1 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
-	%tmp2 = load i16* %b_addr, align 2		; <i16> [#uses=1]
+	%tmp2 = load i16, i16* %b_addr, align 2		; <i16> [#uses=1]
 	store i16 %tmp2, i16* %tmp1, align 2
 	%tmp3 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
 	%tmp34 = bitcast i16* %tmp3 to [2 x i1]*		; <[2 x i1]*> [#uses=1]
 	%tmp5 = getelementptr [2 x i1], [2 x i1]* %tmp34, i32 0, i32 1		; <i1*> [#uses=1]
-	%tmp6 = load i1* %tmp5, align 1		; <i1> [#uses=1]
+	%tmp6 = load i1, i1* %tmp5, align 1		; <i1> [#uses=1]
 	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
 	store i32 %tmp67, i32* %tmp, align 4
-	%tmp8 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp8 = load i32, i32* %tmp, align 4		; <i32> [#uses=1]
 	store i32 %tmp8, i32* %retval, align 4
 	br label %return
 
 return:		; preds = %entry
-	%retval9 = load i32* %retval		; <i32> [#uses=1]
+	%retval9 = load i32, i32* %retval		; <i32> [#uses=1]
 	%retval910 = trunc i32 %retval9 to i1		; <i1> [#uses=1]
 	ret i1 %retval910
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/llvm/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
index 343fa1d..99366b3 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@@ -13,7 +13,7 @@
 	%tmp16 = getelementptr [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0		; <%struct.T*> [#uses=1]
 	%tmp17 = getelementptr %struct.T, %struct.T* %tmp16, i32 0, i32 1		; <[3 x i8]*> [#uses=1]
 	%tmp1718 = bitcast [3 x i8]* %tmp17 to i32*		; <i32*> [#uses=1]
-	%tmp19 = load i32* %tmp1718, align 4		; <i32> [#uses=1]
+	%tmp19 = load i32, i32* %tmp1718, align 4		; <i32> [#uses=1]
 	%mask = and i32 %tmp19, 16777215		; <i32> [#uses=2]
 	%mask2324 = trunc i32 %mask to i8		; <i8> [#uses=1]
 	ret i8 %mask2324
diff --git a/llvm/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/llvm/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
index d66070b..f37b6529 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
@@ -11,6 +11,6 @@
 	%tmp7 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
 	%tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]*		; <[2 x i32]*> [#uses=1]
 	%tmp9 = getelementptr [2 x i32], [2 x i32]* %tmp78, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp10 = load i32* %tmp9, align 4		; <i32> [#uses=0]
+	%tmp10 = load i32, i32* %tmp9, align 4		; <i32> [#uses=0]
 	unreachable
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/llvm/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
index ad4918d..d1f3312 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@@ -15,7 +15,7 @@
 	store { i32, i32 } %res2, { i32, i32 }* %target
         ; Actually use %target, so it doesn't get removed altogether
         %ptr = getelementptr { i32, i32 }, { i32, i32 }* %target, i32 0, i32 0
-        %val = load i32* %ptr
+        %val = load i32, i32* %ptr
 	ret i32 %val
 }
 
@@ -28,6 +28,6 @@
 	store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
         ; Actually use %target, so it doesn't get removed altogether
         %ptr = getelementptr [ 2 x i32 ], [ 2 x i32 ]* %target, i32 0, i32 0
-        %val = load i32* %ptr
+        %val = load i32, i32* %ptr
 	ret i32 %val
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index 3ddb67d..c0ff25f 100644
--- a/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -14,7 +14,7 @@
   %s2 = bitcast %struct.x* %s to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
   %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
-  %2 = load i32* %1, align 4
+  %2 = load i32, i32* %1, align 4
   ret i32 %2
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/llvm/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
index e60a2d0..f0af1ca 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
@@ -11,6 +11,6 @@
        store i32 %x, i32* %cast
        %second = getelementptr %pair, %pair* %instance, i32 0, i32 1
        store i32 %y, i32* %second
-       %v = load i32* %cast
+       %v = load i32, i32* %cast
        ret i32 %v
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/llvm/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
index 67228a7..56375ff 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
@@ -9,7 +9,7 @@
 	%tmp = alloca { i64, i64 }, align 8		; <{ i64, i64 }*> [#uses=2]
 	store { i64, i64 } %0, { i64, i64 }* %tmp
 	%1 = bitcast { i64, i64 }* %tmp to %struct.anon*		; <%struct.anon*> [#uses=1]
-	%2 = load %struct.anon* %1, align 8		; <%struct.anon> [#uses=1]
+	%2 = load %struct.anon, %struct.anon* %1, align 8		; <%struct.anon> [#uses=1]
         %tmp3 = extractvalue %struct.anon %2, 0
 	ret i32 %tmp3
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index 218d3d5..d1cc424 100644
--- a/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/llvm/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -30,16 +30,16 @@
   store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
   %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
   %3 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
-  %4 = load <8 x i16>* %3, align 16
+  %4 = load <8 x i16>, <8 x i16>* %3, align 16
   store <8 x i16> %4, <8 x i16>* %2, align 16
   %5 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
   %6 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
-  %7 = load <8 x i16>* %6, align 16
+  %7 = load <8 x i16>, <8 x i16>* %6, align 16
   store <8 x i16> %7, <8 x i16>* %5, align 16
   %8 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
-  %9 = load <8 x i16>* %8, align 16
+  %9 = load <8 x i16>, <8 x i16>* %8, align 16
   %10 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
-  %11 = load <8 x i16>* %10, align 16
+  %11 = load <8 x i16>, <8 x i16>* %10, align 16
   %12 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
   %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
   %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -55,7 +55,7 @@
   %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
   %21 = bitcast %struct.int16x8x2_t* %0 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
-  %22 = load %struct.int16x8x2_t** %dst_addr, align 4
+  %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
   %23 = bitcast %struct.int16x8x2_t* %22 to i8*
   %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
index 1f5a2d8..dee27f8 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@@ -21,7 +21,7 @@
   %1 = getelementptr inbounds %struct.Point_3, %struct.Point_3* %tmpcast, i64 0, i32 0
   %base.i.i.i = getelementptr inbounds %struct.PointC3, %struct.PointC3* %1, i64 0, i32 0
   %arrayidx.i.i.i.i = getelementptr inbounds %struct.array, %struct.array* %base.i.i.i, i64 0, i32 0, i64 0
-  %tmp5.i.i = load float* %arrayidx.i.i.i.i, align 4
+  %tmp5.i.i = load float, float* %arrayidx.i.i.i.i, align 4
   ret void
 }
 
@@ -35,7 +35,7 @@
   %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
   %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
   store <2 x float> zeroinitializer, <2 x float>* %0, align 16
-  %tmp5.i.i = load float* %tmpcast, align 4
+  %tmp5.i.i = load float, float* %tmpcast, align 4
   ret void
 }
 
@@ -54,8 +54,8 @@
   %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
   store <2 x float> zeroinitializer, <2 x float>* %0, align 16
   store float 1.0, float* %tmpcast2, align 4
-  %r1 = load float* %tmpcast, align 4
-  %r2 = load float* %tmpcast2, align 4
+  %r1 = load float, float* %tmpcast, align 4
+  %r2 = load float, float* %tmpcast2, align 4
   %r = fadd float %r1, %r2
   ret float %r
 }
@@ -70,6 +70,6 @@
   store { <2 x float>, <2 x float> } {<2 x float> <float 0.0, float 1.0>, <2 x float> <float 2.0, float 3.0>}, { <2 x float>, <2 x float> }* %ai, align 8
   %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]*
   %arrayidx = getelementptr inbounds [4 x float], [4 x float]* %tmpcast, i64 0, i64 3
-  %f = load float* %arrayidx, align 4
+  %f = load float, float* %arrayidx, align 4
   ret float %f
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
index 9e53163..af6d1f3 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@@ -14,7 +14,7 @@
   %a = alloca <4 x float>, align 16
   %p = bitcast <4 x float>* %a to i8*
   call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
-  %vec = load <4 x float>* %a, align 8
+  %vec = load <4 x float>, <4 x float>* %a, align 8
   %val = extractelement <4 x float> %vec, i32 0
   ret float %val
 }
diff --git a/llvm/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll b/llvm/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
index 5f4d0fc..9a24662 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
@@ -25,7 +25,7 @@
 
 join:                                             ; preds = %then, %else
   %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
-  %storemerge = load i32* %storemerge.in
+  %storemerge = load i32, i32* %storemerge.in
   %x3 = call i32 @extern_fn2(i32 %storemerge)
   ret void
 
diff --git a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
index af8d55c..51d1d14 100644
--- a/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
+++ b/llvm/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
@@ -19,7 +19,7 @@
   %1 = bitcast %struct.S* %ret to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
   %2 = bitcast %struct.S* %retval to double*
-  %3 = load double* %2, align 1
+  %3 = load double, double* %2, align 1
   ret double %3
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/AggregatePromote.ll b/llvm/test/Transforms/ScalarRepl/AggregatePromote.ll
index a3386f6..f6dfdf5 100644
--- a/llvm/test/Transforms/ScalarRepl/AggregatePromote.ll
+++ b/llvm/test/Transforms/ScalarRepl/AggregatePromote.ll
@@ -10,7 +10,7 @@
 	%B = bitcast i64* %A to i32*		; <i32*> [#uses=1]
 	%C = bitcast i32* %B to i8*		; <i8*> [#uses=1]
 	store i8 0, i8* %C
-	%Y = load i64* %A		; <i64> [#uses=1]
+	%Y = load i64, i64* %A		; <i64> [#uses=1]
 	ret i64 %Y
 }
 
@@ -21,7 +21,7 @@
 	%tmp.1 = getelementptr i32, i32* %tmp.0, i32 1		; <i32*> [#uses=1]
 	%tmp.2 = bitcast i32* %tmp.1 to i8*		; <i8*> [#uses=1]
 	%tmp.3 = getelementptr i8, i8* %tmp.2, i32 3		; <i8*> [#uses=1]
-	%tmp.2.upgrd.1 = load i8* %tmp.3		; <i8> [#uses=1]
+	%tmp.2.upgrd.1 = load i8, i8* %tmp.3		; <i8> [#uses=1]
 	ret i8 %tmp.2.upgrd.1
 }
 
@@ -31,9 +31,9 @@
 	store i64 %X, i64* %tmp.0
 	%tmp.3 = bitcast { i64 }* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
 	%tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
-	%tmp.5 = load i16* %tmp.4		; <i16> [#uses=1]
+	%tmp.5 = load i16, i16* %tmp.4		; <i16> [#uses=1]
 	%tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
-	%tmp.9 = load i16* %tmp.8		; <i16> [#uses=1]
+	%tmp.9 = load i16, i16* %tmp.8		; <i16> [#uses=1]
 	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
 	ret i16 %tmp.10
 }
@@ -43,9 +43,9 @@
 	store i64 %X, i64* %a
 	%tmp.3 = bitcast i64* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
 	%tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
-	%tmp.5 = load i16* %tmp.4		; <i16> [#uses=1]
+	%tmp.5 = load i16, i16* %tmp.4		; <i16> [#uses=1]
 	%tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
-	%tmp.9 = load i16* %tmp.8		; <i16> [#uses=1]
+	%tmp.9 = load i16, i16* %tmp.8		; <i16> [#uses=1]
 	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
 	ret i16 %tmp.10
 }
diff --git a/llvm/test/Transforms/ScalarRepl/DifferingTypes.ll b/llvm/test/Transforms/ScalarRepl/DifferingTypes.ll
index 933c47f..3860f6c 100644
--- a/llvm/test/Transforms/ScalarRepl/DifferingTypes.ll
+++ b/llvm/test/Transforms/ScalarRepl/DifferingTypes.ll
@@ -10,7 +10,7 @@
 	store i32 %i, i32* %I
 	%P = bitcast i32* %I to i8*		; <i8*> [#uses=1]
 	store i8 %j, i8* %P
-	%t = load i32* %I		; <i32> [#uses=1]
+	%t = load i32, i32* %I		; <i32> [#uses=1]
 	ret i32 %t
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/address-space.ll b/llvm/test/Transforms/ScalarRepl/address-space.ll
index d6e3b74..b8b90ef 100644
--- a/llvm/test/Transforms/ScalarRepl/address-space.ll
+++ b/llvm/test/Transforms/ScalarRepl/address-space.ll
@@ -7,7 +7,7 @@
 %struct.anon = type { [1 x float] }
 
 ; CHECK-LABEL: define void @Test(
-; CHECK: load float addrspace(2)*
+; CHECK: load float, float addrspace(2)*
 ; CHECK-NEXT: fsub float
 ; CHECK: store float {{.*}}, float addrspace(2)* 
 define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind {
@@ -19,7 +19,7 @@
   call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
   %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
   %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
-  %tmp5 = load float* %arrayidx4                  ; <float> [#uses=1]
+  %tmp5 = load float, float* %arrayidx4                  ; <float> [#uses=1]
   %sub = fsub float %tmp5, 5.000000e+00           ; <float> [#uses=1]
   store float %sub, float* %arrayidx4
   %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
diff --git a/llvm/test/Transforms/ScalarRepl/arraytest.ll b/llvm/test/Transforms/ScalarRepl/arraytest.ll
index d95d5a1..486e725 100644
--- a/llvm/test/Transforms/ScalarRepl/arraytest.ll
+++ b/llvm/test/Transforms/ScalarRepl/arraytest.ll
@@ -5,7 +5,7 @@
 	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
 	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
-	%Z = load i32* %Y		; <i32> [#uses=1]
+	%Z = load i32, i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/badarray.ll b/llvm/test/Transforms/ScalarRepl/badarray.ll
index a0017c4..6f5bc95 100644
--- a/llvm/test/Transforms/ScalarRepl/badarray.ll
+++ b/llvm/test/Transforms/ScalarRepl/badarray.ll
@@ -12,7 +12,7 @@
 	%X = alloca [4 x i32]
 	%Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 6		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
-	%Z = load i32* %Y		; <i32> [#uses=1]
+	%Z = load i32, i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 }
 
@@ -24,7 +24,7 @@
 ; CHECK-NOT: = alloca
         %yx2.i = alloca float, align 4          ; <float*> [#uses=1]            
         %yx26.i = bitcast float* %yx2.i to i64*         ; <i64*> [#uses=1]      
-        %0 = load i64* %yx26.i, align 8         ; <i64> [#uses=0]               
+        %0 = load i64, i64* %yx26.i, align 8         ; <i64> [#uses=0]               
         unreachable
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/basictest.ll b/llvm/test/Transforms/ScalarRepl/basictest.ll
index 7f5d235..35d4d3b 100644
--- a/llvm/test/Transforms/ScalarRepl/basictest.ll
+++ b/llvm/test/Transforms/ScalarRepl/basictest.ll
@@ -5,7 +5,7 @@
 	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
 	%Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
 	store i32 0, i32* %Y
-	%Z = load i32* %Y		; <i32> [#uses=1]
+	%Z = load i32, i32* %Y		; <i32> [#uses=1]
 	ret i32 %Z
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
@@ -21,7 +21,7 @@
         br label %L2
         
 L2:
-	%Z = load i64* %B		; <i32> [#uses=1]
+	%Z = load i64, i64* %B		; <i32> [#uses=1]
 	ret i64 %Z
 ; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
diff --git a/llvm/test/Transforms/ScalarRepl/bitfield-sroa.ll b/llvm/test/Transforms/ScalarRepl/bitfield-sroa.ll
index 07b522b..52986b0 100644
--- a/llvm/test/Transforms/ScalarRepl/bitfield-sroa.ll
+++ b/llvm/test/Transforms/ScalarRepl/bitfield-sroa.ll
@@ -9,9 +9,9 @@
         store i64 %A, i64* %tmp59172, align 8
         %C = getelementptr %t, %t* %ALL, i32 0, i32 0, i32 1             
         %D = bitcast i16* %C to i32*    
-        %E = load i32* %D, align 4     
+        %E = load i32, i32* %D, align 4     
         %F = bitcast %t* %ALL to i8* 
-        %G = load i8* %F, align 8 
+        %G = load i8, i8* %F, align 8 
 	ret i8 %G
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
index b464947..97977db 100644
--- a/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -12,8 +12,8 @@
 
 	%A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0
 	%B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1
-	%a = load i32* %A
-	%b = load i32* %B
+	%a = load i32, i32* %A
+	%b = load i32, i32* %B
 	%c = add i32 %a, %b
 	ret i32 %c
 }
@@ -28,8 +28,8 @@
 
 	%A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0
 	%B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3
-	%a = load float* %A
-	%b = load float* %B
+	%a = load float, float* %A
+	%b = load float, float* %B
 	%c = fadd float %a, %b
 	ret float %c
 }
@@ -46,7 +46,7 @@
         store i32 %b, i32* %B
 
 	%Y = bitcast {{i32,i32}}* %X to i64*
-        %Z = load i64* %Y
+        %Z = load i64, i64* %Y
 	ret i64 %Z
 }
 
@@ -61,7 +61,7 @@
 	store float %b, float* %B
         
       	%Y = bitcast {[4 x float]}* %X to i128*
-	%V = load i128* %Y
+	%V = load i128, i128* %Y
 	ret i128 %V
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/crash.ll b/llvm/test/Transforms/ScalarRepl/crash.ll
index f0a50f8..72e9f09 100644
--- a/llvm/test/Transforms/ScalarRepl/crash.ll
+++ b/llvm/test/Transforms/ScalarRepl/crash.ll
@@ -11,7 +11,7 @@
   unreachable
 
 for.cond:                                         ; preds = %for.cond
-  %tmp1.i = load i32** %l_72, align 8
+  %tmp1.i = load i32*, i32** %l_72, align 8
   store i32* %tmp1.i, i32** %l_72, align 8
   br label %for.cond
 
@@ -30,7 +30,7 @@
         %X = alloca { [4 x i32] }               ; <{ [4 x i32] }*> [#uses=1]
         %Y = getelementptr { [4 x i32] }, { [4 x i32] }* %X, i64 0, i32 0, i64 2               ; <i32*> [#uses=2]
         store i32 4, i32* %Y
-        %Z = load i32* %Y               ; <i32> [#uses=1]
+        %Z = load i32, i32* %Y               ; <i32> [#uses=1]
         ret i32 %Z
 }
 
@@ -102,11 +102,11 @@
         ret void
 bb9875:         ; preds = %bb1365
         %source_ptr9884 = bitcast i8** %source_ptr to i8**              ; <i8**> [#uses=1]
-        %tmp9885 = load i8** %source_ptr9884            ; <i8*> [#uses=0]
+        %tmp9885 = load i8*, i8** %source_ptr9884            ; <i8*> [#uses=0]
         ret void
 bb10249:                ; preds = %bb1365
         %source_ptr10257 = bitcast i8** %source_ptr to i16**            ; <i16**> [#uses=1]
-        %tmp10258 = load i16** %source_ptr10257         ; <i16*> [#uses=0]
+        %tmp10258 = load i16*, i16** %source_ptr10257         ; <i16*> [#uses=0]
         ret void
 cond_next10377:         ; preds = %bb1365
         ret void
@@ -125,9 +125,9 @@
         %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
         %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
         store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
-        %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i          ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i          ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
         %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"*              ; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
-        %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i         ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i         ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
         %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0         ; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
         unreachable
 }
@@ -161,7 +161,7 @@
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
-        %i.2.reload22 = load i32* null          ; <i32> [#uses=1]
+        %i.2.reload22 = load i32, i32* null          ; <i32> [#uses=1]
         %tmp51 = getelementptr %struct.aal_spanbucket_t, %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1      
         ; <i16*> [#uses=0]
         ret void
@@ -221,7 +221,7 @@
         %storetmp.i = bitcast %struct.singlebool* %a to i1*             ; <i1*> [#uses=1]
         store i1 true, i1* %storetmp.i
         %tmp = getelementptr %struct.singlebool, %struct.singlebool* %a, i64 0, i32 0               ; <i8*> [#uses=1]
-        %tmp1 = load i8* %tmp           ; <i8> [#uses=1]
+        %tmp1 = load i8, i8* %tmp           ; <i8> [#uses=1]
         ret i8 %tmp1
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/llvm/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index b0c459e2..bb83185 100644
--- a/llvm/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/llvm/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -21,18 +21,18 @@
   store i32 %b, i32* %b.addr, align 4
   call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !8, metadata !{}), !dbg !9
   call void @llvm.dbg.declare(metadata i32* %c, metadata !10, metadata !{}), !dbg !12
-  %tmp = load i32* %a.addr, align 4, !dbg !13
+  %tmp = load i32, i32* %a.addr, align 4, !dbg !13
   store i32 %tmp, i32* %c, align 4, !dbg !13
-  %tmp1 = load i32* %a.addr, align 4, !dbg !14
-  %tmp2 = load i32* %b.addr, align 4, !dbg !14
+  %tmp1 = load i32, i32* %a.addr, align 4, !dbg !14
+  %tmp2 = load i32, i32* %b.addr, align 4, !dbg !14
   %add = add nsw i32 %tmp1, %tmp2, !dbg !14
   store i32 %add, i32* %a.addr, align 4, !dbg !14
-  %tmp3 = load i32* %c, align 4, !dbg !15
-  %tmp4 = load i32* %b.addr, align 4, !dbg !15
+  %tmp3 = load i32, i32* %c, align 4, !dbg !15
+  %tmp4 = load i32, i32* %b.addr, align 4, !dbg !15
   %sub = sub nsw i32 %tmp3, %tmp4, !dbg !15
   store i32 %sub, i32* %b.addr, align 4, !dbg !15
-  %tmp5 = load i32* %a.addr, align 4, !dbg !16
-  %tmp6 = load i32* %b.addr, align 4, !dbg !16
+  %tmp5 = load i32, i32* %a.addr, align 4, !dbg !16
+  %tmp6 = load i32, i32* %b.addr, align 4, !dbg !16
   %add7 = add nsw i32 %tmp5, %tmp6, !dbg !16
   ret i32 %add7, !dbg !16
 }
diff --git a/llvm/test/Transforms/ScalarRepl/inline-vector.ll b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
index d118be0..5d856c6 100644
--- a/llvm/test/Transforms/ScalarRepl/inline-vector.ll
+++ b/llvm/test/Transforms/ScalarRepl/inline-vector.ll
@@ -30,20 +30,20 @@
   %tmp3 = bitcast %struct.Vector4* %vector to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
   %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
-  %1 = load [2 x i64]* %0, align 16
+  %1 = load [2 x i64], [2 x i64]* %0, align 16
   %tmp2.i = extractvalue [2 x i64] %1, 0
   %tmp3.i = zext i64 %tmp2.i to i128
   %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
   %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
   %2 = bitcast %struct.Vector4* %vector to <4 x float>*
   store <4 x float> %sub.i.i, <4 x float>* %2, align 16
-  %tmp4 = load i32* %i, align 4
+  %tmp4 = load i32, i32* %i, align 4
   %inc = add nsw i32 %tmp4, 1
   br label %for.cond
 
 for.end:                                          ; preds = %for.cond
   %x = getelementptr inbounds %struct.Vector4, %struct.Vector4* %vector, i32 0, i32 0
-  %tmp5 = load float* %x, align 16
+  %tmp5 = load float, float* %x, align 16
   %conv = fpext float %tmp5 to double
   %call = call i32 (...)* @printf(double %conv) nounwind
   ret void
diff --git a/llvm/test/Transforms/ScalarRepl/lifetime.ll b/llvm/test/Transforms/ScalarRepl/lifetime.ll
index b6627dd..c0ddfb5 100644
--- a/llvm/test/Transforms/ScalarRepl/lifetime.ll
+++ b/llvm/test/Transforms/ScalarRepl/lifetime.ll
@@ -30,7 +30,7 @@
   %B = bitcast i32* %A2 to i8*
   store i32 0, i32* %A2
   call void @llvm.lifetime.start(i64 -1, i8* %B)
-  %C = load i32* %A2
+  %C = load i32, i32* %A2
   ret void
 ; CHECK: ret void
 }
@@ -44,7 +44,7 @@
   %B = bitcast i32* %A2 to i8*
   store i32 0, i32* %A2
   call void @llvm.lifetime.start(i64 6, i8* %B)
-  %C = load i32* %A2
+  %C = load i32, i32* %A2
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -58,7 +58,7 @@
   %B = bitcast i32* %A2 to i8*
   store i32 0, i32* %A2
   call void @llvm.lifetime.start(i64 1, i8* %B)
-  %C = load i32* %A2
+  %C = load i32, i32* %A2
   ret void
 ; CHECK-NEXT: ret void
 }
@@ -90,7 +90,7 @@
 ; CHECK: llvm.lifetime{{.*}}i64 1
 ; CHECK: llvm.lifetime{{.*}}i64 1
 ; CHECK: llvm.lifetime{{.*}}i64 1
-  %C = load i8* %A2
+  %C = load i8, i8* %A2
   ret void
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/load-store-aggregate.ll b/llvm/test/Transforms/ScalarRepl/load-store-aggregate.ll
index f414234..88299f3 100644
--- a/llvm/test/Transforms/ScalarRepl/load-store-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/load-store-aggregate.ll
@@ -11,11 +11,11 @@
 define i32 @test(%struct.foo* %P) {
 entry:
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
-        %V = load %struct.foo* %P
+        %V = load %struct.foo, %struct.foo* %P
         store %struct.foo %V, %struct.foo* %L
 
 	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
@@ -26,6 +26,6 @@
         store i32 %A, i32* %L.0
         %L.1 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 1
         store i32 %B, i32* %L.1
-        %V = load %struct.foo* %L
+        %V = load %struct.foo, %struct.foo* %L
         ret %struct.foo %V
 }
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
index 7ab4666..e8088c1 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -15,7 +15,7 @@
 	%L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
 	%tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
 	call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
-	%tmp5 = load i8* %L2		; <i8> [#uses=1]
+	%tmp5 = load i8, i8* %L2		; <i8> [#uses=1]
 	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
 	ret i32 %tmp56
 }
diff --git a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
index 981ace6..98e2ddd 100644
--- a/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/llvm/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -16,7 +16,7 @@
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
         call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
 	%tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
@@ -27,7 +27,7 @@
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
         call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
@@ -42,7 +42,7 @@
 	%tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
 	store double 1.000000e+01, double* %tmp4
 	%tmp6 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %tmp6		; <i32> [#uses=1]
 	ret i32 %tmp7
 }
 
@@ -58,7 +58,7 @@
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
 	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
 	%3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
-	%4 = load i32* %3, align 8		; <i32> [#uses=1]
+	%4 = load i32, i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
diff --git a/llvm/test/Transforms/ScalarRepl/nonzero-first-index.ll b/llvm/test/Transforms/ScalarRepl/nonzero-first-index.ll
index 5de6eca..da757b0 100644
--- a/llvm/test/Transforms/ScalarRepl/nonzero-first-index.ll
+++ b/llvm/test/Transforms/ScalarRepl/nonzero-first-index.ll
@@ -14,7 +14,7 @@
 	%A = alloca %nested
 	%B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
 	%C = getelementptr i32, i32* %B, i32 2
-	%D = load i32* %C
+	%D = load i32, i32* %C
 	ret i32 %D
 }
 
@@ -25,7 +25,7 @@
 	%A = alloca %nested
 	%B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
 	%C = getelementptr i32, i32* %B, i32 4
-	%D = load i32* %C
+	%D = load i32, i32* %C
 	ret i32 %D
 }
 
@@ -37,7 +37,7 @@
 	%A = alloca %nested
 	%B = bitcast %nested* %A to i32*
 	%C = getelementptr i32, i32* %B, i32 2
-	%D = load i32* %C
+	%D = load i32, i32* %C
 	ret i32 %D
 }
 
@@ -48,6 +48,6 @@
 	%A = alloca %nested
 	%B = bitcast %nested* %A to i32*
 	%C = getelementptr i32, i32* %B, i32 -1
-	%D = load i32* %C
+	%D = load i32, i32* %C
 	ret i32 %D
 }
diff --git a/llvm/test/Transforms/ScalarRepl/not-a-vector.ll b/llvm/test/Transforms/ScalarRepl/not-a-vector.ll
index 8a4f2d4..04c1f93 100644
--- a/llvm/test/Transforms/ScalarRepl/not-a-vector.ll
+++ b/llvm/test/Transforms/ScalarRepl/not-a-vector.ll
@@ -13,7 +13,7 @@
 	store double %B, double* %E
 
 	%F = getelementptr double, double* %C, i32 4
-	%G = load double* %F
+	%G = load double, double* %F
 	ret double %G
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/phi-cycle.ll b/llvm/test/Transforms/ScalarRepl/phi-cycle.ll
index 13b7d4e..dd451b7 100644
--- a/llvm/test/Transforms/ScalarRepl/phi-cycle.ll
+++ b/llvm/test/Transforms/ScalarRepl/phi-cycle.ll
@@ -69,7 +69,7 @@
 ; CHECK-NOT: load
 ; CHECK: %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
 func.exit:                                        ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
-  %tmp3 = load i32* %x.i, align 4
+  %tmp3 = load i32, i32* %x.i, align 4
   %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
   ret i32 0
 }
diff --git a/llvm/test/Transforms/ScalarRepl/phi-select.ll b/llvm/test/Transforms/ScalarRepl/phi-select.ll
index a17a5b1..a6c7135 100644
--- a/llvm/test/Transforms/ScalarRepl/phi-select.ll
+++ b/llvm/test/Transforms/ScalarRepl/phi-select.ll
@@ -20,7 +20,7 @@
   %2 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
   %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1]
   %3 = getelementptr inbounds %struct.X, %struct.X* %p.0, i64 0, i32 0 ; <i32*> [#uses=1]
-  %4 = load i32* %3, align 8                      ; <i32> [#uses=1]
+  %4 = load i32, i32* %3, align 8                      ; <i32> [#uses=1]
   ret i32 %4
 }
 
@@ -39,7 +39,7 @@
   br label %F
 F:
   %X = phi i32* [%B, %entry], [%C, %T]
-  %Q = load i32* %X
+  %Q = load i32, i32* %X
   ret i32 %Q
 }
 
@@ -55,7 +55,7 @@
   store i32 2, i32* %C
   
   %X = select i1 %c, i32* %B, i32* %C
-  %Q = load i32* %X
+  %Q = load i32, i32* %X
   ret i32 %Q
 }
 
@@ -72,7 +72,7 @@
   
   %X = select i1 %c, i32* %B, i32* %C
   %Y = bitcast i32* %X to i64*
-  %Q = load i64* %Y
+  %Q = load i64, i64* %Y
   ret i64 %Q
 }
 
@@ -91,7 +91,7 @@
   %p.0 = select i1 false, i32* %b, i32* %P
   store i32 123, i32* %p.0
   
-  %r = load i32* %b, align 8
+  %r = load i32, i32* %b, align 8
   ret i32 %r
   
 ; CHECK-LABEL: @test5(
@@ -105,7 +105,7 @@
   store i32 1, i32* %a, align 8
   store i32 2, i32* %b, align 8
   %p.0 = select i1 %c, i32* %b, i32* %a
-  %r = load i32* %p.0, align 8
+  %r = load i32, i32* %p.0, align 8
   ret i32 %r
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
@@ -122,7 +122,7 @@
   
   store i32 0, i32* %a
   
-  %r = load i32* %p.0, align 8
+  %r = load i32, i32* %p.0, align 8
   ret i32 %r
 ; CHECK-LABEL: @test7(
 ; CHECK-NOT: alloca i32
@@ -148,6 +148,6 @@
   br label %Cont
 Cont:
   %p.0 = phi i32* [%b, %entry],[%a, %T]
-  %r = load i32* %p.0, align 8
+  %r = load i32, i32* %p.0, align 8
   ret i32 %r
 }
diff --git a/llvm/test/Transforms/ScalarRepl/phinodepromote.ll b/llvm/test/Transforms/ScalarRepl/phinodepromote.ll
index 9c6e8b9..c3af624 100644
--- a/llvm/test/Transforms/ScalarRepl/phinodepromote.ll
+++ b/llvm/test/Transforms/ScalarRepl/phinodepromote.ll
@@ -21,14 +21,14 @@
 	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
 	store i32 0, i32* %mem_tmp.0
 	store i32 1, i32* %mem_tmp.1
-	%tmp.1.i = load i32* %mem_tmp.1		; <i32> [#uses=1]
-	%tmp.3.i = load i32* %mem_tmp.0		; <i32> [#uses=1]
+	%tmp.1.i = load i32, i32* %mem_tmp.1		; <i32> [#uses=1]
+	%tmp.3.i = load i32, i32* %mem_tmp.0		; <i32> [#uses=1]
 	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
 	br i1 %tmp.4.i, label %cond_true.i, label %cond_continue.i
 cond_true.i:		; preds = %entry
 	br label %cond_continue.i
 cond_continue.i:		; preds = %cond_true.i, %entry
 	%mem_tmp.i.0 = phi i32* [ %mem_tmp.1, %cond_true.i ], [ %mem_tmp.0, %entry ]		; <i32*> [#uses=1]
-	%tmp.3 = load i32* %mem_tmp.i.0		; <i32> [#uses=1]
+	%tmp.3 = load i32, i32* %mem_tmp.i.0		; <i32> [#uses=1]
 	ret i32 %tmp.3
 }
diff --git a/llvm/test/Transforms/ScalarRepl/select_promote.ll b/llvm/test/Transforms/ScalarRepl/select_promote.ll
index d6b2b75..b4ef8c4 100644
--- a/llvm/test/Transforms/ScalarRepl/select_promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/select_promote.ll
@@ -8,11 +8,11 @@
 	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
 	store i32 0, i32* %mem_tmp.0
 	store i32 1, i32* %mem_tmp.1
-	%tmp.1.i = load i32* %mem_tmp.1		; <i32> [#uses=1]
-	%tmp.3.i = load i32* %mem_tmp.0		; <i32> [#uses=1]
+	%tmp.1.i = load i32, i32* %mem_tmp.1		; <i32> [#uses=1]
+	%tmp.3.i = load i32, i32* %mem_tmp.0		; <i32> [#uses=1]
 	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
 	%mem_tmp.i.0 = select i1 %tmp.4.i, i32* %mem_tmp.1, i32* %mem_tmp.0		; <i32*> [#uses=1]
-	%tmp.3 = load i32* %mem_tmp.i.0		; <i32> [#uses=1]
+	%tmp.3 = load i32, i32* %mem_tmp.i.0		; <i32> [#uses=1]
 	ret i32 %tmp.3
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/sroa-fca.ll b/llvm/test/Transforms/ScalarRepl/sroa-fca.ll
index 2df3b9b..c6e7c23 100644
--- a/llvm/test/Transforms/ScalarRepl/sroa-fca.ll
+++ b/llvm/test/Transforms/ScalarRepl/sroa-fca.ll
@@ -6,7 +6,7 @@
 	%Y = bitcast i64* %X to {i32,i32}*
 	store {i32,i32} %A, {i32,i32}* %Y
 	
-	%Q = load i64* %X
+	%Q = load i64, i64* %X
 	ret i64 %Q
 }
 
@@ -15,7 +15,7 @@
 	%Y = bitcast i64* %X to {i32,i32}*
 	store i64 %A, i64* %X
 	
-	%Q = load {i32,i32}* %Y
+	%Q = load {i32,i32}, {i32,i32}* %Y
 	ret {i32,i32} %Q
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/sroa_two.ll b/llvm/test/Transforms/ScalarRepl/sroa_two.ll
index 0ede5d2..f2285ef 100644
--- a/llvm/test/Transforms/ScalarRepl/sroa_two.ll
+++ b/llvm/test/Transforms/ScalarRepl/sroa_two.ll
@@ -7,7 +7,7 @@
 	%tmp.1 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 1		; <i32*> [#uses=1]
 	store i32 2, i32* %tmp.1
 	%tmp.3 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 %X		; <i32*> [#uses=1]
-	%tmp.4 = load i32* %tmp.3		; <i32> [#uses=1]
+	%tmp.4 = load i32, i32* %tmp.3		; <i32> [#uses=1]
 	ret i32 %tmp.4
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/union-fp-int.ll b/llvm/test/Transforms/ScalarRepl/union-fp-int.ll
index 6a49918..fa64b60 100644
--- a/llvm/test/Transforms/ScalarRepl/union-fp-int.ll
+++ b/llvm/test/Transforms/ScalarRepl/union-fp-int.ll
@@ -8,7 +8,7 @@
 	%X_addr = alloca float		; <float*> [#uses=2]
 	store float %X, float* %X_addr
 	%X_addr.upgrd.1 = bitcast float* %X_addr to i32*		; <i32*> [#uses=1]
-	%tmp = load i32* %X_addr.upgrd.1		; <i32> [#uses=1]
+	%tmp = load i32, i32* %X_addr.upgrd.1		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/union-packed.ll b/llvm/test/Transforms/ScalarRepl/union-packed.ll
index b272abf..741de76 100644
--- a/llvm/test/Transforms/ScalarRepl/union-packed.ll
+++ b/llvm/test/Transforms/ScalarRepl/union-packed.ll
@@ -8,7 +8,7 @@
 	%X_addr = alloca <4 x float>		; <<4 x float>*> [#uses=2]
 	store <4 x float> %X, <4 x float>* %X_addr
 	%X_addr.upgrd.1 = bitcast <4 x float>* %X_addr to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%tmp = load <4 x i32>* %X_addr.upgrd.1		; <<4 x i32>> [#uses=1]
+	%tmp = load <4 x i32>, <4 x i32>* %X_addr.upgrd.1		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/union-pointer.ll b/llvm/test/Transforms/ScalarRepl/union-pointer.ll
index 82a2c3b..6a5db1c 100644
--- a/llvm/test/Transforms/ScalarRepl/union-pointer.ll
+++ b/llvm/test/Transforms/ScalarRepl/union-pointer.ll
@@ -14,7 +14,7 @@
 	%X_addr = alloca i16*		; <i16**> [#uses=2]
 	store i16* %X, i16** %X_addr
 	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
-	%tmp = load i8** %X_addr.upgrd.1		; <i8*> [#uses=1]
+	%tmp = load i8*, i8** %X_addr.upgrd.1		; <i8*> [#uses=1]
 	ret i8* %tmp
 }
 
@@ -26,7 +26,7 @@
     %x_addr = alloca i16 addrspace(1)*
 	store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
 	%x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
-	%tmp = load i8 addrspace(1)** %x_addr.upgrd.1
+	%tmp = load i8 addrspace(1)*, i8 addrspace(1)** %x_addr.upgrd.1
 	ret i8 addrspace(1)* %tmp
 }
 
@@ -39,7 +39,7 @@
   %elem1 = getelementptr [4 x i16 addrspace(1)*], [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
   store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
   %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
-  %tmp = load i8 addrspace(1)** %elem1.cast
+  %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %elem1.cast
   ret i8 addrspace(1)* %tmp
 }
 
@@ -56,15 +56,15 @@
 	store i64 %tmp.upgrd.2, i64* %tmp1.upgrd.3
 	%tmp.upgrd.4 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 0		; <i32**> [#uses=1]
 	%tmp2 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 0		; <i32**> [#uses=1]
-	%tmp.upgrd.5 = load i32** %tmp2		; <i32*> [#uses=1]
+	%tmp.upgrd.5 = load i32*, i32** %tmp2		; <i32*> [#uses=1]
 	store i32* %tmp.upgrd.5, i32** %tmp.upgrd.4
 	%tmp3 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 1		; <i32*> [#uses=1]
 	%tmp4 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp.upgrd.6 = load i32* %tmp4		; <i32> [#uses=1]
+	%tmp.upgrd.6 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	store i32 %tmp.upgrd.6, i32* %tmp3
 	%tmp7 = bitcast %struct.Val* %tmp to { i64 }*		; <{ i64 }*> [#uses=1]
 	%tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0		; <i64*> [#uses=1]
-	%tmp9 = load i64* %tmp8		; <i64> [#uses=1]
+	%tmp9 = load i64, i64* %tmp8		; <i64> [#uses=1]
 	call void @_Z3bar3ValS_( i64 %Op.0, i64 %tmp9 )
 	ret void
 }
diff --git a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
index dfba9e2..031ad5e 100644
--- a/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ b/llvm/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -10,7 +10,7 @@
 	%s = bitcast <16 x float>* %tmp to i8*
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
 	call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
-	%R = load <16 x float>* %tmp2
+	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
 
@@ -20,7 +20,7 @@
 	%s2 = bitcast <16 x float>* %tmp2 to i8*
 	call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
 	
-	%R = load <16 x float>* %tmp2
+	%R = load <16 x float>, <16 x float>* %tmp2
 	ret <16 x float> %R
 }
 
diff --git a/llvm/test/Transforms/ScalarRepl/vector_promote.ll b/llvm/test/Transforms/ScalarRepl/vector_promote.ll
index 3ee57b3..3c2377f 100644
--- a/llvm/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/vector_promote.ll
@@ -5,18 +5,18 @@
 define void @test1(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
 	store float %f, float* %G.upgrd.1
-	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp4 = load <4 x float>, <4 x float>* %G		; <<4 x float>> [#uses=2]
 	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 ; CHECK-LABEL: @test1(
 ; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>* %F
+; CHECK: %tmp = load <4 x float>, <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
 ; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0
 }
@@ -24,18 +24,18 @@
 define void @test2(<4 x float>* %F, float %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
 	store float %f, float* %tmp.upgrd.2
-	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp4 = load <4 x float>, <4 x float>* %G		; <<4 x float>> [#uses=2]
 	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp6, <4 x float>* %F
 	ret void
 ; CHECK-LABEL: @test2(
 ; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>* %F
+; CHECK: %tmp = load <4 x float>, <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
 ; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2
 }
@@ -43,16 +43,16 @@
 define void @test3(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%tmp.upgrd.3 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
-	%tmp.upgrd.4 = load float* %tmp.upgrd.3		; <float> [#uses=1]
+	%tmp.upgrd.4 = load float, float* %tmp.upgrd.3		; <float> [#uses=1]
 	store float %tmp.upgrd.4, float* %f
 	ret void
 ; CHECK-LABEL: @test3(
 ; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>* %F
+; CHECK: %tmp = load <4 x float>, <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
 ; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2
 }
@@ -60,16 +60,16 @@
 define void @test4(<4 x float>* %F, float* %f) {
 entry:
 	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
-	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp = load <4 x float>, <4 x float>* %F		; <<4 x float>> [#uses=2]
 	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
 	store <4 x float> %tmp3, <4 x float>* %G
 	%G.upgrd.5 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp.upgrd.6 = load float* %G.upgrd.5		; <float> [#uses=1]
+	%tmp.upgrd.6 = load float, float* %G.upgrd.5		; <float> [#uses=1]
 	store float %tmp.upgrd.6, float* %f
 	ret void
 ; CHECK-LABEL: @test4(
 ; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>* %F
+; CHECK: %tmp = load <4 x float>, <4 x float>* %F
 ; CHECK: fadd <4 x float> %tmp, %tmp
 ; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0
 }
@@ -79,7 +79,7 @@
         %X1 = getelementptr [4 x float], [4 x float]* %X_addr, i32 0, i32 2
 	store float %X, float* %X1
 	%a = bitcast float* %X1 to i32*
-	%tmp = load i32* %a
+	%tmp = load i32, i32* %a
 	ret i32 %tmp
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT: bitcast float %X to i32
@@ -90,7 +90,7 @@
 	%X_addr = alloca <2 x float>
         store <2 x float> %X, <2 x float>* %X_addr
 	%P = bitcast <2 x float>* %X_addr to i64*
-	%tmp = load i64* %P
+	%tmp = load i64, i64* %P
 	ret i64 %tmp
 ; CHECK-LABEL: @test6(
 ; CHECK: bitcast <2 x float> %X to i64
@@ -121,14 +121,14 @@
   %__a = alloca <1 x i64>, align 8
   %tmp = alloca <1 x i64>, align 8
   store <1 x i64> %a, <1 x i64>* %a.addr, align 8
-  %0 = load <1 x i64>* %a.addr, align 8
+  %0 = load <1 x i64>, <1 x i64>* %a.addr, align 8
   store <1 x i64> %0, <1 x i64>* %__a, align 8
-  %1 = load <1 x i64>* %__a, align 8
+  %1 = load <1 x i64>, <1 x i64>* %__a, align 8
   %2 = bitcast <1 x i64> %1 to <8 x i8>
   %3 = bitcast <8 x i8> %2 to <1 x i64>
   %vshl_n = shl <1 x i64> %3, <i64 4>
   store <1 x i64> %vshl_n, <1 x i64>* %tmp
-  %4 = load <1 x i64>* %tmp
+  %4 = load <1 x i64>, <1 x i64>* %tmp
   ret <1 x i64> %4
 ; CHECK-LABEL: @test8(
 ; CHECK-NOT: alloca
diff --git a/llvm/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll b/llvm/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
index c3fbdf5..1548831 100644
--- a/llvm/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
+++ b/llvm/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
@@ -18,10 +18,10 @@
 entry:
   %retval = alloca <3 x i32>, align 16
   %z = alloca <4 x i32>, align 16
-  %tmp = load <4 x i32>* %z
+  %tmp = load <4 x i32>, <4 x i32>* %z
   %tmp1 = shufflevector <4 x i32> %tmp, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
   store <3 x i32> %tmp1, <3 x i32>* %retval
   %0 = bitcast <3 x i32>* %retval to <2 x i64>*
-  %1 = load <2 x i64>* %0, align 1
+  %1 = load <2 x i64>, <2 x i64>* %0, align 1
   ret <2 x i64> %1
 }
diff --git a/llvm/test/Transforms/ScalarRepl/volatile.ll b/llvm/test/Transforms/ScalarRepl/volatile.ll
index 370a6d3..2a600b3 100644
--- a/llvm/test/Transforms/ScalarRepl/volatile.ll
+++ b/llvm/test/Transforms/ScalarRepl/volatile.ll
@@ -7,7 +7,7 @@
 ; CHECK: store volatile
 
 	%C = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 1
-	%X = load volatile i32* %C
+	%X = load volatile i32, i32* %C
 ; CHECK: load volatile
 	ret i32 %X
 }
diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll
index 3214346..150eb7d 100644
--- a/llvm/test/Transforms/Scalarizer/basic.ll
+++ b/llvm/test/Transforms/Scalarizer/basic.ll
@@ -21,13 +21,13 @@
 ; CHECK:   %nexti = sub i32 %i, 1
 ; CHECK:   %ptr = getelementptr <4 x float>, <4 x float>* %base, i32 %i
 ; CHECK:   %ptr.i0 = bitcast <4 x float>* %ptr to float*
-; CHECK:   %val.i0 = load float* %ptr.i0, align 16
+; CHECK:   %val.i0 = load float, float* %ptr.i0, align 16
 ; CHECK:   %ptr.i1 = getelementptr float, float* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load float* %ptr.i1, align 4
+; CHECK:   %val.i1 = load float, float* %ptr.i1, align 4
 ; CHECK:   %ptr.i2 = getelementptr float, float* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load float* %ptr.i2, align 8
+; CHECK:   %val.i2 = load float, float* %ptr.i2, align 8
 ; CHECK:   %ptr.i3 = getelementptr float, float* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load float* %ptr.i3, align 4
+; CHECK:   %val.i3 = load float, float* %ptr.i3, align 4
 ; CHECK:   %add.i0 = fadd float %val.i0, %val.i2
 ; CHECK:   %add.i1 = fadd float %val.i1, %val.i3
 ; CHECK:   %add.i2 = fadd float %acc.i0, %acc.i2
@@ -66,7 +66,7 @@
   %nexti = sub i32 %i, 1
 
   %ptr = getelementptr <4 x float>, <4 x float> *%base, i32 %i
-  %val = load <4 x float> *%ptr
+  %val = load <4 x float> , <4 x float> *%ptr
   %dval = bitcast <4 x float> %val to <2 x double>
   %dacc = bitcast <4 x float> %acc to <2 x double>
   %shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
@@ -107,13 +107,13 @@
 ; CHECK:   %nexti = sub i32 %i, 1
 ; CHECK:   %ptr = getelementptr <4 x i8>, <4 x i8>* %base, i32 %i
 ; CHECK:   %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
-; CHECK:   %val.i0 = load i8* %ptr.i0, align 4
+; CHECK:   %val.i0 = load i8, i8* %ptr.i0, align 4
 ; CHECK:   %ptr.i1 = getelementptr i8, i8* %ptr.i0, i32 1
-; CHECK:   %val.i1 = load i8* %ptr.i1, align 1
+; CHECK:   %val.i1 = load i8, i8* %ptr.i1, align 1
 ; CHECK:   %ptr.i2 = getelementptr i8, i8* %ptr.i0, i32 2
-; CHECK:   %val.i2 = load i8* %ptr.i2, align 2
+; CHECK:   %val.i2 = load i8, i8* %ptr.i2, align 2
 ; CHECK:   %ptr.i3 = getelementptr i8, i8* %ptr.i0, i32 3
-; CHECK:   %val.i3 = load i8* %ptr.i3, align 1
+; CHECK:   %val.i3 = load i8, i8* %ptr.i3, align 1
 ; CHECK:   %ext.i0 = sext i8 %val.i0 to i32
 ; CHECK:   %ext.i1 = sext i8 %val.i1 to i32
 ; CHECK:   %ext.i2 = sext i8 %val.i2 to i32
@@ -151,7 +151,7 @@
   %nexti = sub i32 %i, 1
 
   %ptr = getelementptr <4 x i8>, <4 x i8> *%base, i32 %i
-  %val = load <4 x i8> *%ptr
+  %val = load <4 x i8> , <4 x i8> *%ptr
   %ext = sext <4 x i8> %val to <4 x i32>
   %add = add <4 x i32> %ext, %acc
   %cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
@@ -172,16 +172,16 @@
 ; Check that !tbaa information is preserved.
 define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f3(
-; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa ![[TAG]]
-; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa ![[TAG]]
-; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa ![[TAG]]
+; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa ![[TAG]]
 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
 ; CHECK: ret void
-  %val = load <4 x i32> *%src, !tbaa !1
+  %val = load <4 x i32> , <4 x i32> *%src, !tbaa !1
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
   ret void
@@ -190,16 +190,16 @@
 ; Check that !tbaa.struct information is preserved.
 define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f4(
-; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
-; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
-; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
+; CHECK: %val.i0 = load i32, i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
 ; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
 ; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
 ; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
 ; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
 ; CHECK: ret void
-  %val = load <4 x i32> *%src, !tbaa.struct !5
+  %val = load <4 x i32> , <4 x i32> *%src, !tbaa.struct !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
   ret void
@@ -208,10 +208,10 @@
 ; Check that llvm.mem.parallel_loop_access information is preserved.
 define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
 ; CHECK-LABEL: @f5(
-; CHECK: %val.i0 = load i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
-; CHECK: %val.i1 = load i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
-; CHECK: %val.i2 = load i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
-; CHECK: %val.i3 = load i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: %val.i0 = load i32, i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
+; CHECK: %val.i1 = load i32, i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: %val.i2 = load i32, i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
+; CHECK: %val.i3 = load i32, i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
 ; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
 ; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
 ; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
@@ -224,7 +224,7 @@
   %index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
   %this_src = getelementptr <4 x i32>, <4 x i32> *%src, i32 %index
   %this_dst = getelementptr <4 x i32>, <4 x i32> *%dst, i32 %index
-  %val = load <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
+  %val = load <4 x i32> , <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
   %next_index = add i32 %index, -1
@@ -261,7 +261,7 @@
 ; CHECK-LABEL: @f7(
 ; CHECK-NOT: !foo
 ; CHECK: ret void
-  %val = load <4 x i32> *%src, !foo !5
+  %val = load <4 x i32> , <4 x i32> *%src, !foo !5
   %add = add <4 x i32> %val, %val
   store <4 x i32> %add, <4 x i32> *%dst, !foo !5
   ret void
@@ -305,19 +305,19 @@
 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float* %src.i0, align 4
+; CHECK: %val.i0 = load float, float* %src.i0, align 4
 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float* %src.i1, align 4
+; CHECK: %val.i1 = load float, float* %src.i1, align 4
 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float* %src.i2, align 4
+; CHECK: %val.i2 = load float, float* %src.i2, align 4
 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float* %src.i3, align 4
+; CHECK: %val.i3 = load float, float* %src.i3, align 4
 ; CHECK: store float %val.i0, float* %dest.i0, align 8
 ; CHECK: store float %val.i1, float* %dest.i1, align 4
 ; CHECK: store float %val.i2, float* %dest.i2, align 8
 ; CHECK: store float %val.i3, float* %dest.i3, align 4
 ; CHECK: ret void
-  %val = load <4 x float> *%src, align 4
+  %val = load <4 x float> , <4 x float> *%src, align 4
   store <4 x float> %val, <4 x float> *%dest, align 8
   ret void
 }
@@ -330,19 +330,19 @@
 ; CHECK: %dest.i2 = getelementptr float, float* %dest.i0, i32 2
 ; CHECK: %dest.i3 = getelementptr float, float* %dest.i0, i32 3
 ; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
-; CHECK: %val.i0 = load float* %src.i0, align 1
+; CHECK: %val.i0 = load float, float* %src.i0, align 1
 ; CHECK: %src.i1 = getelementptr float, float* %src.i0, i32 1
-; CHECK: %val.i1 = load float* %src.i1, align 1
+; CHECK: %val.i1 = load float, float* %src.i1, align 1
 ; CHECK: %src.i2 = getelementptr float, float* %src.i0, i32 2
-; CHECK: %val.i2 = load float* %src.i2, align 1
+; CHECK: %val.i2 = load float, float* %src.i2, align 1
 ; CHECK: %src.i3 = getelementptr float, float* %src.i0, i32 3
-; CHECK: %val.i3 = load float* %src.i3, align 1
+; CHECK: %val.i3 = load float, float* %src.i3, align 1
 ; CHECK: store float %val.i0, float* %dest.i0, align 2
 ; CHECK: store float %val.i1, float* %dest.i1, align 2
 ; CHECK: store float %val.i2, float* %dest.i2, align 2
 ; CHECK: store float %val.i3, float* %dest.i3, align 2
 ; CHECK: ret void
-  %val = load <4 x float> *%src, align 1
+  %val = load <4 x float> , <4 x float> *%src, align 1
   store <4 x float> %val, <4 x float> *%dest, align 2
   ret void
 }
@@ -350,13 +350,13 @@
 ; Test that sub-byte loads aren't scalarized.
 define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
 ; CHECK: @f11(
-; CHECK: %val0 = load <32 x i1>* %src0
-; CHECK: %val1 = load <32 x i1>* %src1
+; CHECK: %val0 = load <32 x i1>, <32 x i1>* %src0
+; CHECK: %val1 = load <32 x i1>, <32 x i1>* %src1
 ; CHECK: store <32 x i1> %and, <32 x i1>* %dest
 ; CHECK: ret void
   %src1 = getelementptr <32 x i1>, <32 x i1> *%src0, i32 1
-  %val0 = load <32 x i1> *%src0
-  %val1 = load <32 x i1> *%src1
+  %val0 = load <32 x i1> , <32 x i1> *%src0
+  %val1 = load <32 x i1> , <32 x i1> *%src1
   %and = and <32 x i1> %val0, %val1
   store <32 x i1> %and, <32 x i1> *%dest
   ret void
@@ -375,7 +375,7 @@
 ; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
 ; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
 ; CHECK: ret void
-  %val0 = load <4 x i32> *%src
+  %val0 = load <4 x i32> , <4 x i32> *%src
   %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
   %val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
   store <4 x i32> %val2, <4 x i32> *%dest
diff --git a/llvm/test/Transforms/Scalarizer/dbginfo.ll b/llvm/test/Transforms/Scalarizer/dbginfo.ll
index 892947d..0460309 100644
--- a/llvm/test/Transforms/Scalarizer/dbginfo.ll
+++ b/llvm/test/Transforms/Scalarizer/dbginfo.ll
@@ -19,14 +19,14 @@
 ; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %a, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
 ; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %b, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
 ; CHECK: tail call void @llvm.dbg.value(metadata <4 x i32>* %c, i64 0, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}}
-; CHECK: %bval.i0 = load i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
-; CHECK: %bval.i1 = load i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %bval.i2 = load i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %bval.i3 = load i32* %b.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %cval.i0 = load i32* %c.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %cval.i1 = load i32* %c.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %cval.i2 = load i32* %c.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
-; CHECK: %cval.i3 = load i32* %c.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i0 = load i32, i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
+; CHECK: %bval.i1 = load i32, i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i2 = load i32, i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %bval.i3 = load i32, i32* %b.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i0 = load i32, i32* %c.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i1 = load i32, i32* %c.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i2 = load i32, i32* %c.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
+; CHECK: %cval.i3 = load i32, i32* %c.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
 ; CHECK: %add.i0 = add i32 %bval.i0, %cval.i0, !dbg ![[TAG1]]
 ; CHECK: %add.i1 = add i32 %bval.i1, %cval.i1, !dbg ![[TAG1]]
 ; CHECK: %add.i2 = add i32 %bval.i2, %cval.i2, !dbg ![[TAG1]]
@@ -40,8 +40,8 @@
   tail call void @llvm.dbg.value(metadata <4 x i32>* %a, i64 0, metadata !15, metadata !{}), !dbg !20
   tail call void @llvm.dbg.value(metadata <4 x i32>* %b, i64 0, metadata !16, metadata !{}), !dbg !20
   tail call void @llvm.dbg.value(metadata <4 x i32>* %c, i64 0, metadata !17, metadata !{}), !dbg !20
-  %bval = load <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
-  %cval = load <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
+  %bval = load <4 x i32>, <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
+  %cval = load <4 x i32>, <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
   %add = add <4 x i32> %bval, %cval, !dbg !21
   store <4 x i32> %add, <4 x i32>* %a, align 16, !dbg !21, !tbaa !22
   ret void, !dbg !25
diff --git a/llvm/test/Transforms/Scalarizer/no-data-layout.ll b/llvm/test/Transforms/Scalarizer/no-data-layout.ll
index 3eaf669..c89c786 100644
--- a/llvm/test/Transforms/Scalarizer/no-data-layout.ll
+++ b/llvm/test/Transforms/Scalarizer/no-data-layout.ll
@@ -3,7 +3,7 @@
 ; Test the handling of loads and stores when no data layout is available.
 define void @f1(<4 x float> *%dest, <4 x float> *%src) {
 ; CHECK: @f1(
-; CHECK: %val = load <4 x float>* %src, align 4
+; CHECK: %val = load <4 x float>, <4 x float>* %src, align 4
 ; CHECK: %val.i0 = extractelement <4 x float> %val, i32 0
 ; CHECK: %add.i0 = fadd float %val.i0, %val.i0
 ; CHECK: %val.i1 = extractelement <4 x float> %val, i32 1
@@ -18,7 +18,7 @@
 ; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
 ; CHECK: store <4 x float> %add, <4 x float>* %dest, align 8
 ; CHECK: ret void
-  %val = load <4 x float> *%src, align 4
+  %val = load <4 x float> , <4 x float> *%src, align 4
   %add = fadd <4 x float> %val, %val
   store <4 x float> %add, <4 x float> *%dest, align 8
   ret void
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 58e2d3b..9ee492d 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -23,23 +23,23 @@
   %1 = sext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
   %3 = addrspacecast float addrspace(3)* %2 to float*
-  %4 = load float* %3, align 4
+  %4 = load float, float* %3, align 4
   %5 = fadd float %4, 0.000000e+00
   %6 = add i32 %y, 1
   %7 = sext i32 %6 to i64
   %8 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
   %9 = addrspacecast float addrspace(3)* %8 to float*
-  %10 = load float* %9, align 4
+  %10 = load float, float* %9, align 4
   %11 = fadd float %5, %10
   %12 = add i32 %x, 1
   %13 = sext i32 %12 to i64
   %14 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
   %15 = addrspacecast float addrspace(3)* %14 to float*
-  %16 = load float* %15, align 4
+  %16 = load float, float* %15, align 4
   %17 = fadd float %11, %16
   %18 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
   %19 = addrspacecast float addrspace(3)* %18 to float*
-  %20 = load float* %19, align 4
+  %20 = load float, float* %19, align 4
   %21 = fadd float %17, %20
   store float %21, float* %output, align 4
   ret void
@@ -68,21 +68,21 @@
   %1 = sext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
   %3 = addrspacecast float addrspace(3)* %2 to float*
-  %4 = load float* %3, align 4
+  %4 = load float, float* %3, align 4
   %5 = fadd float %4, 0.000000e+00
   %6 = add i64 %0, 1
   %7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
   %8 = addrspacecast float addrspace(3)* %7 to float*
-  %9 = load float* %8, align 4
+  %9 = load float, float* %8, align 4
   %10 = fadd float %5, %9
   %11 = add i64 %1, 1
   %12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
   %13 = addrspacecast float addrspace(3)* %12 to float*
-  %14 = load float* %13, align 4
+  %14 = load float, float* %13, align 4
   %15 = fadd float %10, %14
   %16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
   %17 = addrspacecast float addrspace(3)* %16 to float*
-  %18 = load float* %17, align 4
+  %18 = load float, float* %17, align 4
   %19 = fadd float %15, %18
   store float %19, float* %output, align 4
   ret void
@@ -116,23 +116,23 @@
   %1 = zext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
   %3 = addrspacecast float addrspace(3)* %2 to float*
-  %4 = load float* %3, align 4
+  %4 = load float, float* %3, align 4
   %5 = fadd float %4, 0.000000e+00
   %6 = add nuw i32 %y, 1
   %7 = zext i32 %6 to i64
   %8 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %7
   %9 = addrspacecast float addrspace(3)* %8 to float*
-  %10 = load float* %9, align 4
+  %10 = load float, float* %9, align 4
   %11 = fadd float %5, %10
   %12 = add nuw i32 %x, 1
   %13 = zext i32 %12 to i64
   %14 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %0
   %15 = addrspacecast float addrspace(3)* %14 to float*
-  %16 = load float* %15, align 4
+  %16 = load float, float* %15, align 4
   %17 = fadd float %11, %16
   %18 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %13, i64 %7
   %19 = addrspacecast float addrspace(3)* %18 to float*
-  %20 = load float* %19, align 4
+  %20 = load float, float* %19, align 4
   %21 = fadd float %17, %20
   store float %21, float* %output, align 4
   ret void
@@ -164,21 +164,21 @@
   %1 = zext i32 %x to i64
   %2 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %0
   %3 = addrspacecast float addrspace(3)* %2 to float*
-  %4 = load float* %3, align 4
+  %4 = load float, float* %3, align 4
   %5 = fadd float %4, 0.000000e+00
   %6 = add i64 %0, 1
   %7 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %1, i64 %6
   %8 = addrspacecast float addrspace(3)* %7 to float*
-  %9 = load float* %8, align 4
+  %9 = load float, float* %8, align 4
   %10 = fadd float %5, %9
   %11 = add i64 %1, 1
   %12 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %0
   %13 = addrspacecast float addrspace(3)* %12 to float*
-  %14 = load float* %13, align 4
+  %14 = load float, float* %13, align 4
   %15 = fadd float %10, %14
   %16 = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %11, i64 %6
   %17 = addrspacecast float addrspace(3)* %16 to float*
-  %18 = load float* %17, align 4
+  %18 = load float, float* %17, align 4
   %19 = fadd float %15, %18
   store float %19, float* %output, align 4
   ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll b/llvm/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
index 90be680..8fd1fae 100644
--- a/llvm/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
@@ -30,7 +30,7 @@
 	%tmp.14 = sext i8 %tmp.1 to i32		; <i32> [#uses=1]
 	%tmp.16 = zext i8 %l_88173906 to i32		; <i32> [#uses=1]
 	%tmp.17 = icmp sgt i32 %tmp.14, %tmp.16		; <i1> [#uses=1]
-	%tmp.19 = load i32* @g_59182229		; <i32> [#uses=2]
+	%tmp.19 = load i32, i32* @g_59182229		; <i32> [#uses=2]
 	br i1 %tmp.17, label %cond_true, label %cond_false
 cond_true:		; preds = %endif.0
 	%tmp.20 = icmp ne i32 %tmp.19, 1		; <i1> [#uses=1]
@@ -53,7 +53,7 @@
 	%tmp.29 = icmp sgt i32 %i.1.1, 99		; <i1> [#uses=1]
 	br i1 %tmp.29, label %endif.2, label %no_exit.1
 no_exit.1:		; preds = %loopentry.1
-	%tmp.30 = load i32* @g_38098584		; <i32> [#uses=1]
+	%tmp.30 = load i32, i32* @g_38098584		; <i32> [#uses=1]
 	%tmp.31 = icmp eq i32 %tmp.30, 0		; <i1> [#uses=1]
 	br i1 %tmp.31, label %else.3, label %then.3
 then.3:		; preds = %no_exit.1
diff --git a/llvm/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll b/llvm/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
index 231d11b..2606e08 100644
--- a/llvm/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
@@ -36,8 +36,8 @@
 	%guess = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
 	%guess1 = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
 	%point5 = alloca %struct.anon*		; <%struct.anon**> [#uses=3]
-	%tmp = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
-	%tmp1 = load %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp1 = load %struct.anon*, %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
 	%tmp.upgrd.1 = call i32 @bc_compare( %struct.anon* %tmp, %struct.anon* %tmp1 )		; <i32> [#uses=2]
 	%tmp.upgrd.2 = icmp slt i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
@@ -48,26 +48,26 @@
 	br i1 %tmp5, label %cond_true6, label %cond_next13
 cond_true6:		; preds = %cond_false
 	call void @free_num( %struct.anon** %num )
-	%tmp8 = load %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp8 = load %struct.anon*, %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
 	%tmp9 = call %struct.anon* @copy_num( %struct.anon* %tmp8 )		; <%struct.anon*> [#uses=1]
 	store %struct.anon* %tmp9, %struct.anon** %num
 	ret i32 1
 cond_next13:		; preds = %cond_false
-	%tmp15 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
-	%tmp16 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp15 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp16 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
 	%tmp17 = call i32 @bc_compare( %struct.anon* %tmp15, %struct.anon* %tmp16 )		; <i32> [#uses=2]
 	%tmp19 = icmp eq i32 %tmp17, 0		; <i1> [#uses=1]
 	br i1 %tmp19, label %cond_true20, label %cond_next27
 cond_true20:		; preds = %cond_next13
 	call void @free_num( %struct.anon** %num )
-	%tmp22 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp22 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
 	%tmp23 = call %struct.anon* @copy_num( %struct.anon* %tmp22 )		; <%struct.anon*> [#uses=1]
 	store %struct.anon* %tmp23, %struct.anon** %num
 	ret i32 1
 cond_next27:		; preds = %cond_next13
-	%tmp29 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp29 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
 	%tmp30 = getelementptr %struct.anon, %struct.anon* %tmp29, i32 0, i32 2		; <i32*> [#uses=1]
-	%tmp31 = load i32* %tmp30		; <i32> [#uses=2]
+	%tmp31 = load i32, i32* %tmp30		; <i32> [#uses=2]
 	%tmp33 = icmp sge i32 %tmp31, %scale		; <i1> [#uses=1]
 	%max = select i1 %tmp33, i32 %tmp31, i32 %scale		; <i32> [#uses=4]
 	%tmp35 = add i32 %max, 2		; <i32> [#uses=0]
@@ -80,24 +80,24 @@
 	%tmp39 = icmp slt i32 %tmp17, 0		; <i1> [#uses=1]
 	br i1 %tmp39, label %cond_true40, label %cond_false43
 cond_true40:		; preds = %cond_next27
-	%tmp41 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp41 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
 	%tmp42 = call %struct.anon* @copy_num( %struct.anon* %tmp41 )		; <%struct.anon*> [#uses=1]
 	store %struct.anon* %tmp42, %struct.anon** %guess
 	br label %bb80.outer
 cond_false43:		; preds = %cond_next27
 	call void @int2num( %struct.anon** %guess, i32 10 )
-	%tmp45 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp45 = load %struct.anon*, %struct.anon** %num		; <%struct.anon*> [#uses=1]
 	%tmp46 = getelementptr %struct.anon, %struct.anon* %tmp45, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp47 = load i32* %tmp46		; <i32> [#uses=1]
+	%tmp47 = load i32, i32* %tmp46		; <i32> [#uses=1]
 	call void @int2num( %struct.anon** %guess1, i32 %tmp47 )
-	%tmp48 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
-	%tmp49 = load %struct.anon** %point5		; <%struct.anon*> [#uses=1]
+	%tmp48 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp49 = load %struct.anon*, %struct.anon** %point5		; <%struct.anon*> [#uses=1]
 	call void @bc_multiply( %struct.anon* %tmp48, %struct.anon* %tmp49, %struct.anon** %guess1, i32 %max )
-	%tmp51 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp51 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
 	%tmp52 = getelementptr %struct.anon, %struct.anon* %tmp51, i32 0, i32 2		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp52
-	%tmp53 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
-	%tmp54 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp53 = load %struct.anon*, %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp54 = load %struct.anon*, %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
 	call void @bc_raise( %struct.anon* %tmp53, %struct.anon* %tmp54, %struct.anon** %guess, i32 %max )
 	br label %bb80.outer
 bb80.outer:		; preds = %cond_true83, %cond_false43, %cond_true40
@@ -113,8 +113,8 @@
 ; CHECK: bb86
 bb86:		; preds = %bb80
 	call void @free_num( %struct.anon** %num )
-	%tmp88 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
-	%tmp89 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp88 = load %struct.anon*, %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp89 = load %struct.anon*, %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
 	%tmp92 = call i32 @bc_divide( %struct.anon* %tmp88, %struct.anon* %tmp89, %struct.anon** %num, i32 %max )		; <i32> [#uses=0]
 	call void @free_num( %struct.anon** %guess )
 	call void @free_num( %struct.anon** %guess1 )
diff --git a/llvm/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll b/llvm/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
index 8fbca35..0820e9c 100644
--- a/llvm/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
@@ -34,7 +34,7 @@
 
 define void @fold_builtin_classify() {
 entry:
-	%tmp63 = load i32* null		; <i32> [#uses=1]
+	%tmp63 = load i32, i32* null		; <i32> [#uses=1]
 	switch i32 %tmp63, label %bb276 [
 		 i32 414, label %bb145
 		 i32 417, label %bb
@@ -42,54 +42,54 @@
 bb:		; preds = %entry
 	ret void
 bb145:		; preds = %entry
-	%tmp146 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp146 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
 	%tmp148 = getelementptr %struct.tree_node, %struct.tree_node* %tmp146, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
-	%tmp149 = load %struct.tree_node** %tmp148		; <%struct.tree_node*> [#uses=1]
+	%tmp149 = load %struct.tree_node*, %struct.tree_node** %tmp148		; <%struct.tree_node*> [#uses=1]
 	%tmp150 = bitcast %struct.tree_node* %tmp149 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
 	%tmp151 = getelementptr %struct.tree_type, %struct.tree_type* %tmp150, i32 0, i32 6		; <i16*> [#uses=1]
 	%tmp151.upgrd.1 = bitcast i16* %tmp151 to i32*		; <i32*> [#uses=1]
-	%tmp152 = load i32* %tmp151.upgrd.1		; <i32> [#uses=1]
+	%tmp152 = load i32, i32* %tmp151.upgrd.1		; <i32> [#uses=1]
 	%tmp154 = lshr i32 %tmp152, 16		; <i32> [#uses=1]
 	%tmp154.mask = and i32 %tmp154, 127		; <i32> [#uses=1]
 	%gep.upgrd.2 = zext i32 %tmp154.mask to i64		; <i64> [#uses=1]
 	%tmp155 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.2		; <i8*> [#uses=1]
-	%tmp156 = load i8* %tmp155		; <i8> [#uses=1]
+	%tmp156 = load i8, i8* %tmp155		; <i8> [#uses=1]
 	%tmp157 = icmp eq i8 %tmp156, 4		; <i1> [#uses=1]
 	br i1 %tmp157, label %cond_next241, label %cond_true158
 cond_true158:		; preds = %bb145
-	%tmp172 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp172 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
 	%tmp174 = getelementptr %struct.tree_node, %struct.tree_node* %tmp172, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
-	%tmp175 = load %struct.tree_node** %tmp174		; <%struct.tree_node*> [#uses=1]
+	%tmp175 = load %struct.tree_node*, %struct.tree_node** %tmp174		; <%struct.tree_node*> [#uses=1]
 	%tmp176 = bitcast %struct.tree_node* %tmp175 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
 	%tmp177 = getelementptr %struct.tree_type, %struct.tree_type* %tmp176, i32 0, i32 6		; <i16*> [#uses=1]
 	%tmp177.upgrd.3 = bitcast i16* %tmp177 to i32*		; <i32*> [#uses=1]
-	%tmp178 = load i32* %tmp177.upgrd.3		; <i32> [#uses=1]
+	%tmp178 = load i32, i32* %tmp177.upgrd.3		; <i32> [#uses=1]
 	%tmp180 = lshr i32 %tmp178, 16		; <i32> [#uses=1]
 	%tmp180.mask = and i32 %tmp180, 127		; <i32> [#uses=1]
 	%gep.upgrd.4 = zext i32 %tmp180.mask to i64		; <i64> [#uses=1]
 	%tmp181 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.4		; <i8*> [#uses=1]
-	%tmp182 = load i8* %tmp181		; <i8> [#uses=1]
+	%tmp182 = load i8, i8* %tmp181		; <i8> [#uses=1]
 	%tmp183 = icmp eq i8 %tmp182, 8		; <i1> [#uses=1]
 	br i1 %tmp183, label %cond_next241, label %cond_true184
 cond_true184:		; preds = %cond_true158
-	%tmp185 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp185 = load %struct.tree_node*, %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
 	%tmp187 = getelementptr %struct.tree_node, %struct.tree_node* %tmp185, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
-	%tmp188 = load %struct.tree_node** %tmp187		; <%struct.tree_node*> [#uses=1]
+	%tmp188 = load %struct.tree_node*, %struct.tree_node** %tmp187		; <%struct.tree_node*> [#uses=1]
 	%tmp189 = bitcast %struct.tree_node* %tmp188 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
 	%tmp190 = getelementptr %struct.tree_type, %struct.tree_type* %tmp189, i32 0, i32 6		; <i16*> [#uses=1]
 	%tmp190.upgrd.5 = bitcast i16* %tmp190 to i32*		; <i32*> [#uses=1]
-	%tmp191 = load i32* %tmp190.upgrd.5		; <i32> [#uses=1]
+	%tmp191 = load i32, i32* %tmp190.upgrd.5		; <i32> [#uses=1]
 	%tmp193 = lshr i32 %tmp191, 16		; <i32> [#uses=1]
 	%tmp193.mask = and i32 %tmp193, 127		; <i32> [#uses=1]
 	%gep.upgrd.6 = zext i32 %tmp193.mask to i64		; <i64> [#uses=1]
 	%tmp194 = getelementptr [35 x i8], [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.6		; <i8*> [#uses=1]
-	%tmp195 = load i8* %tmp194		; <i8> [#uses=1]
+	%tmp195 = load i8, i8* %tmp194		; <i8> [#uses=1]
 	%tmp196 = icmp eq i8 %tmp195, 4		; <i1> [#uses=1]
 	br i1 %tmp196, label %cond_next241, label %cond_true197
 cond_true197:		; preds = %cond_true184
 	ret void
 cond_next241:		; preds = %cond_true184, %cond_true158, %bb145
-	%tmp245 = load i32* null		; <i32> [#uses=0]
+	%tmp245 = load i32, i32* null		; <i32> [#uses=0]
 	ret void
 bb276:		; preds = %entry
 	ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll b/llvm/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
index a621761..dcf2412 100644
--- a/llvm/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
@@ -47,40 +47,40 @@
 	store i32 0, i32* %wstate
 	%tmp = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 0		; <i8**> [#uses=1]
 	%tmp1 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 0		; <i8**> [#uses=1]
-	%tmp.upgrd.5 = load i8** %tmp1		; <i8*> [#uses=1]
+	%tmp.upgrd.5 = load i8*, i8** %tmp1		; <i8*> [#uses=1]
 	store i8* %tmp.upgrd.5, i8** %tmp
 	%tmp.upgrd.6 = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 1		; <i32*> [#uses=1]
 	%tmp2 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 1		; <i32*> [#uses=1]
-	%tmp.upgrd.7 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp.upgrd.7 = load i32, i32* %tmp2		; <i32> [#uses=1]
 	store i32 %tmp.upgrd.7, i32* %tmp.upgrd.6
 	%tmp3 = getelementptr %struct.charsequence, %struct.charsequence* %cs, i64 0, i32 2		; <i32*> [#uses=1]
 	%tmp4 = getelementptr %struct.charsequence, %struct.charsequence* @C.0.2294, i64 0, i32 2		; <i32*> [#uses=1]
-	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	%tmp5 = load i32, i32* %tmp4		; <i32> [#uses=1]
 	store i32 %tmp5, i32* %tmp3
 	br label %bb33
 bb:		; preds = %bb33
-	%tmp.upgrd.8 = load %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp.upgrd.8 = load %struct.FILE*, %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
 	%tmp.upgrd.9 = call i32 @_IO_getc( %struct.FILE* %tmp.upgrd.8 )		; <i32> [#uses=1]
 	%tmp6 = call i32 @tolower( i32 %tmp.upgrd.9 )		; <i32> [#uses=1]
 	%tmp6.upgrd.10 = trunc i32 %tmp6 to i8		; <i8> [#uses=1]
 	store i8 %tmp6.upgrd.10, i8* %c
-	%tmp7 = load i32* %wstate		; <i32> [#uses=1]
+	%tmp7 = load i32, i32* %wstate		; <i32> [#uses=1]
 	%tmp.upgrd.11 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
 	br i1 %tmp.upgrd.11, label %cond_true, label %cond_false
 cond_true:		; preds = %bb
-	%tmp.upgrd.12 = load i8* %c		; <i8> [#uses=1]
+	%tmp.upgrd.12 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp8 = icmp sle i8 %tmp.upgrd.12, 96		; <i1> [#uses=1]
 	br i1 %tmp8, label %cond_true9, label %cond_next
 cond_true9:		; preds = %cond_true
 	br label %bb16
 cond_next:		; preds = %cond_true
-	%tmp10 = load i8* %c		; <i8> [#uses=1]
+	%tmp10 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp11 = icmp sgt i8 %tmp10, 122		; <i1> [#uses=1]
 	br i1 %tmp11, label %cond_true12, label %cond_next13
 cond_true12:		; preds = %cond_next
 	br label %bb16
 cond_next13:		; preds = %cond_next
-	%tmp14 = load i8* %c		; <i8> [#uses=1]
+	%tmp14 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp14.upgrd.13 = sext i8 %tmp14 to i32		; <i32> [#uses=1]
 	%tmp1415 = trunc i32 %tmp14.upgrd.13 to i8		; <i8> [#uses=1]
 	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp1415 )
@@ -88,26 +88,26 @@
 bb16:		; preds = %cond_true12, %cond_true9
 	%tmp17 = call i8* @charsequence_val( %struct.charsequence* %cs )		; <i8*> [#uses=1]
 	store i8* %tmp17, i8** %str
-	%tmp.upgrd.14 = load %struct.trie_s** %t_addr		; <%struct.trie_s*> [#uses=1]
-	%tmp18 = load i8** %str		; <i8*> [#uses=1]
+	%tmp.upgrd.14 = load %struct.trie_s*, %struct.trie_s** %t_addr		; <%struct.trie_s*> [#uses=1]
+	%tmp18 = load i8*, i8** %str		; <i8*> [#uses=1]
 	%tmp19 = call %struct.trie_s* @trie_insert( %struct.trie_s* %tmp.upgrd.14, i8* %tmp18 )		; <%struct.trie_s*> [#uses=0]
-	%tmp20 = load i8** %str		; <i8*> [#uses=1]
+	%tmp20 = load i8*, i8** %str		; <i8*> [#uses=1]
 	call void @free( i8* %tmp20 )
 	store i32 0, i32* %wstate
 	br label %bb21
 bb21:		; preds = %bb16, %cond_next13
 	br label %cond_next32
 cond_false:		; preds = %bb
-	%tmp22 = load i8* %c		; <i8> [#uses=1]
+	%tmp22 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp23 = icmp sgt i8 %tmp22, 96		; <i1> [#uses=1]
 	br i1 %tmp23, label %cond_true24, label %cond_next31
 cond_true24:		; preds = %cond_false
-	%tmp25 = load i8* %c		; <i8> [#uses=1]
+	%tmp25 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp26 = icmp sle i8 %tmp25, 122		; <i1> [#uses=1]
 	br i1 %tmp26, label %cond_true27, label %cond_next30
 cond_true27:		; preds = %cond_true24
 	call void @charsequence_reset( %struct.charsequence* %cs )
-	%tmp28 = load i8* %c		; <i8> [#uses=1]
+	%tmp28 = load i8, i8* %c		; <i8> [#uses=1]
 	%tmp28.upgrd.15 = sext i8 %tmp28 to i32		; <i32> [#uses=1]
 	%tmp2829 = trunc i32 %tmp28.upgrd.15 to i8		; <i8> [#uses=1]
 	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp2829 )
@@ -120,7 +120,7 @@
 cond_next32:		; preds = %cond_next31, %bb21
 	br label %bb33
 bb33:		; preds = %cond_next32, %entry
-	%tmp34 = load %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp34 = load %struct.FILE*, %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
 	%tmp35 = call i32 @feof( %struct.FILE* %tmp34 )		; <i32> [#uses=1]
 	%tmp36 = icmp eq i32 %tmp35, 0		; <i1> [#uses=1]
 	br i1 %tmp36, label %bb, label %bb37
diff --git a/llvm/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/llvm/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
index 8e15637..7625d93 100644
--- a/llvm/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -8,7 +8,7 @@
 define void @test(i32 %X, i32 %Y, i32 %Z) {
 entry:
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp = load i32* @G, align 8		; <i32> [#uses=2]
+	%tmp = load i32, i32* @G, align 8		; <i32> [#uses=2]
 	%tmp3 = icmp eq i32 %X, %Y		; <i1> [#uses=1]
 	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
 	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
diff --git a/llvm/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/llvm/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
index 9b6084f..3127931 100644
--- a/llvm/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -9,7 +9,7 @@
 
 define i32 @main() nounwind  {
 entry:
-	%l = load i32* @g_37, align 4		; <i32> [#uses=1]
+	%l = load i32, i32* @g_37, align 4		; <i32> [#uses=1]
 	%cmpa = icmp ne i32 %l, 0		; <i1> [#uses=3]
 	br i1 %cmpa, label %func_1.exit, label %mooseblock
 
diff --git a/llvm/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll b/llvm/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
index ac9622d..6b216f5 100644
--- a/llvm/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
@@ -4,7 +4,7 @@
 
 define i32 @func_127(i32 %p_129) nounwind {
 entry:
-	load i32* @g_103, align 4		; <i32>:0 [#uses=1]
+	load i32, i32* @g_103, align 4		; <i32>:0 [#uses=1]
 	icmp eq i32 %0, 0		; <i1>:1 [#uses=2]
 	br i1 %1, label %bb6.preheader, label %entry.return_crit_edge
 
diff --git a/llvm/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll b/llvm/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
index 419feb6..faf3f5f 100644
--- a/llvm/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
+++ b/llvm/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
@@ -18,7 +18,7 @@
 	br i1 icmp ne (i32* @i, i32* null), label %bb1, label %bb2
 
 bb1:		; preds = %bb
-	%0 = load i32* @i, align 4		; <i32> [#uses=1]
+	%0 = load i32, i32* @i, align 4		; <i32> [#uses=1]
 	br label %bb3
 
 bb2:		; preds = %bb
diff --git a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
index 076cb58..cfbe219 100644
--- a/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
+++ b/llvm/test/Transforms/SimplifyCFG/AArch64/prefer-fma.ll
@@ -5,9 +5,9 @@
 ; Function Attrs: nounwind
 define double @_Z3fooRdS_S_S_(double* dereferenceable(8) %x, double* dereferenceable(8) %y, double* dereferenceable(8) %a) #0 {
 entry:
-  %0 = load double* %y, align 8
+  %0 = load double, double* %y, align 8
   %cmp = fcmp oeq double %0, 0.000000e+00
-  %1 = load double* %x, align 8
+  %1 = load double, double* %x, align 8
   br i1 %cmp, label %if.then, label %if.else
 
 ; fadd (const, (fmul x, y))
@@ -15,7 +15,7 @@
 ; CHECK-LABEL: if.then:
 ; CHECK:   %3 = fmul fast double %1, %2
 ; CHECK-NEXT:   %mul = fadd fast double 1.000000e+00, %3
-  %2 = load double* %a, align 8
+  %2 = load double, double* %a, align 8
   %3 = fmul fast double %1, %2
   %mul = fadd fast double 1.000000e+00, %3
   store double %mul, double* %y, align 8
@@ -26,16 +26,16 @@
 ; CHECK-LABEL: if.else:
 ; CHECK:   %mul1 = fmul fast double %1, %2
 ; CHECK-NEXT:   %sub1 = fsub fast double %mul1, %0
-  %4 = load double* %a, align 8
+  %4 = load double, double* %a, align 8
   %mul1 = fmul fast double %1, %4
   %sub1 = fsub fast double %mul1, %0
   store double %sub1, double* %y, align 8
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %5 = load double* %y, align 8
+  %5 = load double, double* %y, align 8
   %cmp2 = fcmp oeq double %5, 2.000000e+00
-  %6 = load double* %x, align 8
+  %6 = load double, double* %x, align 8
   br i1 %cmp2, label %if.then2, label %if.else2
 
 ; fsub (x, (fmul y, z))
@@ -43,7 +43,7 @@
 ; CHECK-LABEL: if.then2:
 ; CHECK:   %7 = fmul fast double %5, 3.000000e+00
 ; CHECK-NEXT:   %mul2 = fsub fast double %6, %7
-  %7 = load double* %a, align 8
+  %7 = load double, double* %a, align 8
   %8 = fmul fast double %6, 3.0000000e+00
   %mul2 = fsub fast double %7, %8
   store double %mul2, double* %y, align 8
@@ -62,10 +62,10 @@
   br label %if.end2
 
 if.end2:                                           ; preds = %if.else, %if.then
-  %9 = load double* %x, align 8
-  %10 = load double* %y, align 8
+  %9 = load double, double* %x, align 8
+  %10 = load double, double* %y, align 8
   %add = fadd fast double %9, %10
-  %11 = load double* %a, align 8
+  %11 = load double, double* %a, align 8
   %add2 = fadd fast double %add, %11
   ret double %add2
 }
diff --git a/llvm/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll b/llvm/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
index aba08dc..32a0202 100644
--- a/llvm/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
+++ b/llvm/test/Transforms/SimplifyCFG/EmptyBlockMerge.ll
@@ -6,7 +6,7 @@
 
 define void @cprop_test12(i32* %data) {
 bb0:
-	%reg108 = load i32* %data		; <i32> [#uses=2]
+	%reg108 = load i32, i32* %data		; <i32> [#uses=2]
 	%cond218 = icmp ne i32 %reg108, 5		; <i1> [#uses=1]
 	br i1 %cond218, label %bb3, label %bb2
 bb2:		; preds = %bb0
diff --git a/llvm/test/Transforms/SimplifyCFG/PR17073.ll b/llvm/test/Transforms/SimplifyCFG/PR17073.ll
index 8dc9fb2..e6e98b2 100644
--- a/llvm/test/Transforms/SimplifyCFG/PR17073.ll
+++ b/llvm/test/Transforms/SimplifyCFG/PR17073.ll
@@ -18,7 +18,7 @@
 ; CHECK-NOT: select i1 %tobool, i32* null, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a) 
 define i32* @can_trap1() {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %exit, label %block1
 
@@ -38,7 +38,7 @@
 ; CHECK-NOT: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null
 define i32* @can_trap2() {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %exit, label %block1
 
@@ -57,7 +57,7 @@
 ; CHECK: select i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), i32* select (i1 icmp eq (i64 add (i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64), i64 2), i64 0), i32* null, i32* @a), i32* null
 define i32* @cannot_trap() {
 entry:
-  %0 = load i32* @a, align 4
+  %0 = load i32, i32* @a, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %exit, label %block1
 
diff --git a/llvm/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/llvm/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index 31de3c8b..c23a96d 100644
--- a/llvm/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/llvm/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -34,11 +34,11 @@
 ; CHECK-LABEL: @test4(
 
 entry:
-  %cond1 = load volatile i1* %dummy
+  %cond1 = load volatile i1, i1* %dummy
   br i1 %cond1, label %if, label %end
 
 if:
-  %cond2 = load volatile i1* %dummy
+  %cond2 = load volatile i1, i1* %dummy
   br i1 %cond2, label %then, label %end
 
 then:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
index 0df7963..f3e5506 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-covered-bug.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: sub i3 %arg, -4
 ; CHECK-NEXT: zext i3 %switch.tableidx to i4
 ; CHECK-NEXT: getelementptr inbounds [8 x i64], [8 x i64]* @switch.table, i32 0, i4 %switch.tableidx.zext
-; CHECK-NEXT: load i64* %switch.gep
+; CHECK-NEXT: load i64, i64* %switch.gep
 ; CHECK-NEXT: add i64
 ; CHECK-NEXT: ret i64
 define i64 @test(i3 %arg) {
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
index 18e04f6..2600870 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-table-bug.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT: sub i2 %0, -2
 ; CHECK-NEXT: zext i2 %switch.tableidx to i3
 ; CHECK-NEXT: getelementptr inbounds [4 x i64], [4 x i64]* @switch.table, i32 0, i3 %switch.tableidx.zext
-; CHECK-NEXT: load i64* %switch.gep
+; CHECK-NEXT: load i64, i64* %switch.gep
 ; CHECK-NEXT: ret i64 %switch.load
 define i64 @_TFO6reduce1E5toRawfS0_FT_Si(i2) {
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 85a3680..6bdd649 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -59,7 +59,7 @@
 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
 ; CHECK: switch.lookup:
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32], [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
 ; CHECK-NEXT: ret i32 %switch.load
 ; CHECK: return:
 ; CHECK-NEXT: ret i32 15
@@ -98,7 +98,7 @@
 ; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
 ; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float], [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load float* %switch.gep
+; CHECK-NEXT: %switch.load = load float, float* %switch.gep
 ; CHECK-NEXT: br label %sw.epilog
 ; CHECK: sw.epilog:
 ; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
@@ -145,7 +145,7 @@
 ; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
 ; CHECK: switch.lookup:
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*], [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i8** %switch.gep
+; CHECK-NEXT: %switch.load = load i8*, i8** %switch.gep
 ; CHECK-NEXT: ret i8* %switch.load
 }
 
@@ -174,7 +174,7 @@
 ; CHECK-LABEL: @earlyreturncrash(
 ; CHECK: switch.lookup:
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
 ; CHECK-NEXT: ret i32 %switch.load
 ; CHECK: sw.epilog:
 ; CHECK-NEXT: ret i32 7
@@ -806,7 +806,7 @@
 ; CHECK-NOT: icmp
 ; CHECK-NOT: br 1i
 ; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32], [4 x i32]* @switch.table7, i32 0, i32 %switch.tableidx
-; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: %switch.load = load i32, i32* %switch.gep
 ; CHECK-NEXT: ret i32 %switch.load
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/basictest.ll b/llvm/test/Transforms/SimplifyCFG/basictest.ll
index 5d9dad4..d228499 100644
--- a/llvm/test/Transforms/SimplifyCFG/basictest.ll
+++ b/llvm/test/Transforms/SimplifyCFG/basictest.ll
@@ -50,7 +50,7 @@
 ; CHECK: alloca i8, align 1
 ; CHECK-NEXT: call i8 @test6g
 ; CHECK-NEXT: icmp eq i8 %tmp, 0
-; CHECK-NEXT: load i8* %r, align 1{{$}}
+; CHECK-NEXT: load i8, i8* %r, align 1{{$}}
 
 bb0:
   %r = alloca i8, align 1
@@ -58,7 +58,7 @@
   %tmp1 = icmp eq i8 %tmp, 0
   br i1 %tmp1, label %bb2, label %bb1
 bb1:
-  %tmp3 = load i8* %r, align 1, !range !2, !tbaa !1
+  %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1
   %tmp4 = icmp eq i8 %tmp3, 1
   br i1 %tmp4, label %bb2, label %bb3
 bb2:
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
index 878c0a4..fad5fce 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-threshold.ll
@@ -19,7 +19,7 @@
 ; AGGRESSIVE-NOT: br i1
 
 cond.false:
-  %0 = load i32* %input, align 4
+  %0 = load i32, i32* %input, align 4
   br label %cond.end
 
 cond.end:
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-phi-thread.ll b/llvm/test/Transforms/SimplifyCFG/branch-phi-thread.ll
index c19ba69..4c1b7e6 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-phi-thread.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-phi-thread.ll
@@ -51,7 +51,7 @@
 	br i1 %C, label %T, label %F
 T:		; preds = %A, %E
 	call void @f3( )
-	%XX = load i32* %AP		; <i32> [#uses=1]
+	%XX = load i32, i32* %AP		; <i32> [#uses=1]
 	store i32 %XX, i32* %BP
 	br i1 %C, label %B, label %A
 A:		; preds = %T
diff --git a/llvm/test/Transforms/SimplifyCFG/dbginfo.ll b/llvm/test/Transforms/SimplifyCFG/dbginfo.ll
index 1a9f20a..12aec91 100644
--- a/llvm/test/Transforms/SimplifyCFG/dbginfo.ll
+++ b/llvm/test/Transforms/SimplifyCFG/dbginfo.ll
@@ -58,7 +58,7 @@
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram947 to { }*))
 	store %struct.__false_type* %this, %struct.__false_type** %this_addr
-	%0 = load %struct.__false_type** %this_addr, align 4		; <%struct.__false_type*> [#uses=1]
+	%0 = load %struct.__false_type*, %struct.__false_type** %this_addr, align 4		; <%struct.__false_type*> [#uses=1]
 	call void @_ZN9__gnu_cxx13new_allocatorIP5SceneED2Ev(%struct.__false_type* %0) nounwind
 	br label %bb
 
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
index 5c83e2a..c1ca605 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -6,12 +6,12 @@
         br i1 %P, label %T, label %F
 T:              ; preds = %0
         store i32 1, i32* %Q
-        %A = load i32* %Q               ; <i32> [#uses=1]
+        %A = load i32, i32* %Q               ; <i32> [#uses=1]
         call void @bar( i32 %A )
         ret void
 F:              ; preds = %0
         store i32 1, i32* %Q
-        %B = load i32* %Q               ; <i32> [#uses=1]
+        %B = load i32, i32* %Q               ; <i32> [#uses=1]
         call void @bar( i32 %B )
         ret void
 }
diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-range.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-range.ll
index 7ca3ff2..0a2b282 100644
--- a/llvm/test/Transforms/SimplifyCFG/hoist-with-range.ll
+++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-range.ll
@@ -2,15 +2,15 @@
 
 define void @foo(i1 %c, i8* %p) {
 ; CHECK: if:
-; CHECK-NEXT: load i8* %p, !range !0
+; CHECK-NEXT: load i8, i8* %p, !range !0
 ; CHECK: !0 = !{i8 0, i8 1, i8 3, i8 5}
 if:
   br i1 %c, label %then, label %else
 then:
-  %t = load i8* %p, !range !0
+  %t = load i8, i8* %p, !range !0
   br label %out
 else:
-  %e = load i8* %p, !range !1
+  %e = load i8, i8* %p, !range !1
   br label %out
 out:
   ret void
diff --git a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
index 7001a4e..67e23d2 100644
--- a/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
+++ b/llvm/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -17,7 +17,7 @@
   store i8* blockaddress(@indbrtest0, %BB1), i8** %P
   store i8* blockaddress(@indbrtest0, %BB2), i8** %P
   call void @foo()
-  %t = load i8** %Q
+  %t = load i8*, i8** %Q
   indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2]
 BB0:
   call void @A()
@@ -42,7 +42,7 @@
 entry:
   store i8* blockaddress(@indbrtest1, %BB0), i8** %P
   call void @foo()
-  %t = load i8** %Q
+  %t = load i8*, i8** %Q
   indirectbr i8* %t, [label %BB0, label %BB0]
 BB0:
   call void @A()
@@ -193,7 +193,7 @@
 xlab8x:                                           ; preds = %xlab5x
   %xvaluex = call i32 @xselectorx()
   %xblkx.x = getelementptr [9 x i8*], [9 x i8*]* @xblkx.bbs, i32 0, i32 %xvaluex
-  %xblkx.load = load i8** %xblkx.x
+  %xblkx.load = load i8*, i8** %xblkx.x
   indirectbr i8* %xblkx.load, [label %xblkx.begin, label %xblkx.begin3, label %xblkx.begin4, label %xblkx.begin5, label %xblkx.begin6, label %xblkx.begin7, label %xblkx.begin8, label %xblkx.begin9, label %xblkx.end]
 
 xblkx.begin:
diff --git a/llvm/test/Transforms/SimplifyCFG/iterative-simplify.ll b/llvm/test/Transforms/SimplifyCFG/iterative-simplify.ll
index a397411..60728b9 100644
--- a/llvm/test/Transforms/SimplifyCFG/iterative-simplify.ll
+++ b/llvm/test/Transforms/SimplifyCFG/iterative-simplify.ll
@@ -17,13 +17,13 @@
 	br label %bb
 
 bb:		; preds = %cond_next, %cond_true
-	%tmp = load i32* %z		; <i32> [#uses=1]
+	%tmp = load i32, i32* %z		; <i32> [#uses=1]
 	%tmp1 = sub i32 %tmp, 16384		; <i32> [#uses=1]
 	store i32 %tmp1, i32* %z
-	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp3 = add i32 %tmp2, 1		; <i32> [#uses=1]
 	store i32 %tmp3, i32* %i
-	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp5 = icmp sgt i32 %tmp4, 262144		; <i1> [#uses=1]
 	%tmp56 = zext i1 %tmp5 to i8		; <i8> [#uses=1]
 	%toBool7 = icmp ne i8 %tmp56, 0		; <i1> [#uses=1]
@@ -34,7 +34,7 @@
 	unreachable
 
 cond_next:		; preds = %bb
-	%tmp9 = load i32* %z		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %z		; <i32> [#uses=1]
 	%tmp10 = icmp ne i32 %tmp9, 0		; <i1> [#uses=1]
 	%tmp1011 = zext i1 %tmp10 to i8		; <i8> [#uses=1]
 	%toBool12 = icmp ne i8 %tmp1011, 0		; <i1> [#uses=1]
@@ -53,13 +53,13 @@
 	br label %bb17
 
 bb17:		; preds = %cond_next27, %cond_true15
-	%tmp18 = load i32* %z16		; <i32> [#uses=1]
+	%tmp18 = load i32, i32* %z16		; <i32> [#uses=1]
 	%tmp19 = sub i32 %tmp18, 16384		; <i32> [#uses=1]
 	store i32 %tmp19, i32* %z16
-	%tmp20 = load i32* %i		; <i32> [#uses=1]
+	%tmp20 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp21 = add i32 %tmp20, 1		; <i32> [#uses=1]
 	store i32 %tmp21, i32* %i
-	%tmp22 = load i32* %i		; <i32> [#uses=1]
+	%tmp22 = load i32, i32* %i		; <i32> [#uses=1]
 	%tmp23 = icmp sgt i32 %tmp22, 262144		; <i1> [#uses=1]
 	%tmp2324 = zext i1 %tmp23 to i8		; <i8> [#uses=1]
 	%toBool25 = icmp ne i8 %tmp2324, 0		; <i1> [#uses=1]
@@ -70,7 +70,7 @@
 	unreachable
 
 cond_next27:		; preds = %bb17
-	%tmp28 = load i32* %z16		; <i32> [#uses=1]
+	%tmp28 = load i32, i32* %z16		; <i32> [#uses=1]
 	%tmp29 = icmp ne i32 %tmp28, 0		; <i1> [#uses=1]
 	%tmp2930 = zext i1 %tmp29 to i8		; <i8> [#uses=1]
 	%toBool31 = icmp ne i8 %tmp2930, 0		; <i1> [#uses=1]
@@ -91,7 +91,7 @@
 	br label %return
 
 return:		; preds = %cond_next35
-	%retval36 = load i32* %retval		; <i32> [#uses=1]
+	%retval36 = load i32, i32* %retval		; <i32> [#uses=1]
 	ret i32 %retval36
 }
 
diff --git a/llvm/test/Transforms/SimplifyCFG/multiple-phis.ll b/llvm/test/Transforms/SimplifyCFG/multiple-phis.ll
index 3ecce2e..a6eef09 100644
--- a/llvm/test/Transforms/SimplifyCFG/multiple-phis.ll
+++ b/llvm/test/Transforms/SimplifyCFG/multiple-phis.ll
@@ -23,7 +23,7 @@
   %div = udiv i32 %add, 2
   %idxprom = zext i32 %div to i64
   %arrayidx = getelementptr inbounds i32, i32* %r, i64 %idxprom
-  %0 = load i32* %arrayidx
+  %0 = load i32, i32* %arrayidx
   %cmp1 = icmp ult i32 %k, %0
   br i1 %cmp1, label %if.then, label %if.else
 
diff --git a/llvm/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll b/llvm/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll
index b388cc5..4792e95 100644
--- a/llvm/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll
+++ b/llvm/test/Transforms/SimplifyCFG/no_speculative_loads_with_tsan.ll
@@ -8,7 +8,7 @@
   br i1 %tobool, label %return, label %if.then
 
 if.then:                                          ; preds = %entry
-  %0 = load i32* @g, align 4
+  %0 = load i32, i32* @g, align 4
   br label %return
 
 return:                                           ; preds = %entry, %if.then
@@ -26,7 +26,7 @@
   br i1 %tobool, label %return, label %if.then
 
 if.then:                                          ; preds = %entry
-  %0 = load i32* @g, align 4
+  %0 = load i32, i32* @g, align 4
   br label %return
 
 return:                                           ; preds = %entry, %if.then
@@ -34,7 +34,7 @@
   ret i32 %retval
 ; CHECK-LABEL: @TestTsan
 ; CHECK: br i1
-; CHECK: load i32* @g
+; CHECK: load i32, i32* @g
 ; CHECK: br label
 ; CHECK: ret i32
 }
diff --git a/llvm/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll b/llvm/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
index ad73f14..c0f0046a 100644
--- a/llvm/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
+++ b/llvm/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
@@ -21,7 +21,7 @@
 
 if.end7:                                          ; preds = %if.else, %if.then4, %if.then
   %x.0 = phi i32* [ %a, %if.then ], [ %c, %if.then4 ], [ null, %if.else ]
-  %tmp9 = load i32* %x.0
+  %tmp9 = load i32, i32* %x.0
   ret i32 %tmp9
 
 ; CHECK-LABEL: @test1(
@@ -50,7 +50,7 @@
 
 if.end7:                                          ; preds = %if.else, %if.then4, %if.then
   %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
-  %tmp9 = load i32* %x.0
+  %tmp9 = load i32, i32* %x.0
   ret i32 %tmp9
 ; CHECK-LABEL: @test2(
 ; CHECK: if.else:
@@ -79,7 +79,7 @@
 if.end7:                                          ; preds = %if.else, %if.then4, %if.then
   %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
   tail call void @bar() nounwind
-  %tmp9 = load i32* %x.0
+  %tmp9 = load i32, i32* %x.0
   ret i32 %tmp9
 ; CHECK-LABEL: @test3(
 ; CHECK: if.end7:
@@ -106,7 +106,7 @@
 if.end7:                                          ; preds = %if.else, %if.then4, %if.then
   %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
   %gep = getelementptr i32, i32* %x.0, i32 10
-  %tmp9 = load i32* %gep
+  %tmp9 = load i32, i32* %gep
   %tmp10 = or i32 %tmp9, 1
   store i32 %tmp10, i32* %gep
   ret i32 %tmp9
diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index 7802a05..ae1794b 100644
--- a/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -353,7 +353,7 @@
   %tobool = icmp eq i32 %bit.0, 0
   br i1 %tobool, label %for.exit, label %for.body3, !prof !10
 for.body3:
-  %v3 = load i32* @max_regno, align 4
+  %v3 = load i32, i32* @max_regno, align 4
   %cmp4 = icmp eq i32 %i.1, %v3
   br i1 %cmp4, label %for.exit, label %for.inc, !prof !11
 for.inc:
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-store.ll b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll
index c082f2c..c1ac7bc 100644
--- a/llvm/test/Transforms/SimplifyCFG/speculate-store.ll
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -3,14 +3,14 @@
 define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %C
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
 
 ; First store to the location.
   store i32 %add, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %1, %D
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
@@ -31,14 +31,14 @@
 define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %C
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
 
 ; Store to a different location.
   store i32 %add, i32* %arrayidx, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %1, %D
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
@@ -58,7 +58,7 @@
 define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %C
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
 
@@ -66,7 +66,7 @@
   store i32 %add, i32* %arrayidx2, align 4
   call void @unknown_fun()
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %1, %D
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
@@ -84,14 +84,14 @@
 define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
 entry:
   %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
-  %0 = load i32* %arrayidx, align 4
+  %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %C
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
 
 ; First store to the location.
   store i32 %add, i32* %arrayidx2, align 4
   %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
-  %1 = load i32* %arrayidx4, align 4
+  %1 = load i32, i32* %arrayidx4, align 4
   %add5 = add nsw i32 %1, %D
   %cmp6 = icmp sgt i32 %add5, %C
   br i1 %cmp6, label %if.then, label %ret.end
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-with-offset.ll b/llvm/test/Transforms/SimplifyCFG/speculate-with-offset.ll
index f92e316..65ebb5c 100644
--- a/llvm/test/Transforms/SimplifyCFG/speculate-with-offset.ll
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-with-offset.ll
@@ -17,7 +17,7 @@
   br label %return
 
 if.end:                                           ; preds = %entry
-  %tmp5 = load i64** %__a.addr, align 8
+  %tmp5 = load i64*, i64** %__a.addr, align 8
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
@@ -39,7 +39,7 @@
   br label %return
 
 if.end:                                           ; preds = %entry
-  %tmp5 = load i64** %__a.addr, align 8
+  %tmp5 = load i64*, i64** %__a.addr, align 8
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
@@ -61,7 +61,7 @@
   br label %return
 
 if.end:                                           ; preds = %entry
-  %tmp5 = load i64** %__a.addr, align 8
+  %tmp5 = load i64*, i64** %__a.addr, align 8
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
@@ -83,7 +83,7 @@
   br label %return
 
 if.end:                                           ; preds = %entry
-  %tmp5 = load i64** %__a.addr, align 8
+  %tmp5 = load i64*, i64** %__a.addr, align 8
   br label %return
 
 return:                                           ; preds = %if.end, %if.then
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll b/llvm/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
index ddf5d1f..f5359b5 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-to-select-multiple-edge-per-block-phi.ll
@@ -20,12 +20,12 @@
 ; CHECK: %switch.selectcmp1 = icmp eq i32 %1, 5
 ; CHECK: %switch.select2 = select i1 %switch.selectcmp1, i32 5, i32 %switch.select
 entry:
-  %0 = load i32* @b, align 4
+  %0 = load i32, i32* @b, align 4
   %tobool = icmp eq i32 %0, 0
   br i1 %tobool, label %if.end3, label %if.then
 
 if.then:
-  %1 = load i32* @a, align 4
+  %1 = load i32, i32* @a, align 4
   switch i32 %1, label %if.end3 [
     i32 5, label %return
     i32 0, label %return
diff --git a/llvm/test/Transforms/SimplifyCFG/switch_create.ll b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
index f141931..490b751 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch_create.ll
@@ -155,7 +155,7 @@
 define i1 @test6({ i32, i32 }* %I) {
 entry:
         %tmp.1.i = getelementptr { i32, i32 }, { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
-        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2.i = load i32, i32* %tmp.1.i           ; <i32> [#uses=6]
         %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
         br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
 shortcirc_next.0:               ; preds = %entry
diff --git a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
index 5ae62af..5881367 100644
--- a/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -11,7 +11,7 @@
         br i1 %0, label %bb, label %return
 
 bb:             ; preds = %entry
-        %1 = load volatile i32* null
+        %1 = load volatile i32, i32* null
         unreachable
         
         br label %return
diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable-blocks.ll b/llvm/test/Transforms/SimplifyCFG/unreachable-blocks.ll
index 0aaafec..87a64ad 100644
--- a/llvm/test/Transforms/SimplifyCFG/unreachable-blocks.ll
+++ b/llvm/test/Transforms/SimplifyCFG/unreachable-blocks.ll
@@ -15,7 +15,7 @@
   br i1 %test, label %and_if1, label %and_if_cont2
 
 and_if1:                                          ; preds = %while_block
-  %char = load i8* %newptr
+  %char = load i8, i8* %newptr
   %test2 = icmp ule i8 %char, 32
   br label %and_if_cont2
 
diff --git a/llvm/test/Transforms/Sink/basic.ll b/llvm/test/Transforms/Sink/basic.ll
index c56922a..1bbf161 100644
--- a/llvm/test/Transforms/Sink/basic.ll
+++ b/llvm/test/Transforms/Sink/basic.ll
@@ -8,11 +8,11 @@
 
 ;      CHECK-LABEL: @foo(
 ;      CHECK: true:
-; CHECK-NEXT: %l = load i32* @A
+; CHECK-NEXT: %l = load i32, i32* @A
 ; CHECK-NEXT: ret i32 %l
 
 define i32 @foo(i1 %z) {
-  %l = load i32* @A
+  %l = load i32, i32* @A
   store i32 0, i32* @B
   br i1 %z, label %true, label %false
 true:
@@ -28,7 +28,7 @@
 ; CHECK-NEXT: store i32
 
 define i32 @foo2(i1 %z) {
-  %l = load volatile i32* @A
+  %l = load volatile i32, i32* @A
   store i32 0, i32* @B
   br i1 %z, label %true, label %false
 true:
@@ -79,7 +79,7 @@
   store i32 0, i32* %0
   store i32 1, i32* %2
   %3 = getelementptr i32, i32* %0, i32 %b
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 
 endif:
@@ -104,7 +104,7 @@
   store i32 0, i32* %0
   store i32 1, i32* %2
   %3 = getelementptr i32, i32* %0, i32 %b
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 
 endif:
@@ -135,7 +135,7 @@
   store i32 0, i32* %0
   store i32 1, i32* %2
   %3 = getelementptr i32, i32* %0, i32 %b
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   ret i32 %4
 
 endif:
diff --git a/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index aca7cd6..722d2b7 100644
--- a/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/llvm/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -19,7 +19,7 @@
 define i32 @foo(i32 %i) #2 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !{}), !dbg !20
-  %.0 = load i32* @xyz, align 4
+  %.0 = load i32, i32* @xyz, align 4
   ret i32 %.0, !dbg !21
 }
 
diff --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
index 8aed08b..ba9aa29 100644
--- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
+++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll
@@ -29,10 +29,10 @@
 ; CHECK: br label %Flow
 lor.lhs.false:                                    ; preds = %for.body
   %arrayidx = getelementptr inbounds float, float* %nr, i64 %indvars.iv
-  %tmp1 = load float* %arrayidx, align 4
+  %tmp1 = load float, float* %arrayidx, align 4
   %tmp2 = add nsw i64 %indvars.iv, -1
   %arrayidx2 = getelementptr inbounds float, float* %nr, i64 %tmp2
-  %tmp3 = load float* %arrayidx2, align 4
+  %tmp3 = load float, float* %arrayidx2, align 4
   %cmp3 = fcmp une float %tmp1, %tmp3
   br i1 %cmp3, label %if.then, label %for.body.1
 
@@ -45,7 +45,7 @@
   %sub4 = sub nsw i32 %tmp0, %prev_start.026
   %tmp4 = add nsw i64 %indvars.iv, -1
   %arrayidx8 = getelementptr inbounds float, float* %nr, i64 %tmp4
-  %tmp5 = load float* %arrayidx8, align 4
+  %tmp5 = load float, float* %arrayidx8, align 4
   br i1 %cmp1, label %for.end, label %for.body.1
 
 ; CHECK: for.end:
@@ -84,7 +84,7 @@
 ; CHECK: br label %for.body.backedge
 if.then6.6:                                       ; preds = %for.body.6
   %arrayidx8.6 = getelementptr inbounds float, float* %nr, i64 %indvars.iv.next.454
-  %tmp29 = load float* %arrayidx8.6, align 4
+  %tmp29 = load float, float* %arrayidx8.6, align 4
   br label %for.body.backedge
 
 ; CHECK: Flow3:
diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll
index 8e9814b..2488b55 100644
--- a/llvm/test/Transforms/TailCallElim/basic.ll
+++ b/llvm/test/Transforms/TailCallElim/basic.ll
@@ -183,7 +183,7 @@
   %a = alloca i8*
   %b = alloca i8
   call void @test11_helper1(i8** %a, i8* %b)  ; a = &b
-  %c = load i8** %a
+  %c = load i8*, i8** %a
   call void @test11_helper2(i8* %c)
 ; CHECK: call void @test11_helper2
   ret void
diff --git a/llvm/test/Transforms/TailCallElim/dont_reorder_load.ll b/llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
index a29b72e..ac399a1 100644
--- a/llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -21,7 +21,7 @@
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
@@ -40,7 +40,7 @@
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
@@ -58,7 +58,7 @@
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load volatile i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load volatile i32, i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
diff --git a/llvm/test/Transforms/TailCallElim/reorder_load.ll b/llvm/test/Transforms/TailCallElim/reorder_load.ll
index 2e350d6..b989bbf 100644
--- a/llvm/test/Transforms/TailCallElim/reorder_load.ll
+++ b/llvm/test/Transforms/TailCallElim/reorder_load.ll
@@ -16,7 +16,7 @@
 define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
 ; CHECK-LABEL: @raise_load_1(
 ; CHECK-NOT: call
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NOT: call
 ; CHECK: }
 entry:
@@ -29,7 +29,7 @@
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
@@ -40,7 +40,7 @@
 define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
 ; CHECK-LABEL: @raise_load_2(
 ; CHECK-NOT: call
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NOT: call
 ; CHECK: }
 entry:
@@ -60,7 +60,7 @@
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load i32* @global		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* @global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
@@ -71,7 +71,7 @@
 define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
 ; CHECK-LABEL: @raise_load_3(
 ; CHECK-NOT: call
-; CHECK: load i32*
+; CHECK: load i32, i32*
 ; CHECK-NOT: call
 ; CHECK: }
 entry:
@@ -84,7 +84,7 @@
 else:		; preds = %entry
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
-	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp9 = load i32, i32* @extern_weak_global		; <i32> [#uses=1]
 	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
@@ -96,8 +96,8 @@
 define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
 ; CHECK-LABEL: @raise_load_4(
 ; CHECK-NOT: call
-; CHECK: load i32*
-; CHECK-NEXT: load i32*
+; CHECK: load i32, i32*
+; CHECK-NEXT: load i32, i32*
 ; CHECK-NOT: call
 ; CHECK: }
 entry:
@@ -116,9 +116,9 @@
 
 recurse:		; preds = %else
 	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
-	%first = load i32* %a_arg		; <i32> [#uses=1]
+	%first = load i32, i32* %a_arg		; <i32> [#uses=1]
 	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
-	%second = load i32* %a_arg		; <i32> [#uses=1]
+	%second = load i32, i32* %a_arg		; <i32> [#uses=1]
 	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
 	ret i32 %tmp10
 }
diff --git a/llvm/test/Verifier/2006-10-15-AddrLabel.ll b/llvm/test/Verifier/2006-10-15-AddrLabel.ll
index decbf5b..bd2c40d 100644
--- a/llvm/test/Verifier/2006-10-15-AddrLabel.ll
+++ b/llvm/test/Verifier/2006-10-15-AddrLabel.ll
@@ -5,6 +5,6 @@
 define i32 @main() {
          %foo  = call i8* %llvm.stacksave()
          %foop = bitcast i8* %foo to label*
-         %nret = load label* %foop
+         %nret = load label, label* %foop
          br label %nret
 }
diff --git a/llvm/test/Verifier/range-1.ll b/llvm/test/Verifier/range-1.ll
index fda65cb..170badc 100644
--- a/llvm/test/Verifier/range-1.ll
+++ b/llvm/test/Verifier/range-1.ll
@@ -11,7 +11,7 @@
 
 define i8 @f2(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !1
+  %y = load i8, i8* %x, align 1, !range !1
   ret i8 %y
 }
 !1 = !{}
@@ -19,7 +19,7 @@
 
 define i8 @f3(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !2
+  %y = load i8, i8* %x, align 1, !range !2
   ret i8 %y
 }
 !2 = !{i8 0}
@@ -27,7 +27,7 @@
 
 define i8 @f4(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !3
+  %y = load i8, i8* %x, align 1, !range !3
   ret i8 %y
 }
 !3 = !{double 0.0, i8 0}
@@ -35,7 +35,7 @@
 
 define i8 @f5(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !4
+  %y = load i8, i8* %x, align 1, !range !4
   ret i8 %y
 }
 !4 = !{i8 0, double 0.0}
@@ -43,7 +43,7 @@
 
 define i8 @f6(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !5
+  %y = load i8, i8* %x, align 1, !range !5
   ret i8 %y
 }
 !5 = !{i32 0, i8 0}
@@ -52,7 +52,7 @@
 
 define i8 @f7(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !6
+  %y = load i8, i8* %x, align 1, !range !6
   ret i8 %y
 }
 !6 = !{i8 0, i32 0}
@@ -61,7 +61,7 @@
 
 define i8 @f8(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !7
+  %y = load i8, i8* %x, align 1, !range !7
   ret i8 %y
 }
 !7 = !{i32 0, i32 0}
@@ -70,7 +70,7 @@
 
 define i8 @f9(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !8
+  %y = load i8, i8* %x, align 1, !range !8
   ret i8 %y
 }
 !8 = !{i8 0, i8 0}
@@ -78,7 +78,7 @@
 
 define i8 @f10(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !9
+  %y = load i8, i8* %x, align 1, !range !9
   ret i8 %y
 }
 !9 = !{i8 0, i8 2, i8 1, i8 3}
@@ -86,7 +86,7 @@
 
 define i8 @f11(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !10
+  %y = load i8, i8* %x, align 1, !range !10
   ret i8 %y
 }
 !10 = !{i8 0, i8 2, i8 2, i8 3}
@@ -94,7 +94,7 @@
 
 define i8 @f12(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !11
+  %y = load i8, i8* %x, align 1, !range !11
   ret i8 %y
 }
 !11 = !{i8 1, i8 2, i8 -1, i8 0}
@@ -102,7 +102,7 @@
 
 define i8 @f13(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !12
+  %y = load i8, i8* %x, align 1, !range !12
   ret i8 %y
 }
 !12 = !{i8 1, i8 3, i8 5, i8 1}
@@ -110,7 +110,7 @@
 
 define i8 @f14(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !13
+  %y = load i8, i8* %x, align 1, !range !13
   ret i8 %y
 }
 !13 = !{i8 1, i8 3, i8 5, i8 2}
@@ -118,7 +118,7 @@
 
 define i8 @f15(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !14
+  %y = load i8, i8* %x, align 1, !range !14
   ret i8 %y
 }
 !14 = !{i8 10, i8 1, i8 12, i8 13}
@@ -126,7 +126,7 @@
 
 define i8 @f16(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !16
+  %y = load i8, i8* %x, align 1, !range !16
   ret i8 %y
 }
 !16 = !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 2}
@@ -134,7 +134,7 @@
 
 define i8 @f17(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !17
+  %y = load i8, i8* %x, align 1, !range !17
   ret i8 %y
 }
 !17 = !{i8 1, i8 3, i8 4, i8 5, i8 6, i8 1}
diff --git a/llvm/test/Verifier/range-2.ll b/llvm/test/Verifier/range-2.ll
index f8891c8..b7c9a6e 100644
--- a/llvm/test/Verifier/range-2.ll
+++ b/llvm/test/Verifier/range-2.ll
@@ -2,35 +2,35 @@
 
 define i8 @f1(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !0
+  %y = load i8, i8* %x, align 1, !range !0
   ret i8 %y
 }
 !0 = !{i8 0, i8 1}
 
 define i8 @f2(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !1
+  %y = load i8, i8* %x, align 1, !range !1
   ret i8 %y
 }
 !1 = !{i8 255, i8 1}
 
 define i8 @f3(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !2
+  %y = load i8, i8* %x, align 1, !range !2
   ret i8 %y
 }
 !2 = !{i8 1, i8 3, i8 5, i8 42}
 
 define i8 @f4(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !3
+  %y = load i8, i8* %x, align 1, !range !3
   ret i8 %y
 }
 !3 = !{i8 -1, i8 0, i8 1, i8 2}
 
 define i8 @f5(i8* %x) {
 entry:
-  %y = load i8* %x, align 1, !range !4
+  %y = load i8, i8* %x, align 1, !range !4
   ret i8 %y
 }
 !4 = !{i8 -1, i8 0, i8 1, i8 -2}
diff --git a/llvm/test/tools/gold/slp-vectorize.ll b/llvm/test/tools/gold/slp-vectorize.ll
index 242f255..a75f0b3 100644
--- a/llvm/test/tools/gold/slp-vectorize.ll
+++ b/llvm/test/tools/gold/slp-vectorize.ll
@@ -11,19 +11,19 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
 define void @f(float* nocapture %x) {
-  %tmp = load float* %x, align 4
+  %tmp = load float, float* %x, align 4
   %add = fadd float %tmp, 1.000000e+00
   store float %add, float* %x, align 4
   %arrayidx1 = getelementptr inbounds float, float* %x, i64 1
-  %tmp1 = load float* %arrayidx1, align 4
+  %tmp1 = load float, float* %arrayidx1, align 4
   %add2 = fadd float %tmp1, 1.000000e+00
   store float %add2, float* %arrayidx1, align 4
   %arrayidx3 = getelementptr inbounds float, float* %x, i64 2
-  %tmp2 = load float* %arrayidx3, align 4
+  %tmp2 = load float, float* %arrayidx3, align 4
   %add4 = fadd float %tmp2, 1.000000e+00
   store float %add4, float* %arrayidx3, align 4
   %arrayidx5 = getelementptr inbounds float, float* %x, i64 3
-  %tmp3 = load float* %arrayidx5, align 4
+  %tmp3 = load float, float* %arrayidx5, align 4
   %add6 = fadd float %tmp3, 1.000000e+00
   store float %add6, float* %arrayidx5, align 4
   ret void
diff --git a/llvm/test/tools/gold/vectorize.ll b/llvm/test/tools/gold/vectorize.ll
index e58ce79..121c96e 100644
--- a/llvm/test/tools/gold/vectorize.ll
+++ b/llvm/test/tools/gold/vectorize.ll
@@ -18,7 +18,7 @@
 bb1:
   %i.0 = phi i64 [ 0, %bb ], [ %tmp4, %bb1 ]
   %tmp = getelementptr inbounds float, float* %x, i64 %i.0
-  %tmp2 = load float* %tmp, align 4
+  %tmp2 = load float, float* %tmp, align 4
   %tmp3 = fadd float %tmp2, 1.000000e+00
   store float %tmp3, float* %tmp, align 4
   %tmp4 = add nsw i64 %i.0, 1