[opaque pointer type] Add textual IR support for explicit type parameter to load instruction

Essentially the same as the GEP change in r230786.

A similar migration script can be used to update test cases, though a few more
test case improvements/changes were required this time around: (r229269-r229278)

import fileinput
import sys
import re

pat = re.compile(r"((?:=|:|^)\s*load (?:atomic )?(?:volatile )?(.*?))(| addrspace\(\d+\) *)\*($| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$)")

for line in sys.stdin:
  sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line))

Reviewers: rafael, dexonsmith, grosser

Differential Revision: http://reviews.llvm.org/D7649

llvm-svn: 230794
diff --git a/llvm/test/CodeGen/R600/32-bit-local-address-space.ll b/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
index ee0c4f0..5a6ce2f5 100644
--- a/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
+++ b/llvm/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -15,7 +15,7 @@
 ; SI: ds_read_b32 v{{[0-9]+}}, [[PTR]]
 define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -27,7 +27,7 @@
 define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 %offset
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -38,7 +38,7 @@
 define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 1
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -51,7 +51,7 @@
 define void @local_address_gep_large_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(3)* %in, i32 16385
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -73,7 +73,7 @@
 ; SI: ds_read_b32
 define void @mul_32bit_ptr(float addrspace(1)* %out, [3 x float] addrspace(3)* %lds, i32 %tid) {
   %ptr = getelementptr [3 x float], [3 x float] addrspace(3)* %lds, i32 %tid, i32 0
-  %val = load float addrspace(3)* %ptr
+  %val = load float, float addrspace(3)* %ptr
   store float %val, float addrspace(1)* %out
   ret void
 }
@@ -84,7 +84,7 @@
 ; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0
 ; SI: ds_read_b32 v{{[0-9]+}}, [[REG]]
 define void @infer_ptr_alignment_global_offset(float addrspace(1)* %out, i32 %tid) {
-  %val = load float addrspace(3)* @g_lds
+  %val = load float, float addrspace(3)* @g_lds
   store float %val, float addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/add-debug.ll b/llvm/test/CodeGen/R600/add-debug.ll
index a83c689..529905d 100644
--- a/llvm/test/CodeGen/R600/add-debug.ll
+++ b/llvm/test/CodeGen/R600/add-debug.ll
@@ -9,7 +9,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/add.ll b/llvm/test/CodeGen/R600/add.ll
index ca95af3..7027161 100644
--- a/llvm/test/CodeGen/R600/add.ll
+++ b/llvm/test/CodeGen/R600/add.ll
@@ -10,8 +10,8 @@
 ;SI: buffer_store_dword [[REG]],
 define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = add <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -46,8 +46,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = add <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -136,7 +136,7 @@
 ; SI-NOT: v_addc_u32_e32 s
 define void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) {
 entry:
-  %0 = load i64 addrspace(1)* %in
+  %0 = load i64, i64 addrspace(1)* %in
   %1 = add i64 %a, %0
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -152,7 +152,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/add_i64.ll b/llvm/test/CodeGen/R600/add_i64.ll
index 1e1065a..8346add 100644
--- a/llvm/test/CodeGen/R600/add_i64.ll
+++ b/llvm/test/CodeGen/R600/add_i64.ll
@@ -10,8 +10,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
-  %a = load i64 addrspace(1)* %a_ptr
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %a_ptr
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = add i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -22,7 +22,7 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @sgpr_operand(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 addrspace(1)* noalias %in_bar, i64 %a) {
-  %foo = load i64 addrspace(1)* %in, align 8
+  %foo = load i64, i64 addrspace(1)* %in, align 8
   %result = add i64 %foo, %a
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -35,7 +35,7 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @sgpr_operand_reversed(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %a) {
-  %foo = load i64 addrspace(1)* %in, align 8
+  %foo = load i64, i64 addrspace(1)* %in, align 8
   %result = add i64 %a, %foo
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -62,8 +62,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <2 x i64> addrspace(1)* %a_ptr
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = add <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/address-space.ll b/llvm/test/CodeGen/R600/address-space.ll
index 97e3d93..4be8c58 100644
--- a/llvm/test/CodeGen/R600/address-space.ll
+++ b/llvm/test/CodeGen/R600/address-space.ll
@@ -21,8 +21,8 @@
   br label %bb32
 
 bb32:
-  %a = load float addrspace(3)* %x, align 4
-  %b = load float addrspace(3)* %y, align 4
+  %a = load float, float addrspace(3)* %x, align 4
+  %b = load float, float addrspace(3)* %y, align 4
   %cmp = fcmp one float %a, %b
   br i1 %cmp, label %bb34, label %bb33
 
diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll
index 54aaba7..5672d47 100644
--- a/llvm/test/CodeGen/R600/and.ll
+++ b/llvm/test/CodeGen/R600/and.ll
@@ -11,8 +11,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = and <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -31,8 +31,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = and <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -57,8 +57,8 @@
 ; FUNC-LABEL: {{^}}v_and_i32:
 ; SI: v_and_b32
 define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %and = and i32 %a, %b
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -67,7 +67,7 @@
 ; FUNC-LABEL: {{^}}v_and_constant_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 0x12d687, v{{[0-9]+}}
 define void @v_and_constant_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, 1234567
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -76,7 +76,7 @@
 ; FUNC-LABEL: {{^}}v_and_inline_imm_64_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 64, v{{[0-9]+}}
 define void @v_and_inline_imm_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, 64
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -85,7 +85,7 @@
 ; FUNC-LABEL: {{^}}v_and_inline_imm_neg_16_i32
 ; SI: v_and_b32_e32 v{{[0-9]+}}, -16, v{{[0-9]+}}
 define void @v_and_inline_imm_neg_16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr) {
-  %a = load i32 addrspace(1)* %aptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
   %and = and i32 %a, -16
   store i32 %and, i32 addrspace(1)* %out, align 4
   ret void
@@ -120,8 +120,8 @@
 ; SI: v_and_b32
 ; SI: v_and_b32
 define void @v_and_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %and = and i64 %a, %b
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
@@ -136,8 +136,8 @@
   br i1 %tmp0, label %if, label %endif
 
 if:
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %and = and i64 %a, %b
   br label %endif
 
@@ -151,7 +151,7 @@
 ; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 1234567
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
@@ -162,7 +162,7 @@
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
 ; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
 define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 64
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
index e588e29..8c2a079 100644
--- a/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/R600/array-ptr-calc-i32.ll
@@ -29,14 +29,14 @@
   %tid = call i32 @llvm.SI.tid() readnone
   %a_ptr = getelementptr i32, i32 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
-  %a = load i32 addrspace(1)* %a_ptr
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %a_ptr
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
   store i32 %result, i32* %alloca_ptr, align 4
   ; Dummy call
   call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
-  %reload = load i32* %alloca_ptr, align 4
+  %reload = load i32, i32* %alloca_ptr, align 4
   %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll b/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
index f3db9d7..eae095e 100644
--- a/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
+++ b/llvm/test/CodeGen/R600/array-ptr-calc-i64.ll
@@ -9,8 +9,8 @@
   %tid = call i32 @llvm.SI.tid() readnone
   %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0
   %b_ptr = getelementptr i32, i32 addrspace(1)* %inB, i32 %tid
-  %a = load i32 addrspace(1)* %a_ptr
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %a_ptr
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/big_alu.ll b/llvm/test/CodeGen/R600/big_alu.ll
index 28be216..8206f33 100644
--- a/llvm/test/CodeGen/R600/big_alu.ll
+++ b/llvm/test/CodeGen/R600/big_alu.ll
@@ -51,29 +51,29 @@
   %43 = extractelement <4 x float> %reg7, i32 1
   %44 = extractelement <4 x float> %reg7, i32 2
   %45 = extractelement <4 x float> %reg7, i32 3
-  %46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %47 = extractelement <4 x float> %46, i32 0
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %49 = extractelement <4 x float> %48, i32 1
-  %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
   %51 = extractelement <4 x float> %50, i32 2
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
   %53 = extractelement <4 x float> %52, i32 0
-  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %55 = extractelement <4 x float> %54, i32 0
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %57 = extractelement <4 x float> %56, i32 1
-  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %59 = extractelement <4 x float> %58, i32 2
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
   %61 = extractelement <4 x float> %60, i32 3
-  %62 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %63 = extractelement <4 x float> %62, i32 0
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %65 = extractelement <4 x float> %64, i32 1
-  %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %67 = extractelement <4 x float> %66, i32 2
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %69 = extractelement <4 x float> %68, i32 0
   %70 = fcmp oge float %69, 3.500000e+00
   %71 = sext i1 %70 to i32
@@ -81,7 +81,7 @@
   %73 = bitcast float %72 to i32
   %74 = icmp ne i32 %73, 0
   %. = select i1 %74, float 0.000000e+00, float 0.000000e+00
-  %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %76 = extractelement <4 x float> %75, i32 0
   %77 = fcmp oge float %76, 2.000000e+00
   %78 = sext i1 %77 to i32
@@ -135,7 +135,7 @@
   %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
   %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
   %125 = fdiv float 1.000000e+00, %124
-  %126 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %127 = extractelement <4 x float> %126, i32 0
   %128 = fmul float %127, %125
   %129 = fmul float %103, %128
@@ -347,15 +347,15 @@
   %329 = fmul float %314, %328
   %330 = fmul float %316, %328
   %331 = fmul float %318, %328
-  %332 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %332 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %333 = extractelement <4 x float> %332, i32 0
   %334 = fsub float -0.000000e+00, %333
   %335 = fadd float 1.000000e+00, %334
-  %336 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %337 = extractelement <4 x float> %336, i32 0
   %338 = fsub float -0.000000e+00, %337
   %339 = fadd float 1.000000e+00, %338
-  %340 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %341 = extractelement <4 x float> %340, i32 0
   %342 = fsub float -0.000000e+00, %341
   %343 = fadd float 1.000000e+00, %342
@@ -1018,7 +1018,7 @@
   %temp92.11 = phi float [ %877, %IF176 ], [ %temp92.10, %ENDIF172 ]
   %temp93.5 = phi float [ %878, %IF176 ], [ %temp93.4, %ENDIF172 ]
   %temp94.5 = phi float [ %879, %IF176 ], [ %temp94.4, %ENDIF172 ]
-  %880 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %880 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %881 = extractelement <4 x float> %880, i32 0
   %882 = fcmp olt float %881, %179
   %883 = sext i1 %882 to i32
@@ -1114,12 +1114,12 @@
   %960 = fmul float %temp87.6, %956
   %961 = fmul float %2, -2.000000e+00
   %962 = fadd float %961, 1.000000e+00
-  %963 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
+  %963 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23)
   %964 = extractelement <4 x float> %963, i32 2
   %965 = fsub float -0.000000e+00, %964
   %966 = fadd float %962, %965
   %967 = fdiv float 1.000000e+00, %966
-  %968 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
+  %968 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24)
   %969 = extractelement <4 x float> %968, i32 2
   %970 = fmul float %969, %967
   %971 = fsub float -0.000000e+00, %53
diff --git a/llvm/test/CodeGen/R600/bitcast.ll b/llvm/test/CodeGen/R600/bitcast.ll
index 1ba64af..fd56d95 100644
--- a/llvm/test/CodeGen/R600/bitcast.ll
+++ b/llvm/test/CodeGen/R600/bitcast.ll
@@ -9,7 +9,7 @@
 ; SI: s_endpgm
 define void @v32i8_to_v8i32(<32 x i8> addrspace(2)* inreg) #0 {
 entry:
-  %1 = load <32 x i8> addrspace(2)* %0
+  %1 = load <32 x i8>, <32 x i8> addrspace(2)* %0
   %2 = bitcast <32 x i8> %1 to <8 x i32>
   %3 = extractelement <8 x i32> %2, i32 1
   %4 = icmp ne i32 %3, 0
@@ -23,34 +23,34 @@
 define void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
-  %1 = load <16 x i8> addrspace(1)* %0
+  %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
   store <16 x i8> %1, <16 x i8> addrspace(1)* %out
   ret void
 }
 
 define void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %load = load float addrspace(1)* %in, align 4
+  %load = load float, float addrspace(1)* %in, align 4
   %bc = bitcast float %load to <2 x i16>
   store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
   ret void
 }
 
 define void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in, align 4
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
   %bc = bitcast <2 x i16> %load to float
   store float %bc, float addrspace(1)* %out, align 4
   ret void
 }
 
 define void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %bc = bitcast <4 x i8> %load to i32
   store i32 %bc, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 define void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %bc = bitcast i32 %load to <4 x i8>
   store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
   ret void
@@ -59,7 +59,7 @@
 ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
 ; SI: s_endpgm
 define void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %add = add <2 x i32> %val, <i32 4, i32 9>
   %bc = bitcast <2 x i32> %add to double
   store double %bc, double addrspace(1)* %out, align 8
@@ -69,7 +69,7 @@
 ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
 ; SI: s_endpgm
 define void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
-  %val = load double addrspace(1)* %in, align 8
+  %val = load double, double addrspace(1)* %in, align 8
   %add = fadd double %val, 4.0
   %bc = bitcast double %add to <2 x i32>
   store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
diff --git a/llvm/test/CodeGen/R600/bswap.ll b/llvm/test/CodeGen/R600/bswap.ll
index e93543d..4cf8e4b 100644
--- a/llvm/test/CodeGen/R600/bswap.ll
+++ b/llvm/test/CodeGen/R600/bswap.ll
@@ -18,7 +18,7 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
   store i32 %bswap, i32 addrspace(1)* %out, align 4
   ret void
@@ -33,7 +33,7 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) nounwind {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %bswap, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -54,7 +54,7 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) nounwind {
-  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) nounwind readnone
   store <4 x i32> %bswap, <4 x i32> addrspace(1)* %out, align 16
   ret void
@@ -87,28 +87,28 @@
 ; SI-DAG: v_bfi_b32
 ; SI: s_endpgm
 define void @test_bswap_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) nounwind {
-  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
   %bswap = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %val) nounwind readnone
   store <8 x i32> %bswap, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
 
 define void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
   store i64 %bswap, i64 addrspace(1)* %out, align 8
   ret void
 }
 
 define void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %bswap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) nounwind readnone
   store <2 x i64> %bswap, <2 x i64> addrspace(1)* %out, align 16
   ret void
 }
 
 define void @test_bswap_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) nounwind {
-  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
   %bswap = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %val) nounwind readnone
   store <4 x i64> %bswap, <4 x i64> addrspace(1)* %out, align 32
   ret void
diff --git a/llvm/test/CodeGen/R600/call.ll b/llvm/test/CodeGen/R600/call.ll
index 4cc7501..eb71649 100644
--- a/llvm/test/CodeGen/R600/call.ll
+++ b/llvm/test/CodeGen/R600/call.ll
@@ -14,8 +14,8 @@
 
 define void @test_call(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %c = call i32 @defined_function(i32 %b) nounwind
   %result = add i32 %a, %c
   store i32 %result, i32 addrspace(1)* %out
@@ -24,8 +24,8 @@
 
 define void @test_call_external(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %c = call i32 @external_function(i32 %b) nounwind
   %result = add i32 %a, %c
   store i32 %result, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/combine_vloads.ll b/llvm/test/CodeGen/R600/combine_vloads.ll
index d8c9c04..01572af 100644
--- a/llvm/test/CodeGen/R600/combine_vloads.ll
+++ b/llvm/test/CodeGen/R600/combine_vloads.ll
@@ -23,7 +23,7 @@
   %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
   %arrayidx_v4 = bitcast <8 x i8> addrspace(1)* %src to <32 x i8> addrspace(1)*
   %0 = bitcast <32 x i8> addrspace(1)* %arrayidx_v4 to <8 x i32> addrspace(1)*
-  %vecload2 = load <8 x i32> addrspace(1)* %0, align 32
+  %vecload2 = load <8 x i32>, <8 x i32> addrspace(1)* %0, align 32
   %1 = bitcast <8 x i32> %vecload2 to <32 x i8>
   %tmp5 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %tmp8 = shufflevector <32 x i8> %1, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
diff --git a/llvm/test/CodeGen/R600/commute_modifiers.ll b/llvm/test/CodeGen/R600/commute_modifiers.ll
index cccc08e..7fc36ea 100644
--- a/llvm/test/CodeGen/R600/commute_modifiers.ll
+++ b/llvm/test/CodeGen/R600/commute_modifiers.ll
@@ -11,7 +11,7 @@
 define void @commute_add_imm_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %z = fadd float 2.0, %x.fabs
   store float %z, float addrspace(1)* %out
@@ -25,7 +25,7 @@
 define void @commute_mul_imm_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %x.fneg.fabs = fsub float -0.000000e+00, %x.fabs
   %z = fmul float 4.0, %x.fneg.fabs
@@ -40,7 +40,7 @@
 define void @commute_mul_imm_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fneg = fsub float -0.000000e+00, %x
   %z = fmul float 4.0, %x.fneg
   store float %z, float addrspace(1)* %out
@@ -56,7 +56,7 @@
 define void @commute_add_lit_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %x = load float addrspace(1)* %gep.0
+  %x = load float, float addrspace(1)* %gep.0
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %z = fadd float 1024.0, %x.fabs
   store float %z, float addrspace(1)* %out
@@ -72,8 +72,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %z = fadd float %x, %y.fabs
   store float %z, float addrspace(1)* %out
@@ -89,8 +89,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fneg = fsub float -0.000000e+00, %y
   %z = fmul float %x, %y.fneg
   store float %z, float addrspace(1)* %out
@@ -106,8 +106,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
   %z = fmul float %x, %y.fabs.fneg
@@ -125,8 +125,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %z = fmul float %x.fabs, %y.fabs
@@ -143,8 +143,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
   %x.fabs = call float @llvm.fabs.f32(float %x) #1
   %y.fabs = call float @llvm.fabs.f32(float %y) #1
   %y.fabs.fneg = fsub float -0.000000e+00, %y.fabs
@@ -167,8 +167,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r2.fabs = call float @llvm.fabs.f32(float %r2)
 
diff --git a/llvm/test/CodeGen/R600/copy-illegal-type.ll b/llvm/test/CodeGen/R600/copy-illegal-type.ll
index 56c43d2..8b39756 100644
--- a/llvm/test/CodeGen/R600/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/R600/copy-illegal-type.ll
@@ -6,7 +6,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -17,7 +17,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   ret void
@@ -30,7 +30,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -45,7 +45,7 @@
 ; SI: buffer_store_dword [[REG]]
 ; SI: s_endpgm
 define void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out1, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out2, align 4
@@ -82,7 +82,7 @@
 
 ; SI: s_endpgm
 define void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -120,7 +120,7 @@
 
 ; SI: s_endpgm
 define void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %add = add <4 x i8> %val, <i8 9, i8 9, i8 9, i8 9>
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out0, align 4
   store <4 x i8> %add, <4 x i8> addrspace(1)* %out1, align 4
@@ -133,7 +133,7 @@
 ; SI-NOT: bfi
 ; SI: s_endpgm
 define void @test_copy_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
-  %val = load <3 x i8> addrspace(1)* %in, align 4
+  %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
   store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -145,7 +145,7 @@
 ; SI: buffer_load_ubyte
 ; SI: s_endpgm
 define void @test_copy_v4i8_volatile_load(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load volatile <4 x i8> addrspace(1)* %in, align 4
+  %val = load volatile <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
@@ -161,7 +161,7 @@
 ; SI: buffer_store_byte
 ; SI: s_endpgm
 define void @test_copy_v4i8_volatile_store(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
-  %val = load <4 x i8> addrspace(1)* %in, align 4
+  %val = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   store volatile <4 x i8> %val, <4 x i8> addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/copy-to-reg.ll b/llvm/test/CodeGen/R600/copy-to-reg.ll
index 784d2d0..fc875f6 100644
--- a/llvm/test/CodeGen/R600/copy-to-reg.ll
+++ b/llvm/test/CodeGen/R600/copy-to-reg.ll
@@ -21,7 +21,7 @@
 
 done:
   %tmp0 = getelementptr [16 x i32], [16 x i32]* %alloca, i32 0, i32 0
-  %tmp1 = load i32* %tmp0
+  %tmp1 = load i32, i32* %tmp0
   store i32 %tmp1, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/ctlz_zero_undef.ll b/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
index 1a4317b..bd26c30 100644
--- a/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/R600/ctlz_zero_undef.ll
@@ -28,7 +28,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
@@ -44,7 +44,7 @@
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
   store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -64,7 +64,7 @@
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 ; EG: FFBH_UINT {{\*? *}}[[RESULT]]
 define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
   %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
   store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/ctpop.ll b/llvm/test/CodeGen/R600/ctpop.ll
index c0e8e6d..0a031c5 100644
--- a/llvm/test/CodeGen/R600/ctpop.ll
+++ b/llvm/test/CodeGen/R600/ctpop.ll
@@ -31,7 +31,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   store i32 %ctpop, i32 addrspace(1)* %out, align 4
   ret void
@@ -49,8 +49,8 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1) nounwind {
-  %val0 = load i32 addrspace(1)* %in0, align 4
-  %val1 = load i32 addrspace(1)* %in1, align 4
+  %val0 = load i32, i32 addrspace(1)* %in0, align 4
+  %val1 = load i32, i32 addrspace(1)* %in1, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
   %ctpop1 = call i32 @llvm.ctpop.i32(i32 %val1) nounwind readnone
   %add = add i32 %ctpop0, %ctpop1
@@ -65,7 +65,7 @@
 ; GCN-NEXT: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
-  %val0 = load i32 addrspace(1)* %in0, align 4
+  %val0 = load i32, i32 addrspace(1)* %in0, align 4
   %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
   %add = add i32 %ctpop0, %sval
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -80,7 +80,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <2 x i32> addrspace(1)* %in, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %ctpop = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %val) nounwind readnone
   store <2 x i32> %ctpop, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -98,7 +98,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <4 x i32> addrspace(1)* %in, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
   %ctpop = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %val) nounwind readnone
   store <4 x i32> %ctpop, <4 x i32> addrspace(1)* %out, align 16
   ret void
@@ -124,7 +124,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v8i32(<8 x i32> addrspace(1)* noalias %out, <8 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <8 x i32> addrspace(1)* %in, align 32
+  %val = load <8 x i32>, <8 x i32> addrspace(1)* %in, align 32
   %ctpop = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %val) nounwind readnone
   store <8 x i32> %ctpop, <8 x i32> addrspace(1)* %out, align 32
   ret void
@@ -166,7 +166,7 @@
 ; EG: BCNT_INT
 ; EG: BCNT_INT
 define void @v_ctpop_v16i32(<16 x i32> addrspace(1)* noalias %out, <16 x i32> addrspace(1)* noalias %in) nounwind {
-  %val = load <16 x i32> addrspace(1)* %in, align 32
+  %val = load <16 x i32>, <16 x i32> addrspace(1)* %in, align 32
   %ctpop = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %val) nounwind readnone
   store <16 x i32> %ctpop, <16 x i32> addrspace(1)* %out, align 32
   ret void
@@ -180,7 +180,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 4
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -195,7 +195,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_inline_constant_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 4, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -210,7 +210,7 @@
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_i32_add_literal(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, 99999
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -226,7 +226,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %ctpop, %const
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -242,7 +242,7 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_var_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %const) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %add = add i32 %const, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
@@ -259,10 +259,10 @@
 
 ; EG: BCNT_INT
 define void @v_ctpop_i32_add_vvar_inv(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 addrspace(1)* noalias %constptr) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ctpop = call i32 @llvm.ctpop.i32(i32 %val) nounwind readnone
   %gep = getelementptr i32, i32 addrspace(1)* %constptr, i32 4
-  %const = load i32 addrspace(1)* %gep, align 4
+  %const = load i32, i32 addrspace(1)* %gep, align 4
   %add = add i32 %const, %ctpop
   store i32 %add, i32 addrspace(1)* %out, align 4
   ret void
@@ -290,7 +290,7 @@
 
 else:
   %tmp3 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %tmp4 = load i32 addrspace(1)* %tmp3
+  %tmp4 = load i32, i32 addrspace(1)* %tmp3
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/ctpop64.ll b/llvm/test/CodeGen/R600/ctpop64.ll
index 9841319..e1a0ee3 100644
--- a/llvm/test/CodeGen/R600/ctpop64.ll
+++ b/llvm/test/CodeGen/R600/ctpop64.ll
@@ -29,7 +29,7 @@
 ; GCN: buffer_store_dword [[RESULT]],
 ; GCN: s_endpgm
 define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
   %truncctpop = trunc i64 %ctpop to i32
   store i32 %truncctpop, i32 addrspace(1)* %out, align 4
@@ -67,7 +67,7 @@
 ; GCN: v_bcnt_u32_b32
 ; GCN: s_endpgm
 define void @v_ctpop_v2i64(<2 x i32> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
   %truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
   store <2 x i32> %truncctpop, <2 x i32> addrspace(1)* %out, align 8
@@ -85,7 +85,7 @@
 ; GCN: v_bcnt_u32_b32
 ; GCN: s_endpgm
 define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrspace(1)* noalias %in) nounwind {
-  %val = load <4 x i64> addrspace(1)* %in, align 32
+  %val = load <4 x i64>, <4 x i64> addrspace(1)* %in, align 32
   %ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
   %truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
   store <4 x i32> %truncctpop, <4 x i32> addrspace(1)* %out, align 16
@@ -114,7 +114,7 @@
 
 else:
   %tmp3 = getelementptr i64, i64 addrspace(1)* %in, i32 1
-  %tmp4 = load i64 addrspace(1)* %tmp3
+  %tmp4 = load i64, i64 addrspace(1)* %tmp3
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/cttz_zero_undef.ll b/llvm/test/CodeGen/R600/cttz_zero_undef.ll
index d9d284c..56fcb51 100644
--- a/llvm/test/CodeGen/R600/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/R600/cttz_zero_undef.ll
@@ -28,7 +28,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %cttz = call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone
   store i32 %cttz, i32 addrspace(1)* %out, align 4
   ret void
@@ -44,7 +44,7 @@
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <2 x i32> addrspace(1)* %valptr, align 8
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %val, i1 true) nounwind readnone
   store <2 x i32> %cttz, <2 x i32> addrspace(1)* %out, align 8
   ret void
@@ -64,7 +64,7 @@
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 ; EG: FFBL_INT {{\*? *}}[[RESULT]]
 define void @v_cttz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
-  %val = load <4 x i32> addrspace(1)* %valptr, align 16
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
   %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %val, i1 true) nounwind readnone
   store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
index 4d4bf93..3399d9d 100644
--- a/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/R600/cvt_f32_ubyte.ll
@@ -8,7 +8,7 @@
 ; SI: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dword [[CONV]],
 define void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
-  %load = load i8 addrspace(1)* %in, align 1
+  %load = load i8, i8 addrspace(1)* %in, align 1
   %cvt = uitofp i8 %load to float
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -23,7 +23,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in, align 2
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in, align 2
   %cvt = uitofp <2 x i8> %load to <2 x float>
   store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
   ret void
@@ -37,7 +37,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32
 ; SI: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <3 x i8> addrspace(1)* %in, align 4
+  %load = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <3 x i8> %load to <3 x float>
   store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
   ret void
@@ -53,7 +53,7 @@
 ; SI-DAG: v_cvt_f32_ubyte0_e32 v[[LORESULT:[0-9]+]], [[LOADREG]]
 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   ret void
@@ -77,7 +77,7 @@
 
 ; SI: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
 define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   ret void
@@ -105,7 +105,7 @@
 ; XSI: v_cvt_f32_u32_e32
 ; SI: s_endpgm
 define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 4
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
@@ -117,7 +117,7 @@
 ; SI-LABEL: {{^}}load_v7i8_to_v7f32:
 ; SI: s_endpgm
 define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <7 x i8> addrspace(1)* %in, align 1
+  %load = load <7 x i8>, <7 x i8> addrspace(1)* %in, align 1
   %cvt = uitofp <7 x i8> %load to <7 x float>
   store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
   ret void
@@ -146,7 +146,7 @@
 ; SI: buffer_store_dword
 ; SI: buffer_store_dword
 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in, align 8
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
   %cvt = uitofp <8 x i8> %load to <8 x float>
   store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
   ret void
@@ -158,7 +158,7 @@
 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
 ; SI: buffer_store_dword [[CONV]],
 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 2
   %inreg = and i32 %add, 255
   %cvt = uitofp i32 %inreg to float
@@ -168,7 +168,7 @@
 
 ; SI-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
 define void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %inreg = and i32 %load, 65280
   %shr = lshr i32 %inreg, 8
   %cvt = uitofp i32 %shr to float
@@ -180,7 +180,7 @@
 ; We don't get these ones because of the zext, but instcombine removes
 ; them so it shouldn't really matter.
 define void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
-  %load = load i8 addrspace(1)* %in, align 1
+  %load = load i8, i8 addrspace(1)* %in, align 1
   %ext = zext i8 %load to i32
   %cvt = uitofp i32 %ext to float
   store float %cvt, float addrspace(1)* %out, align 4
@@ -188,7 +188,7 @@
 }
 
 define void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in, align 1
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 1
   %ext = zext <4 x i8> %load to <4 x i32>
   %cvt = uitofp <4 x i32> %ext to <4 x float>
   store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
diff --git a/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll b/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
index f51cdc1..fb43ff4 100644
--- a/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/llvm/test/CodeGen/R600/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@@ -13,7 +13,7 @@
 define void @sint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %sint = load i32 addrspace(1) * %in
+  %sint = load i32, i32 addrspace(1) * %in
   %conv = sitofp i32 %sint to float
   %0 = insertelement <4 x float> undef, float %conv, i32 0
   %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
@@ -27,7 +27,7 @@
 define void @uint(<4 x float> addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %uint = load i32 addrspace(1) * %in
+  %uint = load i32, i32 addrspace(1) * %in
   %conv = uitofp i32 %uint to float
   %0 = insertelement <4 x float> undef, float %conv, i32 0
   %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/R600/dot4-folding.ll b/llvm/test/CodeGen/R600/dot4-folding.ll
index dca6a59..d8975f6 100644
--- a/llvm/test/CodeGen/R600/dot4-folding.ll
+++ b/llvm/test/CodeGen/R600/dot4-folding.ll
@@ -14,8 +14,8 @@
 
 define void @main(float addrspace(1)* %out) {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %2 = call float @llvm.AMDGPU.dp4(<4 x float> %0,<4 x float> %1)
   %3 = insertelement <4 x float> undef, float %2, i32 0
   call void @llvm.R600.store.swizzle(<4 x float> %3, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
index 6ec87a8..c381fc4 100644
--- a/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
+++ b/llvm/test/CodeGen/R600/ds-negative-offset-addressing-mode-loop.ll
@@ -34,19 +34,19 @@
   %k.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   tail call void @llvm.AMDGPU.barrier.local() #1
   %arrayidx = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %offset.02
-  %tmp = load float addrspace(3)* %arrayidx, align 4
+  %tmp = load float, float addrspace(3)* %arrayidx, align 4
   %add1 = add nsw i32 %offset.02, 1
   %arrayidx2 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add1
-  %tmp1 = load float addrspace(3)* %arrayidx2, align 4
+  %tmp1 = load float, float addrspace(3)* %arrayidx2, align 4
   %add3 = add nsw i32 %offset.02, 32
   %arrayidx4 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add3
-  %tmp2 = load float addrspace(3)* %arrayidx4, align 4
+  %tmp2 = load float, float addrspace(3)* %arrayidx4, align 4
   %add5 = add nsw i32 %offset.02, 33
   %arrayidx6 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add5
-  %tmp3 = load float addrspace(3)* %arrayidx6, align 4
+  %tmp3 = load float, float addrspace(3)* %arrayidx6, align 4
   %add7 = add nsw i32 %offset.02, 64
   %arrayidx8 = getelementptr inbounds float, float addrspace(3)* %lptr, i32 %add7
-  %tmp4 = load float addrspace(3)* %arrayidx8, align 4
+  %tmp4 = load float, float addrspace(3)* %arrayidx8, align 4
   %add9 = fadd float %tmp, %tmp1
   %add10 = fadd float %add9, %tmp2
   %add11 = fadd float %add10, %tmp3
diff --git a/llvm/test/CodeGen/R600/ds_read2.ll b/llvm/test/CodeGen/R600/ds_read2.ll
index 5901e85..f53b6c0 100644
--- a/llvm/test/CodeGen/R600/ds_read2.ll
+++ b/llvm/test/CodeGen/R600/ds_read2.ll
@@ -15,10 +15,10 @@
 define void @simple_read2_f32(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -34,10 +34,10 @@
 define void @simple_read2_f32_max_offset(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -52,10 +52,10 @@
 define void @simple_read2_f32_too_far(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -70,20 +70,20 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -102,22 +102,22 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   call void @llvm.AMDGPU.barrier.local() #2
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -137,20 +137,20 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
 
   %idx.1 = add nsw i32 %tid.x, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum.0 = fadd float %val0, %val1
 
   %idx.2 = add nsw i32 %tid.x, 11
   %arrayidx2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.2
-  %val2 = load float addrspace(3)* %arrayidx2, align 4
+  %val2 = load float, float addrspace(3)* %arrayidx2, align 4
 
   %idx.3 = add nsw i32 %tid.x, 27
   %arrayidx3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.3
-  %val3 = load float addrspace(3)* %arrayidx3, align 4
+  %val3 = load float, float addrspace(3)* %arrayidx3, align 4
   %sum.1 = fadd float %val2, %val3
 
   %sum = fadd float %sum.0, %sum.1
@@ -177,8 +177,8 @@
   %gep = getelementptr inbounds float, <2 x float addrspace(3)*> %lds.ptr, <2 x i32> %index.1
   %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
   %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -207,8 +207,8 @@
   ; Apply an additional offset after the vector that will be more obviously folded.
   %gep.1.offset = getelementptr float, float addrspace(3)* %gep.1, i32 8
 
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1.offset, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1.offset, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -231,8 +231,8 @@
   %gep = getelementptr inbounds [512 x float], <2 x [512 x float] addrspace(3)*> %ptr.1, <2 x i32> <i32 0, i32 0>, <2 x i32> %idx
   %gep.0 = extractelement <2 x float addrspace(3)*> %gep, i32 0
   %gep.1 = extractelement <2 x float addrspace(3)*> %gep, i32 1
-  %val0 = load float addrspace(3)* %gep.0, align 4
-  %val1 = load float addrspace(3)* %gep.1, align 4
+  %val0 = load float, float addrspace(3)* %gep.0, align 4
+  %val1 = load float, float addrspace(3)* %gep.1, align 4
   %add.x = add nsw i32 %x.i, 8
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
@@ -248,10 +248,10 @@
 define void @simple_read2_f32_volatile_0(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load volatile float addrspace(3)* %arrayidx0, align 4
+  %val0 = load volatile float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -266,10 +266,10 @@
 define void @simple_read2_f32_volatile_1(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load volatile float addrspace(3)* %arrayidx1, align 4
+  %val1 = load volatile float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -285,10 +285,10 @@
 define void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 1
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 1
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 1
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 1
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -301,10 +301,10 @@
 define void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 2
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 2
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 2
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 2
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -320,10 +320,10 @@
 define void @simple_read2_f64(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -336,10 +336,10 @@
 define void @simple_read2_f64_max_offset(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 255
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -354,10 +354,10 @@
 define void @simple_read2_f64_too_far(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 257
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -372,10 +372,10 @@
 define void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
@@ -388,8 +388,8 @@
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
 define void @load_constant_adjacent_offsets(i32 addrspace(1)* %out) {
-  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
-  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
+  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 1), align 4
   %sum = add i32 %val0, %val1
   store i32 %sum, i32 addrspace(1)* %out, align 4
   ret void
@@ -399,8 +399,8 @@
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:2
 define void @load_constant_disjoint_offsets(i32 addrspace(1)* %out) {
-  %val0 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
-  %val1 = load i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
+  %val0 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 0), align 4
+  %val1 = load i32, i32 addrspace(3)* getelementptr inbounds ([4 x i32] addrspace(3)* @foo, i32 0, i32 2), align 4
   %sum = add i32 %val0, %val1
   store i32 %sum, i32 addrspace(1)* %out, align 4
   ret void
@@ -413,8 +413,8 @@
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:0 offset1:1
 ; SI: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] offset0:2 offset1:3
 define void @load_misaligned64_constant_offsets(i64 addrspace(1)* %out) {
-  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
-  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
+  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
+  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
   %sum = add i64 %val0, %val1
   store i64 %sum, i64 addrspace(1)* %out, align 8
   ret void
@@ -429,8 +429,8 @@
 ; SI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[BASE1]] offset0:0 offset1:1
 ; SI: s_endpgm
 define void @load_misaligned64_constant_large_offsets(i64 addrspace(1)* %out) {
-  %val0 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
-  %val1 = load i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
+  %val0 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 2048), align 4
+  %val1 = load i64, i64 addrspace(3)* getelementptr inbounds ([4096 x i64] addrspace(3)* @bar.large, i32 0, i32 4095), align 4
   %sum = add i64 %val0, %val1
   store i64 %sum, i64 addrspace(1)* %out, align 8
   ret void
@@ -443,33 +443,33 @@
   %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
   %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
-  %tmp16 = load float addrspace(3)* %arrayidx44, align 4
+  %tmp16 = load float, float addrspace(3)* %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
   %arrayidx48 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add47
-  %tmp17 = load float addrspace(3)* %arrayidx48, align 4
+  %tmp17 = load float, float addrspace(3)* %arrayidx48, align 4
   %add51 = add nsw i32 %x.i, 16
   %arrayidx52 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add51
-  %tmp18 = load float addrspace(3)* %arrayidx52, align 4
+  %tmp18 = load float, float addrspace(3)* %arrayidx52, align 4
   %add55 = add nsw i32 %x.i, 17
   %arrayidx56 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %add55
-  %tmp19 = load float addrspace(3)* %arrayidx56, align 4
+  %tmp19 = load float, float addrspace(3)* %arrayidx56, align 4
   %arrayidx60 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %y.i
-  %tmp20 = load float addrspace(3)* %arrayidx60, align 4
+  %tmp20 = load float, float addrspace(3)* %arrayidx60, align 4
   %add63 = add nsw i32 %y.i, 1
   %arrayidx64 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add63
-  %tmp21 = load float addrspace(3)* %arrayidx64, align 4
+  %tmp21 = load float, float addrspace(3)* %arrayidx64, align 4
   %add67 = add nsw i32 %y.i, 32
   %arrayidx68 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add67
-  %tmp22 = load float addrspace(3)* %arrayidx68, align 4
+  %tmp22 = load float, float addrspace(3)* %arrayidx68, align 4
   %add71 = add nsw i32 %y.i, 33
   %arrayidx72 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add71
-  %tmp23 = load float addrspace(3)* %arrayidx72, align 4
+  %tmp23 = load float, float addrspace(3)* %arrayidx72, align 4
   %add75 = add nsw i32 %y.i, 64
   %arrayidx76 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add75
-  %tmp24 = load float addrspace(3)* %arrayidx76, align 4
+  %tmp24 = load float, float addrspace(3)* %arrayidx76, align 4
   %add79 = add nsw i32 %y.i, 65
   %arrayidx80 = getelementptr inbounds [776 x float], [776 x float] addrspace(3)* @sgemm.lB, i32 0, i32 %add79
-  %tmp25 = load float addrspace(3)* %arrayidx80, align 4
+  %tmp25 = load float, float addrspace(3)* %arrayidx80, align 4
   %sum.0 = fadd float %tmp16, %tmp17
   %sum.1 = fadd float %sum.0, %tmp18
   %sum.2 = fadd float %sum.1, %tmp19
@@ -484,13 +484,13 @@
 }
 
 define void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
-  %load = load <2 x i32> addrspace(3)* %in, align 4
+  %load = load <2 x i32>, <2 x i32> addrspace(3)* %in, align 4
   store <2 x i32> %load, <2 x i32> addrspace(1)* %out, align 8
   ret void
 }
 
 define void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
-  %load = load i64 addrspace(3)* %in, align 4
+  %load = load i64, i64 addrspace(3)* %in, align 4
   store i64 %load, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/ds_read2_offset_order.ll b/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
index 107c861..9ea9a5a 100644
--- a/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
+++ b/llvm/test/CodeGen/R600/ds_read2_offset_order.ll
@@ -15,30 +15,30 @@
 define void @offset_order(float addrspace(1)* %out) {
 entry:
   %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
-  %val0 = load float addrspace(3)* %ptr0
+  %val0 = load float, float addrspace(3)* %ptr0
 
   %ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
-  %val1 = load float addrspace(3)* %ptr1
+  %val1 = load float, float addrspace(3)* %ptr1
   %add1 = fadd float %val0, %val1
 
   %ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
-  %val2 = load float addrspace(3)* %ptr2
+  %val2 = load float, float addrspace(3)* %ptr2
   %add2 = fadd float %add1, %val2
 
   %ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
-  %val3 = load float addrspace(3)* %ptr3
+  %val3 = load float, float addrspace(3)* %ptr3
   %add3 = fadd float %add2, %val3
 
   %ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
-  %val4 = load float addrspace(3)* %ptr4
+  %val4 = load float, float addrspace(3)* %ptr4
   %add4 = fadd float %add3, %val4
 
   %ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
-  %val5 = load float addrspace(3)* %ptr5
+  %val5 = load float, float addrspace(3)* %ptr5
   %add5 = fadd float %add4, %val5
 
   %ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
-  %val6 = load float addrspace(3)* %ptr6
+  %val6 = load float, float addrspace(3)* %ptr6
   %add6 = fadd float %add5, %val6
   store float %add6, float addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ds_read2st64.ll b/llvm/test/CodeGen/R600/ds_read2st64.ll
index 163c687..482debb 100644
--- a/llvm/test/CodeGen/R600/ds_read2st64.ll
+++ b/llvm/test/CodeGen/R600/ds_read2st64.ll
@@ -13,10 +13,10 @@
 define void @simple_read2st64_f32_0_1(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -33,10 +33,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 128
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -53,10 +53,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 16320
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -73,10 +73,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 16384
   %arrayidx1 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -89,10 +89,10 @@
 define void @odd_invalid_read2st64_f32_0(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 63
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -106,10 +106,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %add.x.1 = add nsw i32 %x.i, 127
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.1
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i32 %x.i
   store float %sum, float addrspace(1)* %out.gep, align 4
@@ -125,10 +125,10 @@
 define void @simple_read2st64_f64_0_1(double addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -145,10 +145,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 128
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -164,10 +164,10 @@
 define void @misaligned_read2st64_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 4
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 4
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 4
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
@@ -185,10 +185,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8128
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -205,10 +205,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8192
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -222,10 +222,10 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %add.x.0 = add nsw i32 %x.i, 64
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x.1 = add nsw i32 %x.i, 8129
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.1
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 8
@@ -242,10 +242,10 @@
 define void @byte_size_only_divisible_64_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
-  %val0 = load double addrspace(3)* %arrayidx0, align 8
+  %val0 = load double, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
   %arrayidx1 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x
-  %val1 = load double addrspace(3)* %arrayidx1, align 8
+  %val1 = load double, double addrspace(3)* %arrayidx1, align 8
   %sum = fadd double %val0, %val1
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i32 %x.i
   store double %sum, double addrspace(1)* %out.gep, align 4
diff --git a/llvm/test/CodeGen/R600/ds_write2.ll b/llvm/test/CodeGen/R600/ds_write2.ll
index aaa3f59..d06f780 100644
--- a/llvm/test/CodeGen/R600/ds_write2.ll
+++ b/llvm/test/CodeGen/R600/ds_write2.ll
@@ -12,7 +12,7 @@
 define void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
-  %val = load float addrspace(1)* %in.gep, align 4
+  %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -31,8 +31,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -50,8 +50,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store volatile float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -69,8 +69,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 8
@@ -90,8 +90,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in.gep.0, i32 1
-  %val0 = load <2 x float> addrspace(1)* %in.gep.0, align 8
-  %val1 = load <2 x float> addrspace(1)* %in.gep.1, align 8
+  %val0 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.0, align 8
+  %val1 = load <2 x float>, <2 x float> addrspace(1)* %in.gep.1, align 8
   %val0.0 = extractelement <2 x float> %val0, i32 0
   %val1.1 = extractelement <2 x float> %val1, i32 1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -110,7 +110,7 @@
 define void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %x.i
-  %val = load <2 x float> addrspace(1)* %in.gep, align 8
+  %val = load <2 x float>, <2 x float> addrspace(1)* %in.gep, align 8
   %val0 = extractelement <2 x float> %val, i32 0
   %val1 = extractelement <2 x float> %val, i32 1
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -129,7 +129,7 @@
 define void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 %x.i
-  %val = load <4 x float> addrspace(1)* %in.gep, align 16
+  %val = load <4 x float>, <4 x float> addrspace(1)* %in.gep, align 16
   %val0 = extractelement <4 x float> %val, i32 0
   %val1 = extractelement <4 x float> %val, i32 3
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
@@ -150,8 +150,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 255
@@ -168,8 +168,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 257
@@ -186,8 +186,8 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %idx.0 = add nsw i32 %tid.x, 0
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
@@ -216,8 +216,8 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %tid.x
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %tid.x
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %idx.0 = add nsw i32 %tid.x, 3
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %idx.0
@@ -247,8 +247,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in0.gep = getelementptr float, float addrspace(1)* %in0, i32 %x.i
   %in1.gep = getelementptr float, float addrspace(1)* %in1, i32 %x.i
-  %val0 = load float addrspace(1)* %in0.gep, align 4
-  %val1 = load float addrspace(1)* %in1.gep, align 4
+  %val0 = load float, float addrspace(1)* %in0.gep, align 4
+  %val1 = load float, float addrspace(1)* %in1.gep, align 4
 
   %index.0 = insertelement <2 x i32> undef, i32 %x.i, i32 0
   %index.1 = insertelement <2 x i32> %index.0, i32 8, i32 0
@@ -273,7 +273,7 @@
 define void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -291,7 +291,7 @@
 define void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 7
@@ -310,8 +310,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
-  %val0 = load double addrspace(1)* %in.gep.0, align 8
-  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double, double addrspace(1)* %in.gep.1, align 8
   %arrayidx0 = getelementptr inbounds [512 x double], [512 x double] addrspace(3)* @lds.f64, i32 0, i32 %x.i
   store double %val0, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
@@ -373,7 +373,7 @@
 define void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tgid.x() #1
   %y.i = tail call i32 @llvm.r600.read.tidig.y() #1
-  %val = load float addrspace(1)* %in
+  %val = load float, float addrspace(1)* %in
   %arrayidx44 = getelementptr inbounds [264 x float], [264 x float] addrspace(3)* @sgemm.lA, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx44, align 4
   %add47 = add nsw i32 %x.i, 1
diff --git a/llvm/test/CodeGen/R600/ds_write2st64.ll b/llvm/test/CodeGen/R600/ds_write2st64.ll
index 3070771..2044df2 100644
--- a/llvm/test/CodeGen/R600/ds_write2st64.ll
+++ b/llvm/test/CodeGen/R600/ds_write2st64.ll
@@ -12,7 +12,7 @@
 define void @simple_write2st64_one_val_f32_0_1(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr float, float addrspace(1)* %in, i32 %x.i
-  %val = load float addrspace(1)* %in.gep, align 4
+  %val = load float, float addrspace(1)* %in.gep, align 4
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %x.i
   store float %val, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 64
@@ -31,8 +31,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %add.x.0 = add nsw i32 %x.i, 128
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 %add.x.0
   store float %val0, float addrspace(3)* %arrayidx0, align 4
@@ -52,8 +52,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr float, float addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
-  %val0 = load float addrspace(1)* %in.gep.0, align 4
-  %val1 = load float addrspace(1)* %in.gep.1, align 4
+  %val0 = load float, float addrspace(1)* %in.gep.0, align 4
+  %val1 = load float, float addrspace(1)* %in.gep.1, align 4
   %arrayidx0 = getelementptr inbounds float, float addrspace(3)* %lds, i32 %x.i
   store float %val0, float addrspace(3)* %arrayidx0, align 4
   %add.x = add nsw i32 %x.i, 16320
@@ -72,8 +72,8 @@
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep.0 = getelementptr double, double addrspace(1)* %in, i32 %x.i
   %in.gep.1 = getelementptr double, double addrspace(1)* %in.gep.0, i32 1
-  %val0 = load double addrspace(1)* %in.gep.0, align 8
-  %val1 = load double addrspace(1)* %in.gep.1, align 8
+  %val0 = load double, double addrspace(1)* %in.gep.0, align 8
+  %val1 = load double, double addrspace(1)* %in.gep.1, align 8
   %add.x.0 = add nsw i32 %x.i, 256
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %add.x.0
   store double %val0, double addrspace(3)* %arrayidx0, align 8
@@ -90,7 +90,7 @@
 define void @byte_size_only_divisible_64_write2st64_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
   %in.gep = getelementptr double, double addrspace(1)* %in, i32 %x.i
-  %val = load double addrspace(1)* %in.gep, align 8
+  %val = load double, double addrspace(1)* %in.gep, align 8
   %arrayidx0 = getelementptr inbounds double, double addrspace(3)* %lds, i32 %x.i
   store double %val, double addrspace(3)* %arrayidx0, align 8
   %add.x = add nsw i32 %x.i, 8
diff --git a/llvm/test/CodeGen/R600/extload-private.ll b/llvm/test/CodeGen/R600/extload-private.ll
index fec8682..294c3a9 100644
--- a/llvm/test/CodeGen/R600/extload-private.ll
+++ b/llvm/test/CodeGen/R600/extload-private.ll
@@ -6,7 +6,7 @@
 define void @load_i8_sext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i8
-  %tmp1 = load i8* %tmp0
+  %tmp1 = load i8, i8* %tmp0
   %tmp2 = sext i8 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -17,7 +17,7 @@
 define void @load_i8_zext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i8
-  %tmp1 = load i8* %tmp0
+  %tmp1 = load i8, i8* %tmp0
   %tmp2 = zext i8 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -28,7 +28,7 @@
 define void @load_i16_sext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i16
-  %tmp1 = load i16* %tmp0
+  %tmp1 = load i16, i16* %tmp0
   %tmp2 = sext i16 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
@@ -39,7 +39,7 @@
 define void @load_i16_zext_private(i32 addrspace(1)* %out) {
 entry:
   %tmp0 = alloca i16
-  %tmp1 = load i16* %tmp0
+  %tmp1 = load i16, i16* %tmp0
   %tmp2 = zext i16 %tmp1 to i32
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/extload.ll b/llvm/test/CodeGen/R600/extload.ll
index 77e5dc3..662eb7a 100644
--- a/llvm/test/CodeGen/R600/extload.ll
+++ b/llvm/test/CodeGen/R600/extload.ll
@@ -8,7 +8,7 @@
 
 define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
-  %load = load i32 addrspace(1)* %cast, align 1
+  %load = load i32, i32 addrspace(1)* %cast, align 1
   %x = bitcast i32 %load to <4 x i8>
   %castOut = bitcast i8 addrspace(1)* %out to <4 x i8> addrspace(1)*
   store <4 x i8> %x, <4 x i8> addrspace(1)* %castOut, align 1
@@ -21,7 +21,7 @@
 
 define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
-  %load = load i32 addrspace(1)* %cast, align 1
+  %load = load i32, i32 addrspace(1)* %cast, align 1
   %x = bitcast i32 %load to <2 x i16>
   %castOut = bitcast i16 addrspace(1)* %out to <2 x i16> addrspace(1)*
   store <2 x i16> %x, <2 x i16> addrspace(1)* %castOut, align 1
@@ -33,7 +33,7 @@
 ; EG: LDS_WRITE * [[VAL]]
 define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
-  %load = load i32 addrspace(3)* %cast, align 1
+  %load = load i32, i32 addrspace(3)* %cast, align 1
   %x = bitcast i32 %load to <4 x i8>
   %castOut = bitcast i8 addrspace(3)* %out to <4 x i8> addrspace(3)*
   store <4 x i8> %x, <4 x i8> addrspace(3)* %castOut, align 1
@@ -45,7 +45,7 @@
 ; EG: LDS_WRITE * [[VAL]]
 define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
   %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
-  %load = load i32 addrspace(3)* %cast, align 1
+  %load = load i32, i32 addrspace(3)* %cast, align 1
   %x = bitcast i32 %load to <2 x i16>
   %castOut = bitcast i16 addrspace(3)* %out to <2 x i16> addrspace(3)*
   store <2 x i16> %x, <2 x i16> addrspace(3)* %castOut, align 1
diff --git a/llvm/test/CodeGen/R600/fabs.f64.ll b/llvm/test/CodeGen/R600/fabs.f64.ll
index 30c8952..3c6136c 100644
--- a/llvm/test/CodeGen/R600/fabs.f64.ll
+++ b/llvm/test/CodeGen/R600/fabs.f64.ll
@@ -14,7 +14,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %tidext = sext i32 %tid to i64
   %gep = getelementptr double, double addrspace(1)* %in, i64 %tidext
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %fabs = call double @llvm.fabs.f64(double %val)
   store double %fabs, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fadd.ll b/llvm/test/CodeGen/R600/fadd.ll
index 647eee4..5fac328 100644
--- a/llvm/test/CodeGen/R600/fadd.ll
+++ b/llvm/test/CodeGen/R600/fadd.ll
@@ -33,8 +33,8 @@
 ; SI: v_add_f32
 define void @fadd_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
   %result = fadd <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/fadd64.ll b/llvm/test/CodeGen/R600/fadd64.ll
index f1f6fef..485c558 100644
--- a/llvm/test/CodeGen/R600/fadd64.ll
+++ b/llvm/test/CodeGen/R600/fadd64.ll
@@ -6,8 +6,8 @@
 
 define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fadd double %r0, %r1
    store double %r2, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fcmp-cnd.ll b/llvm/test/CodeGen/R600/fcmp-cnd.ll
index 1d4e323..530274f 100644
--- a/llvm/test/CodeGen/R600/fcmp-cnd.ll
+++ b/llvm/test/CodeGen/R600/fcmp-cnd.ll
@@ -6,7 +6,7 @@
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %cmp = fcmp oeq float %0, 0.000000e+00
   %value = select i1 %cmp, i32 2, i32 3 
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll b/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
index 55aba0d..c402805 100644
--- a/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
+++ b/llvm/test/CodeGen/R600/fcmp-cnde-int-args.ll
@@ -8,7 +8,7 @@
 
 define void @test(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %cmp = fcmp oeq float %0, 0.000000e+00
   %value = select i1 %cmp, i32 -1, i32 0
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp.ll b/llvm/test/CodeGen/R600/fcmp.ll
index 718f3e8..5207ab5 100644
--- a/llvm/test/CodeGen/R600/fcmp.ll
+++ b/llvm/test/CodeGen/R600/fcmp.ll
@@ -5,9 +5,9 @@
 
 define void @fcmp_sext(i32 addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   %arrayidx1 = getelementptr inbounds float, float addrspace(1)* %in, i32 1
-  %1 = load float addrspace(1)* %arrayidx1
+  %1 = load float, float addrspace(1)* %arrayidx1
   %cmp = fcmp oeq float %0, %1
   %sext = sext i1 %cmp to i32
   store i32 %sext, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fcmp64.ll b/llvm/test/CodeGen/R600/fcmp64.ll
index 9dc8b50..053ab0e 100644
--- a/llvm/test/CodeGen/R600/fcmp64.ll
+++ b/llvm/test/CodeGen/R600/fcmp64.ll
@@ -5,8 +5,8 @@
 ; CHECK: v_cmp_nge_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @flt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ult double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -17,8 +17,8 @@
 ; CHECK: v_cmp_ngt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fle_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ule double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -29,8 +29,8 @@
 ; CHECK: v_cmp_nle_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fgt_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ugt double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -41,8 +41,8 @@
 ; CHECK: v_cmp_nlt_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fge_f64(i32 addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp uge double %r0, %r1
    %r3 = zext i1 %r2 to i32
    store i32 %r3, i32 addrspace(1)* %out
@@ -53,8 +53,8 @@
 ; CHECK: v_cmp_neq_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @fne_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp une double %r0, %r1
    %r3 = select i1 %r2, double %r0, double %r1
    store double %r3, double addrspace(1)* %out
@@ -65,8 +65,8 @@
 ; CHECK: v_cmp_nlg_f64_e32 vcc, {{v[[0-9]+:[0-9]+], v[[0-9]+:[0-9]+]}}
 define void @feq_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fcmp ueq double %r0, %r1
    %r3 = select i1 %r2, double %r0, double %r1
    store double %r3, double addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/fconst64.ll b/llvm/test/CodeGen/R600/fconst64.ll
index 28e0c90..89af375 100644
--- a/llvm/test/CodeGen/R600/fconst64.ll
+++ b/llvm/test/CodeGen/R600/fconst64.ll
@@ -6,7 +6,7 @@
 ; CHECK-DAG: s_mov_b32 {{s[0-9]+}}, 0
 
 define void @fconst_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-   %r1 = load double addrspace(1)* %in
+   %r1 = load double, double addrspace(1)* %in
    %r2 = fadd double %r1, 5.000000e+00
    store double %r2, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fdiv.f64.ll b/llvm/test/CodeGen/R600/fdiv.f64.ll
index c96f141b..7c022e3 100644
--- a/llvm/test/CodeGen/R600/fdiv.f64.ll
+++ b/llvm/test/CodeGen/R600/fdiv.f64.ll
@@ -31,8 +31,8 @@
 ; COMMON: s_endpgm
 define void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr double, double addrspace(1)* %in, i32 1
-  %num = load double addrspace(1)* %in
-  %den = load double addrspace(1)* %gep.1
+  %num = load double, double addrspace(1)* %in
+  %den = load double, double addrspace(1)* %gep.1
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -40,7 +40,7 @@
 
 ; COMMON-LABEL: {{^}}fdiv_f64_s_v:
 define void @fdiv_f64_s_v(double addrspace(1)* %out, double addrspace(1)* %in, double %num) nounwind {
-  %den = load double addrspace(1)* %in
+  %den = load double, double addrspace(1)* %in
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 
 ; COMMON-LABEL: {{^}}fdiv_f64_v_s:
 define void @fdiv_f64_v_s(double addrspace(1)* %out, double addrspace(1)* %in, double %den) nounwind {
-  %num = load double addrspace(1)* %in
+  %num = load double, double addrspace(1)* %in
   %result = fdiv double %num, %den
   store double %result, double addrspace(1)* %out
   ret void
@@ -64,8 +64,8 @@
 ; COMMON-LABEL: {{^}}v_fdiv_v2f64:
 define void @v_fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in, i32 1
-  %num = load <2 x double> addrspace(1)* %in
-  %den = load <2 x double> addrspace(1)* %gep.1
+  %num = load <2 x double>, <2 x double> addrspace(1)* %in
+  %den = load <2 x double>, <2 x double> addrspace(1)* %gep.1
   %result = fdiv <2 x double> %num, %den
   store <2 x double> %result, <2 x double> addrspace(1)* %out
   ret void
@@ -81,8 +81,8 @@
 ; COMMON-LABEL: {{^}}v_fdiv_v4f64:
 define void @v_fdiv_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) nounwind {
   %gep.1 = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %num = load <4 x double> addrspace(1)* %in
-  %den = load <4 x double> addrspace(1)* %gep.1
+  %num = load <4 x double>, <4 x double> addrspace(1)* %in
+  %den = load <4 x double>, <4 x double> addrspace(1)* %gep.1
   %result = fdiv <4 x double> %num, %den
   store <4 x double> %result, <4 x double> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fdiv.ll b/llvm/test/CodeGen/R600/fdiv.ll
index 43ead29..7cbf873 100644
--- a/llvm/test/CodeGen/R600/fdiv.ll
+++ b/llvm/test/CodeGen/R600/fdiv.ll
@@ -60,8 +60,8 @@
 ; SI-DAG: v_mul_f32
 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float>, <4 x float> addrspace(1) * %in
+  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
   %result = fdiv <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fetch-limits.r600.ll b/llvm/test/CodeGen/R600/fetch-limits.r600.ll
index d35573e..821760c 100644
--- a/llvm/test/CodeGen/R600/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/R600/fetch-limits.r600.ll
@@ -9,15 +9,15 @@
 
 define void @fetch_limits_r600() #0 {
 entry:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/llvm/test/CodeGen/R600/fetch-limits.r700+.ll b/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
index 17760a0..2e1916e 100644
--- a/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/R600/fetch-limits.r700+.ll
@@ -18,23 +18,23 @@
 
 define void @fetch_limits_r700() #0 {
 entry:
-  %0 = load <4 x float> addrspace(8)* null
-  %1 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
-  %6 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
-  %9 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
-  %11 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
-  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
-  %14 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
-  %15 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
-  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
+  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
+  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13)
+  %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14)
+  %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15)
+  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16)
   %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
   %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
   %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
diff --git a/llvm/test/CodeGen/R600/flat-address-space.ll b/llvm/test/CodeGen/R600/flat-address-space.ll
index 22ad576..425d67d 100644
--- a/llvm/test/CodeGen/R600/flat-address-space.ll
+++ b/llvm/test/CodeGen/R600/flat-address-space.ll
@@ -26,7 +26,7 @@
 end:
   %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
   store i32 %x, i32 addrspace(4)* %fptr, align 4
-;  %val = load i32 addrspace(4)* %fptr, align 4
+;  %val = load i32, i32 addrspace(4)* %fptr, align 4
 ;  store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -87,7 +87,7 @@
 ; CHECK: flat_load_dword
 define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
-  %fload = load i32 addrspace(4)* %fptr, align 4
+  %fload = load i32, i32 addrspace(4)* %fptr, align 4
   store i32 %fload, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -96,7 +96,7 @@
 ; CHECK: flat_load_dwordx2
 define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
-  %fload = load i64 addrspace(4)* %fptr, align 4
+  %fload = load i64, i64 addrspace(4)* %fptr, align 4
   store i64 %fload, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -105,7 +105,7 @@
 ; CHECK: flat_load_dwordx4
 define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
-  %fload = load <4 x i32> addrspace(4)* %fptr, align 4
+  %fload = load <4 x i32>, <4 x i32> addrspace(4)* %fptr, align 4
   store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8
   ret void
 }
@@ -114,7 +114,7 @@
 ; CHECK: flat_load_sbyte
 define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
-  %fload = load i8 addrspace(4)* %fptr, align 4
+  %fload = load i8, i8 addrspace(4)* %fptr, align 4
   %ext = sext i8 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -124,7 +124,7 @@
 ; CHECK: flat_load_ubyte
 define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
-  %fload = load i8 addrspace(4)* %fptr, align 4
+  %fload = load i8, i8 addrspace(4)* %fptr, align 4
   %ext = zext i8 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -134,7 +134,7 @@
 ; CHECK: flat_load_sshort
 define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
-  %fload = load i16 addrspace(4)* %fptr, align 4
+  %fload = load i16, i16 addrspace(4)* %fptr, align 4
   %ext = sext i16 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -144,7 +144,7 @@
 ; CHECK: flat_load_ushort
 define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
-  %fload = load i16 addrspace(4)* %fptr, align 4
+  %fload = load i16, i16 addrspace(4)* %fptr, align 4
   %ext = zext i16 %fload to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -171,7 +171,7 @@
   store i32 %x, i32 addrspace(4)* %fptr
   ; Dummy call
   call void @llvm.AMDGPU.barrier.local() #1
-  %reload = load i32 addrspace(4)* %fptr, align 4
+  %reload = load i32, i32 addrspace(4)* %fptr, align 4
   store i32 %reload, i32 addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/fma-combine.ll b/llvm/test/CodeGen/R600/fma-combine.ll
index 9c77c15..bd574b8 100644
--- a/llvm/test/CodeGen/R600/fma-combine.ll
+++ b/llvm/test/CodeGen/R600/fma-combine.ll
@@ -20,9 +20,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %mul, %c
@@ -50,10 +50,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fadd double %mul, %c
@@ -77,9 +77,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fadd double %c, %mul
@@ -101,9 +101,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %mul, %c
@@ -131,10 +131,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %mul, %c
@@ -158,9 +158,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %fma = fsub double %c, %mul
@@ -188,10 +188,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %fma0 = fsub double %c, %mul
@@ -215,9 +215,9 @@
   %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -246,10 +246,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -280,10 +280,10 @@
   %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0
-  %b = load double addrspace(1)* %gep.1
-  %c = load double addrspace(1)* %gep.2
-  %d = load double addrspace(1)* %gep.3
+  %a = load double, double addrspace(1)* %gep.0
+  %b = load double, double addrspace(1)* %gep.1
+  %c = load double, double addrspace(1)* %gep.2
+  %d = load double, double addrspace(1)* %gep.3
 
   %mul = fmul double %a, %b
   %mul.neg = fsub double -0.0, %mul
@@ -315,11 +315,11 @@
   %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %x = load double addrspace(1)* %gep.0
-  %y = load double addrspace(1)* %gep.1
-  %z = load double addrspace(1)* %gep.2
-  %u = load double addrspace(1)* %gep.3
-  %v = load double addrspace(1)* %gep.4
+  %x = load double, double addrspace(1)* %gep.0
+  %y = load double, double addrspace(1)* %gep.1
+  %z = load double, double addrspace(1)* %gep.2
+  %u = load double, double addrspace(1)* %gep.3
+  %v = load double, double addrspace(1)* %gep.4
 
   %tmp0 = fmul double %u, %v
   %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0
@@ -350,11 +350,11 @@
   %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid
 
-  %x = load double addrspace(1)* %gep.0
-  %y = load double addrspace(1)* %gep.1
-  %z = load double addrspace(1)* %gep.2
-  %u = load double addrspace(1)* %gep.3
-  %v = load double addrspace(1)* %gep.4
+  %x = load double, double addrspace(1)* %gep.0
+  %y = load double, double addrspace(1)* %gep.1
+  %z = load double, double addrspace(1)* %gep.2
+  %u = load double, double addrspace(1)* %gep.3
+  %v = load double, double addrspace(1)* %gep.4
 
   %tmp0 = fmul double %u, %v
   %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0
diff --git a/llvm/test/CodeGen/R600/fma.f64.ll b/llvm/test/CodeGen/R600/fma.f64.ll
index bca312b..0a55ef7 100644
--- a/llvm/test/CodeGen/R600/fma.f64.ll
+++ b/llvm/test/CodeGen/R600/fma.f64.ll
@@ -10,9 +10,9 @@
 ; SI: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fma_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                      double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
-   %r2 = load double addrspace(1)* %in3
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
+   %r2 = load double, double addrspace(1)* %in3
    %r3 = tail call double @llvm.fma.f64(double %r0, double %r1, double %r2)
    store double %r3, double addrspace(1)* %out
    ret void
@@ -23,9 +23,9 @@
 ; SI: v_fma_f64
 define void @fma_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                        <2 x double> addrspace(1)* %in2, <2 x double> addrspace(1)* %in3) {
-   %r0 = load <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double> addrspace(1)* %in2
-   %r2 = load <2 x double> addrspace(1)* %in3
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+   %r2 = load <2 x double>, <2 x double> addrspace(1)* %in3
    %r3 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %r0, <2 x double> %r1, <2 x double> %r2)
    store <2 x double> %r3, <2 x double> addrspace(1)* %out
    ret void
@@ -38,9 +38,9 @@
 ; SI: v_fma_f64
 define void @fma_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
                        <4 x double> addrspace(1)* %in2, <4 x double> addrspace(1)* %in3) {
-   %r0 = load <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double> addrspace(1)* %in2
-   %r2 = load <4 x double> addrspace(1)* %in3
+   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
+   %r2 = load <4 x double>, <4 x double> addrspace(1)* %in3
    %r3 = tail call <4 x double> @llvm.fma.v4f64(<4 x double> %r0, <4 x double> %r1, <4 x double> %r2)
    store <4 x double> %r3, <4 x double> addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fma.ll b/llvm/test/CodeGen/R600/fma.ll
index 3c874b2..d6024aa 100644
--- a/llvm/test/CodeGen/R600/fma.ll
+++ b/llvm/test/CodeGen/R600/fma.ll
@@ -14,9 +14,9 @@
 ; EG: FMA {{\*? *}}[[RES]]
 define void @fma_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                      float addrspace(1)* %in2, float addrspace(1)* %in3) {
-  %r0 = load float addrspace(1)* %in1
-  %r1 = load float addrspace(1)* %in2
-  %r2 = load float addrspace(1)* %in3
+  %r0 = load float, float addrspace(1)* %in1
+  %r1 = load float, float addrspace(1)* %in2
+  %r2 = load float, float addrspace(1)* %in3
   %r3 = tail call float @llvm.fma.f32(float %r0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %out
   ret void
@@ -31,9 +31,9 @@
 ; EG-DAG: FMA {{\*? *}}[[RES]].[[CHHI]]
 define void @fma_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
                        <2 x float> addrspace(1)* %in2, <2 x float> addrspace(1)* %in3) {
-  %r0 = load <2 x float> addrspace(1)* %in1
-  %r1 = load <2 x float> addrspace(1)* %in2
-  %r2 = load <2 x float> addrspace(1)* %in3
+  %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1
+  %r1 = load <2 x float>, <2 x float> addrspace(1)* %in2
+  %r2 = load <2 x float>, <2 x float> addrspace(1)* %in3
   %r3 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2)
   store <2 x float> %r3, <2 x float> addrspace(1)* %out
   ret void
@@ -52,9 +52,9 @@
 ; EG-DAG: FMA {{\*? *}}[[RES]].W
 define void @fma_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
                        <4 x float> addrspace(1)* %in2, <4 x float> addrspace(1)* %in3) {
-  %r0 = load <4 x float> addrspace(1)* %in1
-  %r1 = load <4 x float> addrspace(1)* %in2
-  %r2 = load <4 x float> addrspace(1)* %in3
+  %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1
+  %r1 = load <4 x float>, <4 x float> addrspace(1)* %in2
+  %r2 = load <4 x float>, <4 x float> addrspace(1)* %in3
   %r3 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %r0, <4 x float> %r1, <4 x float> %r2)
   store <4 x float> %r3, <4 x float> addrspace(1)* %out
   ret void
@@ -68,8 +68,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float 2.0, float %b)
   store float %fma, float addrspace(1)* %out.gep, align 4
@@ -83,8 +83,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %c = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %c = load float, float addrspace(1)* %in.b.gep, align 4
 
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
   store float %fma, float addrspace(1)* %out.gep, align 4
diff --git a/llvm/test/CodeGen/R600/fmax3.ll b/llvm/test/CodeGen/R600/fmax3.ll
index 629c032..c3028a6 100644
--- a/llvm/test/CodeGen/R600/fmax3.ll
+++ b/llvm/test/CodeGen/R600/fmax3.ll
@@ -11,9 +11,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
@@ -29,9 +29,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fmax_legacy.f64.ll b/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
index 0168900..8282438 100644
--- a/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
+++ b/llvm/test/CodeGen/R600/fmax_legacy.f64.ll
@@ -9,8 +9,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp uge double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -24,8 +24,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp oge double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -39,8 +39,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ugt double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -54,8 +54,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ogt double %a, %b
   %val = select i1 %cmp, double %a, double %b
diff --git a/llvm/test/CodeGen/R600/fmax_legacy.ll b/llvm/test/CodeGen/R600/fmax_legacy.ll
index 310aff8..413957d 100644
--- a/llvm/test/CodeGen/R600/fmax_legacy.ll
+++ b/llvm/test/CodeGen/R600/fmax_legacy.ll
@@ -18,8 +18,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp uge float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -38,8 +38,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp oge float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -58,8 +58,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ugt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -78,8 +78,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -102,8 +102,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ogt float %a, %b
   %val = select i1 %cmp, float %a, float %b
diff --git a/llvm/test/CodeGen/R600/fmin3.ll b/llvm/test/CodeGen/R600/fmin3.ll
index e3acb31..0a76699 100644
--- a/llvm/test/CodeGen/R600/fmin3.ll
+++ b/llvm/test/CodeGen/R600/fmin3.ll
@@ -12,9 +12,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
@@ -30,9 +30,9 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
-  %a = load float addrspace(1)* %aptr, align 4
-  %b = load float addrspace(1)* %bptr, align 4
-  %c = load float addrspace(1)* %cptr, align 4
+  %a = load float, float addrspace(1)* %aptr, align 4
+  %b = load float, float addrspace(1)* %bptr, align 4
+  %c = load float, float addrspace(1)* %cptr, align 4
   %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
   %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
   store float %f1, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fmin_legacy.f64.ll b/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
index 395d927..e19a48f 100644
--- a/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
+++ b/llvm/test/CodeGen/R600/fmin_legacy.f64.ll
@@ -19,8 +19,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ule double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -34,8 +34,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ole double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -49,8 +49,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp olt double %a, %b
   %val = select i1 %cmp, double %a, double %b
@@ -64,8 +64,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %cmp = fcmp ult double %a, %b
   %val = select i1 %cmp, double %a, double %b
diff --git a/llvm/test/CodeGen/R600/fmin_legacy.ll b/llvm/test/CodeGen/R600/fmin_legacy.ll
index dc24383..6a625c2 100644
--- a/llvm/test/CodeGen/R600/fmin_legacy.ll
+++ b/llvm/test/CodeGen/R600/fmin_legacy.ll
@@ -30,8 +30,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ule float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -49,8 +49,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ole float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -68,8 +68,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp olt float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -87,8 +87,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ult float %a, %b
   %val = select i1 %cmp, float %a, float %b
@@ -109,8 +109,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %cmp = fcmp ole float %a, %b
   %val0 = select i1 %cmp, float %a, float %b
diff --git a/llvm/test/CodeGen/R600/fmul.ll b/llvm/test/CodeGen/R600/fmul.ll
index 29b4c48..68ebc4d 100644
--- a/llvm/test/CodeGen/R600/fmul.ll
+++ b/llvm/test/CodeGen/R600/fmul.ll
@@ -43,8 +43,8 @@
 ; SI: v_mul_f32
 define void @fmul_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1) * %in
-  %b = load <4 x float> addrspace(1) * %b_ptr
+  %a = load <4 x float>, <4 x float> addrspace(1) * %in
+  %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr
   %result = fmul <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fmul64.ll b/llvm/test/CodeGen/R600/fmul64.ll
index 9d7787c..3c222ea 100644
--- a/llvm/test/CodeGen/R600/fmul64.ll
+++ b/llvm/test/CodeGen/R600/fmul64.ll
@@ -5,8 +5,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
    %r2 = fmul double %r0, %r1
    store double %r2, double addrspace(1)* %out
    ret void
@@ -17,8 +17,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                         <2 x double> addrspace(1)* %in2) {
-   %r0 = load <2 x double> addrspace(1)* %in1
-   %r1 = load <2 x double> addrspace(1)* %in2
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
    %r2 = fmul <2 x double> %r0, %r1
    store <2 x double> %r2, <2 x double> addrspace(1)* %out
    ret void
@@ -31,8 +31,8 @@
 ; SI: v_mul_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 define void @fmul_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in1,
                         <4 x double> addrspace(1)* %in2) {
-   %r0 = load <4 x double> addrspace(1)* %in1
-   %r1 = load <4 x double> addrspace(1)* %in2
+   %r0 = load <4 x double>, <4 x double> addrspace(1)* %in1
+   %r1 = load <4 x double>, <4 x double> addrspace(1)* %in2
    %r2 = fmul <4 x double> %r0, %r1
    store <4 x double> %r2, <4 x double> addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fmuladd.ll b/llvm/test/CodeGen/R600/fmuladd.ll
index 7297b27..ae84d84 100644
--- a/llvm/test/CodeGen/R600/fmuladd.ll
+++ b/llvm/test/CodeGen/R600/fmuladd.ll
@@ -10,9 +10,9 @@
 
 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                          float addrspace(1)* %in2, float addrspace(1)* %in3) {
-   %r0 = load float addrspace(1)* %in1
-   %r1 = load float addrspace(1)* %in2
-   %r2 = load float addrspace(1)* %in3
+   %r0 = load float, float addrspace(1)* %in1
+   %r1 = load float, float addrspace(1)* %in2
+   %r2 = load float, float addrspace(1)* %in3
    %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2)
    store float %r3, float addrspace(1)* %out
    ret void
@@ -23,9 +23,9 @@
 
 define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                          double addrspace(1)* %in2, double addrspace(1)* %in3) {
-   %r0 = load double addrspace(1)* %in1
-   %r1 = load double addrspace(1)* %in2
-   %r2 = load double addrspace(1)* %in3
+   %r0 = load double, double addrspace(1)* %in1
+   %r1 = load double, double addrspace(1)* %in2
+   %r2 = load double, double addrspace(1)* %in3
    %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2)
    store double %r3, double addrspace(1)* %out
    ret void
@@ -42,8 +42,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -61,8 +61,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -82,8 +82,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r0 = load float addrspace(1)* %gep.0
-  %r1 = load float addrspace(1)* %gep.1
+  %r0 = load float, float addrspace(1)* %gep.0
+  %r1 = load float, float addrspace(1)* %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %add.0, %r1
@@ -104,8 +104,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r0 = load float addrspace(1)* %gep.0
-  %r1 = load float addrspace(1)* %gep.1
+  %r0 = load float, float addrspace(1)* %gep.0
+  %r1 = load float, float addrspace(1)* %gep.1
 
   %add.0 = fadd float %r0, %r0
   %add.1 = fadd float %r1, %add.0
@@ -124,8 +124,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -144,8 +144,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r1.fneg = fsub float -0.000000e+00, %r1
 
@@ -166,8 +166,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r1.fneg = fsub float -0.000000e+00, %r1
 
@@ -188,8 +188,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %r2.fneg = fsub float -0.000000e+00, %r2
 
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
index 7e6ede6..8830e82 100644
--- a/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
+++ b/llvm/test/CodeGen/R600/fneg-fabs.f64.ll
@@ -15,8 +15,8 @@
 }
 
 define void @v_fneg_fabs_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %xptr, double addrspace(1)* %yptr) {
-  %x = load double addrspace(1)* %xptr, align 8
-  %y = load double addrspace(1)* %xptr, align 8
+  %x = load double, double addrspace(1)* %xptr, align 8
+  %y = load double, double addrspace(1)* %xptr, align 8
   %fabs = call double @llvm.fabs.f64(double %x)
   %fsub = fsub double -0.000000e+00, %fabs
   %fadd = fadd double %y, %fsub
diff --git a/llvm/test/CodeGen/R600/fneg-fabs.ll b/llvm/test/CodeGen/R600/fneg-fabs.ll
index 4fde048..3b4930d 100644
--- a/llvm/test/CodeGen/R600/fneg-fabs.ll
+++ b/llvm/test/CodeGen/R600/fneg-fabs.ll
@@ -72,7 +72,7 @@
 ; FUNC-LABEL: {{^}}v_fneg_fabs_f32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %fabs = call float @llvm.fabs.f32(float %val)
   %fsub = fsub float -0.000000e+00, %fabs
   store float %fsub, float addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/fp16_to_fp.ll b/llvm/test/CodeGen/R600/fp16_to_fp.ll
index da78f61..5a79ca8 100644
--- a/llvm/test/CodeGen/R600/fp16_to_fp.ll
+++ b/llvm/test/CodeGen/R600/fp16_to_fp.ll
@@ -9,7 +9,7 @@
 ; SI: v_cvt_f32_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; SI: buffer_store_dword [[RESULT]]
 define void @test_convert_fp16_to_fp32(float addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call float @llvm.convert.from.fp16.f32(i16 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -22,7 +22,7 @@
 ; SI: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[RESULT32]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @test_convert_fp16_to_fp64(double addrspace(1)* noalias %out, i16 addrspace(1)* noalias %in) nounwind {
-  %val = load i16 addrspace(1)* %in, align 2
+  %val = load i16, i16 addrspace(1)* %in, align 2
   %cvt = call double @llvm.convert.from.fp16.f64(i16 %val) nounwind readnone
   store double %cvt, double addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/fp32_to_fp16.ll b/llvm/test/CodeGen/R600/fp32_to_fp16.ll
index c3c65ae..67925eb 100644
--- a/llvm/test/CodeGen/R600/fp32_to_fp16.ll
+++ b/llvm/test/CodeGen/R600/fp32_to_fp16.ll
@@ -8,7 +8,7 @@
 ; SI: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[VAL]]
 ; SI: buffer_store_short [[RESULT]]
 define void @test_convert_fp32_to_fp16(i16 addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %cvt = call i16 @llvm.convert.to.fp16.f32(float %val) nounwind readnone
   store i16 %cvt, i16 addrspace(1)* %out, align 2
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_sint.f64.ll b/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
index 93fc847..12df660 100644
--- a/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/R600/fp_to_sint.f64.ll
@@ -49,7 +49,7 @@
 define void @fp_to_sint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %cast = fptosi double %val to i64
   store i64 %cast, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_sint.ll b/llvm/test/CodeGen/R600/fp_to_sint.ll
index 16549c3..301a94b 100644
--- a/llvm/test/CodeGen/R600/fp_to_sint.ll
+++ b/llvm/test/CodeGen/R600/fp_to_sint.ll
@@ -44,7 +44,7 @@
 ; SI: v_cvt_i32_f32_e32
 ; SI: v_cvt_i32_f32_e32
 define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, <4 x float> addrspace(1) * %in
   %result = fptosi <4 x float> %value to <4 x i32>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_uint.f64.ll b/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
index 472c378..41bc2a7 100644
--- a/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/R600/fp_to_uint.f64.ll
@@ -49,7 +49,7 @@
 define void @fp_to_uint_i64_f64(i64 addrspace(1)* %out, double addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %val = load double addrspace(1)* %gep, align 8
+  %val = load double, double addrspace(1)* %gep, align 8
   %cast = fptoui double %val to i64
   store i64 %cast, i64 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/fp_to_uint.ll b/llvm/test/CodeGen/R600/fp_to_uint.ll
index 804d90f..b7b6ccc 100644
--- a/llvm/test/CodeGen/R600/fp_to_uint.ll
+++ b/llvm/test/CodeGen/R600/fp_to_uint.ll
@@ -36,7 +36,7 @@
 ; SI: v_cvt_u32_f32_e32
 
 define void @fp_to_uint_v4f32_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %value = load <4 x float> addrspace(1) * %in
+  %value = load <4 x float>, <4 x float> addrspace(1) * %in
   %result = fptoui <4 x float> %value to <4 x i32>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/frem.ll b/llvm/test/CodeGen/R600/frem.ll
index 9bc0542..f245ef0 100644
--- a/llvm/test/CodeGen/R600/frem.ll
+++ b/llvm/test/CodeGen/R600/frem.ll
@@ -16,8 +16,8 @@
 define void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                       float addrspace(1)* %in2) #0 {
    %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float addrspace(1)* %in1, align 4
-   %r1 = load float addrspace(1)* %gep2, align 4
+   %r0 = load float, float addrspace(1)* %in1, align 4
+   %r1 = load float, float addrspace(1)* %gep2, align 4
    %r2 = frem float %r0, %r1
    store float %r2, float addrspace(1)* %out, align 4
    ret void
@@ -35,8 +35,8 @@
 define void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
                              float addrspace(1)* %in2) #1 {
    %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float addrspace(1)* %in1, align 4
-   %r1 = load float addrspace(1)* %gep2, align 4
+   %r0 = load float, float addrspace(1)* %in1, align 4
+   %r1 = load float, float addrspace(1)* %gep2, align 4
    %r2 = frem float %r0, %r1
    store float %r2, float addrspace(1)* %out, align 4
    ret void
@@ -55,8 +55,8 @@
 ; GCN: s_endpgm
 define void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) #0 {
-   %r0 = load double addrspace(1)* %in1, align 8
-   %r1 = load double addrspace(1)* %in2, align 8
+   %r0 = load double, double addrspace(1)* %in1, align 8
+   %r1 = load double, double addrspace(1)* %in2, align 8
    %r2 = frem double %r0, %r1
    store double %r2, double addrspace(1)* %out, align 8
    ret void
@@ -71,8 +71,8 @@
 ; GCN: s_endpgm
 define void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                              double addrspace(1)* %in2) #1 {
-   %r0 = load double addrspace(1)* %in1, align 8
-   %r1 = load double addrspace(1)* %in2, align 8
+   %r0 = load double, double addrspace(1)* %in1, align 8
+   %r1 = load double, double addrspace(1)* %in2, align 8
    %r2 = frem double %r0, %r1
    store double %r2, double addrspace(1)* %out, align 8
    ret void
@@ -81,8 +81,8 @@
 define void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1,
                         <2 x float> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x float> addrspace(1)* %in1, align 8
-   %r1 = load <2 x float> addrspace(1)* %gep2, align 8
+   %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8
+   %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8
    %r2 = frem <2 x float> %r0, %r1
    store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
    ret void
@@ -91,8 +91,8 @@
 define void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1,
                         <4 x float> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <4 x float> addrspace(1)* %in1, align 16
-   %r1 = load <4 x float> addrspace(1)* %gep2, align 16
+   %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16
+   %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16
    %r2 = frem <4 x float> %r0, %r1
    store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
    ret void
@@ -101,8 +101,8 @@
 define void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
                         <2 x double> addrspace(1)* %in2) #0 {
    %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x double> addrspace(1)* %in1, align 16
-   %r1 = load <2 x double> addrspace(1)* %gep2, align 16
+   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16
+   %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16
    %r2 = frem <2 x double> %r0, %r1
    store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
    ret void
diff --git a/llvm/test/CodeGen/R600/fsqrt.ll b/llvm/test/CodeGen/R600/fsqrt.ll
index 1fdf3e4..0410134 100644
--- a/llvm/test/CodeGen/R600/fsqrt.ll
+++ b/llvm/test/CodeGen/R600/fsqrt.ll
@@ -9,7 +9,7 @@
 ; CHECK: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
 
 define void @fsqrt_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-   %r0 = load float addrspace(1)* %in
+   %r0 = load float, float addrspace(1)* %in
    %r1 = call float @llvm.sqrt.f32(float %r0)
    store float %r1, float addrspace(1)* %out
    ret void
@@ -19,7 +19,7 @@
 ; CHECK: v_sqrt_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 
 define void @fsqrt_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-   %r0 = load double addrspace(1)* %in
+   %r0 = load double, double addrspace(1)* %in
    %r1 = call double @llvm.sqrt.f64(double %r0)
    store double %r1, double addrspace(1)* %out
    ret void
diff --git a/llvm/test/CodeGen/R600/fsub.ll b/llvm/test/CodeGen/R600/fsub.ll
index 59866a0..dfe41cb 100644
--- a/llvm/test/CodeGen/R600/fsub.ll
+++ b/llvm/test/CodeGen/R600/fsub.ll
@@ -7,8 +7,8 @@
 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @v_fsub_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
   %b_ptr = getelementptr float, float addrspace(1)* %in, i32 1
-  %a = load float addrspace(1)* %in, align 4
-  %b = load float addrspace(1)* %b_ptr, align 4
+  %a = load float, float addrspace(1)* %in, align 4
+  %b = load float, float addrspace(1)* %b_ptr, align 4
   %result = fsub float %a, %b
   store float %result, float addrspace(1)* %out, align 4
   ret void
@@ -53,8 +53,8 @@
 ; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
 define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1
-  %a = load <4 x float> addrspace(1)* %in, align 16
-  %b = load <4 x float> addrspace(1)* %b_ptr, align 16
+  %a = load <4 x float>, <4 x float> addrspace(1)* %in, align 16
+  %b = load <4 x float>, <4 x float> addrspace(1)* %b_ptr, align 16
   %result = fsub <4 x float> %a, %b
   store <4 x float> %result, <4 x float> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/R600/fsub64.ll b/llvm/test/CodeGen/R600/fsub64.ll
index d18863f..f34a48e 100644
--- a/llvm/test/CodeGen/R600/fsub64.ll
+++ b/llvm/test/CodeGen/R600/fsub64.ll
@@ -7,8 +7,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                       double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r2 = fsub double %r0, %r1
   store double %r2, double addrspace(1)* %out
   ret void
@@ -18,8 +18,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -\|v\[[0-9]+:[0-9]+\]\|}}
 define void @fsub_fabs_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                            double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r1.fabs = call double @llvm.fabs.f64(double %r1) #0
   %r2 = fsub double %r0, %r1.fabs
   store double %r2, double addrspace(1)* %out
@@ -30,8 +30,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], |v\[[0-9]+:[0-9]+\]|, -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_fabs_inv_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
                                double addrspace(1)* %in2) {
-  %r0 = load double addrspace(1)* %in1
-  %r1 = load double addrspace(1)* %in2
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
   %r0.fabs = call double @llvm.fabs.f64(double %r0) #0
   %r2 = fsub double %r0.fabs, %r1
   store double %r2, double addrspace(1)* %out
@@ -86,8 +86,8 @@
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define void @fsub_v4f64(<4 x double> addrspace(1)* %out, <4 x double> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x double>, <4 x double> addrspace(1)* %in, i32 1
-  %a = load <4 x double> addrspace(1)* %in
-  %b = load <4 x double> addrspace(1)* %b_ptr
+  %a = load <4 x double>, <4 x double> addrspace(1)* %in
+  %b = load <4 x double>, <4 x double> addrspace(1)* %b_ptr
   %result = fsub <4 x double> %a, %b
   store <4 x double> %result, <4 x double> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ftrunc.f64.ll b/llvm/test/CodeGen/R600/ftrunc.f64.ll
index 21399a8..dd51f64 100644
--- a/llvm/test/CodeGen/R600/ftrunc.f64.ll
+++ b/llvm/test/CodeGen/R600/ftrunc.f64.ll
@@ -14,7 +14,7 @@
 ; SI: v_bfe_u32 {{v[0-9]+}}, {{v[0-9]+}}, 20, 11
 ; SI: s_endpgm
 define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %x = load double addrspace(1)* %in, align 8
+  %x = load double, double addrspace(1)* %in, align 8
   %y = call double @llvm.trunc.f64(double %x) nounwind readnone
   store double %y, double addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/global-directive.ll b/llvm/test/CodeGen/R600/global-directive.ll
index 67ba4b6..be775cf 100644
--- a/llvm/test/CodeGen/R600/global-directive.ll
+++ b/llvm/test/CodeGen/R600/global-directive.ll
@@ -7,8 +7,8 @@
 ; SI: {{^}}foo:
 define void @foo(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i1.ll b/llvm/test/CodeGen/R600/global-extload-i1.ll
index 67d36ce..bd9557d 100644
--- a/llvm/test/CodeGen/R600/global-extload-i1.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i1.ll
@@ -8,7 +8,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -20,7 +20,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i32:
 ; SI: s_endpgm
 define void @zextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = zext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -38,7 +38,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i32:
 ; SI: s_endpgm
 define void @sextload_global_v1i1_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = sext <1 x i1> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -47,7 +47,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = zext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -56,7 +56,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i1_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = sext <2 x i1> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = zext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -74,7 +74,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i1_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = sext <4 x i1> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -83,7 +83,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = zext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -92,7 +92,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i1_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = sext <8 x i1> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -101,7 +101,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = zext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -110,7 +110,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i1_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = sext <16 x i1> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -119,7 +119,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = zext <32 x i1> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -128,7 +128,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i1_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = sext <32 x i1> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -137,7 +137,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = zext <64 x i1> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -146,7 +146,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i1_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = sext <64 x i1> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -157,7 +157,7 @@
 ; SI: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
 ; SI: buffer_store_dwordx2
 define void @zextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -169,7 +169,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[BFE]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %a = load i1 addrspace(1)* %in
+  %a = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i1_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = zext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i1_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i1_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i1> addrspace(1)* %in
+  %load = load <1 x i1>, <1 x i1> addrspace(1)* %in
   %ext = sext <1 x i1> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i1_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = zext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -205,7 +205,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i1_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i1_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i1> addrspace(1)* %in
+  %load = load <2 x i1>, <2 x i1> addrspace(1)* %in
   %ext = sext <2 x i1> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -214,7 +214,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i1_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = zext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i1_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i1_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i1> addrspace(1)* %in
+  %load = load <4 x i1>, <4 x i1> addrspace(1)* %in
   %ext = sext <4 x i1> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i1_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = zext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -241,7 +241,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i1_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i1_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i1> addrspace(1)* %in
+  %load = load <8 x i1>, <8 x i1> addrspace(1)* %in
   %ext = sext <8 x i1> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -250,7 +250,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i1_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = zext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -259,7 +259,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i1_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i1_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i1> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i1> addrspace(1)* %in
+  %load = load <16 x i1>, <16 x i1> addrspace(1)* %in
   %ext = sext <16 x i1> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -268,7 +268,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i1_to_v32i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = zext <32 x i1> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -277,7 +277,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i1_to_v32i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i1_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i1> addrspace(1)* %in
+;   %load = load <32 x i1>, <32 x i1> addrspace(1)* %in
 ;   %ext = sext <32 x i1> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -286,7 +286,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i1_to_v64i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = zext <64 x i1> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
@@ -295,7 +295,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i1_to_v64i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i1_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i1> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i1> addrspace(1)* %in
+;   %load = load <64 x i1>, <64 x i1> addrspace(1)* %in
 ;   %ext = sext <64 x i1> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i16.ll b/llvm/test/CodeGen/R600/global-extload-i16.ll
index f3e3312..103a40d 100644
--- a/llvm/test/CodeGen/R600/global-extload-i16.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i16.ll
@@ -8,7 +8,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = zext i16 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -19,7 +19,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = sext i16 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; SI: buffer_load_ushort
 ; SI: s_endpgm
 define void @zextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = zext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -39,7 +39,7 @@
 ; SI: buffer_load_sshort
 ; SI: s_endpgm
 define void @sextload_global_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = sext <1 x i16> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = zext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -57,7 +57,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = sext <2 x i16> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -66,7 +66,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = zext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -75,7 +75,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = sext <4 x i16> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -84,7 +84,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = zext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -93,7 +93,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = sext <8 x i16> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -102,7 +102,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = zext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -111,7 +111,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = sext <16 x i16> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -120,7 +120,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i32:
 ; SI: s_endpgm
 define void @zextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = zext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
   ret void
@@ -129,7 +129,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i32:
 ; SI: s_endpgm
 define void @sextload_global_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = sext <32 x i16> %load to <32 x i32>
   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
   ret void
@@ -138,7 +138,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i32:
 ; SI: s_endpgm
 define void @zextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = zext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
   ret void
@@ -147,7 +147,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i32:
 ; SI: s_endpgm
 define void @sextload_global_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = sext <64 x i16> %load to <64 x i32>
   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
   ret void
@@ -158,7 +158,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = zext i16 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -169,7 +169,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) nounwind {
-  %a = load i16 addrspace(1)* %in
+  %a = load i16, i16 addrspace(1)* %in
   %ext = sext i16 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i16_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = zext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -187,7 +187,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i16_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i16> addrspace(1)* %in
+  %load = load <1 x i16>, <1 x i16> addrspace(1)* %in
   %ext = sext <1 x i16> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -196,7 +196,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i16_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = zext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -205,7 +205,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i16_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i16> addrspace(1)* %in
+  %load = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %ext = sext <2 x i16> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -214,7 +214,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i16_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = zext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i16_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i16> addrspace(1)* %in
+  %load = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %ext = sext <4 x i16> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i16_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = zext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -241,7 +241,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i16_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i16> addrspace(1)* %in
+  %load = load <8 x i16>, <8 x i16> addrspace(1)* %in
   %ext = sext <8 x i16> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -250,7 +250,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i16_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = zext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -259,7 +259,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i16_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i16> addrspace(1)* %in
+  %load = load <16 x i16>, <16 x i16> addrspace(1)* %in
   %ext = sext <16 x i16> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -268,7 +268,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v32i16_to_v32i64:
 ; SI: s_endpgm
 define void @zextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = zext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -277,7 +277,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v32i16_to_v32i64:
 ; SI: s_endpgm
 define void @sextload_global_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i16> addrspace(1)* %in
+  %load = load <32 x i16>, <32 x i16> addrspace(1)* %in
   %ext = sext <32 x i16> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -286,7 +286,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v64i16_to_v64i64:
 ; SI: s_endpgm
 define void @zextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = zext <64 x i16> %load to <64 x i64>
   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
   ret void
@@ -295,7 +295,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v64i16_to_v64i64:
 ; SI: s_endpgm
 define void @sextload_global_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* nocapture %in) nounwind {
-  %load = load <64 x i16> addrspace(1)* %in
+  %load = load <64 x i16>, <64 x i16> addrspace(1)* %in
   %ext = sext <64 x i16> %load to <64 x i64>
   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i32.ll b/llvm/test/CodeGen/R600/global-extload-i32.ll
index b3d5438..79b8345 100644
--- a/llvm/test/CodeGen/R600/global-extload-i32.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i32.ll
@@ -7,7 +7,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %ext = zext i32 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -18,7 +18,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %ext = sext i32 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -29,7 +29,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32> addrspace(1)* %in
+  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
   %ext = zext <1 x i32> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -41,7 +41,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i32> addrspace(1)* %in
+  %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
   %ext = sext <1 x i32> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -53,7 +53,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32> addrspace(1)* %in
+  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %ext = zext <2 x i32> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -67,7 +67,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i32> addrspace(1)* %in
+  %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %ext = sext <2 x i32> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -81,7 +81,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32> addrspace(1)* %in
+  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
   %ext = zext <4 x i32> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -99,7 +99,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i32> addrspace(1)* %in
+  %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
   %ext = sext <4 x i32> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -124,7 +124,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32> addrspace(1)* %in
+  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
   %ext = zext <8 x i32> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -159,7 +159,7 @@
 
 ; SI: s_endpgm
 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i32> addrspace(1)* %in
+  %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
   %ext = sext <8 x i32> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -212,7 +212,7 @@
 ; SI-DAG: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32> addrspace(1)* %in
+  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
   %ext = sext <16 x i32> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -255,7 +255,7 @@
 
 ; SI: s_endpgm
 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i32> addrspace(1)* %in
+  %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
   %ext = zext <16 x i32> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -369,7 +369,7 @@
 
 ; SI: s_endpgm
 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32> addrspace(1)* %in
+  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
   %ext = sext <32 x i32> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
@@ -450,7 +450,7 @@
 
 ; SI: s_endpgm
 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
-  %load = load <32 x i32> addrspace(1)* %in
+  %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
   %ext = zext <32 x i32> %load to <32 x i64>
   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/global-extload-i8.ll b/llvm/test/CodeGen/R600/global-extload-i8.ll
index 4c37f3f..b31d5361 100644
--- a/llvm/test/CodeGen/R600/global-extload-i8.ll
+++ b/llvm/test/CodeGen/R600/global-extload-i8.ll
@@ -7,7 +7,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @zextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -18,7 +18,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @sextload_global_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = sext i8 %a to i32
   store i32 %ext, i32 addrspace(1)* %out
   ret void
@@ -27,7 +27,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i32:
 ; SI: s_endpgm
 define void @zextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = zext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -36,7 +36,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i32:
 ; SI: s_endpgm
 define void @sextload_global_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = sext <1 x i8> %load to <1 x i32>
   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
   ret void
@@ -45,7 +45,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i32:
 ; SI: s_endpgm
 define void @zextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = zext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -54,7 +54,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i32:
 ; SI: s_endpgm
 define void @sextload_global_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = sext <2 x i8> %load to <2 x i32>
   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
   ret void
@@ -63,7 +63,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i32:
 ; SI: s_endpgm
 define void @zextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = zext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -72,7 +72,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i32:
 ; SI: s_endpgm
 define void @sextload_global_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = sext <4 x i8> %load to <4 x i32>
   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
   ret void
@@ -81,7 +81,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i32:
 ; SI: s_endpgm
 define void @zextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = zext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -90,7 +90,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i32:
 ; SI: s_endpgm
 define void @sextload_global_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = sext <8 x i8> %load to <8 x i32>
   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
   ret void
@@ -99,7 +99,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i32:
 ; SI: s_endpgm
 define void @zextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = zext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -108,7 +108,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i32:
 ; SI: s_endpgm
 define void @sextload_global_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = sext <16 x i8> %load to <16 x i32>
   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
   ret void
@@ -117,7 +117,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = zext <32 x i8> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -126,7 +126,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = sext <32 x i8> %load to <32 x i32>
 ;   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
 ;   ret void
@@ -135,7 +135,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i32:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = zext <64 x i8> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -144,7 +144,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i32:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = sext <64 x i8> %load to <64 x i32>
 ;   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
 ;   ret void
@@ -155,7 +155,7 @@
 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
 define void @zextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -166,7 +166,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
 ; SI: buffer_store_dwordx2
 define void @sextload_global_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %a = load i8 addrspace(1)* %in
+  %a = load i8, i8 addrspace(1)* %in
   %ext = sext i8 %a to i64
   store i64 %ext, i64 addrspace(1)* %out
   ret void
@@ -175,7 +175,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v1i8_to_v1i64:
 ; SI: s_endpgm
 define void @zextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = zext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -184,7 +184,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v1i8_to_v1i64:
 ; SI: s_endpgm
 define void @sextload_global_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <1 x i8> addrspace(1)* %in
+  %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
   %ext = sext <1 x i8> %load to <1 x i64>
   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
   ret void
@@ -193,7 +193,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v2i8_to_v2i64:
 ; SI: s_endpgm
 define void @zextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = zext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -202,7 +202,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v2i8_to_v2i64:
 ; SI: s_endpgm
 define void @sextload_global_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <2 x i8> addrspace(1)* %in
+  %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %ext = sext <2 x i8> %load to <2 x i64>
   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
   ret void
@@ -211,7 +211,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v4i8_to_v4i64:
 ; SI: s_endpgm
 define void @zextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = zext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -220,7 +220,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v4i8_to_v4i64:
 ; SI: s_endpgm
 define void @sextload_global_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <4 x i8> addrspace(1)* %in
+  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %ext = sext <4 x i8> %load to <4 x i64>
   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
   ret void
@@ -229,7 +229,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v8i8_to_v8i64:
 ; SI: s_endpgm
 define void @zextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = zext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -238,7 +238,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v8i8_to_v8i64:
 ; SI: s_endpgm
 define void @sextload_global_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <8 x i8> addrspace(1)* %in
+  %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
   %ext = sext <8 x i8> %load to <8 x i64>
   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
   ret void
@@ -247,7 +247,7 @@
 ; FUNC-LABEL: {{^}}zextload_global_v16i8_to_v16i64:
 ; SI: s_endpgm
 define void @zextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = zext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -256,7 +256,7 @@
 ; FUNC-LABEL: {{^}}sextload_global_v16i8_to_v16i64:
 ; SI: s_endpgm
 define void @sextload_global_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* nocapture %in) nounwind {
-  %load = load <16 x i8> addrspace(1)* %in
+  %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
   %ext = sext <16 x i8> %load to <16 x i64>
   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
   ret void
@@ -265,7 +265,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v32i8_to_v32i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = zext <32 x i8> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -274,7 +274,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v32i8_to_v32i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <32 x i8> addrspace(1)* %in
+;   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
 ;   %ext = sext <32 x i8> %load to <32 x i64>
 ;   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
 ;   ret void
@@ -283,7 +283,7 @@
 ; XFUNC-LABEL: {{^}}zextload_global_v64i8_to_v64i64:
 ; XSI: s_endpgm
 ; define void @zextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = zext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
@@ -292,7 +292,7 @@
 ; XFUNC-LABEL: {{^}}sextload_global_v64i8_to_v64i64:
 ; XSI: s_endpgm
 ; define void @sextload_global_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* nocapture %in) nounwind {
-;   %load = load <64 x i8> addrspace(1)* %in
+;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
 ;   %ext = sext <64 x i8> %load to <64 x i64>
 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/global-zero-initializer.ll b/llvm/test/CodeGen/R600/global-zero-initializer.ll
index 0a54248..45aa8bf 100644
--- a/llvm/test/CodeGen/R600/global-zero-initializer.ll
+++ b/llvm/test/CodeGen/R600/global-zero-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(1)* %gep
+  %ld = load i32, i32 addrspace(1)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll b/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
index 18062f0..014b0a5 100644
--- a/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace-fail.ll
@@ -10,7 +10,7 @@
 ; SI: s_endpgm
 define void @test_i8( i32 %s, i8 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i8], [1 x i8] addrspace(2)* @a, i32 0, i32 %s
-  %1 = load i8 addrspace(2)* %arrayidx, align 1
+  %1 = load i8, i8 addrspace(2)* %arrayidx, align 1
   store i8 %1, i8 addrspace(1)* %out
   ret void
 }
@@ -23,7 +23,7 @@
 ; SI: s_endpgm
 define void @test_i16( i32 %s, i16 addrspace(1)* %out) #3 {
   %arrayidx = getelementptr inbounds [1 x i16], [1 x i16] addrspace(2)* @b, i32 0, i32 %s
-  %1 = load i16 addrspace(2)* %arrayidx, align 2
+  %1 = load i16, i16 addrspace(2)* %arrayidx, align 2
   store i16 %1, i16 addrspace(1)* %out
   ret void
 }
@@ -36,7 +36,7 @@
 ; FUNC-LABEL: {{^}}struct_bar_gv_load:
 define void @struct_bar_gv_load(i8 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.bar], [1 x %struct.bar] addrspace(2)* @struct_bar_gv, i32 0, i32 0, i32 1, i32 %index
-  %load = load i8 addrspace(2)* %gep, align 1
+  %load = load i8, i8 addrspace(2)* %gep, align 1
   store i8 %load, i8 addrspace(1)* %out, align 1
   ret void
 }
@@ -51,7 +51,7 @@
 ; FUNC-LABEL: {{^}}array_vector_gv_load:
 define void @array_vector_gv_load(<4 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>] addrspace(2)* @array_vector_gv, i32 0, i32 %index
-  %load = load <4 x i32> addrspace(2)* %gep, align 16
+  %load = load <4 x i32>, <4 x i32> addrspace(2)* %gep, align 16
   store <4 x i32> %load, <4 x i32> addrspace(1)* %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/gv-const-addrspace.ll b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
index 2fb6672..3c1fc6c 100644
--- a/llvm/test/CodeGen/R600/gv-const-addrspace.ll
+++ b/llvm/test/CodeGen/R600/gv-const-addrspace.ll
@@ -22,7 +22,7 @@
 define void @float(float addrspace(1)* %out, i32 %index) {
 entry:
   %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
-  %1 = load float addrspace(2)* %0
+  %1 = load float, float addrspace(2)* %0
   store float %1, float addrspace(1)* %out
   ret void
 }
@@ -45,7 +45,7 @@
 define void @i32(i32 addrspace(1)* %out, i32 %index) {
 entry:
   %0 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(2)* @i32_gv, i32 0, i32 %index
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -60,7 +60,7 @@
 
 define void @struct_foo_gv_load(i32 addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [1 x %struct.foo], [1 x %struct.foo] addrspace(2)* @struct_foo_gv, i32 0, i32 0, i32 1, i32 %index
-  %load = load i32 addrspace(2)* %gep, align 4
+  %load = load i32, i32 addrspace(2)* %gep, align 4
   store i32 %load, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -76,7 +76,7 @@
 ; VI: s_load_dword
 define void @array_v1_gv_load(<1 x i32> addrspace(1)* %out, i32 %index) {
   %gep = getelementptr inbounds [4 x <1 x i32>], [4 x <1 x i32>] addrspace(2)* @array_v1_gv, i32 0, i32 %index
-  %load = load <1 x i32> addrspace(2)* %gep, align 4
+  %load = load <1 x i32>, <1 x i32> addrspace(2)* %gep, align 4
   store <1 x i32> %load, <1 x i32> addrspace(1)* %out, align 4
   ret void
 }
@@ -88,7 +88,7 @@
 
 if:
   %1 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
-  %2 = load float addrspace(2)* %1
+  %2 = load float, float addrspace(2)* %1
   store float %2, float addrspace(1)* %out
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/half.ll b/llvm/test/CodeGen/R600/half.ll
index 35a41c5..42ee788 100644
--- a/llvm/test/CodeGen/R600/half.ll
+++ b/llvm/test/CodeGen/R600/half.ll
@@ -5,7 +5,7 @@
 ; CHECK-LABEL: {{^}}test_load_store:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load half addrspace(1)* %in
+  %val = load half, half addrspace(1)* %in
   store half %val, half addrspace(1) * %out
   ret void
 }
@@ -14,7 +14,7 @@
 ; CHECK-LABEL: {{^}}test_bitcast_from_half:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load half addrspace(1) * %in
+  %val = load half, half addrspace(1) * %in
   %val_int = bitcast half %val to i16
   store i16 %val_int, i16 addrspace(1)* %out
   ret void
@@ -24,7 +24,7 @@
 ; CHECK-LABEL: {{^}}test_bitcast_to_half:
 ; CHECK: buffer_load_ushort [[TMP:v[0-9]+]]
 ; CHECK: buffer_store_short [[TMP]]
-  %val = load i16 addrspace(1)* %in
+  %val = load i16, i16 addrspace(1)* %in
   %val_fp = bitcast i16 %val to half
   store half %val_fp, half addrspace(1)* %out
   ret void
@@ -34,7 +34,7 @@
 ; CHECK-LABEL: {{^}}test_extend32:
 ; CHECK: v_cvt_f32_f16_e32
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val32 = fpext half %val16 to float
   store float %val32, float addrspace(1)* %out
   ret void
@@ -45,7 +45,7 @@
 ; CHECK: v_cvt_f32_f16_e32
 ; CHECK: v_cvt_f64_f32_e32
 
-  %val16 = load half addrspace(1)* %in
+  %val16 = load half, half addrspace(1)* %in
   %val64 = fpext half %val16 to double
   store double %val64, double addrspace(1)* %out
   ret void
@@ -55,7 +55,7 @@
 ; CHECK-LABEL: {{^}}test_trunc32:
 ; CHECK: v_cvt_f16_f32_e32
 
-  %val32 = load float addrspace(1)* %in
+  %val32 = load float, float addrspace(1)* %in
   %val16 = fptrunc float %val32 to half
   store half %val16, half addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
index 6047466..c218e19 100644
--- a/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
+++ b/llvm/test/CodeGen/R600/i8-to-double-to-float.ll
@@ -3,7 +3,7 @@
 ;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
 define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
+  %1 = load i8, i8 addrspace(1)* %in
   %2 = uitofp i8 %1 to double
   %3 = fptrunc double %2 to float
   store float %3, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
index 0cde06c..60e59a5 100644
--- a/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
+++ b/llvm/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
@@ -8,9 +8,9 @@
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %1 = load i32 addrspace(1)* %arrayidx1
+  %1 = load i32, i32 addrspace(1)* %arrayidx1
   %cmp = icmp eq i32 %0, %1
   %value = select i1 %cmp, i32 0, i32 -1
   store i32 %value, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/imm.ll b/llvm/test/CodeGen/R600/imm.ll
index 9b95fd6..8917cd6 100644
--- a/llvm/test/CodeGen/R600/imm.ll
+++ b/llvm/test/CodeGen/R600/imm.ll
@@ -225,7 +225,7 @@
 ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
 ; CHECK: buffer_store_dword [[REG]]
 define void @commute_add_inline_imm_0.5_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %x = load float addrspace(1)* %in
+  %x = load float, float addrspace(1)* %in
   %y = fadd float %x, 0.5
   store float %y, float addrspace(1)* %out
   ret void
@@ -236,7 +236,7 @@
 ; CHECK: v_add_f32_e32 [[REG:v[0-9]+]], 0x44800000, [[VAL]]
 ; CHECK: buffer_store_dword [[REG]]
 define void @commute_add_literal_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %x = load float addrspace(1)* %in
+  %x = load float, float addrspace(1)* %in
   %y = fadd float %x, 1024.0
   store float %y, float addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/indirect-private-64.ll b/llvm/test/CodeGen/R600/indirect-private-64.ll
index 378db4d..d63e1b6 100644
--- a/llvm/test/CodeGen/R600/indirect-private-64.ll
+++ b/llvm/test/CodeGen/R600/indirect-private-64.ll
@@ -14,12 +14,12 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load double addrspace(1)* %in, align 8
+  %val = load double, double addrspace(1)* %in, align 8
   %array = alloca double, i32 16, align 8
   %ptr = getelementptr double, double* %array, i32 %b
   store double %val, double* %ptr, align 8
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load double* %ptr, align 8
+  %result = load double, double* %ptr, align 8
   store double %result, double addrspace(1)* %out, align 8
   ret void
 }
@@ -38,12 +38,12 @@
 ; SI-PROMOTE: ds_read_b32
 ; SI-PROMOTE: ds_read_b32
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load <2 x double> addrspace(1)* %in, align 16
+  %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
   %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
   store <2 x double> %val, <2 x double>* %ptr, align 16
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load <2 x double>* %ptr, align 16
+  %result = load <2 x double>, <2 x double>* %ptr, align 16
   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
   ret void
 }
@@ -56,12 +56,12 @@
 ; SI-PROMOTE: ds_write_b64
 ; SI-PROMOTE: ds_read_b64
 define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load i64 addrspace(1)* %in, align 8
+  %val = load i64, i64 addrspace(1)* %in, align 8
   %array = alloca i64, i32 16, align 8
   %ptr = getelementptr i64, i64* %array, i32 %b
   store i64 %val, i64* %ptr, align 8
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load i64* %ptr, align 8
+  %result = load i64, i64* %ptr, align 8
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -80,12 +80,12 @@
 ; SI-PROMOTE: ds_read_b32
 ; SI-PROMOTE: ds_read_b32
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
-  %val = load <2 x i64> addrspace(1)* %in, align 16
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
   %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
   store <2 x i64> %val, <2 x i64>* %ptr, align 16
   call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
-  %result = load <2 x i64>* %ptr, align 16
+  %result = load <2 x i64>, <2 x i64>* %ptr, align 16
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/insert_vector_elt.ll b/llvm/test/CodeGen/R600/insert_vector_elt.ll
index 67b0a7a..6de3d40 100644
--- a/llvm/test/CodeGen/R600/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/R600/insert_vector_elt.ll
@@ -185,13 +185,13 @@
   br i1 %1, label %if, label %else
 
 if:
-  %2 = load i32 addrspace(1)* %in
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = insertelement <2 x i32> %0, i32 %2, i32 1
   br label %endif
 
 else:
   %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %5 = load i32 addrspace(1)* %4
+  %5 = load i32, i32 addrspace(1)* %4
   %6 = insertelement <2 x i32> %0, i32 %5, i32 1
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/jump-address.ll b/llvm/test/CodeGen/R600/jump-address.ll
index a1cd388..9dbc21c 100644
--- a/llvm/test/CodeGen/R600/jump-address.ll
+++ b/llvm/test/CodeGen/R600/jump-address.ll
@@ -6,7 +6,7 @@
 
 define void @main() #0 {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = bitcast float %1 to i32
   %3 = icmp eq i32 %2, 0
@@ -17,7 +17,7 @@
   br i1 %7, label %ENDIF, label %ELSE
 
 ELSE:                                             ; preds = %main_body
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %9 = extractelement <4 x float> %8, i32 0
   %10 = bitcast float %9 to i32
   %11 = icmp eq i32 %10, 1
@@ -40,7 +40,7 @@
   ret void
 
 IF13:                                             ; preds = %ELSE
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fsub float -0.000000e+00, %21
   %23 = fadd float 0xFFF8000000000000, %22
diff --git a/llvm/test/CodeGen/R600/kcache-fold.ll b/llvm/test/CodeGen/R600/kcache-fold.ll
index 27840b2..6c405fa 100644
--- a/llvm/test/CodeGen/R600/kcache-fold.ll
+++ b/llvm/test/CodeGen/R600/kcache-fold.ll
@@ -4,35 +4,35 @@
 ; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
 define void @main1() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
   %1 = extractelement <4 x float> %0, i32 0
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %3 = extractelement <4 x float> %2, i32 0
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %11 = extractelement <4 x float> %10, i32 1
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
   %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
-  %16 = load <4 x float> addrspace(8)* null
+  %16 = load <4 x float>, <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
-  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %19 = extractelement <4 x float> %18, i32 2
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %21 = extractelement <4 x float> %20, i32 2
   %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
-  %24 = load <4 x float> addrspace(8)* null
+  %24 = load <4 x float>, <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
-  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %27 = extractelement <4 x float> %26, i32 3
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 3
   %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
@@ -52,35 +52,35 @@
 ; CHECK-NOT: MOV
 define void @main2() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* null
+  %0 = load <4 x float>, <4 x float> addrspace(8)* null
   %1 = extractelement <4 x float> %0, i32 0
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %3 = extractelement <4 x float> %2, i32 0
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 1
   %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %11 = extractelement <4 x float> %10, i32 0
-  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
   %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
-  %16 = load <4 x float> addrspace(8)* null
+  %16 = load <4 x float>, <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
-  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %19 = extractelement <4 x float> %18, i32 3
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %21 = extractelement <4 x float> %20, i32 2
   %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
-  %24 = load <4 x float> addrspace(8)* null
+  %24 = load <4 x float>, <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
-  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %27 = extractelement <4 x float> %26, i32 3
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 2
   %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
diff --git a/llvm/test/CodeGen/R600/large-alloca.ll b/llvm/test/CodeGen/R600/large-alloca.ll
index 26ae217..671833d 100644
--- a/llvm/test/CodeGen/R600/large-alloca.ll
+++ b/llvm/test/CodeGen/R600/large-alloca.ll
@@ -8,7 +8,7 @@
   %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
   store i32 %x, i32* %gep
   %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
-  %0 = load i32* %gep1
+  %0 = load i32, i32* %gep1
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/large-constant-initializer.ll b/llvm/test/CodeGen/R600/large-constant-initializer.ll
index c8671ef..81c09ae 100644
--- a/llvm/test/CodeGen/R600/large-constant-initializer.ll
+++ b/llvm/test/CodeGen/R600/large-constant-initializer.ll
@@ -5,7 +5,7 @@
 @gv = external unnamed_addr addrspace(2) constant [239 x i32], align 4
 
 define void @opencv_cvtfloat_crash(i32 addrspace(1)* %out, i32 %x) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([239 x i32] addrspace(2)* @gv, i64 0, i64 239), align 4
   %mul12 = mul nsw i32 %val, 7
   br i1 undef, label %exit, label %bb
 
diff --git a/llvm/test/CodeGen/R600/lds-initializer.ll b/llvm/test/CodeGen/R600/lds-initializer.ll
index 1a80a57..bf8df63 100644
--- a/llvm/test/CodeGen/R600/lds-initializer.ll
+++ b/llvm/test/CodeGen/R600/lds-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_init_lds_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(3)* %gep
+  %ld = load i32, i32 addrspace(3)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/lds-oqap-crash.ll b/llvm/test/CodeGen/R600/lds-oqap-crash.ll
index fbcd778..6ff6fc3 100644
--- a/llvm/test/CodeGen/R600/lds-oqap-crash.ll
+++ b/llvm/test/CodeGen/R600/lds-oqap-crash.ll
@@ -12,7 +12,7 @@
 ; CHECK: {{^}}lds_crash:
 define void @lds_crash(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %a, i32 %b, i32 %c) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   ; This block needs to be > 115 ISA instructions to hit the bug,
   ; so we'll use udiv instructions.
   %div0 = udiv i32 %0, %b
diff --git a/llvm/test/CodeGen/R600/lds-output-queue.ll b/llvm/test/CodeGen/R600/lds-output-queue.ll
index f34cad4..44ffc36 100644
--- a/llvm/test/CodeGen/R600/lds-output-queue.ll
+++ b/llvm/test/CodeGen/R600/lds-output-queue.ll
@@ -13,11 +13,11 @@
 define void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
 entry:
   %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-  %1 = load i32 addrspace(3)* %0
+  %1 = load i32, i32 addrspace(3)* %0
   call void @llvm.AMDGPU.barrier.local()
 
   ; This will start a new clause for the vertex fetch
-  %2 = load i32 addrspace(1)* %in
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = add i32 %1, %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -41,8 +41,8 @@
 ; has been declared in the local memory space:
 ;
 ;  %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
-;  %1 = load i32 addrspace(3)* %0
-;  %2 = load i32 addrspace(1)* %in
+;  %1 = load i32, i32 addrspace(3)* %0
+;  %2 = load i32, i32 addrspace(1)* %in
 ;
 ; The instruction selection phase will generate ISA that looks like this:
 ; %OQAP = LDS_READ_RET
@@ -91,8 +91,8 @@
 define void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
-  %1 = load i32 addrspace(3)* %0
-  %2 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(3)* %0
+  %2 = load i32, i32 addrspace(1)* %in
   %3 = add i32 %2, %1
   store i32 %3, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/lds-zero-initializer.ll b/llvm/test/CodeGen/R600/lds-zero-initializer.ll
index f18462e..fb51bc0 100644
--- a/llvm/test/CodeGen/R600/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/R600/lds-zero-initializer.ll
@@ -7,7 +7,7 @@
 
 define void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
  %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
-  %ld = load i32 addrspace(3)* %gep
+  %ld = load i32, i32 addrspace(3)* %gep
   store i32 %ld, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
index 8bc2583..8bf094b8 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.abs.ll
@@ -28,7 +28,7 @@
 ; EG: SUB_INT
 ; EG: MAX_INT
 define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
-  %val = load i32 addrspace(1)* %src, align 4
+  %val = load i32, i32 addrspace(1)* %src, align 4
   %abs = call i32 @llvm.AMDGPU.abs(i32 %val) nounwind readnone
   store i32 %abs, i32 addrspace(1)* %out, align 4
   ret void
@@ -42,7 +42,7 @@
 ; EG: SUB_INT
 ; EG: MAX_INT
 define void @abs_i32_legacy_amdil(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
-  %val = load i32 addrspace(1)* %src, align 4
+  %val = load i32, i32 addrspace(1)* %src, align 4
   %abs = call i32 @llvm.AMDIL.abs.i32(i32 %val) nounwind readnone
   store i32 %abs, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
index 0268e5b..db88397 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.global.ll
@@ -17,7 +17,7 @@
   %3 = sub i32 %2, 1
   %4 = sub i32 %3, %0
   %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
-  %6 = load i32 addrspace(1)* %5
+  %6 = load i32, i32 addrspace(1)* %5
   store i32 %6, i32 addrspace(1)* %1
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
index 3ca9f3e..48fb2e0 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.barrier.local.ll
@@ -18,7 +18,7 @@
   %3 = sub i32 %2, 1
   %4 = sub i32 %3, %0
   %5 = getelementptr i32, i32 addrspace(1)* %out, i32 %4
-  %6 = load i32 addrspace(1)* %5
+  %6 = load i32, i32 addrspace(1)* %5
   store i32 %6, i32 addrspace(1)* %1
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
index 2ec2546..ffd3d6c 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.i32.ll
@@ -44,7 +44,7 @@
 ; FUNC-LABEL: {{^}}v_bfe_print_arg:
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8
 define void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) nounwind {
-  %load = load i32 addrspace(1)* %src0, align 4
+  %load = load i32, i32 addrspace(1)* %src0, align 4
   %bfe_i32 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 2, i32 8) nounwind readnone
   store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
   ret void
@@ -75,7 +75,7 @@
 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -89,7 +89,7 @@
 ; SI: buffer_store_dword [[VREG]],
 ; SI: s_endpgm
 define void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 0, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -102,7 +102,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -115,7 +115,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -127,7 +127,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -139,7 +139,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 8, i32 24)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -151,7 +151,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 24, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -162,7 +162,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -173,7 +173,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -418,7 +418,7 @@
 ; XSI-NOT: SHR
 ; XSI: buffer_store_dword [[BFE]],
 define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %x, i32 0, i32 24)
   %shl = shl i32 %bfe, 8
   %ashr = ashr i32 %shl, 8
@@ -434,7 +434,7 @@
 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
 ; SI: buffer_store_dword [[TMP2]]
 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %src = load i32 addrspace(1)* %in, align 4
+  %src = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %src, i32 1, i32 16) nounwind readnone
   %div = sdiv i32 %bfe, 2
   store i32 %div, i32 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
index 6cd0108..83bdb15 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.bfe.u32.ll
@@ -65,7 +65,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %load = load i8 addrspace(1)* %in
+  %load = load i8, i8 addrspace(1)* %in
   %ext = zext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -79,7 +79,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 8)
@@ -94,7 +94,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 0, i32 16)
@@ -108,7 +108,7 @@
 ; SI: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 1, i32 8)
@@ -123,7 +123,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 3, i32 8)
@@ -138,7 +138,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 7, i32 8)
@@ -152,7 +152,7 @@
 ; SI-NEXT: bfe
 ; SI: s_endpgm
 define void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %ext, i32 8, i32 8)
@@ -166,14 +166,14 @@
 ; SI: s_endpgm
 ; EG: AND_INT T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, 1,
 define void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 0, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
 }
 
 define void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -181,7 +181,7 @@
 }
 
 define void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -196,7 +196,7 @@
 ; SI: buffer_store_dword [[VREG]],
 ; SI: s_endpgm
 define void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = lshr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 31, i32 1)
@@ -211,7 +211,7 @@
 ; SI: v_bfe_i32 {{v[0-9]+}}, {{v[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shr, i32 0, i32 1)
@@ -224,7 +224,7 @@
 ; SI: v_lshrrev_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -236,7 +236,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 0, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -249,7 +249,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
@@ -262,7 +262,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -274,7 +274,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 1, i32 31)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -286,7 +286,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 8, i32 24)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -298,7 +298,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %x, i32 24, i32 8)
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -309,7 +309,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -320,7 +320,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.u32(i32 %shl, i32 31, i32 1)
   store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
@@ -568,7 +568,7 @@
 define void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
                                             i32 addrspace(1)* %out1,
                                             i32 addrspace(1)* %in) nounwind {
-  %src = load i32 addrspace(1)* %in, align 4
+  %src = load i32, i32 addrspace(1)* %in, align 4
   %and = and i32 %src, 63
   %bfe_u32 = call i32 @llvm.AMDGPU.bfe.u32(i32 %and, i32 2, i32 2) nounwind readnone
   store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
index 3973f53..301de4b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.brev.ll
@@ -21,7 +21,7 @@
 ; SI: buffer_store_dword [[RESULT]],
 ; SI: s_endpgm
 define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32 addrspace(1)* %valptr, align 4
+  %val = load i32, i32 addrspace(1)* %valptr, align 4
   %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
   store i32 %ctlz, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
index 5f31289f..805a88b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.class.ll
@@ -136,7 +136,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float %a, i32 511) #1
   %sext = sext i1 %result to i32
@@ -154,7 +154,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float 1.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -174,7 +174,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f32(float 1024.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -292,7 +292,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load double addrspace(1)* %in
+  %a = load double, double addrspace(1)* %in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double %a, i32 511) #1
   %sext = sext i1 %result to i32
@@ -308,7 +308,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double 1.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -323,7 +323,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %b = load i32 addrspace(1)* %gep.in
+  %b = load i32, i32 addrspace(1)* %gep.in
 
   %result = call i1 @llvm.AMDGPU.class.f64(double 1024.0, i32 %b) #1
   %sext = sext i1 %result to i32
@@ -340,7 +340,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 3) #1
@@ -360,7 +360,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -383,7 +383,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 1) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 2) #1
@@ -418,7 +418,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 8) #1
@@ -438,7 +438,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 7) #1
@@ -458,7 +458,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.in
+  %a = load float, float addrspace(1)* %gep.in
 
   %class0 = call i1 @llvm.AMDGPU.class.f32(float %a, i32 4) #1
   %class1 = call i1 @llvm.AMDGPU.class.f32(float %b, i32 8) #1
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
index aa07afd..be3e0a4 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.cube.ll
@@ -8,15 +8,15 @@
 ; CHECK: CUBE * T{{[0-9]}}.W
 define void @cube() #0 {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %1 = extractelement <4 x float> %0, i32 3
-  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %3 = extractelement <4 x float> %2, i32 0
   %4 = fdiv float %3, %1
-  %5 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %6 = extractelement <4 x float> %5, i32 1
   %7 = fdiv float %6, %1
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %9 = extractelement <4 x float> %8, i32 2
   %10 = fdiv float %9, %1
   %11 = insertelement <4 x float> undef, float %4, i32 0
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
index 799817e..8b32f69 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.cvt_f32_ubyte.ll
@@ -9,7 +9,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte0_to_float:
 ; SI: v_cvt_f32_ubyte0
 define void @test_unpack_byte0_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte0(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -18,7 +18,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte1_to_float:
 ; SI: v_cvt_f32_ubyte1
 define void @test_unpack_byte1_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte1(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -27,7 +27,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte2_to_float:
 ; SI: v_cvt_f32_ubyte2
 define void @test_unpack_byte2_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte2(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
@@ -36,7 +36,7 @@
 ; SI-LABEL: {{^}}test_unpack_byte3_to_float:
 ; SI: v_cvt_f32_ubyte3
 define void @test_unpack_byte3_to_float(float addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %cvt = call float @llvm.AMDGPU.cvt.f32.ubyte3(i32 %val) nounwind readnone
   store float %cvt, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
index f93d5f0..48a4af1 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_fmas.ll
@@ -122,9 +122,9 @@
   %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
 
-  %a = load float addrspace(1)* %gep.a
-  %b = load float addrspace(1)* %gep.b
-  %c = load float addrspace(1)* %gep.c
+  %a = load float, float addrspace(1)* %gep.a
+  %b = load float, float addrspace(1)* %gep.b
+  %c = load float, float addrspace(1)* %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   %cmp1 = icmp ne i32 %d, 0
@@ -159,15 +159,15 @@
   %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
   %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
 
-  %a = load float addrspace(1)* %gep.a
-  %b = load float addrspace(1)* %gep.b
-  %c = load float addrspace(1)* %gep.c
+  %a = load float, float addrspace(1)* %gep.a
+  %b = load float, float addrspace(1)* %gep.b
+  %c = load float, float addrspace(1)* %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %bb, label %exit
 
 bb:
-  %val = load i32 addrspace(1)* %dummy
+  %val = load i32, i32 addrspace(1)* %dummy
   %cmp1 = icmp ne i32 %val, 0
   br label %exit
 
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
index 7be97c2..de830de 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.div_scale.ll
@@ -16,8 +16,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -36,8 +36,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -56,8 +56,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -76,8 +76,8 @@
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
 
-  %a = load double addrspace(1)* %gep.0, align 8
-  %b = load double addrspace(1)* %gep.1, align 8
+  %a = load double, double addrspace(1)* %gep.0, align 8
+  %b = load double, double addrspace(1)* %gep.1, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -95,7 +95,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -113,7 +113,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %b = load float addrspace(1)* %gep, align 4
+  %b = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -131,7 +131,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -149,7 +149,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
 
-  %a = load float addrspace(1)* %gep, align 4
+  %a = load float, float addrspace(1)* %gep, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float %b, i1 true) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -167,7 +167,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -185,7 +185,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %b = load double addrspace(1)* %gep, align 8
+  %b = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -203,7 +203,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 false) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -221,7 +221,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
 
-  %a = load double addrspace(1)* %gep, align 8
+  %a = load double, double addrspace(1)* %gep, align 8
 
   %result = call { double, i1 } @llvm.AMDGPU.div.scale.f64(double %a, double %b, i1 true) nounwind readnone
   %result0 = extractvalue { double, i1 } %result, 0
@@ -295,7 +295,7 @@
 define void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -311,7 +311,7 @@
 define void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %result = call { float, i1 } @llvm.AMDGPU.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone
   %result0 = extractvalue { float, i1 } %result, 0
@@ -330,8 +330,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -352,8 +352,8 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
index 7d15300..f4cf7fc 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.fract.ll
@@ -11,7 +11,7 @@
 ; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
-  %val = load float addrspace(1)* %src, align 4
+  %val = load float, float addrspace(1)* %src, align 4
   %fract = call float @llvm.AMDGPU.fract.f32(float %val) nounwind readnone
   store float %fract, float addrspace(1)* %out, align 4
   ret void
@@ -21,7 +21,7 @@
 ; SI: v_fract_f32
 ; EG: FRACT
 define void @fract_f32_legacy_amdil(float addrspace(1)* %out, float addrspace(1)* %src) nounwind {
-  %val = load float addrspace(1)* %src, align 4
+  %val = load float, float addrspace(1)* %src, align 4
   %fract = call float @llvm.AMDIL.fraction.f32(float %val) nounwind readnone
   store float %fract, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
index ce7fca0..46662f9 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.imax.ll
@@ -5,7 +5,7 @@
 ; SI: v_max_i32_e32
 define void @vector_imax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %max = call i32 @llvm.AMDGPU.imax(i32 %p0, i32 %load)
   %bc = bitcast i32 %max to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
index 15cd38b..34b454e 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.imin.ll
@@ -5,7 +5,7 @@
 ; SI: v_min_i32_e32
 define void @vector_imin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %min = call i32 @llvm.AMDGPU.imin(i32 %p0, i32 %load)
   %bc = bitcast i32 %min to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
index aac014b..1020660 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.tex.ll
@@ -18,7 +18,7 @@
 ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN
 
 define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-   %addr = load <4 x float> addrspace(1)* %in
+   %addr = load <4 x float>, <4 x float> addrspace(1)* %in
    %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %addr, i32 0, i32 0, i32 1)
    %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res1, i32 0, i32 0, i32 2)
    %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %res2, i32 0, i32 0, i32 3)
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
index 5829f73..6b546a7 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.trig_preop.ll
@@ -10,8 +10,8 @@
 ; SI: buffer_store_dwordx2 [[RESULT]],
 ; SI: s_endpgm
 define void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load double, double addrspace(1)* %aptr, align 8
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 %b) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
   ret void
@@ -23,7 +23,7 @@
 ; SI: buffer_store_dwordx2 [[RESULT]],
 ; SI: s_endpgm
 define void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind {
-  %a = load double addrspace(1)* %aptr, align 8
+  %a = load double, double addrspace(1)* %aptr, align 8
   %result = call double @llvm.AMDGPU.trig.preop.f64(double %a, i32 7) nounwind readnone
   store double %result, double addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
index ea02d3f..77a073b 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umad24.ll
@@ -29,8 +29,8 @@
   %src0.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %src2.gep = getelementptr i32, i32 addrspace(1)* %src0.gep, i32 1
 
-  %src0 = load i32 addrspace(1)* %src0.gep, align 4
-  %src2 = load i32 addrspace(1)* %src2.gep, align 4
+  %src0 = load i32, i32 addrspace(1)* %src0.gep, align 4
+  %src2 = load i32, i32 addrspace(1)* %src2.gep, align 4
   %mad = call i32 @llvm.AMDGPU.umad24(i32 %src0, i32 4, i32 %src2) nounwind readnone
   store i32 %mad, i32 addrspace(1)* %out.gep, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
index 4320dfe..a97d103 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umax.ll
@@ -5,7 +5,7 @@
 ; SI: v_max_u32_e32
 define void @vector_umax(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %max = call i32 @llvm.AMDGPU.umax(i32 %p0, i32 %load)
   %bc = bitcast i32 %max to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@
 ; SI-NOT: and
 ; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umax(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.umax(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
index e4cac33..2acd10e 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.umin.ll
@@ -5,7 +5,7 @@
 ; SI: v_min_u32_e32
 define void @vector_umin(i32 %p0, i32 %p1, i32 addrspace(1)* %in) #0 {
 main_body:
-  %load = load i32 addrspace(1)* %in, align 4
+  %load = load i32, i32 addrspace(1)* %in, align 4
   %min = call i32 @llvm.AMDGPU.umin(i32 %p0, i32 %load)
   %bc = bitcast i32 %min to float
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %bc, float %bc, float %bc, float %bc)
@@ -28,7 +28,7 @@
 ; SI-NOT: and
 ; SI: buffer_store_short [[RESULT]],
 define void @trunc_zext_umin(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = zext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.umin(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
diff --git a/llvm/test/CodeGen/R600/llvm.SI.imageload.ll b/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
index 14db226..b67716c 100644
--- a/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
+++ b/llvm/test/CodeGen/R600/llvm.SI.imageload.ll
@@ -89,15 +89,15 @@
 define void @vgpr_coords(float addrspace(2)* addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr float addrspace(2)*, float addrspace(2)* addrspace(2)* %0, i32 0
-  %21 = load float addrspace(2)* addrspace(2)* %20, !tbaa !2
+  %21 = load float addrspace(2)*, float addrspace(2)* addrspace(2)* %20, !tbaa !2
   %22 = getelementptr float, float addrspace(2)* %21, i32 0
-  %23 = load float addrspace(2)* %22, !tbaa !2, !invariant.load !1
+  %23 = load float, float addrspace(2)* %22, !tbaa !2, !invariant.load !1
   %24 = getelementptr float, float addrspace(2)* %21, i32 1
-  %25 = load float addrspace(2)* %24, !tbaa !2, !invariant.load !1
+  %25 = load float, float addrspace(2)* %24, !tbaa !2, !invariant.load !1
   %26 = getelementptr float, float addrspace(2)* %21, i32 4
-  %27 = load float addrspace(2)* %26, !tbaa !2, !invariant.load !1
+  %27 = load float, float addrspace(2)* %26, !tbaa !2, !invariant.load !1
   %28 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %29 = load <32 x i8> addrspace(2)* %28, !tbaa !2
+  %29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, !tbaa !2
   %30 = bitcast float %27 to i32
   %31 = bitcast float %23 to i32
   %32 = bitcast float %25 to i32
diff --git a/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll b/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
index f8f4520..f6c2585 100644
--- a/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
+++ b/llvm/test/CodeGen/R600/llvm.SI.load.dword.ll
@@ -17,7 +17,7 @@
 define void @main([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <32 x i8>] addrspace(2)* byval %arg2, [2 x <16 x i8>] addrspace(2)* byval %arg3, [17 x <16 x i8>] addrspace(2)* inreg %arg4, [17 x <16 x i8>] addrspace(2)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) #0 {
 main_body:
   %tmp = getelementptr [2 x <16 x i8>], [2 x <16 x i8>] addrspace(2)* %arg3, i64 0, i32 1
-  %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp11 = shl i32 %arg6, 2
   %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
   %tmp13 = bitcast i32 %tmp12 to float
diff --git a/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll b/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
index 812b6a4..036cd2c 100644
--- a/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
+++ b/llvm/test/CodeGen/R600/llvm.amdgpu.dp4.ll
@@ -3,8 +3,8 @@
 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) nounwind readnone
 
 define void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind {
-  %src0 = load <4 x float> addrspace(1)* %a, align 16
-  %src1 = load <4 x float> addrspace(1)* %b, align 16
+  %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16
+  %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
   %dp4 = call float @llvm.AMDGPU.dp4(<4 x float> %src0, <4 x float> %src1) nounwind readnone
   store float %dp4, float addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/llvm.round.f64.ll b/llvm/test/CodeGen/R600/llvm.round.f64.ll
index 0d39a45..7d082a2 100644
--- a/llvm/test/CodeGen/R600/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/R600/llvm.round.f64.ll
@@ -33,7 +33,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() #1
   %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
-  %x = load double addrspace(1)* %gep
+  %x = load double, double addrspace(1)* %gep
   %result = call double @llvm.round.f64(double %x) #1
   store double %result, double addrspace(1)* %out.gep
   ret void
diff --git a/llvm/test/CodeGen/R600/load-i1.ll b/llvm/test/CodeGen/R600/load-i1.ll
index 315c0a3..0ca49fd 100644
--- a/llvm/test/CodeGen/R600/load-i1.ll
+++ b/llvm/test/CodeGen/R600/load-i1.ll
@@ -11,7 +11,7 @@
 ; EG: VTX_READ_8
 ; EG: AND_INT
 define void @global_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   store i1 %load, i1 addrspace(1)* %out, align 1
   ret void
 }
@@ -26,7 +26,7 @@
 ; EG: AND_INT
 ; EG: LDS_BYTE_WRITE
 define void @local_copy_i1_to_i1(i1 addrspace(3)* %out, i1 addrspace(3)* %in) nounwind {
-  %load = load i1 addrspace(3)* %in
+  %load = load i1, i1 addrspace(3)* %in
   store i1 %load, i1 addrspace(3)* %out, align 1
   ret void
 }
@@ -40,7 +40,7 @@
 ; EG: VTX_READ_8
 ; EG: AND_INT
 define void @constant_copy_i1_to_i1(i1 addrspace(1)* %out, i1 addrspace(2)* %in) nounwind {
-  %load = load i1 addrspace(2)* %in
+  %load = load i1, i1 addrspace(2)* %in
   store i1 %load, i1 addrspace(1)* %out, align 1
   ret void
 }
@@ -54,7 +54,7 @@
 ; EG: VTX_READ_8
 ; EG: BFE_INT
 define void @global_sextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -66,7 +66,7 @@
 ; SI: s_endpgm
 
 define void @global_zextload_i1_to_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   store i32 %ext, i32 addrspace(1)* %out, align 4
   ret void
@@ -78,7 +78,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @global_sextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i64
   store i64 %ext, i64 addrspace(1)* %out, align 4
   ret void
@@ -90,7 +90,7 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @global_zextload_i1_to_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i64
   store i64 %ext, i64 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/load-input-fold.ll b/llvm/test/CodeGen/R600/load-input-fold.ll
index 265fa9b..e45fb78 100644
--- a/llvm/test/CodeGen/R600/load-input-fold.ll
+++ b/llvm/test/CodeGen/R600/load-input-fold.ll
@@ -14,71 +14,71 @@
   %9 = extractelement <4 x float> %reg3, i32 1
   %10 = extractelement <4 x float> %reg3, i32 2
   %11 = extractelement <4 x float> %reg3, i32 3
-  %12 = load <4 x float> addrspace(8)* null
+  %12 = load <4 x float>, <4 x float> addrspace(8)* null
   %13 = extractelement <4 x float> %12, i32 0
   %14 = fmul float %0, %13
-  %15 = load <4 x float> addrspace(8)* null
+  %15 = load <4 x float>, <4 x float> addrspace(8)* null
   %16 = extractelement <4 x float> %15, i32 1
   %17 = fmul float %0, %16
-  %18 = load <4 x float> addrspace(8)* null
+  %18 = load <4 x float>, <4 x float> addrspace(8)* null
   %19 = extractelement <4 x float> %18, i32 2
   %20 = fmul float %0, %19
-  %21 = load <4 x float> addrspace(8)* null
+  %21 = load <4 x float>, <4 x float> addrspace(8)* null
   %22 = extractelement <4 x float> %21, i32 3
   %23 = fmul float %0, %22
-  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %25 = extractelement <4 x float> %24, i32 0
   %26 = fmul float %1, %25
   %27 = fadd float %26, %14
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %29 = extractelement <4 x float> %28, i32 1
   %30 = fmul float %1, %29
   %31 = fadd float %30, %17
-  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %33 = extractelement <4 x float> %32, i32 2
   %34 = fmul float %1, %33
   %35 = fadd float %34, %20
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %37 = extractelement <4 x float> %36, i32 3
   %38 = fmul float %1, %37
   %39 = fadd float %38, %23
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %41 = extractelement <4 x float> %40, i32 0
   %42 = fmul float %2, %41
   %43 = fadd float %42, %27
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %45 = extractelement <4 x float> %44, i32 1
   %46 = fmul float %2, %45
   %47 = fadd float %46, %31
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %49 = extractelement <4 x float> %48, i32 2
   %50 = fmul float %2, %49
   %51 = fadd float %50, %35
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %53 = extractelement <4 x float> %52, i32 3
   %54 = fmul float %2, %53
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %57 = extractelement <4 x float> %56, i32 0
   %58 = fmul float %3, %57
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %61 = extractelement <4 x float> %60, i32 1
   %62 = fmul float %3, %61
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %65 = extractelement <4 x float> %64, i32 2
   %66 = fmul float %3, %65
   %67 = fadd float %66, %51
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %69 = extractelement <4 x float> %68, i32 3
   %70 = fmul float %3, %69
   %71 = fadd float %70, %55
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %73 = extractelement <4 x float> %72, i32 0
-  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %75 = extractelement <4 x float> %74, i32 1
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %77 = extractelement <4 x float> %76, i32 2
   %78 = insertelement <4 x float> undef, float %4, i32 0
   %79 = insertelement <4 x float> %78, float %5, i32 1
diff --git a/llvm/test/CodeGen/R600/load.ll b/llvm/test/CodeGen/R600/load.ll
index 8145ee4..e285831 100644
--- a/llvm/test/CodeGen/R600/load.ll
+++ b/llvm/test/CodeGen/R600/load.ll
@@ -13,7 +13,7 @@
 
 ; SI: buffer_load_ubyte v{{[0-9]+}},
 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
-  %1 = load i8 addrspace(1)* %in
+  %1 = load i8, i8 addrspace(1)* %in
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -28,7 +28,7 @@
 ; SI: buffer_load_sbyte
 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
-  %0 = load i8 addrspace(1)* %in
+  %0 = load i8, i8 addrspace(1)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -41,7 +41,7 @@
 ; SI: buffer_load_ubyte
 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(1)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %1 = zext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -62,7 +62,7 @@
 ; SI: buffer_load_sbyte
 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(1)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in
   %1 = sext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -79,7 +79,7 @@
 ; SI: buffer_load_ubyte
 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(1)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %1 = zext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -112,7 +112,7 @@
 ; SI: buffer_load_sbyte
 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(1)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in
   %1 = sext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -124,7 +124,7 @@
 ; SI: buffer_load_ushort
 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
-  %0 = load i16	 addrspace(1)* %in
+  %0 = load i16	, i16	 addrspace(1)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -139,7 +139,7 @@
 ; SI: buffer_load_sshort
 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) {
 entry:
-  %0 = load i16 addrspace(1)* %in
+  %0 = load i16, i16 addrspace(1)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -152,7 +152,7 @@
 ; SI: buffer_load_ushort
 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(1)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %1 = zext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -173,7 +173,7 @@
 ; SI: buffer_load_sshort
 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(1)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in
   %1 = sext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -190,7 +190,7 @@
 ; SI: buffer_load_ushort
 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(1)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %1 = zext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -223,7 +223,7 @@
 ; SI: buffer_load_sshort
 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(1)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in
   %1 = sext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -236,7 +236,7 @@
 ; SI: buffer_load_dword v{{[0-9]+}}
 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -248,7 +248,7 @@
 ; SI: buffer_load_dword v{{[0-9]+}}
 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
 entry:
-  %0 = load float addrspace(1)* %in
+  %0 = load float, float addrspace(1)* %in
   store float %0, float addrspace(1)* %out
   ret void
 }
@@ -260,7 +260,7 @@
 ; SI: buffer_load_dwordx2
 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) {
 entry:
-  %0 = load <2 x float> addrspace(1)* %in
+  %0 = load <2 x float>, <2 x float> addrspace(1)* %in
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
@@ -270,7 +270,7 @@
 ; SI: buffer_load_dwordx2
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
-  %0 = load i64 addrspace(1)* %in
+  %0 = load i64, i64 addrspace(1)* %in
   store i64 %0, i64 addrspace(1)* %out
   ret void
 }
@@ -284,7 +284,7 @@
 
 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = sext i32 %0 to i64
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -295,7 +295,7 @@
 ; R600: MEM_RAT
 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = zext i32 %0 to i64
   store i64 %1, i64 addrspace(1)* %out
   ret void
@@ -315,7 +315,7 @@
 ; SI: buffer_load_dword
 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <8 x i32> addrspace(1)* %in
+  %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
   store <8 x i32> %0, <8 x i32> addrspace(1)* %out
   ret void
 }
@@ -344,7 +344,7 @@
 ; SI: buffer_load_dword
 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <16 x i32> addrspace(1)* %in
+  %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
   store <16 x i32> %0, <16 x i32> addrspace(1)* %out
   ret void
 }
@@ -363,7 +363,7 @@
 ; SI: buffer_load_sbyte v{{[0-9]+}},
 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
-  %0 = load i8 addrspace(2)* %in
+  %0 = load i8, i8 addrspace(2)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -375,7 +375,7 @@
 ; SI: buffer_load_ubyte v{{[0-9]+}},
 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
-  %0 = load i8 addrspace(2)* %in
+  %0 = load i8, i8 addrspace(2)* %in
   %1 = zext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -388,7 +388,7 @@
 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1
-  %1 = load i8 addrspace(2)* %0
+  %1 = load i8, i8 addrspace(2)* %0
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -404,7 +404,7 @@
 ; SI: buffer_load_sshort
 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
-  %0 = load i16 addrspace(2)* %in
+  %0 = load i16, i16 addrspace(2)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -416,7 +416,7 @@
 ; SI: buffer_load_ushort
 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
-  %0 = load i16 addrspace(2)* %in
+  %0 = load i16, i16 addrspace(2)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -429,7 +429,7 @@
 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) {
 entry:
   %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1
-  %1 = load i16 addrspace(2)* %0
+  %1 = load i16, i16 addrspace(2)* %0
   %2 = zext i16 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -442,7 +442,7 @@
 ; SI: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
 entry:
-  %0 = load i32 addrspace(2)* %in
+  %0 = load i32, i32 addrspace(2)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -453,7 +453,7 @@
 
 ; SI: s_load_dword s{{[0-9]+}}
 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) {
-  %1 = load float addrspace(2)* %in
+  %1 = load float, float addrspace(2)* %in
   store float %1, float addrspace(1)* %out
   ret void
 }
@@ -469,7 +469,7 @@
 ; SI: s_mov_b32 m0
 ; SI: ds_read_u8
 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
-  %1 = load i8 addrspace(3)* %in
+  %1 = load i8, i8 addrspace(3)* %in
   %2 = zext i8 %1 to i32
   store i32 %2, i32 addrspace(1)* %out
   ret void
@@ -483,7 +483,7 @@
 ; SI: ds_read_i8
 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) {
 entry:
-  %0 = load i8 addrspace(3)* %in
+  %0 = load i8, i8 addrspace(3)* %in
   %1 = sext i8 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -498,7 +498,7 @@
 ; SI: ds_read_u8
 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(3)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
   %1 = zext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -515,7 +515,7 @@
 ; SI: ds_read_i8
 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i8> addrspace(3)* %in
+  %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in
   %1 = sext <2 x i8> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -534,7 +534,7 @@
 ; SI: ds_read_u8
 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(3)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
   %1 = zext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -557,7 +557,7 @@
 ; SI: ds_read_i8
 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i8> addrspace(3)* %in
+  %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in
   %1 = sext <4 x i8> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -571,7 +571,7 @@
 ; SI: ds_read_u16
 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
-  %0 = load i16	 addrspace(3)* %in
+  %0 = load i16	, i16	 addrspace(3)* %in
   %1 = zext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -585,7 +585,7 @@
 ; SI: ds_read_i16
 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) {
 entry:
-  %0 = load i16 addrspace(3)* %in
+  %0 = load i16, i16 addrspace(3)* %in
   %1 = sext i16 %0 to i32
   store i32 %1, i32 addrspace(1)* %out
   ret void
@@ -600,7 +600,7 @@
 ; SI: ds_read_u16
 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(3)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
   %1 = zext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -617,7 +617,7 @@
 ; SI: ds_read_i16
 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x i16> addrspace(3)* %in
+  %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in
   %1 = sext <2 x i16> %0 to <2 x i32>
   store <2 x i32> %1, <2 x i32> addrspace(1)* %out
   ret void
@@ -636,7 +636,7 @@
 ; SI: ds_read_u16
 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(3)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
   %1 = zext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -659,7 +659,7 @@
 ; SI: ds_read_i16
 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) {
 entry:
-  %0 = load <4 x i16> addrspace(3)* %in
+  %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in
   %1 = sext <4 x i16> %0 to <4 x i32>
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
@@ -673,7 +673,7 @@
 ; SI: ds_read_b32
 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
-  %0 = load i32 addrspace(3)* %in
+  %0 = load i32, i32 addrspace(3)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -685,7 +685,7 @@
 ; SI: ds_read_b32
 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) {
 entry:
-  %0 = load float addrspace(3)* %in
+  %0 = load float, float addrspace(3)* %in
   store float %0, float addrspace(1)* %out
   ret void
 }
@@ -698,7 +698,7 @@
 ; SI: ds_read_b64
 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) {
 entry:
-  %0 = load <2 x float> addrspace(3)* %in
+  %0 = load <2 x float>, <2 x float> addrspace(3)* %in
   store <2 x float> %0, <2 x float> addrspace(1)* %out
   ret void
 }
@@ -711,10 +711,10 @@
 ; SI-DAG: ds_read_b32
 ; SI-DAG: ds_read2_b32
 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) {
-  %scalar = load i32 addrspace(3)* %in
+  %scalar = load i32, i32 addrspace(3)* %in
   %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)*
   %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2
-  %vec0 = load <2 x i32> addrspace(3)* %vec_ptr, align 4
+  %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4
   %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0
   %vec = add <2 x i32> %vec0, %vec1
   store <2 x i32> %vec, <2 x i32> addrspace(1)* %out
@@ -733,7 +733,7 @@
 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
 entry:
   %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1
-  %tmp1 = load i32 addrspace(3)* %tmp0
+  %tmp1 = load i32, i32 addrspace(3)* %tmp0
   %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
   store i32 %tmp1, i32 addrspace(1)* %tmp2
   ret void
diff --git a/llvm/test/CodeGen/R600/load.vec.ll b/llvm/test/CodeGen/R600/load.vec.ll
index 346d8dc..02f883c 100644
--- a/llvm/test/CodeGen/R600/load.vec.ll
+++ b/llvm/test/CodeGen/R600/load.vec.ll
@@ -8,7 +8,7 @@
 ; SI: {{^}}load_v2i32:
 ; SI: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %a = load <2 x i32> addrspace(1) * %in
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
   store <2 x i32> %a, <2 x i32> addrspace(1)* %out
   ret void
 }
@@ -19,7 +19,7 @@
 ; SI: {{^}}load_v4i32:
 ; SI: buffer_load_dwordx4 v[{{[0-9]+:[0-9]+}}]
 define void @load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %a = load <4 x i32> addrspace(1) * %in
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
   store <4 x i32> %a, <4 x i32> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/load64.ll b/llvm/test/CodeGen/R600/load64.ll
index cb3d654..74beabd 100644
--- a/llvm/test/CodeGen/R600/load64.ll
+++ b/llvm/test/CodeGen/R600/load64.ll
@@ -6,7 +6,7 @@
 ; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
-  %1 = load double addrspace(1)* %in
+  %1 = load double, double addrspace(1)* %in
   store double %1, double addrspace(1)* %out
   ret void
 }
@@ -15,7 +15,7 @@
 ; CHECK: buffer_load_dwordx2 v[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %tmp = load i64 addrspace(1)* %in
+  %tmp = load i64, i64 addrspace(1)* %in
   store i64 %tmp, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -25,7 +25,7 @@
 ; CHECK: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}]
 ; CHECK: buffer_store_dwordx2 v[{{[0-9]+:[0-9]+}}]
 define void @load_const_addrspace_f64(double addrspace(1)* %out, double addrspace(2)* %in) {
-  %1 = load double addrspace(2)* %in
+  %1 = load double, double addrspace(2)* %in
   store double %1, double addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/local-64.ll b/llvm/test/CodeGen/R600/local-64.ll
index 768b038..33f3159 100644
--- a/llvm/test/CodeGen/R600/local-64.ll
+++ b/llvm/test/CodeGen/R600/local-64.ll
@@ -7,7 +7,7 @@
 ; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
   %gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
-  %val = load i32 addrspace(3)* %gep, align 4
+  %val = load i32, i32 addrspace(3)* %gep, align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -16,7 +16,7 @@
 ; BOTH: ds_read_b32 [[REG:v[0-9]+]], v{{[0-9]+}}
 ; BOTH: buffer_store_dword [[REG]],
 define void @local_i32_load_0_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) nounwind {
-  %val = load i32 addrspace(3)* %in, align 4
+  %val = load i32, i32 addrspace(3)* %in, align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -27,7 +27,7 @@
 ; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65535
-  %val = load i8 addrspace(3)* %gep, align 4
+  %val = load i8, i8 addrspace(3)* %gep, align 4
   store i8 %val, i8 addrspace(1)* %out, align 4
   ret void
 }
@@ -42,7 +42,7 @@
 ; BOTH: buffer_store_byte [[REG]],
 define void @local_i8_load_over_i16_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %in) nounwind {
   %gep = getelementptr i8, i8 addrspace(3)* %in, i32 65536
-  %val = load i8 addrspace(3)* %gep, align 4
+  %val = load i8, i8 addrspace(3)* %gep, align 4
   store i8 %val, i8 addrspace(1)* %out, align 4
   ret void
 }
@@ -53,7 +53,7 @@
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
   %gep = getelementptr i64, i64 addrspace(3)* %in, i32 7
-  %val = load i64 addrspace(3)* %gep, align 8
+  %val = load i64, i64 addrspace(3)* %gep, align 8
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -62,7 +62,7 @@
 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_i64_load_0_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %in) nounwind {
-  %val = load i64 addrspace(3)* %in, align 8
+  %val = load i64, i64 addrspace(3)* %in, align 8
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -73,7 +73,7 @@
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
   %gep = getelementptr double, double addrspace(3)* %in, i32 7
-  %val = load double addrspace(3)* %gep, align 8
+  %val = load double, double addrspace(3)* %gep, align 8
   store double %val, double addrspace(1)* %out, align 8
   ret void
 }
@@ -82,7 +82,7 @@
 ; BOTH: ds_read_b64 [[REG:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}
 ; BOTH: buffer_store_dwordx2 [[REG]],
 define void @local_f64_load_0_offset(double addrspace(1)* %out, double addrspace(3)* %in) nounwind {
-  %val = load double addrspace(3)* %in, align 8
+  %val = load double, double addrspace(3)* %in, align 8
   store double %val, double addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/local-memory-two-objects.ll b/llvm/test/CodeGen/R600/local-memory-two-objects.ll
index 1d38570..75f8d54 100644
--- a/llvm/test/CodeGen/R600/local-memory-two-objects.ll
+++ b/llvm/test/CodeGen/R600/local-memory-two-objects.ll
@@ -45,11 +45,11 @@
   %sub = sub nsw i32 3, %x.i
   call void @llvm.AMDGPU.barrier.local()
   %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub
-  %0 = load i32 addrspace(3)* %arrayidx2, align 4
+  %0 = load i32, i32 addrspace(3)* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i
   store i32 %0, i32 addrspace(1)* %arrayidx3, align 4
   %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub
-  %1 = load i32 addrspace(3)* %arrayidx4, align 4
+  %1 = load i32, i32 addrspace(3)* %arrayidx4, align 4
   %add = add nsw i32 %x.i, 4
   %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add
   store i32 %1, i32 addrspace(1)* %arrayidx5, align 4
diff --git a/llvm/test/CodeGen/R600/local-memory.ll b/llvm/test/CodeGen/R600/local-memory.ll
index 2c082da..4ec0418 100644
--- a/llvm/test/CodeGen/R600/local-memory.ll
+++ b/llvm/test/CodeGen/R600/local-memory.ll
@@ -36,7 +36,7 @@
   %.add = select i1 %cmp, i32 0, i32 %add
   call void @llvm.AMDGPU.barrier.local()
   %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add
-  %0 = load i32 addrspace(3)* %arrayidx1, align 4
+  %0 = load i32, i32 addrspace(3)* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i
   store i32 %0, i32 addrspace(1)* %arrayidx2, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/loop-idiom.ll b/llvm/test/CodeGen/R600/loop-idiom.ll
index 4c30b69..810b34f 100644
--- a/llvm/test/CodeGen/R600/loop-idiom.ll
+++ b/llvm/test/CodeGen/R600/loop-idiom.ll
@@ -22,7 +22,7 @@
   %0 = phi i32 [0, %entry], [%4, %for.body]
   %1 = getelementptr i8, i8 addrspace(3)* %in, i32 %0
   %2 = getelementptr i8, i8* %dest, i32 %0
-  %3 = load i8 addrspace(3)* %1
+  %3 = load i8, i8 addrspace(3)* %1
   store i8 %3, i8* %2
   %4 = add i32 %0, 1
   %5 = icmp eq i32 %4, %size
diff --git a/llvm/test/CodeGen/R600/m0-spill.ll b/llvm/test/CodeGen/R600/m0-spill.ll
index 37bc10a..1dddc85 100644
--- a/llvm/test/CodeGen/R600/m0-spill.ll
+++ b/llvm/test/CodeGen/R600/m0-spill.ll
@@ -13,7 +13,7 @@
 
 if:
   %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
-  %lds_data = load float addrspace(3)* %lds_ptr
+  %lds_data = load float, float addrspace(3)* %lds_ptr
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/mad-combine.ll b/llvm/test/CodeGen/R600/mad-combine.ll
index 435efe0..bc07162 100644
--- a/llvm/test/CodeGen/R600/mad-combine.ll
+++ b/llvm/test/CodeGen/R600/mad-combine.ll
@@ -37,9 +37,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fadd float %mul, %c
@@ -76,10 +76,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fadd float %mul, %c
@@ -110,9 +110,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fadd float %c, %mul
@@ -140,9 +140,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fsub float %mul, %c
@@ -179,10 +179,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fsub float %mul, %c
@@ -212,9 +212,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %fma = fsub float %c, %mul
@@ -250,10 +250,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %fma0 = fsub float %c, %mul
@@ -284,9 +284,9 @@
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -324,10 +324,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -367,10 +367,10 @@
   %gep.out.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %gep.out.1 = getelementptr float, float addrspace(1)* %gep.out.0, i32 1
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
-  %d = load float addrspace(1)* %gep.3
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
+  %d = load float, float addrspace(1)* %gep.3
 
   %mul = fmul float %a, %b
   %mul.neg = fsub float -0.0, %mul
@@ -412,11 +412,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fma.f32(float %x, float %y, float %tmp0) #0
@@ -458,11 +458,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fma.f32(float %y, float %z, float %tmp0) #0
@@ -503,11 +503,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fmuladd.f32(float %x, float %y, float %tmp0) #0
@@ -549,11 +549,11 @@
   %gep.4 = getelementptr float, float addrspace(1)* %gep.0, i32 4
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %x = load float addrspace(1)* %gep.0
-  %y = load float addrspace(1)* %gep.1
-  %z = load float addrspace(1)* %gep.2
-  %u = load float addrspace(1)* %gep.3
-  %v = load float addrspace(1)* %gep.4
+  %x = load float, float addrspace(1)* %gep.0
+  %y = load float, float addrspace(1)* %gep.1
+  %z = load float, float addrspace(1)* %gep.2
+  %u = load float, float addrspace(1)* %gep.3
+  %v = load float, float addrspace(1)* %gep.4
 
   %tmp0 = fmul float %u, %v
   %tmp1 = call float @llvm.fmuladd.f32(float %y, float %z, float %tmp0) #0
diff --git a/llvm/test/CodeGen/R600/mad-sub.ll b/llvm/test/CodeGen/R600/mad-sub.ll
index 6ea6771..aa4194f 100644
--- a/llvm/test/CodeGen/R600/mad-sub.ll
+++ b/llvm/test/CodeGen/R600/mad-sub.ll
@@ -18,9 +18,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c
   store float %sub, float addrspace(1)* %outgep, align 4
@@ -42,9 +42,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %mul = fmul float %a, %b
   %sub = fsub float %c, %mul
   store float %sub, float addrspace(1)* %outgep, align 4
@@ -63,9 +63,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr double, double addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr double, double addrspace(1)* %out, i64 %tid.ext
-  %a = load double addrspace(1)* %gep0, align 8
-  %b = load double addrspace(1)* %gep1, align 8
-  %c = load double addrspace(1)* %gep2, align 8
+  %a = load double, double addrspace(1)* %gep0, align 8
+  %b = load double, double addrspace(1)* %gep1, align 8
+  %c = load double, double addrspace(1)* %gep2, align 8
   %mul = fmul double %a, %b
   %sub = fsub double %mul, %c
   store double %sub, double addrspace(1)* %outgep, align 8
@@ -87,9 +87,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %mul, %c.abs
@@ -112,9 +112,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %c.abs = call float @llvm.fabs.f32(float %c) #0
   %mul = fmul float %a, %b
   %sub = fsub float %c.abs, %mul
@@ -133,9 +133,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %nega = fsub float -0.000000e+00, %a
   %negb = fsub float -0.000000e+00, %b
   %mul = fmul float %nega, %negb
@@ -159,9 +159,9 @@
   %add2 = add i64 %tid.ext, 2
   %gep2 = getelementptr float, float addrspace(1)* %ptr, i64 %add2
   %outgep = getelementptr float, float addrspace(1)* %out, i64 %tid.ext
-  %a = load float addrspace(1)* %gep0, align 4
-  %b = load float addrspace(1)* %gep1, align 4
-  %c = load float addrspace(1)* %gep2, align 4
+  %a = load float, float addrspace(1)* %gep0, align 4
+  %b = load float, float addrspace(1)* %gep1, align 4
+  %c = load float, float addrspace(1)* %gep2, align 4
   %b.abs = call float @llvm.fabs.f32(float %b) #0
   %mul = fmul float %a, %b.abs
   %sub = fsub float %mul, %c
@@ -180,8 +180,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %r2, %add
@@ -201,8 +201,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %r1 = load float addrspace(1)* %gep.0
-  %r2 = load float addrspace(1)* %gep.1
+  %r1 = load float, float addrspace(1)* %gep.0
+  %r2 = load float, float addrspace(1)* %gep.1
 
   %add = fadd float %r1, %r1
   %r3 = fsub float %add, %r2
diff --git a/llvm/test/CodeGen/R600/madak.ll b/llvm/test/CodeGen/R600/madak.ll
index f958783..cc3e91e 100644
--- a/llvm/test/CodeGen/R600/madak.ll
+++ b/llvm/test/CodeGen/R600/madak.ll
@@ -16,8 +16,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -47,9 +47,9 @@
   %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
 
-  %a = load float addrspace(1)* %in.gep.0, align 4
-  %b = load float addrspace(1)* %in.gep.1, align 4
-  %c = load float addrspace(1)* %in.gep.2, align 4
+  %a = load float, float addrspace(1)* %in.gep.0, align 4
+  %b = load float, float addrspace(1)* %in.gep.1, align 4
+  %c = load float, float addrspace(1)* %in.gep.2, align 4
 
   %mul0 = fmul float %a, %b
   %mul1 = fmul float %a, %c
@@ -69,7 +69,7 @@
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
 
   %mul = fmul float 4.0, %a
   %madak = fadd float %mul, 10.0
@@ -90,8 +90,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 4.0
@@ -111,7 +111,7 @@
   %in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -130,7 +130,7 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %mul = fmul float %a, %b
   %madak = fadd float %mul, 10.0
@@ -159,8 +159,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -181,8 +181,8 @@
   %in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %in.a.gep, align 4
-  %b = load float addrspace(1)* %in.b.gep, align 4
+  %a = load float, float addrspace(1)* %in.a.gep, align 4
+  %b = load float, float addrspace(1)* %in.b.gep, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
diff --git a/llvm/test/CodeGen/R600/madmk.ll b/llvm/test/CodeGen/R600/madmk.ll
index ffd5a94..17d1b8a 100644
--- a/llvm/test/CodeGen/R600/madmk.ll
+++ b/llvm/test/CodeGen/R600/madmk.ll
@@ -14,8 +14,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -41,9 +41,9 @@
   %out.gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
   %out.gep.1 = getelementptr float, float addrspace(1)* %in.gep.0, i32 1
 
-  %a = load float addrspace(1)* %in.gep.0, align 4
-  %b = load float addrspace(1)* %in.gep.1, align 4
-  %c = load float addrspace(1)* %in.gep.2, align 4
+  %a = load float, float addrspace(1)* %in.gep.0, align 4
+  %b = load float, float addrspace(1)* %in.gep.1, align 4
+  %c = load float, float addrspace(1)* %in.gep.2, align 4
 
   %mul0 = fmul float %a, 10.0
   %mul1 = fmul float %a, 10.0
@@ -66,8 +66,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %mul = fmul float %a, 4.0
   %madmk = fadd float %mul, %b
@@ -97,7 +97,7 @@
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -113,7 +113,7 @@
   %tid = tail call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %b = load float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, %b
@@ -131,8 +131,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone
 
@@ -152,8 +152,8 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
-  %b = load float addrspace(1)* %gep.1, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
+  %b = load float, float addrspace(1)* %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone
 
@@ -172,7 +172,7 @@
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 
-  %a = load float addrspace(1)* %gep.0, align 4
+  %a = load float, float addrspace(1)* %gep.0, align 4
 
   %mul = fmul float %a, 10.0
   %madmk = fadd float %mul, 2.0
diff --git a/llvm/test/CodeGen/R600/max.ll b/llvm/test/CodeGen/R600/max.ll
index 90931c2..e6ab96c 100644
--- a/llvm/test/CodeGen/R600/max.ll
+++ b/llvm/test/CodeGen/R600/max.ll
@@ -9,8 +9,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sge i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -33,8 +33,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sgt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -57,8 +57,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp uge i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -81,8 +81,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ugt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
diff --git a/llvm/test/CodeGen/R600/max3.ll b/llvm/test/CodeGen/R600/max3.ll
index 8b53584..cfb94b2 100644
--- a/llvm/test/CodeGen/R600/max3.ll
+++ b/llvm/test/CodeGen/R600/max3.ll
@@ -10,9 +10,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp sgt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp sgt i32 %i0, %c
@@ -29,9 +29,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp ugt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ugt i32 %i0, %c
diff --git a/llvm/test/CodeGen/R600/min.ll b/llvm/test/CodeGen/R600/min.ll
index 9f85356..d1febf5 100644
--- a/llvm/test/CodeGen/R600/min.ll
+++ b/llvm/test/CodeGen/R600/min.ll
@@ -9,8 +9,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp sle i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -33,8 +33,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp slt i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -57,8 +57,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ule i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -81,8 +81,8 @@
   %gep0 = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ult i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep, align 4
@@ -110,8 +110,8 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out0, i32 %tid
   %outgep1 = getelementptr i1, i1 addrspace(1)* %out1, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
   %cmp = icmp ult i32 %a, %b
   %val = select i1 %cmp, i32 %a, i32 %b
   store i32 %val, i32 addrspace(1)* %outgep0, align 4
diff --git a/llvm/test/CodeGen/R600/min3.ll b/llvm/test/CodeGen/R600/min3.ll
index f14e28c..38ef46d 100644
--- a/llvm/test/CodeGen/R600/min3.ll
+++ b/llvm/test/CodeGen/R600/min3.ll
@@ -10,9 +10,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp slt i32 %i0, %c
@@ -29,9 +29,9 @@
   %gep1 = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
   %gep2 = getelementptr i32, i32 addrspace(1)* %cptr, i32 %tid
   %outgep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
   %icmp0 = icmp ult i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
   %icmp1 = icmp ult i32 %i0, %c
@@ -57,10 +57,10 @@
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
-  %d = load i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %d = load i32, i32 addrspace(1)* %gep3, align 4
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
@@ -91,10 +91,10 @@
   %outgep0 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   %outgep1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tid2
 
-  %a = load i32 addrspace(1)* %gep0, align 4
-  %b = load i32 addrspace(1)* %gep1, align 4
-  %c = load i32 addrspace(1)* %gep2, align 4
-  %d = load i32 addrspace(1)* %gep3, align 4
+  %a = load i32, i32 addrspace(1)* %gep0, align 4
+  %b = load i32, i32 addrspace(1)* %gep1, align 4
+  %c = load i32, i32 addrspace(1)* %gep2, align 4
+  %d = load i32, i32 addrspace(1)* %gep3, align 4
 
   %icmp0 = icmp slt i32 %a, %b
   %i0 = select i1 %icmp0, i32 %a, i32 %b
diff --git a/llvm/test/CodeGen/R600/missing-store.ll b/llvm/test/CodeGen/R600/missing-store.ll
index 162fe97..4af9cdf 100644
--- a/llvm/test/CodeGen/R600/missing-store.ll
+++ b/llvm/test/CodeGen/R600/missing-store.ll
@@ -12,11 +12,11 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @missing_store_reduced(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @ptr_load, align 8
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   store i32 %tmp2, i32 addrspace(1)* %out, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/mubuf.ll b/llvm/test/CodeGen/R600/mubuf.ll
index ee4b80d..b19163f 100644
--- a/llvm/test/CodeGen/R600/mubuf.ll
+++ b/llvm/test/CodeGen/R600/mubuf.ll
@@ -12,7 +12,7 @@
 define void @mubuf_load0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1
-  %1 = load i32 addrspace(1)* %0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -23,7 +23,7 @@
 define void @mubuf_load1(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i8, i8 addrspace(1)* %in, i64 4095
-  %1 = load i8 addrspace(1)* %0
+  %1 = load i8, i8 addrspace(1)* %0
   store i8 %1, i8 addrspace(1)* %out
   ret void
 }
@@ -35,7 +35,7 @@
 define void @mubuf_load2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 1024
-  %1 = load i32 addrspace(1)* %0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -48,7 +48,7 @@
 entry:
   %0 = getelementptr i32, i32 addrspace(1)* %in, i64 %offset
   %1 = getelementptr i32, i32 addrspace(1)* %0, i64 1
-  %2 = load i32 addrspace(1)* %1
+  %2 = load i32, i32 addrspace(1)* %1
   store i32 %2, i32 addrspace(1)* %out
   ret void
 }
@@ -58,7 +58,7 @@
 define void @soffset_max_imm([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
 main_body:
   %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
-  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
   %tmp2 = shl i32 %6, 2
   %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
   %tmp4 = add i32 %6, 16
@@ -77,7 +77,7 @@
 define void @soffset_no_fold([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, [32 x <8 x i32>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32, i32, i32) #1 {
 main_body:
   %tmp0 = getelementptr [6 x <16 x i8>], [6 x <16 x i8>] addrspace(2)* %0, i32 0, i32 0
-  %tmp1 = load <16 x i8> addrspace(2)* %tmp0
+  %tmp1 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp0
   %tmp2 = shl i32 %6, 2
   %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
   %tmp4 = add i32 %6, 16
diff --git a/llvm/test/CodeGen/R600/mul.ll b/llvm/test/CodeGen/R600/mul.ll
index 119a4c0..94e0f96 100644
--- a/llvm/test/CodeGen/R600/mul.ll
+++ b/llvm/test/CodeGen/R600/mul.ll
@@ -13,8 +13,8 @@
 
 define void @test_mul_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = mul <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 
 define void @v_mul_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = mul <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -58,8 +58,8 @@
 ; SI: v_mul_lo_i32
 ; SI: buffer_store_dword
 define void @v_trunc_i64_mul_to_i32(i32 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %mul = mul i64 %b, %a
   %trunc = trunc i64 %mul to i32
   store i32 %trunc, i32 addrspace(1)* %out, align 8
@@ -88,7 +88,7 @@
 ; SI-DAG: v_mul_hi_i32
 ; SI: s_endpgm
 define void @v_mul64_sext_c(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ext = sext i32 %val to i64
   %mul = mul i64 %ext, 80
   store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -100,7 +100,7 @@
 ; SI-DAG: v_mul_hi_i32 v{{[0-9]+}}, 9, v{{[0-9]+}}
 ; SI: s_endpgm
 define void @v_mul64_sext_inline_imm(i64 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %ext = sext i32 %val to i64
   %mul = mul i64 %ext, 9
   store i64 %mul, i64 addrspace(1)* %out, align 8
@@ -124,8 +124,8 @@
 ; SI: v_mul_lo_i32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define void @v_mul_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = mul i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -148,8 +148,8 @@
 ; FUNC-LABEL: {{^}}v_mul_i64:
 ; SI: v_mul_lo_i32
 define void @v_mul_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) {
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %mul = mul i64 %a, %b
   store i64 %mul, i64 addrspace(1)* %out, align 8
   ret void
@@ -163,7 +163,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   br label %endif
 
 else:
@@ -186,7 +186,7 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i64 addrspace(1)* %in
+  %1 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 else:
diff --git a/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll b/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
index 532edf0..ef0cb0c 100644
--- a/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
+++ b/llvm/test/CodeGen/R600/no-initializer-constant-addrspace.ll
@@ -6,7 +6,7 @@
 
 ; FUNC-LABEL: {{^}}load_extern_const_init:
 define void @load_extern_const_init(i32 addrspace(1)* %out) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @extern_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
@@ -15,7 +15,7 @@
 
 ; FUNC-LABEL: {{^}}load_undef_const_init:
 define void @load_undef_const_init(i32 addrspace(1)* %out) nounwind {
-  %val = load i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
+  %val = load i32, i32 addrspace(2)* getelementptr ([5 x i32] addrspace(2)* @undef_const_addrspace, i64 0, i64 3), align 4
   store i32 %val, i32 addrspace(1)* %out, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/no-shrink-extloads.ll b/llvm/test/CodeGen/R600/no-shrink-extloads.ll
index 2bd1a86..e4328ec 100644
--- a/llvm/test/CodeGen/R600/no-shrink-extloads.ll
+++ b/llvm/test/CodeGen/R600/no-shrink-extloads.ll
@@ -25,7 +25,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i16, i16 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i16
   store i16 %trunc, i16 addrspace(1)* %gep.out
   ret void
@@ -47,7 +47,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
@@ -69,7 +69,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i1, i1 addrspace(1)* %out, i32 %tid
-  %load = load i32 addrspace(1)* %gep.in
+  %load = load i32, i32 addrspace(1)* %gep.in
   %trunc = trunc i32 %load to i1
   store i1 %trunc, i1 addrspace(1)* %gep.out
   ret void
@@ -91,7 +91,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %trunc = trunc i64 %load to i32
   store i32 %trunc, i32 addrspace(1)* %gep.out
   ret void
@@ -114,7 +114,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %srl = lshr i64 %load, 32
   %trunc = trunc i64 %srl to i32
   store i32 %trunc, i32 addrspace(1)* %gep.out
@@ -138,7 +138,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i16 addrspace(1)* %gep.in
+  %load = load i16, i16 addrspace(1)* %gep.in
   %trunc = trunc i16 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
@@ -161,7 +161,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %srl = lshr i64 %load, 32
   %trunc = trunc i64 %srl to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
@@ -184,7 +184,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %gep.out = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
-  %load = load i64 addrspace(1)* %gep.in
+  %load = load i64, i64 addrspace(1)* %gep.in
   %trunc = trunc i64 %load to i8
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
diff --git a/llvm/test/CodeGen/R600/or.ll b/llvm/test/CodeGen/R600/or.ll
index c62ef37..1b1cb9a 100644
--- a/llvm/test/CodeGen/R600/or.ll
+++ b/llvm/test/CodeGen/R600/or.ll
@@ -11,8 +11,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @or_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = or <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -30,8 +30,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @or_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = or <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -48,7 +48,7 @@
 ; FUNC-LABEL: {{^}}vector_or_i32:
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 %b) {
-  %loada = load i32 addrspace(1)* %a
+  %loada = load i32, i32 addrspace(1)* %a
   %or = or i32 %loada, %b
   store i32 %or, i32 addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; FUNC-LABEL: {{^}}vector_or_literal_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
 define void @vector_or_literal_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32 addrspace(1)* %a, align 4
+  %loada = load i32, i32 addrspace(1)* %a, align 4
   %or = or i32 %loada, 65535
   store i32 %or, i32 addrspace(1)* %out, align 4
   ret void
@@ -74,7 +74,7 @@
 ; FUNC-LABEL: {{^}}vector_or_inline_immediate_i32:
 ; SI: v_or_b32_e32 v{{[0-9]+}}, 4, v{{[0-9]+}}
 define void @vector_or_inline_immediate_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) {
-  %loada = load i32 addrspace(1)* %a, align 4
+  %loada = load i32, i32 addrspace(1)* %a, align 4
   %or = or i32 %loada, 4
   store i32 %or, i32 addrspace(1)* %out, align 4
   ret void
@@ -95,8 +95,8 @@
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
-  %loadb = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
+  %loadb = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, %loadb
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -106,7 +106,7 @@
 ; SI: v_or_b32_e32 v{{[0-9]}}
 ; SI: v_or_b32_e32 v{{[0-9]}}
 define void @scalar_vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 %b) {
-  %loada = load i64 addrspace(1)* %a
+  %loada = load i64, i64 addrspace(1)* %a
   %or = or i64 %loada, %b
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -120,7 +120,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @vector_or_i64_loadimm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 22470723082367
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -133,7 +133,7 @@
 ; SI: v_or_b32_e32 {{v[0-9]+}}, 0, {{.*}}
 ; SI: s_endpgm
 define void @vector_or_i64_imm(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 8
+  %loada = load i64, i64 addrspace(1)* %a, align 8
   %or = or i64 %loada, 8
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -157,8 +157,8 @@
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 define void @or_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float addrspace(1)* %in0
-  %b = load float addrspace(1)* %in1
+  %a = load float, float addrspace(1)* %in0
+  %b = load float, float addrspace(1)* %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 0.000000e+00
   %or = or i1 %acmp, %bcmp
diff --git a/llvm/test/CodeGen/R600/parallelandifcollapse.ll b/llvm/test/CodeGen/R600/parallelandifcollapse.ll
index 82b1150..f32b044 100644
--- a/llvm/test/CodeGen/R600/parallelandifcollapse.ll
+++ b/llvm/test/CodeGen/R600/parallelandifcollapse.ll
@@ -23,14 +23,14 @@
   %c1 = alloca i32, align 4
   %d1 = alloca i32, align 4
   %data = alloca i32, align 4
-  %0 = load i32* %a0, align 4
-  %1 = load i32* %b0, align 4
+  %0 = load i32, i32* %a0, align 4
+  %1 = load i32, i32* %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.end
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32* %c0, align 4
-  %3 = load i32* %d0, align 4
+  %2 = load i32, i32* %c0, align 4
+  %3 = load i32, i32* %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.end
 
@@ -39,14 +39,14 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.then, %land.lhs.true, %entry
-  %4 = load i32* %a1, align 4
-  %5 = load i32* %b1, align 4
+  %4 = load i32, i32* %a1, align 4
+  %5 = load i32, i32* %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.end6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32* %c1, align 4
-  %7 = load i32* %d1, align 4
+  %6 = load i32, i32* %c1, align 4
+  %7 = load i32, i32* %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.end6
 
diff --git a/llvm/test/CodeGen/R600/parallelorifcollapse.ll b/llvm/test/CodeGen/R600/parallelorifcollapse.ll
index feca688..1da1e91b8 100644
--- a/llvm/test/CodeGen/R600/parallelorifcollapse.ll
+++ b/llvm/test/CodeGen/R600/parallelorifcollapse.ll
@@ -23,14 +23,14 @@
   %c1 = alloca i32, align 4
   %d1 = alloca i32, align 4
   %data = alloca i32, align 4
-  %0 = load i32* %a0, align 4
-  %1 = load i32* %b0, align 4
+  %0 = load i32, i32* %a0, align 4
+  %1 = load i32, i32* %b0, align 4
   %cmp = icmp ne i32 %0, %1
   br i1 %cmp, label %land.lhs.true, label %if.else
 
 land.lhs.true:                                    ; preds = %entry
-  %2 = load i32* %c0, align 4
-  %3 = load i32* %d0, align 4
+  %2 = load i32, i32* %c0, align 4
+  %3 = load i32, i32* %d0, align 4
   %cmp1 = icmp ne i32 %2, %3
   br i1 %cmp1, label %if.then, label %if.else
 
@@ -42,14 +42,14 @@
   br label %if.end
 
 if.end:                                           ; preds = %if.else, %if.then
-  %4 = load i32* %a1, align 4
-  %5 = load i32* %b1, align 4
+  %4 = load i32, i32* %a1, align 4
+  %5 = load i32, i32* %b1, align 4
   %cmp2 = icmp ne i32 %4, %5
   br i1 %cmp2, label %land.lhs.true3, label %if.else6
 
 land.lhs.true3:                                   ; preds = %if.end
-  %6 = load i32* %c1, align 4
-  %7 = load i32* %d1, align 4
+  %6 = load i32, i32* %c1, align 4
+  %7 = load i32, i32* %d1, align 4
   %cmp4 = icmp ne i32 %6, %7
   br i1 %cmp4, label %if.then5, label %if.else6
 
diff --git a/llvm/test/CodeGen/R600/private-memory.ll b/llvm/test/CodeGen/R600/private-memory.ll
index 881baf3..1c56297 100644
--- a/llvm/test/CodeGen/R600/private-memory.ll
+++ b/llvm/test/CodeGen/R600/private-memory.ll
@@ -23,18 +23,18 @@
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
 entry:
   %stack = alloca [5 x i32], align 4
-  %0 = load i32 addrspace(1)* %in, align 4
+  %0 = load i32, i32 addrspace(1)* %in, align 4
   %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %0
   store i32 4, i32* %arrayidx1, align 4
   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
-  %1 = load i32 addrspace(1)* %arrayidx2, align 4
+  %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
   %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 %1
   store i32 5, i32* %arrayidx3, align 4
   %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 0
-  %2 = load i32* %arrayidx10, align 4
+  %2 = load i32, i32* %arrayidx10, align 4
   store i32 %2, i32 addrspace(1)* %out, align 4
   %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32]* %stack, i32 0, i32 1
-  %3 = load i32* %arrayidx12
+  %3 = load i32, i32* %arrayidx12
   %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
   store i32 %3, i32 addrspace(1)* %arrayidx13
   ret void
@@ -67,8 +67,8 @@
   store i32 3, i32* %b.y.ptr
   %a.indirect.ptr = getelementptr %struct.point, %struct.point* %a, i32 0, i32 0
   %b.indirect.ptr = getelementptr %struct.point, %struct.point* %b, i32 0, i32 0
-  %a.indirect = load i32* %a.indirect.ptr
-  %b.indirect = load i32* %b.indirect.ptr
+  %a.indirect = load i32, i32* %a.indirect.ptr
+  %b.indirect = load i32, i32* %b.indirect.ptr
   %0 = add i32 %a.indirect, %b.indirect
   store i32 %0, i32 addrspace(1)* %out
   ret void
@@ -86,9 +86,9 @@
 entry:
   %prv_array_const = alloca [2 x i32]
   %prv_array = alloca [2 x i32]
-  %a = load i32 addrspace(1)* %in
+  %a = load i32, i32 addrspace(1)* %in
   %b_src_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %b = load i32 addrspace(1)* %b_src_ptr
+  %b = load i32, i32 addrspace(1)* %b_src_ptr
   %a_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
   store i32 %a, i32* %a_dst_ptr
   %b_dst_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 1
@@ -98,9 +98,9 @@
 for.body:
   %inc = phi i32 [0, %entry], [%count, %for.body]
   %x_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array_const, i32 0, i32 0
-  %x = load i32* %x_ptr
+  %x = load i32, i32* %x_ptr
   %y_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
-  %y = load i32* %y_ptr
+  %y = load i32, i32* %y_ptr
   %xy = add i32 %x, %y
   store i32 %xy, i32* %y_ptr
   %count = add i32 %inc, 1
@@ -109,7 +109,7 @@
 
 for.end:
   %value_ptr = getelementptr [2 x i32], [2 x i32]* %prv_array, i32 0, i32 0
-  %value = load i32* %value_ptr
+  %value = load i32, i32* %value_ptr
   store i32 %value, i32 addrspace(1)* %out
   ret void
 }
@@ -129,7 +129,7 @@
   store i16 0, i16* %1
   store i16 1, i16* %2
   %3 = getelementptr [2 x i16], [2 x i16]* %0, i32 0, i32 %index
-  %4 = load i16* %3
+  %4 = load i16, i16* %3
   %5 = sext i16 %4 to i32
   store i32 %5, i32 addrspace(1)* %out
   ret void
@@ -149,7 +149,7 @@
   store i8 0, i8* %1
   store i8 1, i8* %2
   %3 = getelementptr [2 x i8], [2 x i8]* %0, i32 0, i32 %index
-  %4 = load i8* %3
+  %4 = load i8, i8* %3
   %5 = sext i8 %4 to i32
   store i32 %5, i32 addrspace(1)* %out
   ret void
@@ -172,7 +172,7 @@
   store i32 0, i32* %1
   store i32 1, i32* %2
   %3 = getelementptr [2 x i32], [2 x i32]* %0, i32 0, i32 %in
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   %5 = call i32 @llvm.r600.read.tidig.x()
   %6 = add i32 %4, %5
   store i32 %6, i32 addrspace(1)* %out
@@ -202,8 +202,8 @@
   store i8 0, i8* %6
   %7 = getelementptr [3 x i8], [3 x i8]* %0, i32 0, i32 %in
   %8 = getelementptr [2 x i8], [2 x i8]* %1, i32 0, i32 %in
-  %9 = load i8* %7
-  %10 = load i8* %8
+  %9 = load i8, i8* %7
+  %10 = load i8, i8* %8
   %11 = add i8 %9, %10
   %12 = sext i8 %11 to i32
   store i32 %12, i32 addrspace(1)* %out
@@ -218,7 +218,7 @@
   store i8 0, i8* %gep0
   store i8 1, i8* %gep1
   %gep2 = getelementptr [2 x [2 x i8]], [2 x [2 x i8]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i8* %gep2
+  %load = load i8, i8* %gep2
   %sext = sext i8 %load to i32
   store i32 %sext, i32 addrspace(1)* %out
   ret void
@@ -232,7 +232,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x [2 x i32]], [2 x [2 x i32]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -245,7 +245,7 @@
   store i64 0, i64* %gep0
   store i64 1, i64* %gep1
   %gep2 = getelementptr [2 x [2 x i64]], [2 x [2 x i64]]* %alloca, i32 0, i32 0, i32 %index
-  %load = load i64* %gep2
+  %load = load i64, i64* %gep2
   store i64 %load, i64 addrspace(1)* %out
   ret void
 }
@@ -260,7 +260,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]]* %alloca, i32 0, i32 0, i32 %index, i32 0
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -273,7 +273,7 @@
   store i32 0, i32* %gep0
   store i32 1, i32* %gep1
   %gep2 = getelementptr [2 x %struct.pair32], [2 x %struct.pair32]* %alloca, i32 0, i32 %index, i32 0
-  %load = load i32* %gep2
+  %load = load i32, i32* %gep2
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -287,7 +287,7 @@
   store i32 1, i32* %tmp2
   %cmp = icmp eq i32 %in, 0
   %sel = select i1 %cmp, i32* %tmp1, i32* %tmp2
-  %load = load i32* %sel
+  %load = load i32, i32* %sel
   store i32 %load, i32 addrspace(1)* %out
   ret void
 }
@@ -307,7 +307,7 @@
   %tmp2 = add i32 %tmp1, 5
   %tmp3 = inttoptr i32 %tmp2 to i32*
   %tmp4 = getelementptr i32, i32* %tmp3, i32 %b
-  %tmp5 = load i32* %tmp4
+  %tmp5 = load i32, i32* %tmp4
   store i32 %tmp5, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/pv-packing.ll b/llvm/test/CodeGen/R600/pv-packing.ll
index e5615b9..445c0bf 100644
--- a/llvm/test/CodeGen/R600/pv-packing.ll
+++ b/llvm/test/CodeGen/R600/pv-packing.ll
@@ -14,8 +14,8 @@
   %6 = extractelement <4 x float> %reg3, i32 0
   %7 = extractelement <4 x float> %reg3, i32 1
   %8 = extractelement <4 x float> %reg3, i32 2
-  %9 = load <4 x float> addrspace(8)* null
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %9 = load <4 x float>, <4 x float> addrspace(8)* null
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
   %12 = fmul float %0, %3
   %13 = fadd float %12, %6
diff --git a/llvm/test/CodeGen/R600/pv.ll b/llvm/test/CodeGen/R600/pv.ll
index 1908f15..26bfa0d 100644
--- a/llvm/test/CodeGen/R600/pv.ll
+++ b/llvm/test/CodeGen/R600/pv.ll
@@ -33,63 +33,63 @@
   %25 = extractelement <4 x float> %reg7, i32 1
   %26 = extractelement <4 x float> %reg7, i32 2
   %27 = extractelement <4 x float> %reg7, i32 3
-  %28 = load <4 x float> addrspace(8)* null
+  %28 = load <4 x float>, <4 x float> addrspace(8)* null
   %29 = extractelement <4 x float> %28, i32 0
   %30 = fmul float %0, %29
-  %31 = load <4 x float> addrspace(8)* null
+  %31 = load <4 x float>, <4 x float> addrspace(8)* null
   %32 = extractelement <4 x float> %31, i32 1
   %33 = fmul float %0, %32
-  %34 = load <4 x float> addrspace(8)* null
+  %34 = load <4 x float>, <4 x float> addrspace(8)* null
   %35 = extractelement <4 x float> %34, i32 2
   %36 = fmul float %0, %35
-  %37 = load <4 x float> addrspace(8)* null
+  %37 = load <4 x float>, <4 x float> addrspace(8)* null
   %38 = extractelement <4 x float> %37, i32 3
   %39 = fmul float %0, %38
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %41 = extractelement <4 x float> %40, i32 0
   %42 = fmul float %1, %41
   %43 = fadd float %42, %30
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %45 = extractelement <4 x float> %44, i32 1
   %46 = fmul float %1, %45
   %47 = fadd float %46, %33
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %49 = extractelement <4 x float> %48, i32 2
   %50 = fmul float %1, %49
   %51 = fadd float %50, %36
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %53 = extractelement <4 x float> %52, i32 3
   %54 = fmul float %1, %53
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %57 = extractelement <4 x float> %56, i32 0
   %58 = fmul float %2, %57
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %61 = extractelement <4 x float> %60, i32 1
   %62 = fmul float %2, %61
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %65 = extractelement <4 x float> %64, i32 2
   %66 = fmul float %2, %65
   %67 = fadd float %66, %51
-  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %69 = extractelement <4 x float> %68, i32 3
   %70 = fmul float %2, %69
   %71 = fadd float %70, %55
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %73 = extractelement <4 x float> %72, i32 0
   %74 = fmul float %3, %73
   %75 = fadd float %74, %59
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %77 = extractelement <4 x float> %76, i32 1
   %78 = fmul float %3, %77
   %79 = fadd float %78, %63
-  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %81 = extractelement <4 x float> %80, i32 2
   %82 = fmul float %3, %81
   %83 = fadd float %82, %67
-  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %85 = extractelement <4 x float> %84, i32 3
   %86 = fmul float %3, %85
   %87 = fadd float %86, %71
@@ -107,15 +107,15 @@
   %99 = fmul float %4, %98
   %100 = fmul float %5, %98
   %101 = fmul float %6, %98
-  %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %102 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %103 = extractelement <4 x float> %102, i32 0
   %104 = fmul float %103, %8
   %105 = fadd float %104, %20
-  %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %106 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %107 = extractelement <4 x float> %106, i32 1
   %108 = fmul float %107, %9
   %109 = fadd float %108, %21
-  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %110 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %111 = extractelement <4 x float> %110, i32 2
   %112 = fmul float %111, %10
   %113 = fadd float %112, %22
@@ -123,11 +123,11 @@
   %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
   %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
   %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
-  %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %119 = extractelement <4 x float> %118, i32 0
-  %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %120 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %121 = extractelement <4 x float> %120, i32 1
-  %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %123 = extractelement <4 x float> %122, i32 2
   %124 = insertelement <4 x float> undef, float %99, i32 0
   %125 = insertelement <4 x float> %124, float %100, i32 1
@@ -138,11 +138,11 @@
   %130 = insertelement <4 x float> %129, float %123, i32 2
   %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
   %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
-  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %133 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %134 = extractelement <4 x float> %133, i32 0
-  %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %135 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %136 = extractelement <4 x float> %135, i32 1
-  %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %137 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %138 = extractelement <4 x float> %137, i32 2
   %139 = insertelement <4 x float> undef, float %99, i32 0
   %140 = insertelement <4 x float> %139, float %100, i32 1
@@ -153,31 +153,31 @@
   %145 = insertelement <4 x float> %144, float %138, i32 2
   %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
   %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
-  %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %148 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %149 = extractelement <4 x float> %148, i32 0
   %150 = fmul float %149, %8
-  %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %151 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %152 = extractelement <4 x float> %151, i32 1
   %153 = fmul float %152, %9
-  %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
+  %154 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
   %155 = extractelement <4 x float> %154, i32 2
   %156 = fmul float %155, %10
-  %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %157 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %158 = extractelement <4 x float> %157, i32 0
   %159 = fmul float %158, %12
-  %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %160 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %161 = extractelement <4 x float> %160, i32 1
   %162 = fmul float %161, %13
-  %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
+  %163 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
   %164 = extractelement <4 x float> %163, i32 2
   %165 = fmul float %164, %14
-  %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %166 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %167 = extractelement <4 x float> %166, i32 0
   %168 = fmul float %167, %16
-  %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %169 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %170 = extractelement <4 x float> %169, i32 1
   %171 = fmul float %170, %17
-  %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
+  %172 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
   %173 = extractelement <4 x float> %172, i32 2
   %174 = fmul float %173, %18
   %175 = fcmp uge float %132, 0.000000e+00
diff --git a/llvm/test/CodeGen/R600/r600-export-fix.ll b/llvm/test/CodeGen/R600/r600-export-fix.ll
index 7d72856..fd789b0 100644
--- a/llvm/test/CodeGen/R600/r600-export-fix.ll
+++ b/llvm/test/CodeGen/R600/r600-export-fix.ll
@@ -16,83 +16,83 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = fmul float %5, %0
-  %7 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %8 = extractelement <4 x float> %7, i32 1
   %9 = fmul float %8, %0
-  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %11 = extractelement <4 x float> %10, i32 2
   %12 = fmul float %11, %0
-  %13 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
+  %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
   %14 = extractelement <4 x float> %13, i32 3
   %15 = fmul float %14, %0
-  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %17 = extractelement <4 x float> %16, i32 0
   %18 = fmul float %17, %1
   %19 = fadd float %18, %6
-  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %1
   %23 = fadd float %22, %9
-  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %25 = extractelement <4 x float> %24, i32 2
   %26 = fmul float %25, %1
   %27 = fadd float %26, %12
-  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
+  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
   %29 = extractelement <4 x float> %28, i32 3
   %30 = fmul float %29, %1
   %31 = fadd float %30, %15
-  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float %33, %2
   %35 = fadd float %34, %19
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %37 = extractelement <4 x float> %36, i32 1
   %38 = fmul float %37, %2
   %39 = fadd float %38, %23
-  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %41 = extractelement <4 x float> %40, i32 2
   %42 = fmul float %41, %2
   %43 = fadd float %42, %27
-  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
+  %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
   %45 = extractelement <4 x float> %44, i32 3
   %46 = fmul float %45, %2
   %47 = fadd float %46, %31
-  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %49 = extractelement <4 x float> %48, i32 0
   %50 = fmul float %49, %3
   %51 = fadd float %50, %35
-  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %53 = extractelement <4 x float> %52, i32 1
   %54 = fmul float %53, %3
   %55 = fadd float %54, %39
-  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %57 = extractelement <4 x float> %56, i32 2
   %58 = fmul float %57, %3
   %59 = fadd float %58, %43
-  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
+  %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
   %61 = extractelement <4 x float> %60, i32 3
   %62 = fmul float %61, %3
   %63 = fadd float %62, %47
-  %64 = load <4 x float> addrspace(8)* null
+  %64 = load <4 x float>, <4 x float> addrspace(8)* null
   %65 = extractelement <4 x float> %64, i32 0
-  %66 = load <4 x float> addrspace(8)* null
+  %66 = load <4 x float>, <4 x float> addrspace(8)* null
   %67 = extractelement <4 x float> %66, i32 1
-  %68 = load <4 x float> addrspace(8)* null
+  %68 = load <4 x float>, <4 x float> addrspace(8)* null
   %69 = extractelement <4 x float> %68, i32 2
-  %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %71 = extractelement <4 x float> %70, i32 0
-  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %73 = extractelement <4 x float> %72, i32 1
-  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %75 = extractelement <4 x float> %74, i32 2
-  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %77 = extractelement <4 x float> %76, i32 0
-  %78 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %79 = extractelement <4 x float> %78, i32 1
-  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
+  %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
   %81 = extractelement <4 x float> %80, i32 2
   %82 = insertelement <4 x float> undef, float %51, i32 0
   %83 = insertelement <4 x float> %82, float %55, i32 1
diff --git a/llvm/test/CodeGen/R600/r600cfg.ll b/llvm/test/CodeGen/R600/r600cfg.ll
index dddc9de..c7b9d65 100644
--- a/llvm/test/CodeGen/R600/r600cfg.ll
+++ b/llvm/test/CodeGen/R600/r600cfg.ll
@@ -83,7 +83,7 @@
 ENDIF43:                                          ; preds = %ELSE45, %IF44
   %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
   %52 = bitcast i32 %.sink to float
-  %53 = load <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, <4 x float> addrspace(8)* null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = bitcast float %54 to i32
   br label %LOOP47
diff --git a/llvm/test/CodeGen/R600/register-count-comments.ll b/llvm/test/CodeGen/R600/register-count-comments.ll
index 7f36fd2..de6bfb3 100644
--- a/llvm/test/CodeGen/R600/register-count-comments.ll
+++ b/llvm/test/CodeGen/R600/register-count-comments.ll
@@ -12,8 +12,8 @@
   %aptr = getelementptr i32, i32 addrspace(1)* %abase, i32 %tid
   %bptr = getelementptr i32, i32 addrspace(1)* %bbase, i32 %tid
   %outptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %result = add i32 %a, %b
   store i32 %result, i32 addrspace(1)* %outptr, align 4
   ret void
diff --git a/llvm/test/CodeGen/R600/reorder-stores.ll b/llvm/test/CodeGen/R600/reorder-stores.ll
index ea50d5e..187650f 100644
--- a/llvm/test/CodeGen/R600/reorder-stores.ll
+++ b/llvm/test/CodeGen/R600/reorder-stores.ll
@@ -12,8 +12,8 @@
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
-  %tmp1 = load <2 x double> addrspace(1)* %x, align 16
-  %tmp4 = load <2 x double> addrspace(1)* %y, align 16
+  %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
+  %tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
   store <2 x double> %tmp4, <2 x double> addrspace(1)* %x, align 16
   store <2 x double> %tmp1, <2 x double> addrspace(1)* %y, align 16
   ret void
@@ -26,8 +26,8 @@
 ; SI: ds_write_b64
 ; SI: s_endpgm
 define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
-  %tmp1 = load <2 x double> addrspace(3)* %x, align 16
-  %tmp4 = load <2 x double> addrspace(3)* %y, align 16
+  %tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
+  %tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
   store <2 x double> %tmp4, <2 x double> addrspace(3)* %x, align 16
   store <2 x double> %tmp1, <2 x double> addrspace(3)* %y, align 16
   ret void
@@ -76,8 +76,8 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
-  %tmp1 = load <8 x i32> addrspace(1)* %x, align 32
-  %tmp4 = load <8 x i32> addrspace(1)* %y, align 32
+  %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
+  %tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
   store <8 x i32> %tmp4, <8 x i32> addrspace(1)* %x, align 32
   store <8 x i32> %tmp1, <8 x i32> addrspace(1)* %y, align 32
   ret void
@@ -91,8 +91,8 @@
 ; SI: ds_write_b64
 ; SI: s_endpgm
 define void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
-  %tmp1 = load <2 x i32> addrspace(3)* %x, align 8
-  %tmp4 = load <2 x i32> addrspace(3)* %y, align 8
+  %tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
+  %tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
   %tmp1ext = zext <2 x i32> %tmp1 to <2 x i64>
   %tmp4ext = zext <2 x i32> %tmp4 to <2 x i64>
   %tmp7 = add <2 x i64> %tmp1ext, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/R600/rotl.i64.ll b/llvm/test/CodeGen/R600/rotl.i64.ll
index 6da17a4..3f4ceb7 100644
--- a/llvm/test/CodeGen/R600/rotl.i64.ll
+++ b/llvm/test/CodeGen/R600/rotl.i64.ll
@@ -28,8 +28,8 @@
 ; BOTH: s_endpgm
 define void @v_rotl_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
-  %x = load i64 addrspace(1)* %xptr, align 8
-  %y = load i64 addrspace(1)* %yptr, align 8
+  %x = load i64, i64 addrspace(1)* %xptr, align 8
+  %y = load i64, i64 addrspace(1)* %yptr, align 8
   %tmp0 = shl i64 %x, %y
   %tmp1 = sub i64 64, %y
   %tmp2 = lshr i64 %x, %tmp1
diff --git a/llvm/test/CodeGen/R600/rotr.i64.ll b/llvm/test/CodeGen/R600/rotr.i64.ll
index f1d1d26..586de44 100644
--- a/llvm/test/CodeGen/R600/rotr.i64.ll
+++ b/llvm/test/CodeGen/R600/rotr.i64.ll
@@ -26,8 +26,8 @@
 ; BOTH: v_or_b32
 define void @v_rotr_i64(i64 addrspace(1)* %in, i64 addrspace(1)* %xptr, i64 addrspace(1)* %yptr) {
 entry:
-  %x = load i64 addrspace(1)* %xptr, align 8
-  %y = load i64 addrspace(1)* %yptr, align 8
+  %x = load i64, i64 addrspace(1)* %xptr, align 8
+  %y = load i64, i64 addrspace(1)* %yptr, align 8
   %tmp0 = sub i64 64, %y
   %tmp1 = shl i64 %x, %tmp0
   %tmp2 = lshr i64 %x, %y
@@ -50,8 +50,8 @@
 ; BOTH-LABEL: {{^}}v_rotr_v2i64:
 define void @v_rotr_v2i64(<2 x i64> addrspace(1)* %in, <2 x i64> addrspace(1)* %xptr, <2 x i64> addrspace(1)* %yptr) {
 entry:
-  %x = load <2 x i64> addrspace(1)* %xptr, align 8
-  %y = load <2 x i64> addrspace(1)* %yptr, align 8
+  %x = load <2 x i64>, <2 x i64> addrspace(1)* %xptr, align 8
+  %y = load <2 x i64>, <2 x i64> addrspace(1)* %yptr, align 8
   %tmp0 = sub <2 x i64> <i64 64, i64 64>, %y
   %tmp1 = shl <2 x i64> %x, %tmp0
   %tmp2 = lshr <2 x i64> %x, %y
diff --git a/llvm/test/CodeGen/R600/rsq.ll b/llvm/test/CodeGen/R600/rsq.ll
index 183c717..b67b800 100644
--- a/llvm/test/CodeGen/R600/rsq.ll
+++ b/llvm/test/CodeGen/R600/rsq.ll
@@ -9,7 +9,7 @@
 ; SI: v_rsq_f32_e32
 ; SI: s_endpgm
 define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind {
-  %val = load float addrspace(1)* %in, align 4
+  %val = load float, float addrspace(1)* %in, align 4
   %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
   %div = fdiv float 1.0, %sqrt
   store float %div, float addrspace(1)* %out, align 4
@@ -21,7 +21,7 @@
 ; SI-SAFE: v_sqrt_f64_e32
 ; SI: s_endpgm
 define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind {
-  %val = load double addrspace(1)* %in, align 4
+  %val = load double, double addrspace(1)* %in, align 4
   %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone
   %div = fdiv double 1.0, %sqrt
   store double %div, double addrspace(1)* %out, align 4
@@ -62,9 +62,9 @@
   %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
   %gep.2 = getelementptr float, float addrspace(1)* %gep.0, i32 2
 
-  %a = load float addrspace(1)* %gep.0
-  %b = load float addrspace(1)* %gep.1
-  %c = load float addrspace(1)* %gep.2
+  %a = load float, float addrspace(1)* %gep.0
+  %b = load float, float addrspace(1)* %gep.1
+  %c = load float, float addrspace(1)* %gep.2
 
   %x = call float @llvm.sqrt.f32(float %a)
   %y = fmul float %x, %b
diff --git a/llvm/test/CodeGen/R600/s_movk_i32.ll b/llvm/test/CodeGen/R600/s_movk_i32.ll
index 8be2d1d..6b1a36c 100644
--- a/llvm/test/CodeGen/R600/s_movk_i32.ll
+++ b/llvm/test/CodeGen/R600/s_movk_i32.ll
@@ -9,7 +9,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -23,7 +23,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -37,7 +37,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -51,7 +51,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -65,7 +65,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -79,7 +79,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -93,7 +93,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -107,7 +107,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -122,7 +122,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -136,7 +136,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -150,7 +150,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -164,7 +164,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
   store i64 %or, i64 addrspace(1)* %out
   ret void
@@ -178,7 +178,7 @@
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
-  %loada = load i64 addrspace(1)* %a, align 4
+  %loada = load i64, i64 addrspace(1)* %a, align 4
   %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
   store i64 %or, i64 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/saddo.ll b/llvm/test/CodeGen/R600/saddo.ll
index 8e625c1..f8ced79 100644
--- a/llvm/test/CodeGen/R600/saddo.ll
+++ b/llvm/test/CodeGen/R600/saddo.ll
@@ -28,8 +28,8 @@
 
 ; FUNC-LABEL: {{^}}v_saddo_i32:
 define void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %sadd = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %sadd, 0
   %carry = extractvalue { i32, i1 } %sadd, 1
@@ -52,8 +52,8 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %sadd, 0
   %carry = extractvalue { i64, i1 } %sadd, 1
diff --git a/llvm/test/CodeGen/R600/salu-to-valu.ll b/llvm/test/CodeGen/R600/salu-to-valu.ll
index e9c4228..0b964957 100644
--- a/llvm/test/CodeGen/R600/salu-to-valu.ll
+++ b/llvm/test/CodeGen/R600/salu-to-valu.ll
@@ -28,10 +28,10 @@
   %4 = phi i64 [0, %entry], [%5, %loop]
   %5 = add i64 %2, %4
   %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
-  %7 = load i8 addrspace(1)* %6, align 1
+  %7 = load i8, i8 addrspace(1)* %6, align 1
   %8 = or i64 %5, 1
   %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
-  %10 = load i8 addrspace(1)* %9, align 1
+  %10 = load i8, i8 addrspace(1)* %9, align 1
   %11 = add i8 %7, %10
   %12 = sext i8 %11 to i32
   store i32 %12, i32 addrspace(1)* %out
@@ -59,18 +59,18 @@
   br i1 %0, label %if, label %else
 
 if:
-  %1 = load i32 addrspace(2)* addrspace(1)* %in
+  %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
   br label %endif
 
 else:
   %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
-  %3 = load i32 addrspace(2)* addrspace(1)* %2
+  %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
   br label %endif
 
 endif:
   %4 = phi i32 addrspace(2)*  [%1, %if], [%3, %else]
   %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
-  %6 = load i32 addrspace(2)* %5
+  %6 = load i32, i32 addrspace(2)* %5
   store i32 %6, i32 addrspace(1)* %out
   ret void
 }
@@ -84,7 +84,7 @@
   %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %1 = add i32 %0, 4
   %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
-  %3 = load i32 addrspace(2)* %2
+  %3 = load i32, i32 addrspace(2)* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
@@ -97,7 +97,7 @@
   %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
-  %tmp3 = load <8 x i32> addrspace(2)* %tmp2, align 4
+  %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
   store <8 x i32> %tmp3, <8 x i32> addrspace(1)* %out, align 32
   ret void
 }
@@ -112,7 +112,7 @@
   %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
-  %tmp3 = load <16 x i32> addrspace(2)* %tmp2, align 4
+  %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
   store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/scalar_to_vector.ll b/llvm/test/CodeGen/R600/scalar_to_vector.ll
index b82e552..0970e5d 100644
--- a/llvm/test/CodeGen/R600/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/R600/scalar_to_vector.ll
@@ -11,7 +11,7 @@
 ; SI: buffer_store_short [[RESULT]]
 ; SI: s_endpgm
 define void @scalar_to_vector_v2i32(<4 x i16> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %tmp1 = load i32 addrspace(1)* %in, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %in, align 4
   %bc = bitcast i32 %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -27,7 +27,7 @@
 ; SI: buffer_store_short [[RESULT]]
 ; SI: s_endpgm
 define void @scalar_to_vector_v2f32(<4 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
-  %tmp1 = load float addrspace(1)* %in, align 4
+  %tmp1 = load float, float addrspace(1)* %in, align 4
   %bc = bitcast float %tmp1 to <2 x i16>
   %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
   store <4 x i16> %tmp2, <4 x i16> addrspace(1)* %out, align 8
@@ -39,7 +39,7 @@
 
 
 ; define void @scalar_to_vector_test2(<8 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-;   %tmp1 = load i32 addrspace(1)* %in, align 4
+;   %tmp1 = load i32, i32 addrspace(1)* %in, align 4
 ;   %bc = bitcast i32 %tmp1 to <4 x i8>
 
 ;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll b/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
index b917ec6..759197c 100644
--- a/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
+++ b/llvm/test/CodeGen/R600/schedule-fs-loop-nested.ll
@@ -3,7 +3,7 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(9)* null
+  %0 = load <4 x float>, <4 x float> addrspace(9)* null
   %1 = extractelement <4 x float> %0, i32 3
   %2 = fptosi float %1 to i32
   %3 = bitcast i32 %2 to float
@@ -20,11 +20,11 @@
   %14 = bitcast float %12 to i32
   %15 = add i32 %13, %14
   %16 = bitcast i32 %15 to float
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 0
-  %19 = load <4 x float> addrspace(9)* null
+  %19 = load <4 x float>, <4 x float> addrspace(9)* null
   %20 = extractelement <4 x float> %19, i32 1
-  %21 = load <4 x float> addrspace(9)* null
+  %21 = load <4 x float>, <4 x float> addrspace(9)* null
   %22 = extractelement <4 x float> %21, i32 2
   br label %LOOP
 
diff --git a/llvm/test/CodeGen/R600/schedule-fs-loop.ll b/llvm/test/CodeGen/R600/schedule-fs-loop.ll
index d6c194b..28cc08a 100644
--- a/llvm/test/CodeGen/R600/schedule-fs-loop.ll
+++ b/llvm/test/CodeGen/R600/schedule-fs-loop.ll
@@ -3,15 +3,15 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(9)* null
+  %0 = load <4 x float>, <4 x float> addrspace(9)* null
   %1 = extractelement <4 x float> %0, i32 3
   %2 = fptosi float %1 to i32
   %3 = bitcast i32 %2 to float
-  %4 = load <4 x float> addrspace(9)* null
+  %4 = load <4 x float>, <4 x float> addrspace(9)* null
   %5 = extractelement <4 x float> %4, i32 0
-  %6 = load <4 x float> addrspace(9)* null
+  %6 = load <4 x float>, <4 x float> addrspace(9)* null
   %7 = extractelement <4 x float> %6, i32 1
-  %8 = load <4 x float> addrspace(9)* null
+  %8 = load <4 x float>, <4 x float> addrspace(9)* null
   %9 = extractelement <4 x float> %8, i32 2
   br label %LOOP
 
diff --git a/llvm/test/CodeGen/R600/schedule-global-loads.ll b/llvm/test/CodeGen/R600/schedule-global-loads.ll
index 3763237..3f728fd 100644
--- a/llvm/test/CodeGen/R600/schedule-global-loads.ll
+++ b/llvm/test/CodeGen/R600/schedule-global-loads.ll
@@ -14,9 +14,9 @@
 ; SI: buffer_store_dword [[REG0]]
 ; SI: buffer_store_dword [[REG1]]
 define void @cluster_global_arg_loads(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %ptr) #0 {
-  %load0 = load i32 addrspace(1)* %ptr, align 4
+  %load0 = load i32, i32 addrspace(1)* %ptr, align 4
   %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 1
-  %load1 = load i32 addrspace(1)* %gep, align 4
+  %load1 = load i32, i32 addrspace(1)* %gep, align 4
   store i32 %load0, i32 addrspace(1)* %out0, align 4
   store i32 %load1, i32 addrspace(1)* %out1, align 4
   ret void
@@ -30,8 +30,8 @@
 define void @same_base_ptr_crash(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
 entry:
   %out1 = getelementptr i32, i32 addrspace(1)* %out, i32 %offset
-  %tmp0 = load i32 addrspace(1)* %out
-  %tmp1 = load i32 addrspace(1)* %out1
+  %tmp0 = load i32, i32 addrspace(1)* %out
+  %tmp1 = load i32, i32 addrspace(1)* %out1
   %tmp2 = add i32 %tmp0, %tmp1
   store i32 %tmp2, i32 addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/schedule-if-2.ll b/llvm/test/CodeGen/R600/schedule-if-2.ll
index 38aad18..b1a8879 100644
--- a/llvm/test/CodeGen/R600/schedule-if-2.ll
+++ b/llvm/test/CodeGen/R600/schedule-if-2.ll
@@ -3,10 +3,10 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = fadd float 1.000000e+03, %1
-  %3 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %4 = extractelement <4 x float> %3, i32 0
   %5 = bitcast float %4 to i32
   %6 = icmp eq i32 %5, 0
@@ -47,7 +47,7 @@
   br label %ENDIF
 
 ELSE:                                             ; preds = %main_body
-  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %37 = extractelement <4 x float> %36, i32 0
   %38 = bitcast float %37 to i32
   %39 = icmp eq i32 %38, 1
@@ -80,7 +80,7 @@
   %.28 = select i1 %54, float 0x36A0000000000000, float 0.000000e+00
   %55 = bitcast float %.28 to i32
   %56 = sitofp i32 %55 to float
-  %57 = load <4 x float> addrspace(8)* null
+  %57 = load <4 x float>, <4 x float> addrspace(8)* null
   %58 = extractelement <4 x float> %57, i32 0
   %59 = fsub float -0.000000e+00, %58
   %60 = fadd float %2, %59
diff --git a/llvm/test/CodeGen/R600/schedule-if.ll b/llvm/test/CodeGen/R600/schedule-if.ll
index f960c93..4fcb040 100644
--- a/llvm/test/CodeGen/R600/schedule-if.ll
+++ b/llvm/test/CodeGen/R600/schedule-if.ll
@@ -3,7 +3,7 @@
 
 define void @main() {
 main_body:
-  %0 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %0 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %1 = extractelement <4 x float> %0, i32 0
   %2 = bitcast float %1 to i32
   %3 = icmp eq i32 %2, 0
@@ -14,7 +14,7 @@
   br i1 %7, label %ENDIF, label %ELSE
 
 ELSE:                                             ; preds = %main_body
-  %8 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %9 = extractelement <4 x float> %8, i32 0
   %10 = bitcast float %9 to i32
   %11 = icmp eq i32 %10, 1
@@ -36,7 +36,7 @@
   ret void
 
 IF13:                                             ; preds = %ELSE
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fsub float -0.000000e+00, %21
   %23 = fadd float 1.000000e+03, %22
diff --git a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
index 76b655d..9eb9c13 100644
--- a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
+++ b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop-failure.ll
@@ -39,63 +39,63 @@
   %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %101, %Flow2 ]
   %15 = extractelement <4 x float> %reg1, i32 1
   %16 = extractelement <4 x float> %reg1, i32 3
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float %18, %0
-  %20 = load <4 x float> addrspace(9)* null
+  %20 = load <4 x float>, <4 x float> addrspace(9)* null
   %21 = extractelement <4 x float> %20, i32 1
   %22 = fmul float %21, %0
-  %23 = load <4 x float> addrspace(9)* null
+  %23 = load <4 x float>, <4 x float> addrspace(9)* null
   %24 = extractelement <4 x float> %23, i32 2
   %25 = fmul float %24, %0
-  %26 = load <4 x float> addrspace(9)* null
+  %26 = load <4 x float>, <4 x float> addrspace(9)* null
   %27 = extractelement <4 x float> %26, i32 3
   %28 = fmul float %27, %0
-  %29 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %29 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float %30, %15
   %32 = fadd float %31, %19
-  %33 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %33 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %34 = extractelement <4 x float> %33, i32 1
   %35 = fmul float %34, %15
   %36 = fadd float %35, %22
-  %37 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %37 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %38 = extractelement <4 x float> %37, i32 2
   %39 = fmul float %38, %15
   %40 = fadd float %39, %25
-  %41 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %41 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %42 = extractelement <4 x float> %41, i32 3
   %43 = fmul float %42, %15
   %44 = fadd float %43, %28
-  %45 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %45 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %46 = extractelement <4 x float> %45, i32 0
   %47 = fmul float %46, %1
   %48 = fadd float %47, %32
-  %49 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %49 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %50 = extractelement <4 x float> %49, i32 1
   %51 = fmul float %50, %1
   %52 = fadd float %51, %36
-  %53 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %53 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %54 = extractelement <4 x float> %53, i32 2
   %55 = fmul float %54, %1
   %56 = fadd float %55, %40
-  %57 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %57 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %58 = extractelement <4 x float> %57, i32 3
   %59 = fmul float %58, %1
   %60 = fadd float %59, %44
-  %61 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %61 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %62 = extractelement <4 x float> %61, i32 0
   %63 = fmul float %62, %16
   %64 = fadd float %63, %48
-  %65 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %65 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %66 = extractelement <4 x float> %65, i32 1
   %67 = fmul float %66, %16
   %68 = fadd float %67, %52
-  %69 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %69 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %70 = extractelement <4 x float> %69, i32 2
   %71 = fmul float %70, %16
   %72 = fadd float %71, %56
-  %73 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %73 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %74 = extractelement <4 x float> %73, i32 3
   %75 = fmul float %74, %16
   %76 = fadd float %75, %60
diff --git a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
index 33b20d3..bcecb15 100644
--- a/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
+++ b/llvm/test/CodeGen/R600/schedule-vs-if-nested-loop.ll
@@ -21,63 +21,63 @@
   %temp1.0 = phi float [ 1.000000e+00, %main_body ], [ %temp1.1, %LOOP ], [ %temp1.1, %ENDIF16 ]
   %temp2.0 = phi float [ 0.000000e+00, %main_body ], [ %temp2.1, %LOOP ], [ %temp2.1, %ENDIF16 ]
   %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %temp3.1, %LOOP ], [ %temp3.1, %ENDIF16 ]
-  %11 = load <4 x float> addrspace(9)* null
+  %11 = load <4 x float>, <4 x float> addrspace(9)* null
   %12 = extractelement <4 x float> %11, i32 0
   %13 = fmul float %12, %0
-  %14 = load <4 x float> addrspace(9)* null
+  %14 = load <4 x float>, <4 x float> addrspace(9)* null
   %15 = extractelement <4 x float> %14, i32 1
   %16 = fmul float %15, %0
-  %17 = load <4 x float> addrspace(9)* null
+  %17 = load <4 x float>, <4 x float> addrspace(9)* null
   %18 = extractelement <4 x float> %17, i32 2
   %19 = fmul float %18, %0
-  %20 = load <4 x float> addrspace(9)* null
+  %20 = load <4 x float>, <4 x float> addrspace(9)* null
   %21 = extractelement <4 x float> %20, i32 3
   %22 = fmul float %21, %0
-  %23 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %23 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %24 = extractelement <4 x float> %23, i32 0
   %25 = fmul float %24, %1
   %26 = fadd float %25, %13
-  %27 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %27 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %28 = extractelement <4 x float> %27, i32 1
   %29 = fmul float %28, %1
   %30 = fadd float %29, %16
-  %31 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %31 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %32 = extractelement <4 x float> %31, i32 2
   %33 = fmul float %32, %1
   %34 = fadd float %33, %19
-  %35 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
+  %35 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 1)
   %36 = extractelement <4 x float> %35, i32 3
   %37 = fmul float %36, %1
   %38 = fadd float %37, %22
-  %39 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %39 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %40 = extractelement <4 x float> %39, i32 0
   %41 = fmul float %40, %2
   %42 = fadd float %41, %26
-  %43 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %43 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %44 = extractelement <4 x float> %43, i32 1
   %45 = fmul float %44, %2
   %46 = fadd float %45, %30
-  %47 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %47 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %48 = extractelement <4 x float> %47, i32 2
   %49 = fmul float %48, %2
   %50 = fadd float %49, %34
-  %51 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
+  %51 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 2)
   %52 = extractelement <4 x float> %51, i32 3
   %53 = fmul float %52, %2
   %54 = fadd float %53, %38
-  %55 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %55 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %56 = extractelement <4 x float> %55, i32 0
   %57 = fmul float %56, %3
   %58 = fadd float %57, %42
-  %59 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %59 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %60 = extractelement <4 x float> %59, i32 1
   %61 = fmul float %60, %3
   %62 = fadd float %61, %46
-  %63 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %63 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %64 = extractelement <4 x float> %63, i32 2
   %65 = fmul float %64, %3
   %66 = fadd float %65, %50
-  %67 = load <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
+  %67 = load <4 x float>, <4 x float> addrspace(9)* getelementptr ([1024 x <4 x float>] addrspace(9)* null, i64 0, i32 3)
   %68 = extractelement <4 x float> %67, i32 3
   %69 = fmul float %68, %3
   %70 = fadd float %69, %54
diff --git a/llvm/test/CodeGen/R600/scratch-buffer.ll b/llvm/test/CodeGen/R600/scratch-buffer.ll
index 838a7f9..5608871 100644
--- a/llvm/test/CodeGen/R600/scratch-buffer.ll
+++ b/llvm/test/CodeGen/R600/scratch-buffer.ll
@@ -30,12 +30,12 @@
 
 if:
   %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
-  %if_value = load i32* %if_ptr
+  %if_value = load i32, i32* %if_ptr
   br label %done
 
 else:
   %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
-  %else_value = load i32* %else_ptr
+  %else_value = load i32, i32* %else_ptr
   br label %done
 
 done:
@@ -57,12 +57,12 @@
   %scratch0 = alloca [8192 x i32]
   %scratch1 = alloca [8192 x i32]
 
-  %offset0 = load i32 addrspace(1)* %offsets
+  %offset0 = load i32, i32 addrspace(1)* %offsets
   %scratchptr0 = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %offset0
   store i32 %offset0, i32* %scratchptr0
 
   %offsetptr1 = getelementptr i32, i32 addrspace(1)* %offsets, i32 1
-  %offset1 = load i32 addrspace(1)* %offsetptr1
+  %offset1 = load i32, i32 addrspace(1)* %offsetptr1
   %scratchptr1 = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %offset1
   store i32 %offset1, i32* %scratchptr1
 
@@ -71,12 +71,12 @@
 
 if:
   %if_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch0, i32 0, i32 %if_offset
-  %if_value = load i32* %if_ptr
+  %if_value = load i32, i32* %if_ptr
   br label %done
 
 else:
   %else_ptr = getelementptr [8192 x i32], [8192 x i32]* %scratch1, i32 0, i32 %else_offset
-  %else_value = load i32* %else_ptr
+  %else_value = load i32, i32* %else_ptr
   br label %done
 
 done:
diff --git a/llvm/test/CodeGen/R600/sdiv.ll b/llvm/test/CodeGen/R600/sdiv.ll
index 0805ca6..de64535 100644
--- a/llvm/test/CodeGen/R600/sdiv.ll
+++ b/llvm/test/CodeGen/R600/sdiv.ll
@@ -15,8 +15,8 @@
 ; EG: CF_END
 define void @sdiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in
-  %den = load i32 addrspace(1) * %den_ptr
+  %num = load i32, i32 addrspace(1) * %in
+  %den = load i32, i32 addrspace(1) * %den_ptr
   %result = sdiv i32 %num, %den
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -24,7 +24,7 @@
 
 ; FUNC-LABEL: {{^}}sdiv_i32_4:
 define void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 4
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -44,7 +44,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = sdiv i32 %num, 3435
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -52,15 +52,15 @@
 
 define void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %num = load <2 x i32> addrspace(1) * %in
-  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
   %result = sdiv <2 x i32> %num, %den
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %num = load <2 x i32> addrspace(1) * %in
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %result = sdiv <2 x i32> %num, <i32 4, i32 4>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -68,15 +68,15 @@
 
 define void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %num = load <4 x i32> addrspace(1) * %in
-  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
   %result = sdiv <4 x i32> %num, %den
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %num = load <4 x i32> addrspace(1) * %in
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = sdiv <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/sdivrem24.ll b/llvm/test/CodeGen/R600/sdivrem24.ll
index 56c15e3..ad5df39 100644
--- a/llvm/test/CodeGen/R600/sdivrem24.ll
+++ b/llvm/test/CodeGen/R600/sdivrem24.ll
@@ -14,8 +14,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = sdiv i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = sdiv i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -52,8 +52,8 @@
 ; EG: FLT_TO_INT
 define void @sdiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -71,8 +71,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @sdiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -90,8 +90,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_sdiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -109,8 +109,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_sdiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -132,8 +132,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = srem i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -151,8 +151,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = srem i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -170,8 +170,8 @@
 ; EG: FLT_TO_INT
 define void @srem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -189,8 +189,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @srem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 7
@@ -208,8 +208,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_srem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = ashr i32 %num.i24.0, 8
@@ -227,8 +227,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_srem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = ashr i32 %num.i24.0, 7
diff --git a/llvm/test/CodeGen/R600/select64.ll b/llvm/test/CodeGen/R600/select64.ll
index 0245dae..5cebb30 100644
--- a/llvm/test/CodeGen/R600/select64.ll
+++ b/llvm/test/CodeGen/R600/select64.ll
@@ -42,8 +42,8 @@
 ; CHECK-NOT: v_cndmask_b32
 define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %sel = select i1 %cmp, i64 %a, i64 %b
   %trunc = trunc i64 %sel to i32
   store i32 %trunc, i32 addrspace(1)* %out, align 4
@@ -60,8 +60,8 @@
 ; CHECK: s_endpgm
 define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
-  %a = load i64 addrspace(1)* %aptr, align 8
-  %b = load i64 addrspace(1)* %bptr, align 8
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %b = load i64, i64 addrspace(1)* %bptr, align 8
   %sel = select i1 %cmp, i64 %a, i64 270582939648 ; 63 << 32
   store i64 %sel, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/selectcc-cnd.ll b/llvm/test/CodeGen/R600/selectcc-cnd.ll
index 0bfca69..94d0ace 100644
--- a/llvm/test/CodeGen/R600/selectcc-cnd.ll
+++ b/llvm/test/CodeGen/R600/selectcc-cnd.ll
@@ -4,7 +4,7 @@
 ;CHECK: CNDE {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1.0, literal.x,
 ;CHECK: 1073741824
 define void @test(float addrspace(1)* %out, float addrspace(1)* %in) {
-  %1 = load float addrspace(1)* %in
+  %1 = load float, float addrspace(1)* %in
   %2 = fcmp oeq float %1, 0.0
   %3 = select i1 %2, float 1.0, float 2.0
   store float %3, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
index d568888..58a4ee7 100644
--- a/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
+++ b/llvm/test/CodeGen/R600/selectcc-cnde-int.ll
@@ -4,7 +4,7 @@
 ;CHECK: CNDE_INT {{\*?}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, 1, literal.x,
 ;CHECK-NEXT: 2
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   %2 = icmp eq i32 %1, 0
   %3 = select i1 %2, i32 1, i32 2
   store i32 %3, i32 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll b/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
index 6743800..e870ee8 100644
--- a/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
+++ b/llvm/test/CodeGen/R600/selectcc-icmp-select-float.ll
@@ -8,7 +8,7 @@
 
 define void @test(float addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   %1 = icmp sge i32 %0, 0
   %2 = select i1 %1, float 1.0, float 0.0
   store float %2, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/setcc-opt.ll b/llvm/test/CodeGen/R600/setcc-opt.ll
index 93860f5..0219cdb 100644
--- a/llvm/test/CodeGen/R600/setcc-opt.ll
+++ b/llvm/test/CodeGen/R600/setcc-opt.ll
@@ -162,7 +162,7 @@
 ; GCN-NEXT: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
 define void @cmp_sext_k_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %b.ptr) nounwind {
-  %b = load i8 addrspace(1)* %b.ptr
+  %b = load i8, i8 addrspace(1)* %b.ptr
   %b.ext = sext i8 %b to i32
   %icmp0 = icmp ne i32 %b.ext, -1
   store i1 %icmp0, i1 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/setcc.ll b/llvm/test/CodeGen/R600/setcc.ll
index 0867e83..f33a82d 100644
--- a/llvm/test/CodeGen/R600/setcc.ll
+++ b/llvm/test/CodeGen/R600/setcc.ll
@@ -22,8 +22,8 @@
 
 define void @setcc_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = icmp eq <4 x i32> %a, %b
   %sext = sext <4 x i1> %result to <4 x i32>
   store <4 x i32> %sext, <4 x i32> addrspace(1)* %out
@@ -347,8 +347,8 @@
   %gep.a = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptra, i32 %tid
   %gep.b = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %ptrb, i32 %tid
   %gep.out = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
-  %a = load <3 x i32> addrspace(1)* %gep.a
-  %b = load <3 x i32> addrspace(1)* %gep.b
+  %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep.a
+  %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep.b
   %cmp = icmp eq <3 x i32> %a, %b
   %ext = sext <3 x i1> %cmp to <3 x i32>
   store <3 x i32> %ext, <3 x i32> addrspace(1)* %gep.out
@@ -368,8 +368,8 @@
   %gep.a = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptra, i32 %tid
   %gep.b = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %ptrb, i32 %tid
   %gep.out = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %out, i32 %tid
-  %a = load <3 x i8> addrspace(1)* %gep.a
-  %b = load <3 x i8> addrspace(1)* %gep.b
+  %a = load <3 x i8>, <3 x i8> addrspace(1)* %gep.a
+  %b = load <3 x i8>, <3 x i8> addrspace(1)* %gep.b
   %cmp = icmp eq <3 x i8> %a, %b
   %ext = sext <3 x i1> %cmp to <3 x i8>
   store <3 x i8> %ext, <3 x i8> addrspace(1)* %gep.out
diff --git a/llvm/test/CodeGen/R600/sext-in-reg.ll b/llvm/test/CodeGen/R600/sext-in-reg.ll
index 0668e1e..e8d1428 100644
--- a/llvm/test/CodeGen/R600/sext-in-reg.ll
+++ b/llvm/test/CodeGen/R600/sext-in-reg.ll
@@ -190,8 +190,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 63
@@ -211,8 +211,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 56
@@ -232,8 +232,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 48
@@ -252,8 +252,8 @@
   %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
   %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
-  %a = load i64 addrspace(1)* %a.gep, align 8
-  %b = load i64 addrspace(1)* %b.gep, align 8
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
 
   %c = shl i64 %a, %b
   %shl = shl i64 %c, 32
@@ -428,8 +428,8 @@
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 8
 define void @vgpr_sext_in_reg_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
-  %loada = load <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 24, i32 24, i32 24, i32 24>
   %ashr = ashr <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24>
@@ -441,8 +441,8 @@
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 ; SI: v_bfe_i32 [[EXTRACT:v[0-9]+]], {{v[0-9]+}}, 0, 16
 define void @vgpr_sext_in_reg_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %a, <4 x i32> addrspace(1)* %b) nounwind {
-  %loada = load <4 x i32> addrspace(1)* %a, align 16
-  %loadb = load <4 x i32> addrspace(1)* %b, align 16
+  %loada = load <4 x i32>, <4 x i32> addrspace(1)* %a, align 16
+  %loadb = load <4 x i32>, <4 x i32> addrspace(1)* %b, align 16
   %c = add <4 x i32> %loada, %loadb ; add to prevent folding into extload
   %shl = shl <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
   %ashr = ashr <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16>
@@ -459,7 +459,7 @@
 ; SI: v_bfe_i32
 ; SI: buffer_store_short
 define void @sext_in_reg_to_illegal_type(i16 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture %src) nounwind {
-  %tmp5 = load i8 addrspace(1)* %src, align 1
+  %tmp5 = load i8, i8 addrspace(1)* %src, align 1
   %tmp2 = sext i8 %tmp5 to i32
   %tmp3 = tail call i32 @llvm.AMDGPU.imax(i32 %tmp2, i32 0) nounwind readnone
   %tmp4 = trunc i32 %tmp3 to i8
@@ -474,7 +474,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 8, i32 0) nounwind readnone
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
@@ -485,7 +485,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -496,7 +496,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
 ; SI: s_endpgm
 define void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 8) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 16) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -509,7 +509,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) nounwind {
-  %load = load i32 addrspace(1)* %ptr, align 4
+  %load = load i32, i32 addrspace(1)* %ptr, align 4
   %bfe0 = call i32 @llvm.AMDGPU.bfe.i32(i32 %load, i32 0, i32 16) nounwind readnone
   %bfe1 = call i32 @llvm.AMDGPU.bfe.i32(i32 %bfe0, i32 0, i32 8) nounwind readnone
   store i32 %bfe1, i32 addrspace(1)* %out, align 4
@@ -545,7 +545,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
-  %load = load i8 addrspace(1)* %ptr, align 1
+  %load = load i8, i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 0, i32 8) nounwind readnone
   %shl = shl i32 %bfe, 24
@@ -559,7 +559,7 @@
 ; SI-NOT: {{[^@]}}bfe
 ; SI: s_endpgm
 define void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) nounwind {
-  %load = load i8 addrspace(1)* %ptr, align 1
+  %load = load i8, i8 addrspace(1)* %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %sext, i32 8, i32 0) nounwind readnone
   %shl = shl i32 %bfe, 24
@@ -574,7 +574,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1
 ; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 0, i32 1)
@@ -589,7 +589,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1
 ; SI: s_endpgm
 define void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 1)
@@ -604,7 +604,7 @@
 ; SI: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2
 ; SI: s_endpgm
 define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %x = load i32 addrspace(1)* %in, align 4
+  %x = load i32, i32 addrspace(1)* %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.AMDGPU.bfe.i32(i32 %shr, i32 1, i32 2)
diff --git a/llvm/test/CodeGen/R600/sgpr-control-flow.ll b/llvm/test/CodeGen/R600/sgpr-control-flow.ll
index ba479d5..fae7cd2 100644
--- a/llvm/test/CodeGen/R600/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/R600/sgpr-control-flow.ll
@@ -83,13 +83,13 @@
 
 if:
   %gep.if = getelementptr i32, i32 addrspace(1)* %a, i32 %tid
-  %a.val = load i32 addrspace(1)* %gep.if
+  %a.val = load i32, i32 addrspace(1)* %gep.if
   %cmp.if = icmp eq i32 %a.val, 0
   br label %endif
 
 else:
   %gep.else = getelementptr i32, i32 addrspace(1)* %b, i32 %tid
-  %b.val = load i32 addrspace(1)* %gep.else
+  %b.val = load i32, i32 addrspace(1)* %gep.else
   %cmp.else = icmp slt i32 %b.val, 0
   br label %endif
 
diff --git a/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll b/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
index 893f5a3..df67fcc 100644
--- a/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
+++ b/llvm/test/CodeGen/R600/sgpr-copy-duplicate-operand.ll
@@ -7,7 +7,7 @@
 ; SI-LABEL: {{^}}test_dup_operands:
 ; SI: v_add_i32_e32
 define void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) {
-  %a = load <2 x i32> addrspace(1)* %in
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
   %lo = extractelement <2 x i32> %a, i32 0
   %hi = extractelement <2 x i32> %a, i32 1
   %add = add i32 %lo, %lo
diff --git a/llvm/test/CodeGen/R600/sgpr-copy.ll b/llvm/test/CodeGen/R600/sgpr-copy.ll
index eb57b1a..b849c40 100644
--- a/llvm/test/CodeGen/R600/sgpr-copy.ll
+++ b/llvm/test/CodeGen/R600/sgpr-copy.ll
@@ -10,7 +10,7 @@
 define void @phi1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
@@ -34,7 +34,7 @@
 define void @phi2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 32)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 36)
@@ -51,9 +51,9 @@
   %35 = call float @llvm.SI.load.const(<16 x i8> %21, i32 88)
   %36 = call float @llvm.SI.load.const(<16 x i8> %21, i32 92)
   %37 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %38 = load <32 x i8> addrspace(2)* %37, !tbaa !1
+  %38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, !tbaa !1
   %39 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
-  %40 = load <16 x i8> addrspace(2)* %39, !tbaa !1
+  %40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, !tbaa !1
   %41 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
   %42 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
   %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %3, <2 x i32> %5)
@@ -155,7 +155,7 @@
 define void @loop(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 0)
   %23 = call float @llvm.SI.load.const(<16 x i8> %21, i32 4)
   %24 = call float @llvm.SI.load.const(<16 x i8> %21, i32 8)
@@ -237,12 +237,12 @@
 
 entry:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !2
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !2
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 16)
   %24 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %25 = load <32 x i8> addrspace(2)* %24, !tbaa !2
+  %25 = load <32 x i8>, <32 x i8> addrspace(2)* %24, !tbaa !2
   %26 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %27 = load <16 x i8> addrspace(2)* %26, !tbaa !2
+  %27 = load <16 x i8>, <16 x i8> addrspace(2)* %26, !tbaa !2
   %28 = fcmp oeq float %23, 0.0
   br i1 %28, label %if, label %else
 
@@ -276,7 +276,7 @@
 ; CHECK: s_endpgm
 define void @copy1(float addrspace(1)* %out, float addrspace(1)* %in0) {
 entry:
-  %0 = load float addrspace(1)* %in0
+  %0 = load float, float addrspace(1)* %in0
   %1 = fcmp oeq float %0, 0.0
   br i1 %1, label %if0, label %endif
 
@@ -335,12 +335,12 @@
 define void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
 bb:
   %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i32 0, i32 0
-  %tmp22 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp22 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp23 = call float @llvm.SI.load.const(<16 x i8> %tmp22, i32 16)
   %tmp25 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %arg3, i32 0, i32 0
-  %tmp26 = load <8 x i32> addrspace(2)* %tmp25, !tbaa !0
+  %tmp26 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp25, !tbaa !0
   %tmp27 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %arg2, i32 0, i32 0
-  %tmp28 = load <4 x i32> addrspace(2)* %tmp27, !tbaa !0
+  %tmp28 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp27, !tbaa !0
   %tmp29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %arg5, <2 x i32> %arg7)
   %tmp30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %arg5, <2 x i32> %arg7)
   %tmp31 = bitcast float %tmp23 to i32
diff --git a/llvm/test/CodeGen/R600/shl.ll b/llvm/test/CodeGen/R600/shl.ll
index 18293a8..53b63dc 100644
--- a/llvm/test/CodeGen/R600/shl.ll
+++ b/llvm/test/CodeGen/R600/shl.ll
@@ -16,8 +16,8 @@
 
 define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = shl <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -43,8 +43,8 @@
 
 define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = shl <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -70,8 +70,8 @@
 
 define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1) * %in
-  %b = load i64 addrspace(1) * %b_ptr
+  %a = load i64, i64 addrspace(1) * %in
+  %b = load i64, i64 addrspace(1) * %b_ptr
   %result = shl i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -109,8 +109,8 @@
 
 define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1) * %in
-  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
   %result = shl <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -172,8 +172,8 @@
 
 define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1) * %in
-  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
   %result = shl <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/shl_add_constant.ll b/llvm/test/CodeGen/R600/shl_add_constant.ll
index a62b6c9..b1485bf 100644
--- a/llvm/test/CodeGen/R600/shl_add_constant.ll
+++ b/llvm/test/CodeGen/R600/shl_add_constant.ll
@@ -12,7 +12,7 @@
 define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
   store i32 %result, i32 addrspace(1)* %out, align 4
@@ -28,7 +28,7 @@
 define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %add = add i32 %val, 9
   %result = shl i32 %add, 2
   store i32 %result, i32 addrspace(1)* %out0, align 4
@@ -46,7 +46,7 @@
 define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
-  %val = load i32 addrspace(1)* %ptr, align 4
+  %val = load i32, i32 addrspace(1)* %ptr, align 4
   %shl = add i32 %val, 999
   %result = shl i32 %shl, 2
   store i32 %result, i32 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/shl_add_ptr.ll b/llvm/test/CodeGen/R600/shl_add_ptr.ll
index 15565fc..066dafb 100644
--- a/llvm/test/CodeGen/R600/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/R600/shl_add_ptr.ll
@@ -23,7 +23,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
   store float %val0, float addrspace(1)* %out
   ret void
@@ -43,7 +43,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %shl_add_use = shl i32 %idx.0, 2
   store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
   store float %val0, float addrspace(1)* %out
@@ -59,7 +59,7 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 65535
   %arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
-  %val0 = load i8 addrspace(3)* %arrayidx0
+  %val0 = load i8, i8 addrspace(3)* %arrayidx0
   store i32 %idx.0, i32 addrspace(1)* %add_use
   store i8 %val0, i8 addrspace(1)* %out
   ret void
@@ -77,9 +77,9 @@
   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
   %idx.0 = add nsw i32 %tid.x, 64
   %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float addrspace(3)* %arrayidx0, align 4
+  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
   %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val1 = load float addrspace(3)* %arrayidx1, align 4
+  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
   %sum = fadd float %val0, %val1
   store float %sum, float addrspace(1)* %out, align 4
   ret void
@@ -108,7 +108,7 @@
 ;   %tid.x = tail call i32 @llvm.r600.read.tidig.x() #1
 ;   %idx.0 = add nsw i32 %tid.x, 2
 ;   %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
-;   %val = load atomic i32 addrspace(3)* %arrayidx0 seq_cst, align 4
+;   %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
 ;   store i32 %val, i32 addrspace(1)* %out, align 4
 ;   store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
 ;   ret void
diff --git a/llvm/test/CodeGen/R600/si-lod-bias.ll b/llvm/test/CodeGen/R600/si-lod-bias.ll
index cdcc119..944499a 100644
--- a/llvm/test/CodeGen/R600/si-lod-bias.ll
+++ b/llvm/test/CodeGen/R600/si-lod-bias.ll
@@ -10,12 +10,12 @@
 define void @main(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20, !tbaa !1
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20, !tbaa !1
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   %23 = getelementptr <32 x i8>, <32 x i8> addrspace(2)* %2, i32 0
-  %24 = load <32 x i8> addrspace(2)* %23, !tbaa !1
+  %24 = load <32 x i8>, <32 x i8> addrspace(2)* %23, !tbaa !1
   %25 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %1, i32 0
-  %26 = load <16 x i8> addrspace(2)* %25, !tbaa !1
+  %26 = load <16 x i8>, <16 x i8> addrspace(2)* %25, !tbaa !1
   %27 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %3, <2 x i32> %5)
   %28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %3, <2 x i32> %5)
   %29 = bitcast float %22 to i32
diff --git a/llvm/test/CodeGen/R600/si-sgpr-spill.ll b/llvm/test/CodeGen/R600/si-sgpr-spill.ll
index 781be58..8465270 100644
--- a/llvm/test/CodeGen/R600/si-sgpr-spill.ll
+++ b/llvm/test/CodeGen/R600/si-sgpr-spill.ll
@@ -14,7 +14,7 @@
 define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 96)
   %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 100)
   %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 104)
@@ -54,37 +54,37 @@
   %59 = call float @llvm.SI.load.const(<16 x i8> %22, i32 376)
   %60 = call float @llvm.SI.load.const(<16 x i8> %22, i32 384)
   %61 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %62 = load <32 x i8> addrspace(2)* %61, !tbaa !0
+  %62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, !tbaa !0
   %63 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %64 = load <16 x i8> addrspace(2)* %63, !tbaa !0
+  %64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, !tbaa !0
   %65 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
-  %66 = load <32 x i8> addrspace(2)* %65, !tbaa !0
+  %66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, !tbaa !0
   %67 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
-  %68 = load <16 x i8> addrspace(2)* %67, !tbaa !0
+  %68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, !tbaa !0
   %69 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
-  %70 = load <32 x i8> addrspace(2)* %69, !tbaa !0
+  %70 = load <32 x i8>, <32 x i8> addrspace(2)* %69, !tbaa !0
   %71 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
-  %72 = load <16 x i8> addrspace(2)* %71, !tbaa !0
+  %72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, !tbaa !0
   %73 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
-  %74 = load <32 x i8> addrspace(2)* %73, !tbaa !0
+  %74 = load <32 x i8>, <32 x i8> addrspace(2)* %73, !tbaa !0
   %75 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
-  %76 = load <16 x i8> addrspace(2)* %75, !tbaa !0
+  %76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, !tbaa !0
   %77 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
-  %78 = load <32 x i8> addrspace(2)* %77, !tbaa !0
+  %78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, !tbaa !0
   %79 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
-  %80 = load <16 x i8> addrspace(2)* %79, !tbaa !0
+  %80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, !tbaa !0
   %81 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
-  %82 = load <32 x i8> addrspace(2)* %81, !tbaa !0
+  %82 = load <32 x i8>, <32 x i8> addrspace(2)* %81, !tbaa !0
   %83 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
-  %84 = load <16 x i8> addrspace(2)* %83, !tbaa !0
+  %84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, !tbaa !0
   %85 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
-  %86 = load <32 x i8> addrspace(2)* %85, !tbaa !0
+  %86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, !tbaa !0
   %87 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
-  %88 = load <16 x i8> addrspace(2)* %87, !tbaa !0
+  %88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, !tbaa !0
   %89 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
-  %90 = load <32 x i8> addrspace(2)* %89, !tbaa !0
+  %90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, !tbaa !0
   %91 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
-  %92 = load <16 x i8> addrspace(2)* %91, !tbaa !0
+  %92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, !tbaa !0
   %93 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
   %94 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %4, <2 x i32> %6)
   %95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %4, <2 x i32> %6)
@@ -116,16 +116,16 @@
   %119 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %118
   %120 = bitcast float %93 to i32
   store i32 %120, i32 addrspace(3)* %115
-  %121 = load i32 addrspace(3)* %117
+  %121 = load i32, i32 addrspace(3)* %117
   %122 = bitcast i32 %121 to float
-  %123 = load i32 addrspace(3)* %119
+  %123 = load i32, i32 addrspace(3)* %119
   %124 = bitcast i32 %123 to float
   %125 = fsub float %124, %122
   %126 = bitcast float %94 to i32
   store i32 %126, i32 addrspace(3)* %115
-  %127 = load i32 addrspace(3)* %117
+  %127 = load i32, i32 addrspace(3)* %117
   %128 = bitcast i32 %127 to float
-  %129 = load i32 addrspace(3)* %119
+  %129 = load i32, i32 addrspace(3)* %119
   %130 = bitcast i32 %129 to float
   %131 = fsub float %130, %128
   %132 = insertelement <4 x float> undef, float %125, i32 0
@@ -156,30 +156,30 @@
   %153 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i32 0, i32 %152
   %154 = bitcast float %138 to i32
   store i32 %154, i32 addrspace(3)* %149
-  %155 = load i32 addrspace(3)* %151
+  %155 = load i32, i32 addrspace(3)* %151
   %156 = bitcast i32 %155 to float
-  %157 = load i32 addrspace(3)* %153
+  %157 = load i32, i32 addrspace(3)* %153
   %158 = bitcast i32 %157 to float
   %159 = fsub float %158, %156
   %160 = bitcast float %139 to i32
   store i32 %160, i32 addrspace(3)* %149
-  %161 = load i32 addrspace(3)* %151
+  %161 = load i32, i32 addrspace(3)* %151
   %162 = bitcast i32 %161 to float
-  %163 = load i32 addrspace(3)* %153
+  %163 = load i32, i32 addrspace(3)* %153
   %164 = bitcast i32 %163 to float
   %165 = fsub float %164, %162
   %166 = bitcast float %140 to i32
   store i32 %166, i32 addrspace(3)* %149
-  %167 = load i32 addrspace(3)* %151
+  %167 = load i32, i32 addrspace(3)* %151
   %168 = bitcast i32 %167 to float
-  %169 = load i32 addrspace(3)* %153
+  %169 = load i32, i32 addrspace(3)* %153
   %170 = bitcast i32 %169 to float
   %171 = fsub float %170, %168
   %172 = bitcast float %141 to i32
   store i32 %172, i32 addrspace(3)* %149
-  %173 = load i32 addrspace(3)* %151
+  %173 = load i32, i32 addrspace(3)* %151
   %174 = bitcast i32 %173 to float
-  %175 = load i32 addrspace(3)* %153
+  %175 = load i32, i32 addrspace(3)* %153
   %176 = bitcast i32 %175 to float
   %177 = fsub float %176, %174
   %178 = insertelement <4 x float> undef, float %159, i32 0
@@ -695,7 +695,7 @@
 define void @main1([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %21 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0
-  %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0
+  %22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, !tbaa !0
   %23 = call float @llvm.SI.load.const(<16 x i8> %22, i32 0)
   %24 = call float @llvm.SI.load.const(<16 x i8> %22, i32 4)
   %25 = call float @llvm.SI.load.const(<16 x i8> %22, i32 8)
@@ -800,41 +800,41 @@
   %124 = call float @llvm.SI.load.const(<16 x i8> %22, i32 864)
   %125 = call float @llvm.SI.load.const(<16 x i8> %22, i32 868)
   %126 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0
-  %127 = load <32 x i8> addrspace(2)* %126, !tbaa !0
+  %127 = load <32 x i8>, <32 x i8> addrspace(2)* %126, !tbaa !0
   %128 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0
-  %129 = load <16 x i8> addrspace(2)* %128, !tbaa !0
+  %129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, !tbaa !0
   %130 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1
-  %131 = load <32 x i8> addrspace(2)* %130, !tbaa !0
+  %131 = load <32 x i8>, <32 x i8> addrspace(2)* %130, !tbaa !0
   %132 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1
-  %133 = load <16 x i8> addrspace(2)* %132, !tbaa !0
+  %133 = load <16 x i8>, <16 x i8> addrspace(2)* %132, !tbaa !0
   %134 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2
-  %135 = load <32 x i8> addrspace(2)* %134, !tbaa !0
+  %135 = load <32 x i8>, <32 x i8> addrspace(2)* %134, !tbaa !0
   %136 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2
-  %137 = load <16 x i8> addrspace(2)* %136, !tbaa !0
+  %137 = load <16 x i8>, <16 x i8> addrspace(2)* %136, !tbaa !0
   %138 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3
-  %139 = load <32 x i8> addrspace(2)* %138, !tbaa !0
+  %139 = load <32 x i8>, <32 x i8> addrspace(2)* %138, !tbaa !0
   %140 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3
-  %141 = load <16 x i8> addrspace(2)* %140, !tbaa !0
+  %141 = load <16 x i8>, <16 x i8> addrspace(2)* %140, !tbaa !0
   %142 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4
-  %143 = load <32 x i8> addrspace(2)* %142, !tbaa !0
+  %143 = load <32 x i8>, <32 x i8> addrspace(2)* %142, !tbaa !0
   %144 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4
-  %145 = load <16 x i8> addrspace(2)* %144, !tbaa !0
+  %145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, !tbaa !0
   %146 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5
-  %147 = load <32 x i8> addrspace(2)* %146, !tbaa !0
+  %147 = load <32 x i8>, <32 x i8> addrspace(2)* %146, !tbaa !0
   %148 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5
-  %149 = load <16 x i8> addrspace(2)* %148, !tbaa !0
+  %149 = load <16 x i8>, <16 x i8> addrspace(2)* %148, !tbaa !0
   %150 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6
-  %151 = load <32 x i8> addrspace(2)* %150, !tbaa !0
+  %151 = load <32 x i8>, <32 x i8> addrspace(2)* %150, !tbaa !0
   %152 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6
-  %153 = load <16 x i8> addrspace(2)* %152, !tbaa !0
+  %153 = load <16 x i8>, <16 x i8> addrspace(2)* %152, !tbaa !0
   %154 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7
-  %155 = load <32 x i8> addrspace(2)* %154, !tbaa !0
+  %155 = load <32 x i8>, <32 x i8> addrspace(2)* %154, !tbaa !0
   %156 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7
-  %157 = load <16 x i8> addrspace(2)* %156, !tbaa !0
+  %157 = load <16 x i8>, <16 x i8> addrspace(2)* %156, !tbaa !0
   %158 = getelementptr [16 x <32 x i8>], [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 8
-  %159 = load <32 x i8> addrspace(2)* %158, !tbaa !0
+  %159 = load <32 x i8>, <32 x i8> addrspace(2)* %158, !tbaa !0
   %160 = getelementptr [32 x <16 x i8>], [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 8
-  %161 = load <16 x i8> addrspace(2)* %160, !tbaa !0
+  %161 = load <16 x i8>, <16 x i8> addrspace(2)* %160, !tbaa !0
   %162 = fcmp ugt float %17, 0.000000e+00
   %163 = select i1 %162, float 1.000000e+00, float 0.000000e+00
   %164 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %4, <2 x i32> %6)
diff --git a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
index bb49a5b..5a6129a 100644
--- a/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/R600/si-triv-disjoint-mem-access.ll
@@ -14,14 +14,14 @@
 ; CI-NEXT: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; CI: buffer_store_dword
 define void @reorder_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -34,14 +34,14 @@
 ; CI: buffer_store_dword
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 define void @no_reorder_local_load_volatile_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store volatile i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -54,15 +54,15 @@
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:8
 ; CI: buffer_store_dword
 define void @no_reorder_barrier_local_load_global_store_local_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+  %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-  %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
   call void @llvm.AMDGPU.barrier.local() #2
-  %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -79,14 +79,14 @@
 ; CI: buffer_load_dword
 ; CI: buffer_store_dword
 define void @no_reorder_constant_load_global_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(1)* %gptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(1)* %gptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -100,14 +100,14 @@
 ; CI: ds_write_b32
 ; CI: buffer_store_dword
 define void @reorder_constant_load_local_store_constant_load(i32 addrspace(1)* %out, i32 addrspace(3)* %lptr) #0 {
-  %ptr0 = load i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
+  %ptr0 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(3)* @stored_constant_ptr, align 8
 
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -125,9 +125,9 @@
   %ptr1 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(2)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(2)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(2)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(2)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(2)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -144,9 +144,9 @@
   %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 1
   %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i64 2
 
-  %tmp1 = load i32 addrspace(1)* %ptr1, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %ptr1, align 4
   store i32 99, i32 addrspace(3)* %lptr, align 4
-  %tmp2 = load i32 addrspace(1)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(1)* %ptr2, align 4
 
   %add = add nsw i32 %tmp1, %tmp2
 
@@ -168,10 +168,10 @@
   %ptr3 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 101
 
   store i32 123, i32 addrspace(3)* %ptr1, align 4
-  %tmp1 = load i32 addrspace(3)* %ptr2, align 4
-  %tmp2 = load i32 addrspace(3)* %ptr3, align 4
+  %tmp1 = load i32, i32 addrspace(3)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(3)* %ptr3, align 4
   store i32 123, i32 addrspace(3)* %ptr2, align 4
-  %tmp3 = load i32 addrspace(3)* %ptr1, align 4
+  %tmp3 = load i32, i32 addrspace(3)* %ptr1, align 4
   store i32 789, i32 addrspace(3)* %ptr3, align 4
 
   %add.0 = add nsw i32 %tmp2, %tmp1
@@ -194,10 +194,10 @@
   %ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %ptr0, i32 101
 
   store i32 123, i32 addrspace(1)* %ptr1, align 4
-  %tmp1 = load i32 addrspace(1)* %ptr2, align 4
-  %tmp2 = load i32 addrspace(1)* %ptr3, align 4
+  %tmp1 = load i32, i32 addrspace(1)* %ptr2, align 4
+  %tmp2 = load i32, i32 addrspace(1)* %ptr3, align 4
   store i32 123, i32 addrspace(1)* %ptr2, align 4
-  %tmp3 = load i32 addrspace(1)* %ptr1, align 4
+  %tmp3 = load i32, i32 addrspace(1)* %ptr1, align 4
   store i32 789, i32 addrspace(1)* %ptr3, align 4
 
   %add.0 = add nsw i32 %tmp2, %tmp1
@@ -211,19 +211,19 @@
 ; XCI: TBUFFER_STORE_FORMAT
 ; XCI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}}, 0x8
 ; define void @reorder_local_load_tbuffer_store_local_load(i32 addrspace(1)* %out, i32 %a1, i32 %vaddr) #1 {
-;   %ptr0 = load i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
+;   %ptr0 = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(3)* @stored_lds_ptr, align 4
 
 ;   %ptr1 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 1
 ;   %ptr2 = getelementptr inbounds i32, i32 addrspace(3)* %ptr0, i32 2
 
-;   %tmp1 = load i32 addrspace(3)* %ptr1, align 4
+;   %tmp1 = load i32, i32 addrspace(3)* %ptr1, align 4
 
 ;   %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0
 ;   call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata,
 ;         i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1,
 ;         i32 1, i32 0)
 
-;   %tmp2 = load i32 addrspace(3)* %ptr2, align 4
+;   %tmp2 = load i32, i32 addrspace(3)* %ptr2, align 4
 
 ;   %add = add nsw i32 %tmp1, %tmp2
 
diff --git a/llvm/test/CodeGen/R600/si-vector-hang.ll b/llvm/test/CodeGen/R600/si-vector-hang.ll
index a26f973..94c47fe 100644
--- a/llvm/test/CodeGen/R600/si-vector-hang.ll
+++ b/llvm/test/CodeGen/R600/si-vector-hang.ll
@@ -17,52 +17,52 @@
 ; Function Attrs: nounwind
 define void @test_8_min_char(i8 addrspace(1)* nocapture %out, i8 addrspace(1)* nocapture readonly %in0, i8 addrspace(1)* nocapture readonly %in1) #0 {
 entry:
-  %0 = load i8 addrspace(1)* %in0, align 1
+  %0 = load i8, i8 addrspace(1)* %in0, align 1
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %arrayidx2.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 1
-  %2 = load i8 addrspace(1)* %arrayidx2.i.i, align 1
+  %2 = load i8, i8 addrspace(1)* %arrayidx2.i.i, align 1
   %3 = insertelement <8 x i8> %1, i8 %2, i32 1
   %arrayidx6.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 2
-  %4 = load i8 addrspace(1)* %arrayidx6.i.i, align 1
+  %4 = load i8, i8 addrspace(1)* %arrayidx6.i.i, align 1
   %5 = insertelement <8 x i8> %3, i8 %4, i32 2
   %arrayidx10.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 3
-  %6 = load i8 addrspace(1)* %arrayidx10.i.i, align 1
+  %6 = load i8, i8 addrspace(1)* %arrayidx10.i.i, align 1
   %7 = insertelement <8 x i8> %5, i8 %6, i32 3
   %arrayidx.i.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 4
-  %8 = load i8 addrspace(1)* %arrayidx.i.i, align 1
+  %8 = load i8, i8 addrspace(1)* %arrayidx.i.i, align 1
   %9 = insertelement <8 x i8> undef, i8 %8, i32 0
   %arrayidx2.i9.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 5
-  %10 = load i8 addrspace(1)* %arrayidx2.i9.i, align 1
+  %10 = load i8, i8 addrspace(1)* %arrayidx2.i9.i, align 1
   %11 = insertelement <8 x i8> %9, i8 %10, i32 1
   %arrayidx6.i11.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 6
-  %12 = load i8 addrspace(1)* %arrayidx6.i11.i, align 1
+  %12 = load i8, i8 addrspace(1)* %arrayidx6.i11.i, align 1
   %13 = insertelement <8 x i8> %11, i8 %12, i32 2
   %arrayidx10.i13.i = getelementptr inbounds i8, i8 addrspace(1)* %in0, i64 7
-  %14 = load i8 addrspace(1)* %arrayidx10.i13.i, align 1
+  %14 = load i8, i8 addrspace(1)* %arrayidx10.i13.i, align 1
   %15 = insertelement <8 x i8> %13, i8 %14, i32 3
   %vecinit5.i = shufflevector <8 x i8> %7, <8 x i8> %15, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
-  %16 = load i8 addrspace(1)* %in1, align 1
+  %16 = load i8, i8 addrspace(1)* %in1, align 1
   %17 = insertelement <8 x i8> undef, i8 %16, i32 0
   %arrayidx2.i.i4 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 1
-  %18 = load i8 addrspace(1)* %arrayidx2.i.i4, align 1
+  %18 = load i8, i8 addrspace(1)* %arrayidx2.i.i4, align 1
   %19 = insertelement <8 x i8> %17, i8 %18, i32 1
   %arrayidx6.i.i5 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 2
-  %20 = load i8 addrspace(1)* %arrayidx6.i.i5, align 1
+  %20 = load i8, i8 addrspace(1)* %arrayidx6.i.i5, align 1
   %21 = insertelement <8 x i8> %19, i8 %20, i32 2
   %arrayidx10.i.i6 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 3
-  %22 = load i8 addrspace(1)* %arrayidx10.i.i6, align 1
+  %22 = load i8, i8 addrspace(1)* %arrayidx10.i.i6, align 1
   %23 = insertelement <8 x i8> %21, i8 %22, i32 3
   %arrayidx.i.i7 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 4
-  %24 = load i8 addrspace(1)* %arrayidx.i.i7, align 1
+  %24 = load i8, i8 addrspace(1)* %arrayidx.i.i7, align 1
   %25 = insertelement <8 x i8> undef, i8 %24, i32 0
   %arrayidx2.i9.i8 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 5
-  %26 = load i8 addrspace(1)* %arrayidx2.i9.i8, align 1
+  %26 = load i8, i8 addrspace(1)* %arrayidx2.i9.i8, align 1
   %27 = insertelement <8 x i8> %25, i8 %26, i32 1
   %arrayidx6.i11.i9 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 6
-  %28 = load i8 addrspace(1)* %arrayidx6.i11.i9, align 1
+  %28 = load i8, i8 addrspace(1)* %arrayidx6.i11.i9, align 1
   %29 = insertelement <8 x i8> %27, i8 %28, i32 2
   %arrayidx10.i13.i10 = getelementptr inbounds i8, i8 addrspace(1)* %in1, i64 7
-  %30 = load i8 addrspace(1)* %arrayidx10.i13.i10, align 1
+  %30 = load i8, i8 addrspace(1)* %arrayidx10.i13.i10, align 1
   %31 = insertelement <8 x i8> %29, i8 %30, i32 3
   %vecinit5.i11 = shufflevector <8 x i8> %23, <8 x i8> %31, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
   %cmp.i = icmp slt <8 x i8> %vecinit5.i, %vecinit5.i11
diff --git a/llvm/test/CodeGen/R600/sign_extend.ll b/llvm/test/CodeGen/R600/sign_extend.ll
index f194759..06bee11 100644
--- a/llvm/test/CodeGen/R600/sign_extend.ll
+++ b/llvm/test/CodeGen/R600/sign_extend.ll
@@ -48,7 +48,7 @@
 ; SI: v_ashr
 ; SI: s_endpgm
 define void @v_sext_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
-  %val = load i32 addrspace(1)* %in, align 4
+  %val = load i32, i32 addrspace(1)* %in, align 4
   %sext = sext i32 %val to i64
   store i64 %sext, i64 addrspace(1)* %out, align 8
   ret void
diff --git a/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll b/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
index c75b846..dffee70 100644
--- a/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
+++ b/llvm/test/CodeGen/R600/simplify-demanded-bits-build-pair.ll
@@ -31,7 +31,7 @@
   store i64 3935, i64* %gep2, align 8
   store i64 9342, i64* %gep3, align 8
   %gep = getelementptr i64, i64* %alloca, i32 %idx
-  %load = load i64* %gep, align 8
+  %load = load i64, i64* %gep, align 8
   %mask = and i64 %load, 4294967296
   %add = add i64 %mask, -1
   store i64 %add, i64 addrspace(1)* %out, align 4
diff --git a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
index f6f1e13..da4e91d 100644
--- a/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/R600/sint_to_fp.f64.ll
@@ -54,7 +54,7 @@
 define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64 addrspace(1)* %gep, align 8
+  %val = load i64, i64 addrspace(1)* %gep, align 8
   %result = sitofp i64 %val to double
   store double %result, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/sint_to_fp.ll b/llvm/test/CodeGen/R600/sint_to_fp.ll
index 6a291cf..8506441 100644
--- a/llvm/test/CodeGen/R600/sint_to_fp.ll
+++ b/llvm/test/CodeGen/R600/sint_to_fp.ll
@@ -35,7 +35,7 @@
 ; SI: v_cvt_f32_i32_e32
 ; SI: v_cvt_f32_i32_e32
 define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
+  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = sitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/smrd.ll b/llvm/test/CodeGen/R600/smrd.ll
index 46cbe1b..b0c18ca 100644
--- a/llvm/test/CodeGen/R600/smrd.ll
+++ b/llvm/test/CodeGen/R600/smrd.ll
@@ -8,7 +8,7 @@
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 1
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -20,7 +20,7 @@
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 255
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -34,7 +34,7 @@
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 256
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -55,7 +55,7 @@
 define void @smrd3(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
   %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 4294967296 ; 2 ^ 32
-  %1 = load i32 addrspace(2)* %0
+  %1 = load i32, i32 addrspace(2)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
@@ -67,7 +67,7 @@
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 16)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
@@ -81,7 +81,7 @@
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1020)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
@@ -96,7 +96,7 @@
 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
   %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
-  %21 = load <16 x i8> addrspace(2)* %20
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
   %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1024)
   call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
   ret void
diff --git a/llvm/test/CodeGen/R600/split-scalar-i64-add.ll b/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
index 7826dd1..46409cd 100644
--- a/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
+++ b/llvm/test/CodeGen/R600/split-scalar-i64-add.ll
@@ -38,7 +38,7 @@
 define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %load = load i32 addrspace(1)* %gep
+  %load = load i32, i32 addrspace(1)* %gep
   %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
   %vec.1 = insertelement <2 x i32> %vec.0, i32 %load, i32 1
   %bc = bitcast <2 x i32> %vec.1 to i64
diff --git a/llvm/test/CodeGen/R600/sra.ll b/llvm/test/CodeGen/R600/sra.ll
index a64544e..bcbc32f 100644
--- a/llvm/test/CodeGen/R600/sra.ll
+++ b/llvm/test/CodeGen/R600/sra.ll
@@ -16,8 +16,8 @@
 
 define void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = ashr <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -43,8 +43,8 @@
 
 define void @ashr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = ashr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -90,8 +90,8 @@
 define void @ashr_i64_2(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 entry:
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1) * %in
-  %b = load i64 addrspace(1) * %b_ptr
+  %a = load i64, i64 addrspace(1) * %in
+  %b = load i64, i64 addrspace(1) * %b_ptr
   %result = ashr i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -133,8 +133,8 @@
 
 define void @ashr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1) * %in
-  %b = load <2 x i64> addrspace(1) * %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1) * %b_ptr
   %result = ashr <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -204,8 +204,8 @@
 
 define void @ashr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1) * %in
-  %b = load <4 x i64> addrspace(1) * %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1) * %b_ptr
   %result = ashr <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/srem.ll b/llvm/test/CodeGen/R600/srem.ll
index e0f627a..c78fd54 100644
--- a/llvm/test/CodeGen/R600/srem.ll
+++ b/llvm/test/CodeGen/R600/srem.ll
@@ -4,15 +4,15 @@
 
 define void @srem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in
-  %den = load i32 addrspace(1) * %den_ptr
+  %num = load i32, i32 addrspace(1) * %in
+  %den = load i32, i32 addrspace(1) * %den_ptr
   %result = srem i32 %num, %den
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
 
 define void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = srem i32 %num, 4
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -25,7 +25,7 @@
 ; SI: v_sub_i32
 ; SI: s_endpgm
 define void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = srem i32 %num, 7
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -33,15 +33,15 @@
 
 define void @srem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %num = load <2 x i32> addrspace(1) * %in
-  %den = load <2 x i32> addrspace(1) * %den_ptr
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %den = load <2 x i32>, <2 x i32> addrspace(1) * %den_ptr
   %result = srem <2 x i32> %num, %den
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
-  %num = load <2 x i32> addrspace(1) * %in
+  %num = load <2 x i32>, <2 x i32> addrspace(1) * %in
   %result = srem <2 x i32> %num, <i32 4, i32 4>
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -49,15 +49,15 @@
 
 define void @srem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %num = load <4 x i32> addrspace(1) * %in
-  %den = load <4 x i32> addrspace(1) * %den_ptr
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %den = load <4 x i32>, <4 x i32> addrspace(1) * %den_ptr
   %result = srem <4 x i32> %num, %den
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %num = load <4 x i32> addrspace(1) * %in
+  %num = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = srem <4 x i32> %num, <i32 4, i32 4, i32 4, i32 4>
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -65,15 +65,15 @@
 
 define void @srem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %den_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %num = load i64 addrspace(1) * %in
-  %den = load i64 addrspace(1) * %den_ptr
+  %num = load i64, i64 addrspace(1) * %in
+  %den = load i64, i64 addrspace(1) * %den_ptr
   %result = srem i64 %num, %den
   store i64 %result, i64 addrspace(1)* %out
   ret void
 }
 
 define void @srem_i64_4(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
-  %num = load i64 addrspace(1) * %in
+  %num = load i64, i64 addrspace(1) * %in
   %result = srem i64 %num, 4
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -81,15 +81,15 @@
 
 define void @srem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %den_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %num = load <2 x i64> addrspace(1) * %in
-  %den = load <2 x i64> addrspace(1) * %den_ptr
+  %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
+  %den = load <2 x i64>, <2 x i64> addrspace(1) * %den_ptr
   %result = srem <2 x i64> %num, %den
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v2i64_4(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
-  %num = load <2 x i64> addrspace(1) * %in
+  %num = load <2 x i64>, <2 x i64> addrspace(1) * %in
   %result = srem <2 x i64> %num, <i64 4, i64 4>
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -97,15 +97,15 @@
 
 define void @srem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %den_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %num = load <4 x i64> addrspace(1) * %in
-  %den = load <4 x i64> addrspace(1) * %den_ptr
+  %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
+  %den = load <4 x i64>, <4 x i64> addrspace(1) * %den_ptr
   %result = srem <4 x i64> %num, %den
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
 }
 
 define void @srem_v4i64_4(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
-  %num = load <4 x i64> addrspace(1) * %in
+  %num = load <4 x i64>, <4 x i64> addrspace(1) * %in
   %result = srem <4 x i64> %num, <i64 4, i64 4, i64 4, i64 4>
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/srl.ll b/llvm/test/CodeGen/R600/srl.ll
index 5594161..4904d7f 100644
--- a/llvm/test/CodeGen/R600/srl.ll
+++ b/llvm/test/CodeGen/R600/srl.ll
@@ -8,8 +8,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = lshr i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = lshr <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -50,8 +50,8 @@
 ; EG: LSHR {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = lshr <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1)* %in
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %in
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = lshr i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -112,8 +112,8 @@
 ; EG-DAG: CNDE_INT
 define void @lshr_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = lshr <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -178,8 +178,8 @@
 ; EG-DAG: CNDE_INT
 define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = lshr <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/ssubo.ll b/llvm/test/CodeGen/R600/ssubo.ll
index 09d3959..26884a1 100644
--- a/llvm/test/CodeGen/R600/ssubo.ll
+++ b/llvm/test/CodeGen/R600/ssubo.ll
@@ -28,8 +28,8 @@
 
 ; FUNC-LABEL: {{^}}v_ssubo_i32:
 define void @v_ssubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %ssub, 0
   %carry = extractvalue { i32, i1 } %ssub, 1
@@ -54,8 +54,8 @@
 ; SI: v_sub_i32_e32
 ; SI: v_subb_u32_e32
 define void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %ssub, 0
   %carry = extractvalue { i64, i1 } %ssub, 1
diff --git a/llvm/test/CodeGen/R600/store-barrier.ll b/llvm/test/CodeGen/R600/store-barrier.ll
index c14383b..4a72b4d0 100644
--- a/llvm/test/CodeGen/R600/store-barrier.ll
+++ b/llvm/test/CodeGen/R600/store-barrier.ll
@@ -15,22 +15,22 @@
 define void @test(<2 x i8> addrspace(3)* nocapture %arg, <2 x i8> addrspace(1)* nocapture readonly %arg1, i32 addrspace(1)* nocapture readonly %arg2, <2 x i8> addrspace(1)* nocapture %arg3, i32 %arg4, i64 %tmp9) {
 bb:
   %tmp10 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp9
-  %tmp13 = load i32 addrspace(1)* %tmp10, align 2
+  %tmp13 = load i32, i32 addrspace(1)* %tmp10, align 2
   %tmp14 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp13
-  %tmp15 = load <2 x i8> addrspace(3)* %tmp14, align 2
+  %tmp15 = load <2 x i8>, <2 x i8> addrspace(3)* %tmp14, align 2
   %tmp16 = add i32 %tmp13, 1
   %tmp17 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp16
   store <2 x i8> %tmp15, <2 x i8> addrspace(3)* %tmp17, align 2
   tail call void @llvm.AMDGPU.barrier.local() #2
-  %tmp25 = load i32 addrspace(1)* %tmp10, align 4
+  %tmp25 = load i32, i32 addrspace(1)* %tmp10, align 4
   %tmp26 = sext i32 %tmp25 to i64
   %tmp27 = sext i32 %arg4 to i64
   %tmp28 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 %arg4
-  %tmp29 = load i8 addrspace(3)* %tmp28, align 1
+  %tmp29 = load i8, i8 addrspace(3)* %tmp28, align 1
   %tmp30 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 %tmp27
   store i8 %tmp29, i8 addrspace(1)* %tmp30, align 1
   %tmp32 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(3)* %arg, i32 %tmp25, i32 0
-  %tmp33 = load i8 addrspace(3)* %tmp32, align 1
+  %tmp33 = load i8, i8 addrspace(3)* %tmp32, align 1
   %tmp35 = getelementptr inbounds <2 x i8>, <2 x i8> addrspace(1)* %arg3, i64 %tmp26, i64 0
   store i8 %tmp33, i8 addrspace(1)* %tmp35, align 1
   ret void
diff --git a/llvm/test/CodeGen/R600/store.ll b/llvm/test/CodeGen/R600/store.ll
index fdce4f3..b5d68d2 100644
--- a/llvm/test/CodeGen/R600/store.ll
+++ b/llvm/test/CodeGen/R600/store.ll
@@ -334,9 +334,9 @@
 ; SI: buffer_store_dwordx2
 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
 entry:
-  %0 = load i32 addrspace(2)* %mem, align 4
+  %0 = load i32, i32 addrspace(2)* %mem, align 4
   %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
-  %1 = load i32 addrspace(2)* %arrayidx1.i, align 4
+  %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
   store i32 %0, i32 addrspace(1)* %out, align 4
   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
   store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
diff --git a/llvm/test/CodeGen/R600/store.r600.ll b/llvm/test/CodeGen/R600/store.r600.ll
index 2197260..696fb03 100644
--- a/llvm/test/CodeGen/R600/store.r600.ll
+++ b/llvm/test/CodeGen/R600/store.r600.ll
@@ -7,7 +7,7 @@
 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 
 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %1 = load <4 x i32> addrspace(1) * %in
+  %1 = load <4 x i32>, <4 x i32> addrspace(1) * %in
   store <4 x i32> %1, <4 x i32> addrspace(1)* %out
   ret void
 }
@@ -16,7 +16,7 @@
 ; EG: {{^}}store_v4f32:
 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
 define void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
-  %1 = load <4 x float> addrspace(1) * %in
+  %1 = load <4 x float>, <4 x float> addrspace(1) * %in
   store <4 x float> %1, <4 x float> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/sub.ll b/llvm/test/CodeGen/R600/sub.ll
index b8ef279..03303f5 100644
--- a/llvm/test/CodeGen/R600/sub.ll
+++ b/llvm/test/CodeGen/R600/sub.ll
@@ -10,8 +10,8 @@
 ; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = sub i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -27,8 +27,8 @@
 
 define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = sub <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -47,8 +47,8 @@
 
 define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = sub <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -82,8 +82,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
-  %a = load i64 addrspace(1)* %a_ptr
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %a_ptr
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = sub i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
@@ -98,8 +98,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <2 x i64> addrspace(1)* %a_ptr
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = sub <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -118,8 +118,8 @@
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
-  %a = load <4 x i64> addrspace(1)* %a_ptr
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = sub <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/swizzle-export.ll b/llvm/test/CodeGen/R600/swizzle-export.ll
index 5eaca76..7010e93 100644
--- a/llvm/test/CodeGen/R600/swizzle-export.ll
+++ b/llvm/test/CodeGen/R600/swizzle-export.ll
@@ -12,56 +12,56 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = extractelement <4 x float> %reg1, i32 2
   %3 = extractelement <4 x float> %reg1, i32 3
-  %4 = load <4 x float> addrspace(8)* null
+  %4 = load <4 x float>, <4 x float> addrspace(8)* null
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = load <4 x float> addrspace(8)* null
+  %6 = load <4 x float>, <4 x float> addrspace(8)* null
   %7 = extractelement <4 x float> %6, i32 2
-  %8 = load <4 x float> addrspace(8)* null
+  %8 = load <4 x float>, <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 0
   %10 = fmul float 0.000000e+00, %9
-  %11 = load <4 x float> addrspace(8)* null
+  %11 = load <4 x float>, <4 x float> addrspace(8)* null
   %12 = extractelement <4 x float> %11, i32 0
   %13 = fmul float %5, %12
-  %14 = load <4 x float> addrspace(8)* null
+  %14 = load <4 x float>, <4 x float> addrspace(8)* null
   %15 = extractelement <4 x float> %14, i32 0
   %16 = fmul float 0.000000e+00, %15
-  %17 = load <4 x float> addrspace(8)* null
+  %17 = load <4 x float>, <4 x float> addrspace(8)* null
   %18 = extractelement <4 x float> %17, i32 0
   %19 = fmul float 0.000000e+00, %18
-  %20 = load <4 x float> addrspace(8)* null
+  %20 = load <4 x float>, <4 x float> addrspace(8)* null
   %21 = extractelement <4 x float> %20, i32 0
   %22 = fmul float %7, %21
-  %23 = load <4 x float> addrspace(8)* null
+  %23 = load <4 x float>, <4 x float> addrspace(8)* null
   %24 = extractelement <4 x float> %23, i32 0
   %25 = fmul float 0.000000e+00, %24
-  %26 = load <4 x float> addrspace(8)* null
+  %26 = load <4 x float>, <4 x float> addrspace(8)* null
   %27 = extractelement <4 x float> %26, i32 0
   %28 = fmul float 0.000000e+00, %27
-  %29 = load <4 x float> addrspace(8)* null
+  %29 = load <4 x float>, <4 x float> addrspace(8)* null
   %30 = extractelement <4 x float> %29, i32 0
   %31 = fmul float 0.000000e+00, %30
-  %32 = load <4 x float> addrspace(8)* null
+  %32 = load <4 x float>, <4 x float> addrspace(8)* null
   %33 = extractelement <4 x float> %32, i32 0
   %34 = fmul float 0.000000e+00, %33
-  %35 = load <4 x float> addrspace(8)* null
+  %35 = load <4 x float>, <4 x float> addrspace(8)* null
   %36 = extractelement <4 x float> %35, i32 0
   %37 = fmul float 0.000000e+00, %36
-  %38 = load <4 x float> addrspace(8)* null
+  %38 = load <4 x float>, <4 x float> addrspace(8)* null
   %39 = extractelement <4 x float> %38, i32 0
   %40 = fmul float 1.000000e+00, %39
-  %41 = load <4 x float> addrspace(8)* null
+  %41 = load <4 x float>, <4 x float> addrspace(8)* null
   %42 = extractelement <4 x float> %41, i32 0
   %43 = fmul float 0.000000e+00, %42
-  %44 = load <4 x float> addrspace(8)* null
+  %44 = load <4 x float>, <4 x float> addrspace(8)* null
   %45 = extractelement <4 x float> %44, i32 0
   %46 = fmul float 0.000000e+00, %45
-  %47 = load <4 x float> addrspace(8)* null
+  %47 = load <4 x float>, <4 x float> addrspace(8)* null
   %48 = extractelement <4 x float> %47, i32 0
   %49 = fmul float 0.000000e+00, %48
-  %50 = load <4 x float> addrspace(8)* null
+  %50 = load <4 x float>, <4 x float> addrspace(8)* null
   %51 = extractelement <4 x float> %50, i32 0
   %52 = fmul float 0.000000e+00, %51
-  %53 = load <4 x float> addrspace(8)* null
+  %53 = load <4 x float>, <4 x float> addrspace(8)* null
   %54 = extractelement <4 x float> %53, i32 0
   %55 = fmul float 1.000000e+00, %54
   %56 = insertelement <4 x float> undef, float %0, i32 0
@@ -102,12 +102,12 @@
   %1 = extractelement <4 x float> %reg1, i32 1
   %2 = fadd float %0, 2.5
   %3 = fmul float %1, 3.5
-  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 0
   %6 = call float @llvm.cos.f32(float %5)
-  %7 = load <4 x float> addrspace(8)* null
+  %7 = load <4 x float>, <4 x float> addrspace(8)* null
   %8 = extractelement <4 x float> %7, i32 0
-  %9 = load <4 x float> addrspace(8)* null
+  %9 = load <4 x float>, <4 x float> addrspace(8)* null
   %10 = extractelement <4 x float> %9, i32 1
   %11 = insertelement <4 x float> undef, float %2, i32 0
   %12 = insertelement <4 x float> %11, float %3, i32 1
diff --git a/llvm/test/CodeGen/R600/trunc-cmp-constant.ll b/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
index a097ab0..21dfade 100644
--- a/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
+++ b/llvm/test/CodeGen/R600/trunc-cmp-constant.ll
@@ -9,7 +9,7 @@
 ; SI: v_cndmask_b32_e64
 ; SI: buffer_store_byte
 define void @sextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -25,7 +25,7 @@
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -36,7 +36,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -48,7 +48,7 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -60,7 +60,7 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp eq i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -71,7 +71,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_eq_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp eq i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -84,7 +84,7 @@
 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -96,7 +96,7 @@
 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_0(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, 0
   store i1 %cmp, i1 addrspace(1)* %out
@@ -107,7 +107,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -122,7 +122,7 @@
 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[NEG]]
 ; SI-NEXT: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, 1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -137,7 +137,7 @@
 ; XSI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CMP0]]
 ; XSI-NEXT: buffer_store_byte [[RESULT]]
 define void @sextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = sext i1 %load to i32
   %cmp = icmp ne i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -148,7 +148,7 @@
 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}}
 ; SI: buffer_store_byte [[RESULT]]
 define void @zextload_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i1 addrspace(1)* %in) nounwind {
-  %load = load i1 addrspace(1)* %in
+  %load = load i1, i1 addrspace(1)* %in
   %ext = zext i1 %load to i32
   %cmp = icmp ne i32 %ext, -1
   store i1 %cmp, i1 addrspace(1)* %out
@@ -161,7 +161,7 @@
 ; SI-NEXT: v_cndmask_b32_e64
 ; SI-NEXT: buffer_store_byte
 define void @masked_load_i1_to_i32_trunc_cmp_ne_neg1(i1 addrspace(1)* %out, i8 addrspace(1)* %in) nounwind {
-  %load = load i8 addrspace(1)* %in
+  %load = load i8, i8 addrspace(1)* %in
   %masked = and i8 %load, 255
   %ext = sext i8 %masked to i32
   %cmp = icmp ne i32 %ext, -1
diff --git a/llvm/test/CodeGen/R600/trunc.ll b/llvm/test/CodeGen/R600/trunc.ll
index 5d557ab..5580bd3 100644
--- a/llvm/test/CodeGen/R600/trunc.ll
+++ b/llvm/test/CodeGen/R600/trunc.ll
@@ -53,7 +53,7 @@
 ; SI: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
 ; SI: v_cmp_eq_i32
 define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
-  %a = load i32 addrspace(1)* %ptr, align 4
+  %a = load i32, i32 addrspace(1)* %ptr, align 4
   %trunc = trunc i32 %a to i1
   %result = select i1 %trunc, i32 1, i32 0
   store i32 %result, i32 addrspace(1)* %out, align 4
@@ -91,7 +91,7 @@
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
   %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
-  %x = load i64 addrspace(1)* %gep
+  %x = load i64, i64 addrspace(1)* %gep
 
   %trunc = trunc i64 %x to i1
   %sel = select i1 %trunc, i32 63, i32 -12
diff --git a/llvm/test/CodeGen/R600/uaddo.ll b/llvm/test/CodeGen/R600/uaddo.ll
index 57d7835..9f38365 100644
--- a/llvm/test/CodeGen/R600/uaddo.ll
+++ b/llvm/test/CodeGen/R600/uaddo.ll
@@ -33,8 +33,8 @@
 ; FUNC-LABEL: {{^}}v_uaddo_i32:
 ; SI: v_add_i32
 define void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %uadd, 0
   %carry = extractvalue { i32, i1 } %uadd, 1
@@ -59,8 +59,8 @@
 ; SI: v_add_i32
 ; SI: v_addc_u32
 define void @v_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %uadd, 0
   %carry = extractvalue { i64, i1 } %uadd, 1
diff --git a/llvm/test/CodeGen/R600/udiv.ll b/llvm/test/CodeGen/R600/udiv.ll
index e350ecb..de22a22 100644
--- a/llvm/test/CodeGen/R600/udiv.ll
+++ b/llvm/test/CodeGen/R600/udiv.ll
@@ -8,8 +8,8 @@
 
 define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1) * %in
-  %b = load i32 addrspace(1) * %b_ptr
+  %a = load i32, i32 addrspace(1) * %in
+  %b = load i32, i32 addrspace(1) * %b_ptr
   %result = udiv i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -26,8 +26,8 @@
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1) * %in
-  %b = load <2 x i32> addrspace(1) * %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
   %result = udiv <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -40,8 +40,8 @@
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1) * %in
-  %b = load <4 x i32> addrspace(1) * %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
   %result = udiv <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/udivrem24.ll b/llvm/test/CodeGen/R600/udivrem24.ll
index bbb0108..4de881b 100644
--- a/llvm/test/CodeGen/R600/udivrem24.ll
+++ b/llvm/test/CodeGen/R600/udivrem24.ll
@@ -14,8 +14,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = udiv i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -33,8 +33,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = udiv i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -52,8 +52,8 @@
 ; EG: FLT_TO_UINT
 define void @udiv24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -72,8 +72,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @udiv25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -92,8 +92,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_udiv24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -112,8 +112,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_udiv24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -135,8 +135,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) {
   %den_ptr = getelementptr i8, i8 addrspace(1)* %in, i8 1
-  %num = load i8 addrspace(1) * %in
-  %den = load i8 addrspace(1) * %den_ptr
+  %num = load i8, i8 addrspace(1) * %in
+  %den = load i8, i8 addrspace(1) * %den_ptr
   %result = urem i8 %num, %den
   store i8 %result, i8 addrspace(1)* %out
   ret void
@@ -154,8 +154,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
   %den_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1
-  %num = load i16 addrspace(1) * %in, align 2
-  %den = load i16 addrspace(1) * %den_ptr, align 2
+  %num = load i16, i16 addrspace(1) * %in, align 2
+  %den = load i16, i16 addrspace(1) * %den_ptr, align 2
   %result = urem i16 %num, %den
   store i16 %result, i16 addrspace(1)* %out, align 2
   ret void
@@ -173,8 +173,8 @@
 ; EG: FLT_TO_UINT
 define void @urem24_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -193,8 +193,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @urem25_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 7
@@ -213,8 +213,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_urem24_i32_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 8
   %den.i24.0 = shl i32 %den, 7
   %num.i24 = lshr i32 %num.i24.0, 8
@@ -233,8 +233,8 @@
 ; EG-NOT: RECIP_IEEE
 define void @test_no_urem24_i32_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %den_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %num = load i32 addrspace(1) * %in, align 4
-  %den = load i32 addrspace(1) * %den_ptr, align 4
+  %num = load i32, i32 addrspace(1) * %in, align 4
+  %den = load i32, i32 addrspace(1) * %den_ptr, align 4
   %num.i24.0 = shl i32 %num, 7
   %den.i24.0 = shl i32 %den, 8
   %num.i24 = lshr i32 %num.i24.0, 7
diff --git a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
index e79bdd5..dfec8eb 100644
--- a/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/R600/uint_to_fp.f64.ll
@@ -12,7 +12,7 @@
 define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
   %gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
-  %val = load i64 addrspace(1)* %gep, align 8
+  %val = load i64, i64 addrspace(1)* %gep, align 8
   %result = uitofp i64 %val to double
   store double %result, double addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/uint_to_fp.ll b/llvm/test/CodeGen/R600/uint_to_fp.ll
index 1c8a175..d3f0281 100644
--- a/llvm/test/CodeGen/R600/uint_to_fp.ll
+++ b/llvm/test/CodeGen/R600/uint_to_fp.ll
@@ -38,7 +38,7 @@
 ; SI: v_cvt_f32_u32_e32
 ; SI: s_endpgm
 define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
-  %value = load <4 x i32> addrspace(1) * %in
+  %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
   %result = uitofp <4 x i32> %value to <4 x float>
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/unaligned-load-store.ll b/llvm/test/CodeGen/R600/unaligned-load-store.ll
index 79ff5c9..efb1de2 100644
--- a/llvm/test/CodeGen/R600/unaligned-load-store.ll
+++ b/llvm/test/CodeGen/R600/unaligned-load-store.ll
@@ -8,7 +8,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i16_local(i16 addrspace(3)* %p, i16 addrspace(3)* %r) nounwind {
-  %v = load i16 addrspace(3)* %p, align 1
+  %v = load i16, i16 addrspace(3)* %p, align 1
   store i16 %v, i16 addrspace(3)* %r, align 1
   ret void
 }
@@ -20,7 +20,7 @@
 ; SI: buffer_store_byte
 ; SI: s_endpgm
 define void @unaligned_load_store_i16_global(i16 addrspace(1)* %p, i16 addrspace(1)* %r) nounwind {
-  %v = load i16 addrspace(1)* %p, align 1
+  %v = load i16, i16 addrspace(1)* %p, align 1
   store i16 %v, i16 addrspace(1)* %r, align 1
   ret void
 }
@@ -36,7 +36,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind {
-  %v = load i32 addrspace(3)* %p, align 1
+  %v = load i32, i32 addrspace(3)* %p, align 1
   store i32 %v, i32 addrspace(3)* %r, align 1
   ret void
 }
@@ -51,7 +51,7 @@
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
 define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind {
-  %v = load i32 addrspace(1)* %p, align 1
+  %v = load i32, i32 addrspace(1)* %p, align 1
   store i32 %v, i32 addrspace(1)* %r, align 1
   ret void
 }
@@ -75,7 +75,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) {
-  %v = load i64 addrspace(3)* %p, align 1
+  %v = load i64, i64 addrspace(3)* %p, align 1
   store i64 %v, i64 addrspace(3)* %r, align 1
   ret void
 }
@@ -98,7 +98,7 @@
 ; SI: buffer_store_byte
 ; SI: buffer_store_byte
 define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) {
-  %v = load i64 addrspace(1)* %p, align 1
+  %v = load i64, i64 addrspace(1)* %p, align 1
   store i64 %v, i64 addrspace(1)* %r, align 1
   ret void
 }
@@ -145,7 +145,7 @@
 ; SI: ds_write_b8
 ; SI: s_endpgm
 define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind {
-  %v = load <4 x i32> addrspace(3)* %p, align 1
+  %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
   ret void
 }
@@ -169,7 +169,7 @@
 ; FIXME-SI: buffer_load_ubyte
 ; FIXME-SI: buffer_load_ubyte
 define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind {
-  %v = load <4 x i32> addrspace(1)* %p, align 1
+  %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
   ret void
 }
@@ -178,7 +178,7 @@
 ; SI: ds_read2_b32
 ; SI: s_endpgm
 define void @load_lds_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
-  %val = load i64 addrspace(3)* %in, align 4
+  %val = load i64, i64 addrspace(3)* %in, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -188,7 +188,7 @@
 ; SI: s_endpgm
 define void @load_lds_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
   %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
-  %val = load i64 addrspace(3)* %ptr, align 4
+  %val = load i64, i64 addrspace(3)* %ptr, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -201,7 +201,7 @@
   %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
-  %val = load i64 addrspace(3)* %ptri64, align 4
+  %val = load i64, i64 addrspace(3)* %ptri64, align 4
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
@@ -219,7 +219,7 @@
 ; SI: s_endpgm
 
 define void @load_lds_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
-  %val = load i64 addrspace(3)* %in, align 1
+  %val = load i64, i64 addrspace(3)* %in, align 1
   store i64 %val, i64 addrspace(1)* %out, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll b/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
index 5c6d7ff..036a7e9 100644
--- a/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
+++ b/llvm/test/CodeGen/R600/unhandled-loop-condition-assertion.ll
@@ -20,19 +20,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
@@ -56,19 +56,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
@@ -92,19 +92,19 @@
 for.body:                                         ; preds = %for.body, %for.body.lr.ph
   %main.addr.011 = phi i8 addrspace(1)* [ %main, %for.body.lr.ph ], [ %add.ptr6, %for.body ]
   %0 = bitcast i8 addrspace(1)* %main.addr.011 to i32 addrspace(1)*
-  %1 = load i32 addrspace(1)* %0, align 4
+  %1 = load i32, i32 addrspace(1)* %0, align 4
   %add.ptr = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %main_stride
   %2 = bitcast i8 addrspace(1)* %add.ptr to i32 addrspace(1)*
-  %3 = load i32 addrspace(1)* %2, align 4
+  %3 = load i32, i32 addrspace(1)* %2, align 4
   %add.ptr1 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr.sum
   %4 = bitcast i8 addrspace(1)* %add.ptr1 to i32 addrspace(1)*
-  %5 = load i32 addrspace(1)* %4, align 4
+  %5 = load i32, i32 addrspace(1)* %4, align 4
   %add.ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr1.sum
   %6 = bitcast i8 addrspace(1)* %add.ptr2 to i32 addrspace(1)*
-  %7 = load i32 addrspace(1)* %6, align 4
+  %7 = load i32, i32 addrspace(1)* %6, align 4
   %add.ptr3 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 %add.ptr4.sum
   %8 = bitcast i8 addrspace(1)* %add.ptr3 to i32 addrspace(1)*
-  %9 = load i32 addrspace(1)* %8, align 4
+  %9 = load i32, i32 addrspace(1)* %8, align 4
   %add.ptr6 = getelementptr inbounds i8, i8 addrspace(1)* %main.addr.011, i32 undef
   br i1 undef, label %for.end, label %for.body
 
diff --git a/llvm/test/CodeGen/R600/unroll.ll b/llvm/test/CodeGen/R600/unroll.ll
index 23ff71c..ca8d822 100644
--- a/llvm/test/CodeGen/R600/unroll.ll
+++ b/llvm/test/CodeGen/R600/unroll.ll
@@ -31,7 +31,7 @@
 
 exit:
   %2 = getelementptr [32 x i32], [32 x i32]* %0, i32 0, i32 5
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/urem.ll b/llvm/test/CodeGen/R600/urem.ll
index ab5ba93..62841ec 100644
--- a/llvm/test/CodeGen/R600/urem.ll
+++ b/llvm/test/CodeGen/R600/urem.ll
@@ -11,8 +11,8 @@
 ; EG: CF_END
 define void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
-  %a = load i32 addrspace(1)* %in
-  %b = load i32 addrspace(1)* %b_ptr
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
   %result = urem i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -27,7 +27,7 @@
 ; SI: buffer_store_dword
 ; SI: s_endpgm
 define void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
-  %num = load i32 addrspace(1) * %in
+  %num = load i32, i32 addrspace(1) * %in
   %result = urem i32 %num, 7
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -38,8 +38,8 @@
 ; EG: CF_END
 define void @test_urem_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
-  %a = load <2 x i32> addrspace(1)* %in
-  %b = load <2 x i32> addrspace(1)* %b_ptr
+  %a = load <2 x i32>, <2 x i32> addrspace(1)* %in
+  %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr
   %result = urem <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -50,8 +50,8 @@
 ; EG: CF_END
 define void @test_urem_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
-  %a = load <4 x i32> addrspace(1)* %in
-  %b = load <4 x i32> addrspace(1)* %b_ptr
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %in
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr
   %result = urem <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -62,8 +62,8 @@
 ; EG: CF_END
 define void @test_urem_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
-  %a = load i64 addrspace(1)* %in
-  %b = load i64 addrspace(1)* %b_ptr
+  %a = load i64, i64 addrspace(1)* %in
+  %b = load i64, i64 addrspace(1)* %b_ptr
   %result = urem i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; EG: CF_END
 define void @test_urem_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1
-  %a = load <2 x i64> addrspace(1)* %in
-  %b = load <2 x i64> addrspace(1)* %b_ptr
+  %a = load <2 x i64>, <2 x i64> addrspace(1)* %in
+  %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
   %result = urem <2 x i64> %a, %b
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out
   ret void
@@ -86,8 +86,8 @@
 ; EG: CF_END
 define void @test_urem_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1
-  %a = load <4 x i64> addrspace(1)* %in
-  %b = load <4 x i64> addrspace(1)* %b_ptr
+  %a = load <4 x i64>, <4 x i64> addrspace(1)* %in
+  %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
   %result = urem <4 x i64> %a, %b
   store <4 x i64> %result, <4 x i64> addrspace(1)* %out
   ret void
diff --git a/llvm/test/CodeGen/R600/usubo.ll b/llvm/test/CodeGen/R600/usubo.ll
index be1e666..a753ca4 100644
--- a/llvm/test/CodeGen/R600/usubo.ll
+++ b/llvm/test/CodeGen/R600/usubo.ll
@@ -30,8 +30,8 @@
 ; FUNC-LABEL: {{^}}v_usubo_i32:
 ; SI: v_subrev_i32_e32
 define void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* %carryout, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
-  %a = load i32 addrspace(1)* %aptr, align 4
-  %b = load i32 addrspace(1)* %bptr, align 4
+  %a = load i32, i32 addrspace(1)* %aptr, align 4
+  %b = load i32, i32 addrspace(1)* %bptr, align 4
   %usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) nounwind
   %val = extractvalue { i32, i1 } %usub, 0
   %carry = extractvalue { i32, i1 } %usub, 1
@@ -56,8 +56,8 @@
 ; SI: v_sub_i32
 ; SI: v_subb_u32
 define void @v_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
-  %a = load i64 addrspace(1)* %aptr, align 4
-  %b = load i64 addrspace(1)* %bptr, align 4
+  %a = load i64, i64 addrspace(1)* %aptr, align 4
+  %b = load i64, i64 addrspace(1)* %bptr, align 4
   %usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) nounwind
   %val = extractvalue { i64, i1 } %usub, 0
   %carry = extractvalue { i64, i1 } %usub, 1
diff --git a/llvm/test/CodeGen/R600/v_cndmask.ll b/llvm/test/CodeGen/R600/v_cndmask.ll
index 9bd96c2..c368c5a 100644
--- a/llvm/test/CodeGen/R600/v_cndmask.ll
+++ b/llvm/test/CodeGen/R600/v_cndmask.ll
@@ -11,7 +11,7 @@
 define void @v_cnd_nan_nosgpr(float addrspace(1)* %out, i32 %c, float addrspace(1)* %fptr) #0 {
   %idx = call i32 @llvm.r600.read.tidig.x() #1
   %f.gep = getelementptr float, float addrspace(1)* %fptr, i32 %idx
-  %f = load float addrspace(1)* %fptr
+  %f = load float, float addrspace(1)* %fptr
   %setcc = icmp ne i32 %c, 0
   %select = select i1 %setcc, float 0xFFFFFFFFE0000000, float %f
   store float %select, float addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/valu-i1.ll b/llvm/test/CodeGen/R600/valu-i1.ll
index 8e30972..ef4f3ef 100644
--- a/llvm/test/CodeGen/R600/valu-i1.ll
+++ b/llvm/test/CodeGen/R600/valu-i1.ll
@@ -95,7 +95,7 @@
   %i = phi i32 [%tid, %entry], [%i.inc, %loop]
   %gep.src = getelementptr i32, i32 addrspace(1)* %src, i32 %i
   %gep.dst = getelementptr i32, i32 addrspace(1)* %dst, i32 %i
-  %load = load i32 addrspace(1)* %src
+  %load = load i32, i32 addrspace(1)* %src
   store i32 %load, i32 addrspace(1)* %gep.dst
   %i.inc = add nsw i32 %i, 1
   %cmp = icmp eq i32 %limit, %i.inc
@@ -155,7 +155,7 @@
   %tmp = tail call i32 @llvm.r600.read.tidig.x() #0
   %tmp4 = sext i32 %tmp to i64
   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg3, i64 %tmp4
-  %tmp6 = load i32 addrspace(1)* %tmp5, align 4
+  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
   %tmp7 = icmp sgt i32 %tmp6, 0
   %tmp8 = sext i32 %tmp6 to i64
   br i1 %tmp7, label %bb10, label %bb26
@@ -164,9 +164,9 @@
   %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ]
   %tmp12 = add nsw i64 %tmp11, %tmp4
   %tmp13 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp12
-  %tmp14 = load i32 addrspace(1)* %tmp13, align 4
+  %tmp14 = load i32, i32 addrspace(1)* %tmp13, align 4
   %tmp15 = getelementptr inbounds i32, i32 addrspace(1)* %arg2, i64 %tmp12
-  %tmp16 = load i32 addrspace(1)* %tmp15, align 4
+  %tmp16 = load i32, i32 addrspace(1)* %tmp15, align 4
   %tmp17 = icmp ne i32 %tmp14, -1
   %tmp18 = icmp ne i32 %tmp16, -1
   %tmp19 = and i1 %tmp17, %tmp18
diff --git a/llvm/test/CodeGen/R600/vector-alloca.ll b/llvm/test/CodeGen/R600/vector-alloca.ll
index 81441ee..6f3b484 100644
--- a/llvm/test/CodeGen/R600/vector-alloca.ll
+++ b/llvm/test/CodeGen/R600/vector-alloca.ll
@@ -22,7 +22,7 @@
   store i32 2, i32* %z
   store i32 3, i32* %w
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %index
-  %2 = load i32* %1
+  %2 = load i32, i32* %1
   store i32 %2, i32 addrspace(1)* %out
   ret void
 }
@@ -48,7 +48,7 @@
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %w_index
   store i32 1, i32* %1
   %2 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 %r_index
-  %3 = load i32* %2
+  %3 = load i32, i32* %2
   store i32 %3, i32 addrspace(1)* %out
   ret void
 }
@@ -71,7 +71,7 @@
   %1 = getelementptr [4 x i32], [4 x i32]* %0, i32 0, i32 1
   %2 = bitcast i32* %1 to [4 x i32]*
   %3 = getelementptr [4 x i32], [4 x i32]* %2, i32 0, i32 0
-  %4 = load i32* %3
+  %4 = load i32, i32* %3
   store i32 %4, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
index e4d117f..fb6a17e 100644
--- a/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
+++ b/llvm/test/CodeGen/R600/vertex-fetch-encoding.ll
@@ -8,7 +8,7 @@
 
 define void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
-  %0 = load i32 addrspace(1)* %in
+  %0 = load i32, i32 addrspace(1)* %in
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
@@ -19,7 +19,7 @@
 
 define void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
 entry:
-  %0 = load <4 x i32> addrspace(1)* %in
+  %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in
   store <4 x i32> %0, <4 x i32> addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/vselect.ll b/llvm/test/CodeGen/R600/vselect.ll
index a6152f7..a3014b0 100644
--- a/llvm/test/CodeGen/R600/vselect.ll
+++ b/llvm/test/CodeGen/R600/vselect.ll
@@ -12,8 +12,8 @@
 
 define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
 entry:
-  %0 = load <2 x i32> addrspace(1)* %in0
-  %1 = load <2 x i32> addrspace(1)* %in1
+  %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
+  %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
   %cmp = icmp ne <2 x i32> %0, %1
   %result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
@@ -30,8 +30,8 @@
 
 define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in0, <2 x float> addrspace(1)* %in1) {
 entry:
-  %0 = load <2 x float> addrspace(1)* %in0
-  %1 = load <2 x float> addrspace(1)* %in1
+  %0 = load <2 x float>, <2 x float> addrspace(1)* %in0
+  %1 = load <2 x float>, <2 x float> addrspace(1)* %in1
   %cmp = fcmp une <2 x float> %0, %1
   %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
   store <2 x float> %result, <2 x float> addrspace(1)* %out
@@ -52,8 +52,8 @@
 
 define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
 entry:
-  %0 = load <4 x i32> addrspace(1)* %in0
-  %1 = load <4 x i32> addrspace(1)* %in1
+  %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
+  %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
   %cmp = icmp ne <4 x i32> %0, %1
   %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
@@ -68,8 +68,8 @@
 
 define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in0, <4 x float> addrspace(1)* %in1) {
 entry:
-  %0 = load <4 x float> addrspace(1)* %in0
-  %1 = load <4 x float> addrspace(1)* %in1
+  %0 = load <4 x float>, <4 x float> addrspace(1)* %in0
+  %1 = load <4 x float>, <4 x float> addrspace(1)* %in1
   %cmp = fcmp une <4 x float> %0, %1
   %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
   store <4 x float> %result, <4 x float> addrspace(1)* %out
diff --git a/llvm/test/CodeGen/R600/vtx-fetch-branch.ll b/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
index bcbe34e..4584d6e 100644
--- a/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
+++ b/llvm/test/CodeGen/R600/vtx-fetch-branch.ll
@@ -16,7 +16,7 @@
   br i1 %0, label %endif, label %if
 
 if:
-  %1 = load i32 addrspace(1)* %in
+  %1 = load i32, i32 addrspace(1)* %in
   br label %endif
 
 endif:
diff --git a/llvm/test/CodeGen/R600/vtx-schedule.ll b/llvm/test/CodeGen/R600/vtx-schedule.ll
index 8254c99..912e258 100644
--- a/llvm/test/CodeGen/R600/vtx-schedule.ll
+++ b/llvm/test/CodeGen/R600/vtx-schedule.ll
@@ -11,8 +11,8 @@
 ; CHECK: VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
 define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
 entry:
-  %0 = load i32 addrspace(1)* addrspace(1)* %in0
-  %1 = load i32 addrspace(1)* %0
+  %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in0
+  %1 = load i32, i32 addrspace(1)* %0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
diff --git a/llvm/test/CodeGen/R600/wait.ll b/llvm/test/CodeGen/R600/wait.ll
index 36b96a2..5cc7577 100644
--- a/llvm/test/CodeGen/R600/wait.ll
+++ b/llvm/test/CodeGen/R600/wait.ll
@@ -9,16 +9,16 @@
 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
-  %tmp10 = load <16 x i8> addrspace(2)* %tmp, !tbaa !0
+  %tmp10 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
   %tmp11 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp10, i32 0, i32 %arg6)
   %tmp12 = extractelement <4 x float> %tmp11, i32 0
   %tmp13 = extractelement <4 x float> %tmp11, i32 1
   call void @llvm.AMDGPU.barrier.global() #1
   %tmp14 = extractelement <4 x float> %tmp11, i32 2
 ;  %tmp15 = extractelement <4 x float> %tmp11, i32 3
-  %tmp15 = load float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
+  %tmp15 = load float, float addrspace(2)* %constptr, align 4 ; Force waiting for expcnt and lgkmcnt
   %tmp16 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 1
-  %tmp17 = load <16 x i8> addrspace(2)* %tmp16, !tbaa !0
+  %tmp17 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp16, !tbaa !0
   %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp17, i32 0, i32 %arg6)
   %tmp19 = extractelement <4 x float> %tmp18, i32 0
   %tmp20 = extractelement <4 x float> %tmp18, i32 1
diff --git a/llvm/test/CodeGen/R600/xor.ll b/llvm/test/CodeGen/R600/xor.ll
index 1526e28..ea78cca 100644
--- a/llvm/test/CodeGen/R600/xor.ll
+++ b/llvm/test/CodeGen/R600/xor.ll
@@ -11,8 +11,8 @@
 ; SI: v_xor_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
-  %a = load <2 x i32> addrspace(1) * %in0
-  %b = load <2 x i32> addrspace(1) * %in1
+  %a = load <2 x i32>, <2 x i32> addrspace(1) * %in0
+  %b = load <2 x i32>, <2 x i32> addrspace(1) * %in1
   %result = xor <2 x i32> %a, %b
   store <2 x i32> %result, <2 x i32> addrspace(1)* %out
   ret void
@@ -30,8 +30,8 @@
 ; SI: v_xor_b32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 
 define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
-  %a = load <4 x i32> addrspace(1) * %in0
-  %b = load <4 x i32> addrspace(1) * %in1
+  %a = load <4 x i32>, <4 x i32> addrspace(1) * %in0
+  %b = load <4 x i32>, <4 x i32> addrspace(1) * %in1
   %result = xor <4 x i32> %a, %b
   store <4 x i32> %result, <4 x i32> addrspace(1)* %out
   ret void
@@ -47,8 +47,8 @@
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
-  %a = load float addrspace(1) * %in0
-  %b = load float addrspace(1) * %in1
+  %a = load float, float addrspace(1) * %in0
+  %b = load float, float addrspace(1) * %in1
   %acmp = fcmp oge float %a, 0.000000e+00
   %bcmp = fcmp oge float %b, 1.000000e+00
   %xor = xor i1 %acmp, %bcmp
@@ -64,8 +64,8 @@
 ; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]]
 ; SI: buffer_store_byte [[RESULT]]
 define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) {
-  %a = load i1 addrspace(1)* %in0
-  %b = load i1 addrspace(1)* %in1
+  %a = load i1, i1 addrspace(1)* %in0
+  %b = load i1, i1 addrspace(1)* %in1
   %xor = xor i1 %a, %b
   store i1 %xor, i1 addrspace(1)* %out
   ret void
@@ -74,8 +74,8 @@
 ; FUNC-LABEL: {{^}}vector_xor_i32:
 ; SI: v_xor_b32_e32
 define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
-  %a = load i32 addrspace(1)* %in0
-  %b = load i32 addrspace(1)* %in1
+  %a = load i32, i32 addrspace(1)* %in0
+  %b = load i32, i32 addrspace(1)* %in1
   %result = xor i32 %a, %b
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -100,8 +100,8 @@
 ; FUNC-LABEL: {{^}}vector_not_i32:
 ; SI: v_not_b32
 define void @vector_not_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) {
-  %a = load i32 addrspace(1)* %in0
-  %b = load i32 addrspace(1)* %in1
+  %a = load i32, i32 addrspace(1)* %in0
+  %b = load i32, i32 addrspace(1)* %in1
   %result = xor i32 %a, -1
   store i32 %result, i32 addrspace(1)* %out
   ret void
@@ -112,8 +112,8 @@
 ; SI: v_xor_b32_e32
 ; SI: s_endpgm
 define void @vector_xor_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
-  %a = load i64 addrspace(1)* %in0
-  %b = load i64 addrspace(1)* %in1
+  %a = load i64, i64 addrspace(1)* %in0
+  %b = load i64, i64 addrspace(1)* %in1
   %result = xor i64 %a, %b
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -140,8 +140,8 @@
 ; SI: v_not_b32
 ; SI: v_not_b32
 define void @vector_not_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in0, i64 addrspace(1)* %in1) {
-  %a = load i64 addrspace(1)* %in0
-  %b = load i64 addrspace(1)* %in1
+  %a = load i64, i64 addrspace(1)* %in0
+  %b = load i64, i64 addrspace(1)* %in1
   %result = xor i64 %a, -1
   store i64 %result, i64 addrspace(1)* %out
   ret void
@@ -163,7 +163,7 @@
   br label %endif
 
 else:
-  %2 = load i64 addrspace(1)* %in
+  %2 = load i64, i64 addrspace(1)* %in
   br label %endif
 
 endif: