[AMDGPU] Add v3f16/v3i16 support to SDag Fix lowering and instruction selection for v3x16 types and enable InstCombine to emit them. This patch only implements it for the selection dag. GlobalISel tests in GlobalISel/llvm.amdgcn.image.load.1d.d16.ll and GlobalISel/llvm.amdgcn.image.store.2d.d16.ll still don't work. Differential Revision: https://reviews.llvm.org/D84420

commit: 833b3b0d3a2ff4b8243940eef1a960050ec48682 [log] [tgz]
author: Sebastian Neubauer <sebastian.neubauer@amd.com> Thu Jul 23 16:59:00 2020 +0200
committer: Sebastian Neubauer <sebastian.neubauer@amd.com> Wed Sep 16 17:20:27 2020 +0200
tree: f7437717db750897d7827d8636c381c88c1713ba
parent: cd4615120233c54034b42bafc3d2bcc9f29db63d [diff] [blame]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
index 9e6be56..da1174d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll

@@ -23,6 +23,18 @@
   ret float %r
 }
 
+; GCN-LABEL: {{^}}image_load_v3f16:
+; UNPACKED: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}}
+; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}}
+; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
+define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+main_body:
+  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r = bitcast <4 x half> %ext to <2 x float>
+  ret <2 x float> %r
+}
+
 ; GCN-LABEL: {{^}}image_load_v4f16:
 ; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
 ; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
@@ -56,6 +68,14 @@
   ret float %x
 }
 
+define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+main_body:
+  %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %res = bitcast <4 x half> %ext to <2 x float>
+  ret <2 x float> %res
+}
+
 ; GCN-LABEL: {{^}}image_store_f16
 ; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
 ; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
@@ -78,6 +98,14 @@
   ret void
 }
 
+define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
+main_body:
+  %r = bitcast <2 x float> %in to <4 x half>
+  %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
 ; GCN-LABEL: {{^}}image_store_v4f16
 ; UNPACKED: v_lshrrev_b32_e32
 ; UNPACKED: v_and_b32_e32
@@ -110,15 +138,19 @@
 
 declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
 
 declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
 declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
 declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
 declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
 declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
commit	833b3b0d3a2ff4b8243940eef1a960050ec48682	[log] [tgz]
author	Sebastian Neubauer <sebastian.neubauer@amd.com>	Thu Jul 23 16:59:00 2020 +0200
committer	Sebastian Neubauer <sebastian.neubauer@amd.com>	Wed Sep 16 17:20:27 2020 +0200
tree	f7437717db750897d7827d8636c381c88c1713ba
parent	cd4615120233c54034b42bafc3d2bcc9f29db63d [diff] [blame]