| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=NO-MSAA %s |
| ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=NO-MSAA %s |
| ; RUN: opt -S -passes=amdgpu-image-intrinsic-opt -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1150 < %s | FileCheck -check-prefixes=MSAA %s |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask2( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 2, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask4( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 4, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask8( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 8, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_reverse( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| ; Don't combine because the vaddr inputs are not identical. |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr(<8 x i32> inreg %rsrc, i32 %s0, i32 %t0, i32 %s1, i32 %t1, i32 %s2, i32 %t2, i32 %s3, i32 %t3) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_vaddr( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S0:%.*]], i32 [[T0:%.*]], i32 [[S1:%.*]], i32 [[T1:%.*]], i32 [[S2:%.*]], i32 [[T2:%.*]], i32 [[S3:%.*]], i32 [[T3:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S0]], i32 [[T0]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S1]], i32 [[T1]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S2]], i32 [[T2]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S3]], i32 [[T3]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s0, i32 %t0, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s1, i32 %t1, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s2, i32 %t2, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s3, i32 %t3, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [8 x float] @load_2dmsaa_v8f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I5:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I7:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4 |
| ; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5 |
| ; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6 |
| ; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7 |
| ; NO-MSAA-NEXT: ret [8 x float] [[I15]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [8 x float] @load_2dmsaa_v8f32( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I7:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I8:%.*]] = insertvalue [8 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I9:%.*]] = insertvalue [8 x float] [[I8]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I10:%.*]] = insertvalue [8 x float] [[I9]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I11:%.*]] = insertvalue [8 x float] [[I10]], float [[I3]], 3 |
| ; MSAA-NEXT: [[I12:%.*]] = insertvalue [8 x float] [[I11]], float [[I4]], 4 |
| ; MSAA-NEXT: [[I13:%.*]] = insertvalue [8 x float] [[I12]], float [[I5]], 5 |
| ; MSAA-NEXT: [[I14:%.*]] = insertvalue [8 x float] [[I13]], float [[I6]], 6 |
| ; MSAA-NEXT: [[I15:%.*]] = insertvalue [8 x float] [[I14]], float [[I7]], 7 |
| ; MSAA-NEXT: ret [8 x float] [[I15]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i5 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i7 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i8 = insertvalue [8 x float] undef, float %i, 0 |
| %i9 = insertvalue [8 x float] %i8, float %i1, 1 |
| %i10 = insertvalue [8 x float] %i9, float %i2, 2 |
| %i11 = insertvalue [8 x float] %i10, float %i3, 3 |
| %i12 = insertvalue [8 x float] %i11, float %i4, 4 |
| %i13 = insertvalue [8 x float] %i12, float %i5, 5 |
| %i14 = insertvalue [8 x float] %i13, float %i6, 6 |
| %i15 = insertvalue [8 x float] %i14, float %i7, 7 |
| ret [8 x float] %i15 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1 |
| ; NO-MSAA-NEXT: [[I4:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_interleaved( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I1:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I3:%.*]] = insertvalue [4 x float] [[I1]], float [[I2]], 1 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I3]], float [[I4]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I5]], float [[I6]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = insertvalue [4 x float] undef, float %i, 0 |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = insertvalue [4 x float] %i1, float %i2, 1 |
| %i4 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i5 = insertvalue [4 x float] %i3, float %i4, 2 |
| %i6 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i7 = insertvalue [4 x float] %i5, float %i6, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: ret [2 x float] [[I3]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId01( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 |
| ; MSAA-NEXT: ret [2 x float] [[I3]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = insertvalue [2 x float] undef, float %i, 0 |
| %i3 = insertvalue [2 x float] %i2, float %i1, 1 |
| ret [2 x float] %i3 |
| } |
| |
| define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: ret [2 x float] [[I3]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [2 x float] @load_2dmsaa_v2f32_fragId23( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x float] [[I2]], float [[I1]], 1 |
| ; MSAA-NEXT: ret [2 x float] [[I3]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = insertvalue [2 x float] undef, float %i, 0 |
| %i3 = insertvalue [2 x float] %i2, float %i1, 1 |
| ret [2 x float] %i3 |
| } |
| |
| ; Don't combine because it's not profitable: the resulting msaa loads would |
| ; have 8 vdata outputs. |
| define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: ret [2 x <2 x float>] [[I5]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [2 x <2 x float>] @load_2dmsaa_v2v2f32_dmask3( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [2 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [2 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: ret [2 x <2 x float>] [[I5]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [2 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [2 x <2 x float>] %i4, <2 x float> %i1, 1 |
| ret [2 x <2 x float>] %i5 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask3( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask5( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 5, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask6( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 6, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask9( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 9, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask10( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 10, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2dmsaa_v4v2f32_dmask12( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 8, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32 12, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0 |
| ; NO-MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1 |
| ; NO-MSAA-NEXT: ret [2 x half] [[I3]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [2 x half] @load_2dmsaa_v2f16_fragId01( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x half> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x half> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = insertvalue [2 x half] undef, half [[I]], 0 |
| ; MSAA-NEXT: [[I3:%.*]] = insertvalue [2 x half] [[I2]], half [[I1]], 1 |
| ; MSAA-NEXT: ret [2 x half] [[I3]] |
| ; |
| main_body: |
| %i = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = insertvalue [2 x half] undef, half %i, 0 |
| %i3 = insertvalue [2 x half] %i2, half %i1, 1 |
| ret [2 x half] %i3 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2darraymsaa_v4f32_dmask1( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| |
| |
| |
| define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <2 x float>] @load_2darraymsaa_v4v2f32_dmask3( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SLICE:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 [[SLICE]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <2 x float> [[TMP3]], float [[TMP4]], i64 1 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP5]], i64 0 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP7]], i64 1 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <2 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP12:%.*]] = insertelement <2 x float> undef, float [[TMP11]], i64 0 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP13]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <2 x float>] undef, <2 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <2 x float>] [[I4]], <2 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <2 x float>] [[I5]], <2 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <2 x float>] [[I6]], <2 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <2 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32 3, i32 %s, i32 %t, i32 %slice, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <2 x float>] undef, <2 x float> %i, 0 |
| %i5 = insertvalue [4 x <2 x float>] %i4, <2 x float> %i1, 1 |
| %i6 = insertvalue [4 x <2 x float>] %i5, <2 x float> %i2, 2 |
| %i7 = insertvalue [4 x <2 x float>] %i6, <2 x float> %i3, 3 |
| ret [4 x <2 x float>] %i7 |
| } |
| |
| |
| |
| |
| |
| |
| define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0 |
| ; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1 |
| ; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2 |
| ; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0 |
| ; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1 |
| ; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0 |
| %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1 |
| %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2 |
| %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3 |
| ret [4 x <3 x float>] %i7 |
| } |
| |
| |
| |
| |
| |
| |
| define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 5, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 6, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 [[S]], i32 [[T]], i32 7, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x <3 x float>] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x <3 x float>] @load_2dmsaa_v4v3f32_dmask7_group1( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 2, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 4, i32 [[S]], i32 [[T]], i32 4, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[TMP4:%.*]] = insertelement <3 x float> undef, float [[TMP3]], i64 0 |
| ; MSAA-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[TMP6:%.*]] = insertelement <3 x float> [[TMP4]], float [[TMP5]], i64 1 |
| ; MSAA-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[I:%.*]] = insertelement <3 x float> [[TMP6]], float [[TMP7]], i64 2 |
| ; MSAA-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP9:%.*]] = insertelement <3 x float> undef, float [[TMP8]], i64 0 |
| ; MSAA-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[TMP11:%.*]] = insertelement <3 x float> [[TMP9]], float [[TMP10]], i64 1 |
| ; MSAA-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[TMP2]], i64 1 |
| ; MSAA-NEXT: [[I1:%.*]] = insertelement <3 x float> [[TMP11]], float [[TMP12]], i64 2 |
| ; MSAA-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[TMP14:%.*]] = insertelement <3 x float> undef, float [[TMP13]], i64 0 |
| ; MSAA-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[TMP16:%.*]] = insertelement <3 x float> [[TMP14]], float [[TMP15]], i64 1 |
| ; MSAA-NEXT: [[TMP17:%.*]] = extractelement <4 x float> [[TMP2]], i64 2 |
| ; MSAA-NEXT: [[I2:%.*]] = insertelement <3 x float> [[TMP16]], float [[TMP17]], i64 2 |
| ; MSAA-NEXT: [[TMP18:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[TMP19:%.*]] = insertelement <3 x float> undef, float [[TMP18]], i64 0 |
| ; MSAA-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[TMP21:%.*]] = insertelement <3 x float> [[TMP19]], float [[TMP20]], i64 1 |
| ; MSAA-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP2]], i64 3 |
| ; MSAA-NEXT: [[I3:%.*]] = insertelement <3 x float> [[TMP21]], float [[TMP22]], i64 2 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x <3 x float>] undef, <3 x float> [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x <3 x float>] [[I4]], <3 x float> [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x <3 x float>] [[I5]], <3 x float> [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x <3 x float>] [[I6]], <3 x float> [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x <3 x float>] [[I7]] |
| ; |
| main_body: |
| %i = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 4, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 5, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 6, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32 7, i32 %s, i32 %t, i32 7, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x <3 x float>] undef, <3 x float> %i, 0 |
| %i5 = insertvalue [4 x <3 x float>] %i4, <3 x float> %i1, 1 |
| %i6 = insertvalue [4 x <3 x float>] %i5, <3 x float> %i2, 2 |
| %i7 = insertvalue [4 x <3 x float>] %i6, <3 x float> %i3, 3 |
| ret [4 x <3 x float>] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections(<8 x i32> inreg %rsrc, float %vdata, i32 %s, i32 %t, i32 %fragid) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_sections( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], float [[VDATA:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[FRAGID:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: call void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float [[VDATA]], i32 1, i32 [[S]], i32 [[T]], i32 [[FRAGID]], <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| call void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float %vdata, i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %cond) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1 |
| ; NO-MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]] |
| ; NO-MSAA: if_equal: |
| ; NO-MSAA-NEXT: [[I9:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I10:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I11:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I12:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0 |
| ; NO-MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1 |
| ; NO-MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2 |
| ; NO-MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3 |
| ; NO-MSAA-NEXT: br label [[MERGE:%.*]] |
| ; NO-MSAA: if_unequal: |
| ; NO-MSAA-NEXT: [[I17:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I18:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I19:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 2, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I20:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 3, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0 |
| ; NO-MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1 |
| ; NO-MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2 |
| ; NO-MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3 |
| ; NO-MSAA-NEXT: br label [[MERGE]] |
| ; NO-MSAA: merge: |
| ; NO-MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ] |
| ; NO-MSAA-NEXT: ret [4 x float] [[I25]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_blocks( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC:%.*]], i32 [[S:%.*]], i32 [[T:%.*]], i32 [[COND:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: [[I8:%.*]] = trunc i32 [[COND]] to i1 |
| ; MSAA-NEXT: br i1 [[I8]], label [[IF_EQUAL:%.*]], label [[IF_UNEQUAL:%.*]] |
| ; MSAA: if_equal: |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I9:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I10:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I11:%.*]] = extractelement <4 x float> [[TMP1]], i64 2 |
| ; MSAA-NEXT: [[I12:%.*]] = extractelement <4 x float> [[TMP1]], i64 3 |
| ; MSAA-NEXT: [[I13:%.*]] = insertvalue [4 x float] undef, float [[I9]], 0 |
| ; MSAA-NEXT: [[I14:%.*]] = insertvalue [4 x float] [[I13]], float [[I10]], 1 |
| ; MSAA-NEXT: [[I15:%.*]] = insertvalue [4 x float] [[I14]], float [[I11]], 2 |
| ; MSAA-NEXT: [[I16:%.*]] = insertvalue [4 x float] [[I15]], float [[I12]], 3 |
| ; MSAA-NEXT: br label [[MERGE:%.*]] |
| ; MSAA: if_unequal: |
| ; MSAA-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I17:%.*]] = extractelement <4 x float> [[TMP2]], i64 0 |
| ; MSAA-NEXT: [[I18:%.*]] = extractelement <4 x float> [[TMP2]], i64 1 |
| ; MSAA-NEXT: [[I19:%.*]] = extractelement <4 x float> [[TMP2]], i64 2 |
| ; MSAA-NEXT: [[I20:%.*]] = extractelement <4 x float> [[TMP2]], i64 3 |
| ; MSAA-NEXT: [[I21:%.*]] = insertvalue [4 x float] undef, float [[I17]], 0 |
| ; MSAA-NEXT: [[I22:%.*]] = insertvalue [4 x float] [[I21]], float [[I18]], 1 |
| ; MSAA-NEXT: [[I23:%.*]] = insertvalue [4 x float] [[I22]], float [[I19]], 2 |
| ; MSAA-NEXT: [[I24:%.*]] = insertvalue [4 x float] [[I23]], float [[I20]], 3 |
| ; MSAA-NEXT: br label [[MERGE]] |
| ; MSAA: merge: |
| ; MSAA-NEXT: [[I25:%.*]] = phi [4 x float] [ [[I16]], [[IF_EQUAL]] ], [ [[I24]], [[IF_UNEQUAL]] ] |
| ; MSAA-NEXT: ret [4 x float] [[I25]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| %i8 = trunc i32 %cond to i1 |
| br i1 %i8, label %if_equal, label %if_unequal |
| if_equal: |
| %i9 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i10 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i11 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i12 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i13 = insertvalue [4 x float] undef, float %i9, 0 |
| %i14 = insertvalue [4 x float] %i13, float %i10, 1 |
| %i15 = insertvalue [4 x float] %i14, float %i11, 2 |
| %i16 = insertvalue [4 x float] %i15, float %i12, 3 |
| br label %merge |
| if_unequal: |
| %i17 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0) |
| %i18 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc, i32 0, i32 0) |
| %i19 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 2, <8 x i32> %rsrc, i32 0, i32 0) |
| %i20 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 3, <8 x i32> %rsrc, i32 0, i32 0) |
| %i21 = insertvalue [4 x float] undef, float %i17, 0 |
| %i22 = insertvalue [4 x float] %i21, float %i18, 1 |
| %i23 = insertvalue [4 x float] %i22, float %i19, 2 |
| %i24 = insertvalue [4 x float] %i23, float %i20, 3 |
| br label %merge |
| merge: |
| %i25 = phi [4 x float] [%i16, %if_equal], [%i24, %if_unequal] |
| ret [4 x float] %i25 |
| } |
| |
| define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc(<8 x i32> inreg %rsrc1, <8 x i32> inreg %rsrc2, i32 %s, i32 %t) { |
| ; NO-MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc( |
| ; NO-MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; NO-MSAA-NEXT: main_body: |
| ; NO-MSAA-NEXT: [[I:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I1:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC1]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I2:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I3:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 1, <8 x i32> [[RSRC2]], i32 0, i32 0) |
| ; NO-MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; NO-MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; NO-MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; NO-MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; NO-MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| ; MSAA-LABEL: define amdgpu_ps [4 x float] @load_2dmsaa_v4f32_dmask1_different_rsrc( |
| ; MSAA-SAME: <8 x i32> inreg [[RSRC1:%.*]], <8 x i32> inreg [[RSRC2:%.*]], i32 [[S:%.*]], i32 [[T:%.*]]) #[[ATTR0]] { |
| ; MSAA-NEXT: main_body: |
| ; MSAA-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC1]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I:%.*]] = extractelement <4 x float> [[TMP0]], i64 0 |
| ; MSAA-NEXT: [[I1:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 |
| ; MSAA-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32.v8i32(i32 1, i32 [[S]], i32 [[T]], i32 0, <8 x i32> [[RSRC2]], i32 0, i32 0) |
| ; MSAA-NEXT: [[I2:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 |
| ; MSAA-NEXT: [[I3:%.*]] = extractelement <4 x float> [[TMP1]], i64 1 |
| ; MSAA-NEXT: [[I4:%.*]] = insertvalue [4 x float] undef, float [[I]], 0 |
| ; MSAA-NEXT: [[I5:%.*]] = insertvalue [4 x float] [[I4]], float [[I1]], 1 |
| ; MSAA-NEXT: [[I6:%.*]] = insertvalue [4 x float] [[I5]], float [[I2]], 2 |
| ; MSAA-NEXT: [[I7:%.*]] = insertvalue [4 x float] [[I6]], float [[I3]], 3 |
| ; MSAA-NEXT: ret [4 x float] [[I7]] |
| ; |
| main_body: |
| %i = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc1, i32 0, i32 0) |
| %i1 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc1, i32 0, i32 0) |
| %i2 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc2, i32 0, i32 0) |
| %i3 = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 %s, i32 %t, i32 1, <8 x i32> %rsrc2, i32 0, i32 0) |
| %i4 = insertvalue [4 x float] undef, float %i, 0 |
| %i5 = insertvalue [4 x float] %i4, float %i1, 1 |
| %i6 = insertvalue [4 x float] %i5, float %i2, 2 |
| %i7 = insertvalue [4 x float] %i6, float %i3, 3 |
| ret [4 x float] %i7 |
| } |
| |
| declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| |
| declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32.v8i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| |
| declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0 |
| |
| declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32.v8i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32) |
| |
| attributes #0 = { nounwind readonly willreturn } |