| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -passes=instcombine -mtriple=amdgcn-amd-amdhsa %s | FileCheck %s |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.buffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @raw_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_raw_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float |
| ; CHECK-NEXT: ret float [[VAR2]] |
| ; |
| %var = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x i32> %var to <4 x float> |
| %var2 = extractelement <4 x float> %var1, i32 0 |
| ret float %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0:![0-9]+]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.raw.buffer.load.v1f32(<4 x i32>, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt0_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x half> %data, i32 1 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt3_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x half> [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x half> %shuf |
| } |
| |
| declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #1 |
| |
| define amdgpu_ps i8 @extract_elt0_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_v2i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x i8> %data, i32 0 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v2i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v3i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x i8> %data, i32 1 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt3_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 3 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_v4i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x i8> [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i8> %shuf |
| } |
| |
| declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #1 |
| declare <2 x i8> @llvm.amdgcn.raw.buffer.load.v2i8(<4 x i32>, i32, i32, i32) #1 |
| declare <3 x i8> @llvm.amdgcn.raw.buffer.load.v3i8(<4 x i32>, i32, i32, i32) #1 |
| declare <4 x i8> @llvm.amdgcn.raw.buffer.load.v4i8(<4 x i32>, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.ptr.buffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @raw_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_raw_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float |
| ; CHECK-NEXT: ret float [[VAR2]] |
| ; |
| %var = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x i32> %var to <4 x float> |
| %var2 = extractelement <4 x float> %var1, i32 0 |
| ret float %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt0_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x half> %data, i32 1 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt3_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps <2 x half> @extract_elt0_elt1_raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x half> [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x half> %shuf |
| } |
| |
| declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #1 |
| |
| define amdgpu_ps i8 @extract_elt0_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_v2i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x i8> %data, i32 0 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v2i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v3i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x i8> %data, i32 1 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt3_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 3 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps <2 x i8> @extract_elt0_elt1_raw_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x i8> [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i8> %shuf |
| } |
| |
| declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #1 |
| declare <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32) #1 |
| declare <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32) #1 |
| declare <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.s.buffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @s_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @s_buffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <2 x float> @s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @s_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_s_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_s_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_s_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_s_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| ; Do not trim to vec3 s_buffer_load in instcombine, as the load will most likely be widened |
| ; to vec4 anyway during lowering. |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_s_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_s_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_s_buffer_load_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float |
| ; CHECK-NEXT: ret float [[VAR2]] |
| ; |
| %var = call <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %var1 = bitcast <4 x i32> %var to <4 x float> |
| %var2 = extractelement <4 x float> %var1, i32 0 |
| ret float %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_s_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_s_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32) #1 |
| declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt0_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt0 = extractelement <2 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_s_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <2 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_s_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <3 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| |
| define amdgpu_ps half @extract_elt3_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.s.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps <2 x half> @extract_elt0_elt1_s_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x half> [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x half> %shuf |
| } |
| |
| declare half @llvm.amdgcn.s.buffer.load.f16(<4 x i32>, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.s.buffer.load.v2f16(<4 x i32>, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.s.buffer.load.v3f16(<4 x i32>, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.s.buffer.load.v4f16(<4 x i32>, i32, i32) #1 |
| |
| define amdgpu_ps i8 @extract_elt0_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_s_buffer_load_v2i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt0 = extractelement <2 x i8> %data, i32 0 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_s_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v2i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <2 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_s_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v3i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <3 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_s_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt3_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_s_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[TMP1]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 3 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps <2 x i8> @extract_elt0_elt1_s_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %ofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_s_buffer_load_v4i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x i8> [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32> %rsrc, i32 %ofs, i32 0) |
| %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i8> %shuf |
| } |
| |
| declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32) #1 |
| declare <2 x i8> @llvm.amdgcn.s.buffer.load.v2i8(<4 x i32>, i32, i32) #1 |
| declare <3 x i8> @llvm.amdgcn.s.buffer.load.v3i8(<4 x i32>, i32, i32) #1 |
| declare <4 x i8> @llvm.amdgcn.s.buffer.load.v4i8(<4 x i32>, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.buffer.load.format |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_format_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @raw_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_format_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_raw_buffer_load_format_v4i32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_buffer_load_format_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[VAR]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = extractelement <4 x float> %var, i32 0 |
| ret float %var1 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.raw.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.ptr.buffer.load.format |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_format_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @raw_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_format_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_raw_ptr_buffer_load_format_v4i32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_ptr_buffer_load_format_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[VAR]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = extractelement <4 x float> %var, i32 0 |
| ret float %var1 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.buffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_buffer_load_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @struct_buffer_load_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_buffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_struct_buffer_load_v4i32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float |
| ; CHECK-NEXT: ret float [[VAR2]] |
| ; |
| %var = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x i32> %var to <4 x float> |
| %var2 = extractelement <4 x float> %var1, i32 0 |
| ret float %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.struct.buffer.load.v1f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt0_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_buffer_load_v3f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt3_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x half> [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x half> %shuf |
| } |
| |
| declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps i8 @extract_elt0_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_v2i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x i8> %data, i32 0 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v2i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v2i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v3i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v3i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt3_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 3 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_buffer_load_v4i8(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_v4i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x i8> [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i8> %shuf |
| } |
| |
| declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x i8> @llvm.amdgcn.struct.buffer.load.v2i8(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x i8> @llvm.amdgcn.struct.buffer.load.v3i8(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x i8> @llvm.amdgcn.struct.buffer.load.v4i8(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.ptr.buffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_ptr_buffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @struct_ptr_buffer_load_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 12 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 8 |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_v3f32( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 4 |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_ptr_buffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define float @extract0_bitcast_struct_ptr_buffer_load_v4i32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_v4i32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32 [[VAR]] to float |
| ; CHECK-NEXT: ret float [[VAR2]] |
| ; |
| %var = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x i32> %var to <4 x float> |
| %var2 = extractelement <4 x float> %var1, i32 0 |
| ret float %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt0_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v3f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 2 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt3_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 6 |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps <2 x half> @extract_elt0_elt1_struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x half> [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x half> %data, <4 x half> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x half> %shuf |
| } |
| |
| declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps i8 @extract_elt0_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_v2i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x i8> %data, i32 0 |
| ret i8 %elt0 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v2i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v2i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v3i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v3i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 1 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 1 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps i8 @extract_elt3_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFS:%.*]], 3 |
| ; CHECK-NEXT: [[DATA:%.*]] = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[TMP1]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret i8 [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x i8> %data, i32 3 |
| ret i8 %elt1 |
| } |
| |
| define amdgpu_ps <2 x i8> @extract_elt0_elt1_struct_ptr_buffer_load_v4i8(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_v4i8( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x i8> [[DATA]] |
| ; |
| %data = call <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x i8> %data, <4 x i8> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i8> %shuf |
| } |
| |
| declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v2i8(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v3i8(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x i8> @llvm.amdgcn.struct.ptr.buffer.load.v4i8(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.buffer.load.format |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_buffer_load_format_f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_format_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @struct_buffer_load_format_v1f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_format_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_buffer_load_format_v3f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_buffer_load_format_v4f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32(<4 x i32> inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.struct.buffer.load.format.v1f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.ptr.buffer.load.format |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_ptr_buffer_load_format_f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_format_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <1 x float> @struct_ptr_buffer_load_format_v1f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_format_v1f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <1 x float> [[DATA]] |
| ; |
| %data = call <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <1 x float> %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_buffer_load_format_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_ptr_buffer_load_format_v4f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_ptr_buffer_load_format_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs, i32 %sofs) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_buffer_load_format_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[IDX:%.*]], i32 [[OFS:%.*]], i32 [[SOFS:%.*]], i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 %sofs, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <1 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v1f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.format.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.tbuffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_tbuffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt3_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt2_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 2 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt0_raw_tbuffer_load_v4f16(<4 x i32> inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 0 |
| ret half %elt1 |
| } |
| |
| declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.raw.ptr.tbuffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @raw_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_ptr_tbuffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <2 x float> @raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_raw_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_raw_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_raw_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_raw_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_raw_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| define amdgpu_ps half @extract_elt3_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt3_raw_ptr_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 3 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt2_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt2_raw_ptr_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x half> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 2 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt1_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt1_raw_ptr_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret half [[ELT1]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 1 |
| ret half %elt1 |
| } |
| |
| define amdgpu_ps half @extract_elt0_raw_ptr_tbuffer_load_v4f16(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 inreg %arg1) #0 { |
| ; CHECK-LABEL: @extract_elt0_raw_ptr_tbuffer_load_v4f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 78, i32 0) |
| %elt1 = extractelement <4 x half> %data, i32 0 |
| ret half %elt1 |
| } |
| |
| declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.tbuffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_tbuffer_load_f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_tbuffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_tbuffer_load_v3f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_tbuffer_load_v4f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32(<4 x i32> inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32> [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32> %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.struct.ptr.tbuffer.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @struct_ptr_tbuffer_load_f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_ptr_tbuffer_load_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret float %data |
| } |
| |
| define amdgpu_ps <2 x float> @struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret <2 x float> %data |
| } |
| |
| define amdgpu_ps <4 x float> @struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <4 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| ret <4 x float> %data |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <2 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt3_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[DATA]], i64 3 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <4 x float> %data, i32 3 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_elt3_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 2, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt2_elt3_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[DATA]], <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps float @extract_elt0_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_struct_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt0 = extractelement <3 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_struct_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[DATA]], i64 1 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 1 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps float @extract_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt2_struct_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <3 x float> [[DATA]], i64 2 |
| ; CHECK-NEXT: ret float [[ELT1]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %elt1 = extractelement <3 x float> %data, i32 2 |
| ret float %elt1 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_struct_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_struct_ptr_tbuffer_load_v3f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <3 x float> [[DATA]], <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %shuf = shufflevector <3 x float> %data, <3 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| define i32 @extract0_bitcast_struct_ptr_tbuffer_load_v4f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @extract0_bitcast_struct_ptr_tbuffer_load_v4f32( |
| ; CHECK-NEXT: [[VAR:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0) |
| ; CHECK-NEXT: [[VAR2:%.*]] = bitcast float [[VAR]] to i32 |
| ; CHECK-NEXT: ret i32 [[VAR2]] |
| ; |
| %var = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0) |
| %var1 = bitcast <4 x float> %var to <4 x i32> |
| %var2 = extractelement <4 x i32> %var1, i32 0 |
| ret i32 %var2 |
| } |
| |
| define amdgpu_ps float @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32(ptr addrspace(8) inreg %rsrc, i32 %arg0, i32 %arg1, i32 inreg %arg2) #0 { |
| ; CHECK-LABEL: @preserve_metadata_extract_elt0_struct_ptr_tbuffer_load_v2f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8) [[RSRC:%.*]], i32 [[ARG0:%.*]], i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 78, i32 0), !fpmath [[META0]] |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %arg0, i32 %arg1, i32 %arg2, i32 78, i32 0), !fpmath !0 |
| %elt0 = extractelement <2 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1 |
| declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1 |
| declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32) #1 |
| |
| declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| ; Check that the intrinsic remains unchanged in the presence of TFE or LWE |
| define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_tfe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_tfe( |
| ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 1, i32 0) |
| ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 1, i32 0) |
| %data.vec = extractvalue {<4 x float>,i32} %data, 0 |
| %elt0 = extractelement <4 x float> %data.vec, i32 0 |
| ret float %elt0 |
| } |
| |
| ; Check that the intrinsic remains unchanged in the presence of TFE or LWE |
| define amdgpu_ps float @extract_elt0_image_sample_1d_v4f32_f32_lwe(float %vaddr, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_1d_v4f32_f32_lwe( |
| ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float [[VADDR:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 2, i32 0) |
| ; CHECK-NEXT: [[DATA_VEC:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA_VEC]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32 15, float %vaddr, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 2, i32 0) |
| %data.vec = extractvalue {<4 x float>,i32} %data, 0 |
| %elt0 = extractelement <4 x float> %data.vec, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_image_sample_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.2d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32(float %s, float %t, float %r, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0000_image_sample_3d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32 0, float %s, float %t, float %r, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32(float %s, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0001_image_sample_1darray_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1darray.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32 1, float %s, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0010_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 2, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0100_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 4, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_1000_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 8, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 8, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_1001_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 9, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0011_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps float @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_dmask_0111_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_dmask_0001_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <2 x float> poison, float [[DATA]], i64 0 |
| ; CHECK-NEXT: ret <2 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_dmask_0011_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_dmask_0111_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <2 x float> @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_dmask_0101_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0001_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = insertelement <3 x float> poison, float [[DATA]], i64 0 |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 1, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0011_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 3, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 3, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0101_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32.v8i32.v4i32(i32 5, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x float> [[DATA]], <2 x float> poison, <3 x i32> <i32 0, i32 1, i32 poison> |
| ; CHECK-NEXT: ret <3 x float> [[SHUF]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 5, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_0111_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 7, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| define amdgpu_ps <3 x float> @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_elt1_elt2_dmask_1111_image_sample_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.1d.v3f32.f32.v8i32.v4i32(i32 7, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> |
| ret <3 x float> %shuf |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.sl_v4f32i32s.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt1_image_sample_cl_2darray_v4f32_f32(float %s, float %t, float %slice, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_image_sample_cl_2darray_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.2darray.f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32 15, float %s, float %t, float %slice, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 1 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.d |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt2_image_sample_d_cube_v4f32_f32_f32(float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt2_image_sample_d_cube_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cube.f32.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %face, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 2 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cube.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.d.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt3_image_sample_d_cl_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 3 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.l |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32(float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_dmask_0110_image_sample_l_1d_v2f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.1d.f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32 6, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <2 x float> %data, i32 1 |
| ret float %elt0 |
| } |
| |
| declare <2 x float> @llvm.amdgcn.image.sample.l.1d.v2f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.b |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32(float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_dmask_1001_image_sample_b_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.1d.f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 9, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 1 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.b.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32(float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_dmask_1101_image_sample_b_cl_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.b.cl.1d.v2f32.f32.f32.v8i32.v4i32(i32 12, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 13, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 2> |
| ret <2 x float> %shuf |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.lz |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps <2 x float> @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt3_image_sample_lz_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.lz.1d.v2f32.f32.v8i32.v4i32(i32 10, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <2 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <2 x i32> <i32 1, i32 3> |
| ret <2 x float> %shuf |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cd |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps <3 x float> @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32(float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_elt2_elt3_image_sample_cd_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x float> @llvm.amdgcn.image.sample.cd.1d.v3f32.f32.f32.v8i32.v4i32(i32 14, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret <3 x float> [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %shuf = shufflevector <4 x float> %data, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3> |
| ret <3 x float> %shuf |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cd.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps half @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt3_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 8, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x half> %data, i32 3 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt2_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 4, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x half> %data, i32 2 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps half @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt1_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 2, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x half> %data, i32 1 |
| ret half %elt0 |
| } |
| |
| define amdgpu_ps <4 x half> @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt_to3_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <3 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v3f16.f32.f32.v8i32.v4i32(i32 7, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[RES:%.*]] = shufflevector <3 x half> [[DATA]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> |
| ; CHECK-NEXT: ret <4 x half> [[RES]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 4> |
| ret <4 x half> %res |
| } |
| |
| define amdgpu_ps <4 x half> @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt_to2_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <2 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v2f16.f32.f32.v8i32.v4i32(i32 3, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x half> [[DATA]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison> |
| ; CHECK-NEXT: ret <4 x half> [[RES]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 1, i32 4, i32 5> |
| ret <4 x half> %res |
| } |
| |
| define amdgpu_ps <4 x half> @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt_to1_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[RES:%.*]] = insertelement <4 x half> poison, half [[DATA]], i64 0 |
| ; CHECK-NEXT: ret <4 x half> [[RES]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %res = shufflevector <4 x half> %data, <4 x half> poison, <4 x i32> <i32 0, i32 4, i32 5, i32 6> |
| ret <4 x half> %res |
| } |
| |
| define amdgpu_ps half @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32(float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_1d_v4f16_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call half @llvm.amdgcn.image.sample.cd.cl.1d.f16.f32.f32.v8i32.v4i32(i32 1, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret half [[DATA]] |
| ; |
| %data = call <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x half> %data, i32 0 |
| ret half %elt0 |
| } |
| |
| declare <4 x half> @llvm.amdgcn.image.sample.cd.cl.1d.v4f16.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cl_1d_v4f32_f32(float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.d |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_d_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.d.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.l |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_l_1d_v4f32_f32(float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_l_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.b |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_b_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.b.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32(float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.lz |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_lz_1d_v4f32_f32(float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.1d.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32.v8i32.v4i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cd |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cd.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32(float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.1d.f32.f32.f32.v8i32.v4i32(i32 1, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32.v8i32.v4i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_cl_o_1d_v4f32_f32(i32 %offset, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_cl_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.d.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_d_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.d.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_d_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.l.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_l_o_1d_v4f32_f32(i32 %offset, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_l_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.b.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_b_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.b.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_b_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.lz.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_lz_o_1d_v4f32_f32(i32 %offset, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_lz_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cd.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_cd_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.cd.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_cd_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cl_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cl.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.d.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_d_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.d.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_d_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.d.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.l.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_l_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_l_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.l.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.b.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_b_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.b.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_b_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.b.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.lz.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32(i32 %offset, float %zcompare, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_lz_o_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.lz.o.1d.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32.v8i32.v4i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cd.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.sample.c.cd.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32(i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_sample_c_cd_cl_o_1d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.sample.c.cd.cl.o.1d.f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32.v8i32.v4i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4 |
| ; -------------------------------------------------------------------- |
| |
| ; Don't handle gather4* |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_2d_v4f32_f32(float %s, float %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32 1, float %s, float %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32.v8i32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_cl_2d_v4f32_f32(float %s, float %t, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_cl_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32 2, float %s, float %t, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.l |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_l_2d_v4f32_f32(float %s, float %t, float %lod, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_l_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32 4, float %s, float %t, float %lod, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32.v8i32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.b |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32(float %bias, float %s, float %t, float %slice, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_b_2darray_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32.v4i32(i32 8, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32 8, float %bias, float %s, float %t, float %slice, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.2darray.v4f32.f32.f32.v8i32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.b.cl |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32(float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_cube_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[FACE:%.*]], float [[CLAMP:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32 1, float %bias, float %s, float %t, float %face, float %clamp, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.cube.v4f32.f32.f32.v8i32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.lz |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_lz_2d_v4f32_f16(half %s, half %t, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_lz_2d_v4f32_f16( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32.v4i32(i32 1, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32 1, half %s, half %t, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16.v8i32(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_cl_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_cl_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.l.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_l_o_2d_v4f32_f32(i32 %offset, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_l_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.b.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_b_o_2d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.b.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_b_cl_o_2d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.lz.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_lz_o_2d_v4f32_f32(i32 %offset, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_lz_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_cl_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.l.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_l_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.b.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_o_2d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.b.cl.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32(i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_b_cl_o_2d_v4f32_f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32.v8i32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.gather4.c.lz.o |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32(i32 %offset, float %zcompare, float %s, float %t, <8 x i32> inreg %gather4r, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_gather4_c_lz_o_2d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32.v4i32(i32 1, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[GATHER4R:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[DATA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %gather4r, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32.v8i32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.getlod |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_getlod_1d_v4f32_f32(float %s, <8 x i32> inreg %sampler, <4 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_getlod_1d_v4f32_f32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getlod.1d.f32.f32.v8i32.v4i32(i32 1, float [[S:%.*]], <8 x i32> [[SAMPLER:%.*]], <4 x i32> [[RSRC:%.*]], i1 false, i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32 15, float %s, <8 x i32> %sampler, <4 x i32> %rsrc, i1 false, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32.v8i32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.load |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_load_2dmsaa_v4f32_i32(i32 %s, i32 %t, i32 %sample, <8 x i32> inreg %sampler) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_load_2dmsaa_v4f32_i32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2dmsaa.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[SAMPLE:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32 15, i32 %s, i32 %t, i32 %sample, <8 x i32> %sampler, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32.v8i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.load.mip |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_load_mip_1d_v4f32_i32(i32 %s, i32 %mip, <8 x i32> inreg %sampler) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_load_mip_1d_v4f32_i32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.mip.1d.f32.i32.v8i32(i32 1, i32 [[S:%.*]], i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32 15, i32 %s, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32.v8i32(i32, i32, i32, <8 x i32>, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; llvm.amdgcn.image.getresinfo |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_image_getresinfo_1d_v4f32_i32(i32 %mip, <8 x i32> inreg %sampler) #0 { |
| ; CHECK-LABEL: @extract_elt0_image_getresinfo_1d_v4f32_i32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.getresinfo.1d.f32.i32.v8i32(i32 1, i32 [[MIP:%.*]], <8 x i32> [[SAMPLER:%.*]], i32 0, i32 0) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32 15, i32 %mip, <8 x i32> %sampler, i32 0, i32 0) |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 |
| |
| ; -------------------------------------------------------------------- |
| ; TFE / LWE |
| ; -------------------------------------------------------------------- |
| |
| define amdgpu_ps float @extract_elt0_tfe_image_load_1d_v4f32i32_i32(i32 %s, <8 x i32> inreg %rsrc) #0 { |
| ; CHECK-LABEL: @extract_elt0_tfe_image_load_1d_v4f32i32_i32( |
| ; CHECK-NEXT: [[DATA:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 1) |
| ; CHECK-NEXT: [[RGBA:%.*]] = extractvalue { <4 x float>, i32 } [[DATA]], 0 |
| ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[RGBA]], i64 0 |
| ; CHECK-NEXT: ret float [[ELT0]] |
| ; |
| %data = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) |
| %rgba = extractvalue { <4 x float>, i32 } %data, 0 |
| %elt0 = extractelement <4 x float> %rgba, i32 0 |
| ret float %elt0 |
| } |
| |
| declare {<4 x float>, i32} @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32.v8i32(i32, i32, <8 x i32>, i32, i32) #1 |
| |
| define amdgpu_hs float @tfe_check_assert() #0 { |
| ; CHECK-LABEL: @tfe_check_assert( |
| ; CHECK-NEXT: [[DATA:%.*]] = call float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 1, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) |
| ; CHECK-NEXT: ret float [[DATA]] |
| ; |
| %data = call nsz <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 15, i32 undef, i32 undef, <8 x i32> undef, i32 0, i32 1) #2 |
| %elt0 = extractelement <4 x float> %data, i32 0 |
| ret float %elt0 |
| } |
| |
| declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1 |
| |
| attributes #0 = { nounwind } |
| attributes #1 = { nounwind readonly } |
| |
| !0 = !{float 2.500000e+00} |