| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s |
| |
| declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 |
| |
| define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i1( |
| ; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1 |
| ; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i1, ptr addrspace(4) %in |
| store i1 %val, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i1_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i1_align2( |
| ; OPT-NEXT: [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store i1 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i1, ptr addrspace(4) %in, align 2 |
| store i1 %val, ptr addrspace(1) %out, align 2 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i1_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i1_align4( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i1 |
| ; OPT-NEXT: store i1 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i1, ptr addrspace(4) %in, align 4 |
| store i1 %val, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i8( |
| ; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 1 |
| ; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 1 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i8, ptr addrspace(4) %in |
| store i8 %val, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i8_align2( |
| ; OPT-NEXT: [[VAL:%.*]] = load i8, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store i8 [[VAL]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i8, ptr addrspace(4) %in, align 2 |
| store i8 %val, ptr addrspace(1) %out, align 2 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i8align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i8align4( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 |
| ; OPT-NEXT: store i8 [[TMP3]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %val = load i8, ptr addrspace(4) %in, align 4 |
| store i8 %val, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v2i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v2i8( |
| ; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <2 x i8>, ptr addrspace(4) %in |
| store <2 x i8> %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v2i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v2i8_align4( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP3]] to <2 x i8> |
| ; OPT-NEXT: store <2 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <2 x i8>, ptr addrspace(4) %in, align 4 |
| store <2 x i8> %ld, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v3i8(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v3i8( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24 |
| ; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8> |
| ; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <3 x i8>, ptr addrspace(4) %in |
| store <3 x i8> %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v3i8_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v3i8_align4( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i24 |
| ; OPT-NEXT: [[TMP4:%.*]] = bitcast i24 [[TMP3]] to <3 x i8> |
| ; OPT-NEXT: store <3 x i8> [[TMP4]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <3 x i8>, ptr addrspace(4) %in, align 4 |
| store <3 x i8> %ld, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16( |
| ; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[LD]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_f16( |
| ; OPT-NEXT: [[LD:%.*]] = load half, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store half [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load half, ptr addrspace(4) %in |
| store half %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v2f16(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v2f16( |
| ; OPT-NEXT: [[LD:%.*]] = load <2 x half>, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: store <2 x half> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <2 x half>, ptr addrspace(4) %in |
| store <2 x half> %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @load_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) { |
| ; OPT-LABEL: @load_volatile( |
| ; OPT-NEXT: [[A:%.*]] = load volatile i16, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store i16 [[A]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %a = load volatile i16, ptr addrspace(4) %in |
| store i16 %a, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v2i8_volatile(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_v2i8_volatile( |
| ; OPT-NEXT: [[LD:%.*]] = load volatile <2 x i8>, ptr addrspace(4) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load volatile <2 x i8>, ptr addrspace(4) %in |
| store <2 x i8> %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_v2i8_addrspace1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { |
| ; OPT-LABEL: @constant_load_v2i8_addrspace1( |
| ; OPT-NEXT: [[LD:%.*]] = load <2 x i8>, ptr addrspace(1) [[IN:%.*]], align 2 |
| ; OPT-NEXT: store <2 x i8> [[LD]], ptr addrspace(1) [[OUT:%.*]], align 2 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load <2 x i8>, ptr addrspace(1) %in |
| store <2 x i8> %ld, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 { |
| ; OPT-LABEL: @use_dispatch_ptr( |
| ; OPT-NEXT: [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 |
| ; OPT-NEXT: [[LD:%.*]] = zext i8 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[LD]], ptr addrspace(1) [[PTR:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() |
| %val = load i8, ptr addrspace(4) %dispatch.ptr, align 4 |
| %ld = zext i8 %val to i32 |
| store i32 %ld, ptr addrspace(1) %ptr |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_range( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0:![0-9]+]] |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !0 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_range_max(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_range_max( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG0]] |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !1 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_complex_range(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_complex_range( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG1:![0-9]+]] |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !2 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_range_from_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_range_from_0( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !3 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_range_from_neg( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2:![0-9]+]] |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !4 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !range [[RNG2]] |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !range !5 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @constant_load_i16_align4_invariant(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 { |
| ; OPT-LABEL: @constant_load_i16_align4_invariant( |
| ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[IN:%.*]], align 4, !invariant.load !3 |
| ; OPT-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; OPT-NEXT: [[EXT:%.*]] = sext i16 [[TMP3]] to i32 |
| ; OPT-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT:%.*]], align 4 |
| ; OPT-NEXT: ret void |
| ; |
| %ld = load i16, ptr addrspace(4) %in, align 4, !invariant.load !6 |
| %ext = sext i16 %ld to i32 |
| store i32 %ext, ptr addrspace(1) %out |
| ret void |
| } |
| |
| attributes #0 = { nounwind } |
| |
| ; OPT: !0 = !{i32 5, i32 0} |
| ; OPT: !1 = !{i32 8, i32 0} |
| ; OPT: !2 = !{i32 65520, i32 0} |
| ; OPT: !3 = !{} |
| |
| !0 = !{i16 5, i16 500} |
| !1 = !{i16 5, i16 -1} |
| !2 = !{i16 8, i16 12, i16 42, i16 99} |
| !3 = !{i16 0, i16 255} |
| !4 = !{i16 -16, i16 16} |
| !5 = !{i16 -16, i16 0} |
| !6 = !{} |