| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca -amdgpu-promote-alloca-to-vector-limit=512 < %s | FileCheck %s |
| |
| target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" |
| |
| define void @test_trivial_subvector(<2 x i64> %val.0, <2 x i64> %val.1) { |
| ; CHECK-LABEL: define void @test_trivial_subvector |
| ; CHECK-SAME: (<2 x i64> [[VAL_0:%.*]], <2 x i64> [[VAL_1:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[VAL_0]], i64 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[VAL_0]], i64 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP4]], i32 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i64> [[TMP5]], i64 [[TMP6]], i32 2 |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> [[TMP7]], i64 [[TMP8]], i32 2 |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[VAL_1]], i64 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP9]], i64 [[TMP10]], i32 3 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP4]], i64 1 |
| ; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <2 x i64> [[TMP13]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i64> [[TMP14]], i64 [[TMP8]], i64 1 |
| ; CHECK-NEXT: [[DUMMYUSER_1:%.*]] = freeze <2 x i64> [[TMP15]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i64 0 |
| ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i64> [[TMP16]], i64 [[TMP10]], i64 1 |
| ; CHECK-NEXT: [[DUMMYUSER_2:%.*]] = freeze <2 x i64> [[TMP17]] |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| %stack.1 = getelementptr inbounds [4 x i64], ptr addrspace(5) %stack, i32 0, i32 1 |
| %stack.2 = getelementptr inbounds [4 x i64], ptr addrspace(5) %stack, i32 0, i32 2 |
| |
| store <2 x i64> %val.0, ptr addrspace(5) %stack |
| store <2 x i64> %val.1, ptr addrspace(5) %stack.1 |
| store <2 x i64> %val.1, ptr addrspace(5) %stack.2 |
| |
| %reload = load <2 x i64>, ptr addrspace(5) %stack |
| %dummyuser = freeze <2 x i64> %reload |
| |
| %reload.1 = load <2 x i64>, ptr addrspace(5) %stack.1 |
| %dummyuser.1 = freeze <2 x i64> %reload.1 |
| |
| %reload.2 = load <2 x i64>, ptr addrspace(5) %stack.2 |
| %dummyuser.2 = freeze <2 x i64> %reload.2 |
| ret void |
| } |
| |
| define void @test_different_type_subvector(<4 x i32> %val.0, <8 x i16> %val.1, <16 x i8> %val.2, <128 x i1> %val.3) { |
| ; CHECK-LABEL: define void @test_different_type_subvector |
| ; CHECK-SAME: (<4 x i32> [[VAL_0:%.*]], <8 x i16> [[VAL_1:%.*]], <16 x i8> [[VAL_2:%.*]], <128 x i1> [[VAL_3:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[VAL_0]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP3]], i64 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <16 x i8> |
| ; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <16 x i8> [[TMP7]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[VAL_1]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP8]], i64 0 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP9]], i32 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP8]], i64 1 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i64> [[TMP10]], i64 [[TMP11]], i32 2 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i64 0 |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> [[TMP13]], i64 [[TMP11]], i64 1 |
| ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP14]] to <8 x i16> |
| ; CHECK-NEXT: [[DUMMYUSE_1:%.*]] = freeze <8 x i16> [[TMP15]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i8> [[VAL_2]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i64> [[TMP16]], i64 0 |
| ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> [[TMP12]], i64 [[TMP17]], i32 2 |
| ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i64> [[TMP16]], i64 1 |
| ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP19]], i32 3 |
| ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i64> poison, i64 [[TMP17]], i64 0 |
| ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i64> [[TMP21]], i64 [[TMP19]], i64 1 |
| ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <2 x i64> [[TMP22]] to <4 x i32> |
| ; CHECK-NEXT: [[DUMMYUSE_2:%.*]] = freeze <4 x i32> [[TMP23]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = bitcast <128 x i1> [[VAL_3]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i64> [[TMP24]], i64 0 |
| ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP25]], i32 2 |
| ; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x i64> [[TMP24]], i64 1 |
| ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i64> [[TMP26]], i64 [[TMP27]], i32 3 |
| ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i64 0 |
| ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i64> [[TMP29]], i64 [[TMP25]], i64 1 |
| ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <2 x i64> [[TMP30]] to <128 x i1> |
| ; CHECK-NEXT: [[DUMMYUSE_I1:%.*]] = freeze <128 x i1> [[TMP31]] |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| %stack.1 = getelementptr inbounds [4 x i64], ptr addrspace(5) %stack, i32 0, i32 1 |
| %stack.2 = getelementptr inbounds [4 x i64], ptr addrspace(5) %stack, i32 0, i32 2 |
| |
| store <4 x i32> %val.0, ptr addrspace(5) %stack |
| %reload = load <16 x i8>, ptr addrspace(5) %stack |
| %dummyuser = freeze <16 x i8> %reload |
| |
| store <8 x i16> %val.1, ptr addrspace(5) %stack.1 |
| %reload.1 = load <8 x i16>, ptr addrspace(5) %stack.1 |
| %dummyuse.1 = freeze <8 x i16> %reload.1 |
| |
| store <16 x i8> %val.2, ptr addrspace(5) %stack.2 |
| %reload.2 = load <4 x i32>, ptr addrspace(5) %stack.2 |
| %dummyuse.2 = freeze <4 x i32> %reload.2 |
| |
| store <128 x i1> %val.3, ptr addrspace(5) %stack.2 |
| %reload.i1 = load <128 x i1>, ptr addrspace(5) %stack.1 |
| %dummyuse.i1 = freeze <128 x i1> %reload.i1 |
| |
| ret void |
| } |
| |
| ; Not vectorized, >16 elts is not supported. |
| define void @test_different_type_subvector_i1alloca(<4 x i32> %val.0, <8 x i16> %val.1, <16 x i8> %val.2, <128 x i1> %val.3) { |
| ; CHECK-LABEL: define void @test_different_type_subvector_i1alloca |
| ; CHECK-SAME: (<4 x i32> [[VAL_0:%.*]], <8 x i16> [[VAL_1:%.*]], <16 x i8> [[VAL_2:%.*]], <128 x i1> [[VAL_3:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [256 x i1], align 4, addrspace(5) |
| ; CHECK-NEXT: [[STACK_1:%.*]] = getelementptr inbounds [256 x i1], ptr addrspace(5) [[STACK]], i32 0, i32 63 |
| ; CHECK-NEXT: [[STACK_2:%.*]] = getelementptr inbounds [256 x i1], ptr addrspace(5) [[STACK]], i32 0, i32 127 |
| ; CHECK-NEXT: store <4 x i32> [[VAL_0]], ptr addrspace(5) [[STACK]], align 16 |
| ; CHECK-NEXT: [[RELOAD:%.*]] = load <16 x i8>, ptr addrspace(5) [[STACK]], align 16 |
| ; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <16 x i8> [[RELOAD]] |
| ; CHECK-NEXT: store <8 x i16> [[VAL_1]], ptr addrspace(5) [[STACK_1]], align 16 |
| ; CHECK-NEXT: [[RELOAD_1:%.*]] = load <8 x i16>, ptr addrspace(5) [[STACK_1]], align 16 |
| ; CHECK-NEXT: [[DUMMYUSE_1:%.*]] = freeze <8 x i16> [[RELOAD_1]] |
| ; CHECK-NEXT: store <16 x i8> [[VAL_2]], ptr addrspace(5) [[STACK_2]], align 16 |
| ; CHECK-NEXT: [[RELOAD_2:%.*]] = load <4 x i32>, ptr addrspace(5) [[STACK_2]], align 16 |
| ; CHECK-NEXT: [[DUMMYUSE_2:%.*]] = freeze <4 x i32> [[RELOAD_2]] |
| ; CHECK-NEXT: store <128 x i1> [[VAL_3]], ptr addrspace(5) [[STACK_2]], align 16 |
| ; CHECK-NEXT: [[RELOAD_I1:%.*]] = load <128 x i1>, ptr addrspace(5) [[STACK_1]], align 16 |
| ; CHECK-NEXT: [[DUMMYUSE_I1:%.*]] = freeze <128 x i1> [[RELOAD_I1]] |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [256 x i1], align 4, addrspace(5) |
| %stack.1 = getelementptr inbounds [256 x i1], ptr addrspace(5) %stack, i32 0, i32 63 |
| %stack.2 = getelementptr inbounds [256 x i1], ptr addrspace(5) %stack, i32 0, i32 127 |
| |
| store <4 x i32> %val.0, ptr addrspace(5) %stack |
| %reload = load <16 x i8>, ptr addrspace(5) %stack |
| %dummyuser = freeze <16 x i8> %reload |
| |
| store <8 x i16> %val.1, ptr addrspace(5) %stack.1 |
| %reload.1 = load <8 x i16>, ptr addrspace(5) %stack.1 |
| %dummyuse.1 = freeze <8 x i16> %reload.1 |
| |
| store <16 x i8> %val.2, ptr addrspace(5) %stack.2 |
| %reload.2 = load <4 x i32>, ptr addrspace(5) %stack.2 |
| %dummyuse.2 = freeze <4 x i32> %reload.2 |
| |
| store <128 x i1> %val.3, ptr addrspace(5) %stack.2 |
| %reload.i1 = load <128 x i1>, ptr addrspace(5) %stack.1 |
| %dummyuse.i1 = freeze <128 x i1> %reload.i1 |
| |
| ret void |
| } |
| |
| define void @test_different_type_subvector_fp(<2 x double> %val.0, <4 x float> %val.1, <8 x half> %val.2) { |
| ; CHECK-LABEL: define void @test_different_type_subvector_fp |
| ; CHECK-SAME: (<2 x double> [[VAL_0:%.*]], <4 x float> [[VAL_1:%.*]], <8 x half> [[VAL_2:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VAL_2]] to <2 x double> |
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[TMP0]], i64 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> undef, double [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP0]], i64 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP2]], double [[TMP3]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[TMP3]], i64 1 |
| ; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <2 x double> [[TMP6]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[VAL_1]] to <2 x double> |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i64 0 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x double> [[TMP4]], double [[TMP8]], i32 0 |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP7]], i64 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x double> [[TMP9]], double [[TMP10]], i32 1 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x double> poison, double [[TMP8]], i64 0 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> [[TMP12]], double [[TMP10]], i64 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x double> [[TMP13]] to <4 x float> |
| ; CHECK-NEXT: [[DUMMYUSE_1:%.*]] = freeze <4 x float> [[TMP14]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[VAL_0]], i64 0 |
| ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x double> [[TMP11]], double [[TMP15]], i32 0 |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x double> [[VAL_0]], i64 1 |
| ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP17]], i32 1 |
| ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> poison, double [[TMP15]], i64 0 |
| ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP17]], i64 1 |
| ; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x double> [[TMP20]] to <8 x half> |
| ; CHECK-NEXT: [[DUMMYUSE_2:%.*]] = freeze <8 x half> [[TMP21]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x double> [[TMP18]], double 2.075080e-322, i32 0 |
| ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> [[TMP22]], double 3.162020e-322, i32 1 |
| ; CHECK-NEXT: [[DUMMYUSE_3:%.*]] = freeze <4 x i32> <i32 42, i32 0, i32 64, i32 0> |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x double], align 4, addrspace(5) |
| |
| store <8 x half> %val.2, ptr addrspace(5) %stack |
| %reload = load <2 x double>, ptr addrspace(5) %stack |
| %dummyuser = freeze <2 x double> %reload |
| |
| store <4 x float> %val.1, ptr addrspace(5) %stack |
| %reload.1 = load <4 x float>, ptr addrspace(5) %stack |
| %dummyuse.1 = freeze <4 x float> %reload.1 |
| |
| store <2 x double> %val.0, ptr addrspace(5) %stack |
| %reload.2 = load <8 x half>, ptr addrspace(5) %stack |
| %dummyuse.2 = freeze <8 x half> %reload.2 |
| |
| store <2 x i64> <i64 42, i64 64>, ptr addrspace(5) %stack |
| %reload.3 = load <4 x i32>, ptr addrspace(5) %stack |
| %dummyuse.3 = freeze <4 x i32> %reload.3 |
| |
| ret void |
| } |
| |
| define void @test_different_type_subvector_ptrs(<2 x ptr addrspace(1)> %val.0, <4 x ptr addrspace(3)> %val.1) { |
| ; CHECK-LABEL: define void @test_different_type_subvector_ptrs |
| ; CHECK-SAME: (<2 x ptr addrspace(1)> [[VAL_0:%.*]], <4 x ptr addrspace(3)> [[VAL_1:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(1)> [[VAL_0]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> undef, i64 [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP3]], i32 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP3]], i64 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr addrspace(1)> |
| ; CHECK-NEXT: [[DUMMYUSER:%.*]] = freeze <2 x ptr addrspace(1)> [[TMP7]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint <4 x ptr addrspace(3)> [[VAL_1]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> [[TMP4]], i64 [[TMP10]], i32 0 |
| ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i64> [[TMP11]], i64 [[TMP12]], i32 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i64 0 |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i64> [[TMP14]], i64 [[TMP12]], i64 1 |
| ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP15]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP17:%.*]] = inttoptr <4 x i32> [[TMP16]] to <4 x ptr addrspace(3)> |
| ; CHECK-NEXT: [[DUMMYUSER_1:%.*]] = freeze <4 x ptr addrspace(3)> [[TMP17]] |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| |
| store <2 x ptr addrspace(1)> %val.0, ptr addrspace(5) %stack |
| %reload = load <2 x ptr addrspace(1)>, ptr addrspace(5) %stack |
| %dummyuser = freeze <2 x ptr addrspace(1)> %reload |
| |
| store <4 x ptr addrspace(3)> %val.1, ptr addrspace(5) %stack |
| %reload.1 = load <4 x ptr addrspace(3)>, ptr addrspace(5) %stack |
| %dummyuser.1 = freeze <4 x ptr addrspace(3)> %reload.1 |
| |
| ret void |
| } |
| |
| define void @test_different_type_subvector_ptralloca(<2 x i64> %val.0, <8 x i16> %val.1, <2 x ptr addrspace(3)> %val.2) { |
| ; CHECK-LABEL: define void @test_different_type_subvector_ptralloca |
| ; CHECK-SAME: (<2 x i64> [[VAL_0:%.*]], <8 x i16> [[VAL_1:%.*]], <2 x ptr addrspace(3)> [[VAL_2:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[VAL_0]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <4 x i32> [[TMP0]] to <4 x ptr addrspace(5)> |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr addrspace(5)> undef, ptr addrspace(5) [[TMP2]], i32 0 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 1 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP3]], ptr addrspace(5) [[TMP4]], i32 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 2 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP5]], ptr addrspace(5) [[TMP6]], i32 2 |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP1]], i64 3 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP7]], ptr addrspace(5) [[TMP8]], i32 3 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP2]], i64 0 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP10]], ptr addrspace(5) [[TMP4]], i64 1 |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP11]], ptr addrspace(5) [[TMP6]], i64 2 |
| ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP12]], ptr addrspace(5) [[TMP8]], i64 3 |
| ; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint <4 x ptr addrspace(5)> [[TMP13]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP14]] to <2 x i64> |
| ; CHECK-NEXT: [[DUMMYUSER_1:%.*]] = freeze <2 x i64> [[TMP15]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i16> [[VAL_1]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP17:%.*]] = inttoptr <4 x i32> [[TMP16]] to <4 x ptr addrspace(5)> |
| ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 0 |
| ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP9]], ptr addrspace(5) [[TMP18]], i32 0 |
| ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 1 |
| ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP19]], ptr addrspace(5) [[TMP20]], i32 1 |
| ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 2 |
| ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP21]], ptr addrspace(5) [[TMP22]], i32 2 |
| ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x ptr addrspace(5)> [[TMP17]], i64 3 |
| ; CHECK-NEXT: [[TMP25:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP23]], ptr addrspace(5) [[TMP24]], i32 3 |
| ; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP18]], i64 0 |
| ; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP26]], ptr addrspace(5) [[TMP20]], i64 1 |
| ; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP27]], ptr addrspace(5) [[TMP22]], i64 2 |
| ; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x ptr addrspace(5)> [[TMP28]], ptr addrspace(5) [[TMP24]], i64 3 |
| ; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint <4 x ptr addrspace(5)> [[TMP29]] to <4 x i32> |
| ; CHECK-NEXT: [[TMP31:%.*]] = bitcast <4 x i32> [[TMP30]] to <8 x i16> |
| ; CHECK-NEXT: [[DUMMYUSER_2:%.*]] = freeze <8 x i16> [[TMP31]] |
| ; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[VAL_2]] to <2 x i32> |
| ; CHECK-NEXT: [[TMP33:%.*]] = inttoptr <2 x i32> [[TMP32]] to <2 x ptr addrspace(5)> |
| ; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x ptr addrspace(5)> [[TMP33]], i64 0 |
| ; CHECK-NEXT: [[TMP35:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP25]], ptr addrspace(5) [[TMP34]], i32 0 |
| ; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x ptr addrspace(5)> [[TMP33]], i64 1 |
| ; CHECK-NEXT: [[TMP37:%.*]] = insertelement <8 x ptr addrspace(5)> [[TMP35]], ptr addrspace(5) [[TMP36]], i32 1 |
| ; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x ptr addrspace(5)> poison, ptr addrspace(5) [[TMP34]], i64 0 |
| ; CHECK-NEXT: [[TMP39:%.*]] = insertelement <2 x ptr addrspace(5)> [[TMP38]], ptr addrspace(5) [[TMP36]], i64 1 |
| ; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint <2 x ptr addrspace(5)> [[TMP39]] to <2 x i32> |
| ; CHECK-NEXT: [[TMP41:%.*]] = inttoptr <2 x i32> [[TMP40]] to <2 x ptr addrspace(3)> |
| ; CHECK-NEXT: [[DUMMYUSER_3:%.*]] = freeze <2 x ptr addrspace(3)> [[TMP41]] |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [8 x ptr addrspace(5)], align 4, addrspace(5) |
| |
| store <2 x i64> %val.0, ptr addrspace(5) %stack |
| %reload = load <2 x i64>, ptr addrspace(5) %stack |
| %dummyuser.1 = freeze <2 x i64> %reload |
| |
| store <8 x i16> %val.1, ptr addrspace(5) %stack |
| %reload.1 = load <8 x i16>, ptr addrspace(5) %stack |
| %dummyuser.2 = freeze <8 x i16> %reload.1 |
| |
| store <2 x ptr addrspace(3)> %val.2, ptr addrspace(5) %stack |
| %reload.2 = load <2 x ptr addrspace(3)>, ptr addrspace(5) %stack |
| %dummyuser.3= freeze <2 x ptr addrspace(3)> %reload.2 |
| |
| ret void |
| } |
| |
| define void @test_out_of_bounds_subvec(<2 x i64> %val) { |
| ; CHECK-LABEL: define void @test_out_of_bounds_subvec |
| ; CHECK-SAME: (<2 x i64> [[VAL:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[VAL]], i64 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef, i64 [[TMP0]], i32 3 |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[VAL]], i64 1 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| %stack.3 = getelementptr inbounds [4 x i64], ptr addrspace(5) %stack, i32 0, i32 3 |
| store <2 x i64> %val, ptr addrspace(5) %stack.3 |
| ret void |
| } |
| |
| define void @test_different_type_subvector_not_divisible(<3 x i32> %val) { |
| ; CHECK-LABEL: define void @test_different_type_subvector_not_divisible |
| ; CHECK-SAME: (<3 x i32> [[VAL:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) |
| ; CHECK-NEXT: store <3 x i32> [[VAL]], ptr addrspace(5) [[STACK]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| store <3 x i32> %val, ptr addrspace(5) %stack |
| ret void |
| } |
| |
| define void @test_accessty_too_small(<2 x i16> %val) { |
| ; CHECK-LABEL: define void @test_accessty_too_small |
| ; CHECK-SAME: (<2 x i16> [[VAL:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[STACK:%.*]] = alloca [4 x i64], align 4, addrspace(5) |
| ; CHECK-NEXT: store <2 x i16> [[VAL]], ptr addrspace(5) [[STACK]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %stack = alloca [4 x i64], align 4, addrspace(5) |
| store <2 x i16> %val, ptr addrspace(5) %stack |
| ret void |
| } |
| |
| define void @store_2xi32_into_double(double %foo) { |
| ; CHECK-LABEL: define void @store_2xi32_into_double |
| ; CHECK-SAME: (double [[FOO:%.*]]) { |
| ; CHECK-NEXT: [[DUMMYUSER0:%.*]] = freeze double 0x5F0000005E |
| ; CHECK-NEXT: [[DUMMYUSER1:%.*]] = freeze double 0x6700000066 |
| ; CHECK-NEXT: [[DUMMYUSER2:%.*]] = freeze double 0x6900000068 |
| ; CHECK-NEXT: [[DUMMYUSER3:%.*]] = freeze double 0x6F0000006E |
| ; CHECK-NEXT: ret void |
| ; |
| %alloca = alloca [9 x double], align 8, addrspace(5) |
| |
| store <2 x i32> <i32 94, i32 95>, ptr addrspace(5) %alloca, align 8 |
| %load0 = load double, ptr addrspace(5) %alloca, align 8 |
| %dummyuser0 = freeze double %load0 |
| |
| %idx4 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 4 |
| %idx5 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 5 |
| store <4 x i32> <i32 102, i32 103, i32 104, i32 105>, ptr addrspace(5) %idx4, align 8 |
| %load1 = load double, ptr addrspace(5) %idx4, align 8 |
| %dummyuser1 = freeze double %load1 |
| %load2 = load double, ptr addrspace(5) %idx5, align 8 |
| %dummyuser2 = freeze double %load2 |
| |
| %idx8 = getelementptr inbounds [9 x double], ptr addrspace(5) %alloca, i32 0, i32 8 |
| store <2 x i32> <i32 110, i32 111>, ptr addrspace(5) %idx8, align 8 |
| %load3 = load double, ptr addrspace(5) %idx8, align 8 |
| %dummyuser3 = freeze double %load3 |
| |
| ret void |
| } |
| |
| ; Check we handle loading/storing a subvector using non-constant indexes. |
| define <4 x i16> @nonconst_indexes(i1 %cond, i32 %otheridx, <4 x i16> %store) #0 { |
| ; CHECK-LABEL: define <4 x i16> @nonconst_indexes |
| ; CHECK-SAME: (i1 [[COND:%.*]], i32 [[OTHERIDX:%.*]], <4 x i16> [[STORE:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]] |
| ; CHECK: then: |
| ; CHECK-NEXT: br label [[FINALLY:%.*]] |
| ; CHECK: else: |
| ; CHECK-NEXT: br label [[FINALLY]] |
| ; CHECK: finally: |
| ; CHECK-NEXT: [[INDEX_1:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[OTHERIDX]], [[ELSE]] ] |
| ; CHECK-NEXT: [[INDEX_2:%.*]] = phi i32 [ 2, [[THEN]] ], [ [[OTHERIDX]], [[ELSE]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i16> [[STORE]], i64 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i16> undef, i16 [[TMP0]], i32 [[INDEX_1]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX_1]], 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[STORE]], i64 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i16> [[TMP1]], i16 [[TMP3]], i32 [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX_1]], 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[STORE]], i64 2 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i16> [[TMP4]], i16 [[TMP6]], i32 [[TMP5]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX_1]], 3 |
| ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[STORE]], i64 3 |
| ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i16> [[TMP7]], i16 [[TMP9]], i32 [[TMP8]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i16> [[TMP10]], i32 [[INDEX_2]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i64 0 |
| ; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX_2]], 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP10]], i32 [[TMP13]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> [[TMP12]], i16 [[TMP14]], i64 1 |
| ; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[INDEX_2]], 2 |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i16> [[TMP10]], i32 [[TMP16]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP17]], i64 2 |
| ; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[INDEX_2]], 3 |
| ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i16> [[TMP10]], i32 [[TMP19]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP20]], i64 3 |
| ; CHECK-NEXT: ret <4 x i16> [[TMP21]] |
| ; |
| entry: |
| %data = alloca [16 x i16], addrspace(5) |
| br i1 %cond, label %then, label %else |
| |
| then: |
| br label %finally |
| |
| else: |
| br label %finally |
| |
| finally: |
| %index.1 = phi i32 [ 0, %then ], [ %otheridx, %else ] |
| %index.2 = phi i32 [ 2, %then ], [ %otheridx, %else ] |
| %ptr.1 = getelementptr inbounds [16 x i16], ptr addrspace(5) %data, i32 0, i32 %index.1 |
| %ptr.2 = getelementptr inbounds [16 x i16], ptr addrspace(5) %data, i32 0, i32 %index.2 |
| store <4 x i16> %store, ptr addrspace(5) %ptr.1, align 2 |
| %load = load <4 x i16>, ptr addrspace(5) %ptr.2, align 2 |
| ret <4 x i16> %load |
| } |
| |
| |
| ; Check the case when the alloca is smaller than the vector size. |
| define void @test_smaller_alloca_store(<4 x i32> %store1, <4 x i32> %store2) { |
| ; CHECK-LABEL: define void @test_smaller_alloca_store |
| ; CHECK-SAME: (<4 x i32> [[STORE1:%.*]], <4 x i32> [[STORE2:%.*]]) { |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i32> [[STORE1]], i64 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[TMP0]], i32 0 |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[STORE1]], i64 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP1]], i32 [[TMP2]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[STORE1]], i64 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <3 x i32> [[TMP3]], i32 [[TMP4]], i32 2 |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[STORE2]], i64 0 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <3 x i32> [[TMP5]], i32 [[TMP6]], i32 0 |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[STORE2]], i64 1 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <3 x i32> [[TMP7]], i32 [[TMP8]], i32 1 |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[STORE2]], i64 2 |
| ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <3 x i32> [[TMP9]], i32 [[TMP10]], i32 2 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %res = alloca <3 x i32>, align 16, addrspace(5) |
| store <4 x i32> %store1, ptr addrspace(5) %res, align 16 |
| store <4 x i32> %store2, ptr addrspace(5) %res, align 16 |
| ret void |
| } |