Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 1 | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 2 | ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 3 | |
| 4 | ; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly |
| 5 | ; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but |
| 6 | ; the pass should handle it gracefully if it is |
| 7 | ; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt |
| 8 | ; should now leave these unchanged |
| 9 | |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 10 | %Block = type { [1 x float], i32 } |
| 11 | %gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } |
Matt Arsenault | 37ab4cf | 2017-09-14 18:02:29 +0000 | [diff] [blame] | 12 | %struct = type { i32, i32 } |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 13 | |
| 14 | @block = external addrspace(1) global %Block |
| 15 | @pv = external addrspace(1) global %gl_PerVertex |
| 16 | |
| 17 | define amdgpu_vs void @promote_1d_aggr() #0 { |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 18 | ; CHECK-LABEL: @promote_1d_aggr( |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 19 | ; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 20 | ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1 |
| 21 | ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 22 | ; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 23 | ; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0 |
| 24 | ; CHECK-NEXT: [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 |
| 25 | ; CHECK-NEXT: store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4 |
| 26 | ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]] |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 27 | ; CHECK-NEXT: [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 28 | ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 29 | ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1 |
| 30 | ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2 |
| 31 | ; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 32 | ; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 33 | ; CHECK-NEXT: ret void |
| 34 | ; |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 35 | %i = alloca i32, addrspace(5) |
| 36 | %f1 = alloca [1 x float], addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 37 | %foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1 |
| 38 | %foo1 = load i32, ptr addrspace(1) %foo |
| 39 | store i32 %foo1, ptr addrspace(5) %i |
| 40 | %foo3 = load [1 x float], ptr addrspace(1) @block |
| 41 | store [1 x float] %foo3, ptr addrspace(5) %f1 |
| 42 | %foo4 = load i32, ptr addrspace(5) %i |
| 43 | %foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 44 | %foo6 = load float, ptr addrspace(5) %foo5 |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 45 | %foo7 = alloca <4 x float>, addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 46 | %foo8 = load <4 x float>, ptr addrspace(5) %foo7 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 47 | %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 |
| 48 | %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 |
| 49 | %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 |
| 50 | %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 51 | store <4 x float> %foo12, ptr addrspace(1) @pv |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 52 | ret void |
| 53 | } |
| 54 | |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 55 | %Block2 = type { i32, [2 x float] } |
| 56 | @block2 = external addrspace(1) global %Block2 |
| 57 | |
| 58 | define amdgpu_vs void @promote_store_aggr() #0 { |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 59 | ; CHECK-LABEL: @promote_store_aggr( |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 60 | ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 61 | ; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float |
| 62 | ; CHECK-NEXT: [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0 |
| 63 | ; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 64 | ; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 65 | ; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4 |
Paul Walker | 38fffa6 | 2024-11-06 11:53:33 +0000 | [diff] [blame] | 66 | ; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 67 | ; CHECK-NEXT: ret void |
| 68 | ; |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 69 | %i = alloca i32, addrspace(5) |
| 70 | %f1 = alloca [2 x float], addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 71 | %foo1 = load i32, ptr addrspace(1) @block2 |
| 72 | store i32 %foo1, ptr addrspace(5) %i |
| 73 | %foo2 = load i32, ptr addrspace(5) %i |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 74 | %foo3 = sitofp i32 %foo2 to float |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 75 | store float %foo3, ptr addrspace(5) %f1 |
| 76 | %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1 |
| 77 | store float 2.000000e+00, ptr addrspace(5) %foo5 |
| 78 | %foo6 = load [2 x float], ptr addrspace(5) %f1 |
| 79 | %foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1 |
| 80 | store [2 x float] %foo6, ptr addrspace(1) %foo7 |
| 81 | store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 82 | ret void |
| 83 | } |
| 84 | |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 85 | %Block3 = type { [2 x float], i32 } |
| 86 | @block3 = external addrspace(1) global %Block3 |
| 87 | |
| 88 | define amdgpu_vs void @promote_load_from_store_aggr() #0 { |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 89 | ; CHECK-LABEL: @promote_load_from_store_aggr( |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 90 | ; CHECK-NEXT: [[F1:%.*]] = freeze <2 x float> poison |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 91 | ; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1 |
| 92 | ; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 93 | ; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 94 | ; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0 |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 95 | ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> [[F1]], float [[FOO3_FCA_0_EXTRACT]], i32 0 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 96 | ; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1 |
Alex Richardson | e39f6c1 | 2023-10-25 15:12:01 -0700 | [diff] [blame] | 97 | ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 98 | ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]] |
| 99 | ; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 |
| 100 | ; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1 |
| 101 | ; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2 |
| 102 | ; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 103 | ; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 104 | ; CHECK-NEXT: ret void |
| 105 | ; |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 106 | %i = alloca i32, addrspace(5) |
| 107 | %f1 = alloca [2 x float], addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 108 | %foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1 |
| 109 | %foo1 = load i32, ptr addrspace(1) %foo |
| 110 | store i32 %foo1, ptr addrspace(5) %i |
| 111 | %foo3 = load [2 x float], ptr addrspace(1) @block3 |
| 112 | store [2 x float] %foo3, ptr addrspace(5) %f1 |
| 113 | %foo4 = load i32, ptr addrspace(5) %i |
| 114 | %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 115 | %foo6 = load float, ptr addrspace(5) %foo5 |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 116 | %foo7 = alloca <4 x float>, addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 117 | %foo8 = load <4 x float>, ptr addrspace(5) %foo7 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 118 | %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 |
| 119 | %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 |
| 120 | %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 |
| 121 | %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 122 | store <4 x float> %foo12, ptr addrspace(1) @pv |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 123 | ret void |
| 124 | } |
| 125 | |
Sumanth Gundapaneni | 4c9e14b | 2025-02-24 13:36:30 -0600 | [diff] [blame] | 126 | %Block4 = type { [2 x i32], i32 } |
| 127 | @block4 = external addrspace(1) global %Block4 |
| 128 | %gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] } |
| 129 | @pv1 = external addrspace(1) global %gl_PV |
| 130 | |
| 131 | ; This should not crash on an aliased variable offset that can be |
| 132 | ; optimized out (variable %aliasTofoo3 in the test) |
| 133 | define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) { |
| 134 | ; CHECK-LABEL: @promote_load_from_store_aggr_varoff( |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 135 | ; CHECK-NEXT: [[F1:%.*]] = freeze <3 x i32> poison |
Sumanth Gundapaneni | 4c9e14b | 2025-02-24 13:36:30 -0600 | [diff] [blame] | 136 | ; CHECK-NEXT: [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4 |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 137 | ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <3 x i32> [[F1]], i32 [[FOO3_UNPACK2]], i32 2 |
Sumanth Gundapaneni | 4c9e14b | 2025-02-24 13:36:30 -0600 | [diff] [blame] | 138 | ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]] |
Matt Arsenault | 1d0dd76 | 2025-03-12 20:36:46 +0700 | [diff] [blame] | 139 | ; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x i32> [[INPUT:%.*]], i32 [[TMP2]], i64 3 |
Sumanth Gundapaneni | 4c9e14b | 2025-02-24 13:36:30 -0600 | [diff] [blame] | 140 | ; CHECK-NEXT: store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16 |
| 141 | ; CHECK-NEXT: ret void |
| 142 | ; |
| 143 | %f1 = alloca [3 x i32], align 4, addrspace(5) |
| 144 | %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8 |
| 145 | %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4 |
| 146 | store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4 |
| 147 | %aliasTofoo3 = load i32, ptr addrspace(5) %G1, align 4 |
| 148 | %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasTofoo3 |
| 149 | %foo6 = load i32, ptr addrspace(5) %foo5, align 4 |
| 150 | %foo12 = insertelement <4 x i32> %input, i32 %foo6, i64 3 |
| 151 | store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16 |
| 152 | ret void |
| 153 | } |
| 154 | |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 155 | define amdgpu_vs void @promote_memmove_aggr() #0 { |
| 156 | ; CHECK-LABEL: @promote_memmove_aggr( |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 157 | ; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison |
| 158 | ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0 |
| 159 | ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <5 x float> [[TMP1]], float 0.000000e+00, i32 1 |
| 160 | ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <5 x float> [[TMP2]], float 0.000000e+00, i32 2 |
| 161 | ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP3]], float 0.000000e+00, i32 3 |
| 162 | ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4 |
| 163 | ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[TMP5]], float 1.000000e+00, i32 1 |
| 164 | ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[TMP6]], float 2.000000e+00, i32 3 |
| 165 | ; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <5 x float> [[TMP7]], <5 x float> poison, <5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 4> |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 166 | ; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 167 | ; CHECK-NEXT: ret void |
| 168 | ; |
| 169 | %f1 = alloca [5 x float], addrspace(5) |
| 170 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 171 | %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1 |
| 172 | store float 1.0, ptr addrspace(5) %foo1 |
| 173 | %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 |
| 174 | store float 2.0, ptr addrspace(5) %foo2 |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 175 | call void @llvm.memmove.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 176 | %foo3 = load float, ptr addrspace(5) %f1 |
| 177 | store float %foo3, ptr addrspace(1) @pv |
| 178 | ret void |
| 179 | } |
| 180 | |
| 181 | define amdgpu_vs void @promote_memcpy_aggr() #0 { |
| 182 | ; CHECK-LABEL: @promote_memcpy_aggr( |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 183 | ; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison |
| 184 | ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0 |
| 185 | ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <5 x float> [[TMP7]], float 0.000000e+00, i32 1 |
| 186 | ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <5 x float> [[TMP8]], float 0.000000e+00, i32 2 |
| 187 | ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP9]], float 0.000000e+00, i32 3 |
| 188 | ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4 |
| 189 | ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[TMP5]], float 2.000000e+00, i32 3 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 190 | ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 |
| 191 | ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 192 | ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[TMP6]], float 3.000000e+00, i32 [[FOO4]] |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 193 | ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4> |
| 194 | ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0 |
| 195 | ; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 196 | ; CHECK-NEXT: ret void |
| 197 | ; |
| 198 | %f1 = alloca [5 x float], addrspace(5) |
| 199 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 200 | |
| 201 | %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 |
| 202 | store float 2.0, ptr addrspace(5) %foo2 |
| 203 | |
| 204 | %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 |
| 205 | %foo4 = load i32, ptr addrspace(1) %foo3 |
| 206 | %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 207 | store float 3.0, ptr addrspace(5) %foo5 |
| 208 | |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 209 | call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 210 | %foo6 = load float, ptr addrspace(5) %f1 |
| 211 | store float %foo6, ptr addrspace(1) @pv |
| 212 | ret void |
| 213 | } |
| 214 | |
| 215 | define amdgpu_vs void @promote_memcpy_identity_aggr() #0 { |
| 216 | ; CHECK-LABEL: @promote_memcpy_identity_aggr( |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 217 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) @pv, align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 218 | ; CHECK-NEXT: ret void |
| 219 | ; |
| 220 | %f1 = alloca [5 x float], addrspace(5) |
| 221 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 222 | %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1 |
| 223 | store float 1.0, ptr addrspace(5) %foo1 |
| 224 | %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 |
| 225 | store float 2.0, ptr addrspace(5) %foo2 |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 226 | call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 227 | %foo3 = load float, ptr addrspace(5) %f1 |
| 228 | store float %foo3, ptr addrspace(1) @pv |
| 229 | ret void |
| 230 | } |
| 231 | |
| 232 | ; TODO: promote alloca even there is a memcpy between different alloca |
| 233 | define amdgpu_vs void @promote_memcpy_two_aggrs() #0 { |
| 234 | ; CHECK-LABEL: @promote_memcpy_two_aggrs( |
| 235 | ; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5) |
| 236 | ; CHECK-NEXT: [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5) |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 237 | ; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 |
| 238 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4 |
| 239 | ; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1 |
| 240 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4 |
| 241 | ; CHECK-NEXT: [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2 |
| 242 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4 |
| 243 | ; CHECK-NEXT: [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3 |
| 244 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4 |
| 245 | ; CHECK-NEXT: [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4 |
| 246 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4 |
| 247 | ; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0 |
| 248 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4 |
| 249 | ; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1 |
| 250 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4 |
| 251 | ; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2 |
| 252 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4 |
| 253 | ; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3 |
| 254 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4 |
| 255 | ; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4 |
| 256 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 257 | ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 |
| 258 | ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 |
| 259 | ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]] |
| 260 | ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4 |
| 261 | ; CHECK-NEXT: call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false) |
| 262 | ; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]] |
| 263 | ; CHECK-NEXT: [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4 |
| 264 | ; CHECK-NEXT: store float [[FOO7]], ptr addrspace(1) @pv, align 4 |
| 265 | ; CHECK-NEXT: ret void |
| 266 | ; |
| 267 | %f1 = alloca [5 x float], addrspace(5) |
| 268 | %f2 = alloca [5 x float], addrspace(5) |
| 269 | |
| 270 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 271 | store [5 x float] zeroinitializer, ptr addrspace(5) %f2 |
| 272 | |
| 273 | %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 |
| 274 | %foo4 = load i32, ptr addrspace(1) %foo3 |
| 275 | %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 276 | store float 3.0, ptr addrspace(5) %foo5 |
| 277 | |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 278 | call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 279 | |
| 280 | %foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4 |
| 281 | %foo7 = load float, ptr addrspace(5) %foo6 |
| 282 | store float %foo7, ptr addrspace(1) @pv |
| 283 | ret void |
| 284 | } |
| 285 | |
| 286 | ; TODO: promote alloca even there is a memcpy between the alloca and other memory space. |
| 287 | define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 { |
| 288 | ; CHECK-LABEL: @promote_memcpy_p1p5_aggr( |
| 289 | ; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5) |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 290 | ; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 |
| 291 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4 |
| 292 | ; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1 |
| 293 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4 |
| 294 | ; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2 |
| 295 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4 |
| 296 | ; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3 |
| 297 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4 |
| 298 | ; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4 |
| 299 | ; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 300 | ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 |
| 301 | ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 |
| 302 | ; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]] |
| 303 | ; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4 |
| 304 | ; CHECK-NEXT: call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false) |
| 305 | ; CHECK-NEXT: ret void |
| 306 | ; |
| 307 | %f1 = alloca [5 x float], addrspace(5) |
| 308 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 309 | |
| 310 | %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 |
| 311 | %foo4 = load i32, ptr addrspace(1) %foo3 |
| 312 | %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 313 | store float 3.0, ptr addrspace(5) %foo5 |
| 314 | |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 315 | call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 316 | ret void |
| 317 | } |
| 318 | |
| 319 | define amdgpu_vs void @promote_memcpy_inline_aggr() #0 { |
| 320 | ; CHECK-LABEL: @promote_memcpy_inline_aggr( |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 321 | ; CHECK-NEXT: [[F1:%.*]] = freeze <5 x float> poison |
| 322 | ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <5 x float> [[F1]], float 0.000000e+00, i32 0 |
| 323 | ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <5 x float> [[TMP6]], float 0.000000e+00, i32 1 |
| 324 | ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <5 x float> [[TMP7]], float 0.000000e+00, i32 2 |
| 325 | ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <5 x float> [[TMP8]], float 0.000000e+00, i32 3 |
| 326 | ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <5 x float> [[TMP4]], float 0.000000e+00, i32 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 327 | ; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 |
| 328 | ; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 |
Matt Arsenault | c5fe075 | 2025-03-18 17:27:02 +0700 | [diff] [blame] | 329 | ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> [[TMP5]], float 3.000000e+00, i32 [[FOO4]] |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 330 | ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4> |
| 331 | ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0 |
| 332 | ; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4 |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 333 | ; CHECK-NEXT: ret void |
| 334 | ; |
| 335 | %f1 = alloca [5 x float], addrspace(5) |
| 336 | store [5 x float] zeroinitializer, ptr addrspace(5) %f1 |
| 337 | |
| 338 | %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 |
| 339 | %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 |
| 340 | %foo4 = load i32, ptr addrspace(1) %foo3 |
| 341 | %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 |
| 342 | store float 3.0, ptr addrspace(5) %foo5 |
| 343 | |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 344 | call void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 345 | %foo6 = load float, ptr addrspace(5) %f1 |
| 346 | store float %foo6, ptr addrspace(1) @pv |
| 347 | ret void |
| 348 | } |
| 349 | |
Nikita Popov | 00a4e24 | 2024-02-05 12:41:37 +0100 | [diff] [blame] | 350 | declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) |
| 351 | declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) |
| 352 | declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) |
| 353 | declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) |
Ruiling Song | 5d0ff92 | 2022-12-20 08:58:46 +0800 | [diff] [blame] | 354 | |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 355 | @tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } |
| 356 | @frag_color = external addrspace(1) global <4 x float> |
| 357 | |
| 358 | define amdgpu_ps void @promote_double_aggr() #0 { |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 359 | ; CHECK-LABEL: @promote_double_aggr( |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 360 | ; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0 |
| 361 | ; CHECK-NEXT: [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8 |
| 362 | ; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1 |
| 363 | ; CHECK-NEXT: [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8 |
Matt Arsenault | 5a0a2f8 | 2025-03-12 16:11:11 +0700 | [diff] [blame] | 364 | ; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] poison, double [[FOO1]], 0 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 365 | ; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1 |
pvanhout | 3890a3b | 2023-06-28 12:22:26 +0200 | [diff] [blame] | 366 | ; CHECK-NEXT: [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0 |
| 367 | ; CHECK-NEXT: [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1 |
| 368 | ; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]] |
| 369 | ; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]] |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 370 | ; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float |
Matt Arsenault | 1d0dd76 | 2025-03-12 20:36:46 +0700 | [diff] [blame] | 371 | ; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> poison, float [[FOO17]], i32 0 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 372 | ; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1 |
| 373 | ; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2 |
| 374 | ; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 375 | ; CHECK-NEXT: store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 376 | ; CHECK-NEXT: ret void |
| 377 | ; |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 378 | %s = alloca [2 x double], addrspace(5) |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 379 | %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0 |
| 380 | %foo1 = load double, ptr addrspace(1) %foo |
| 381 | %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1 |
| 382 | %foo3 = load double, ptr addrspace(1) %foo2 |
Matt Arsenault | 5a0a2f8 | 2025-03-12 16:11:11 +0700 | [diff] [blame] | 383 | %foo4 = insertvalue [2 x double] poison, double %foo1, 0 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 384 | %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 385 | store [2 x double] %foo5, ptr addrspace(5) %s |
| 386 | %foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 |
| 387 | %foo7 = load double, ptr addrspace(5) %foo6 |
| 388 | %foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 |
| 389 | %foo9 = load double, ptr addrspace(5) %foo8 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 390 | %foo10 = fadd double %foo7, %foo9 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 391 | store double %foo10, ptr addrspace(5) %s |
| 392 | %foo13 = load double, ptr addrspace(5) %s |
| 393 | %foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 |
| 394 | %foo15 = load double, ptr addrspace(5) %foo14 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 395 | %foo16 = fadd double %foo13, %foo15 |
| 396 | %foo17 = fptrunc double %foo16 to float |
Matt Arsenault | 1d0dd76 | 2025-03-12 20:36:46 +0700 | [diff] [blame] | 397 | %foo18 = insertelement <4 x float> poison, float %foo17, i32 0 |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 398 | %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1 |
| 399 | %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2 |
| 400 | %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3 |
Matt Arsenault | 50caf69 | 2022-11-28 10:36:38 -0500 | [diff] [blame] | 401 | store <4 x float> %foo21, ptr addrspace(1) @frag_color |
Simon Pilgrim | 337b2d0 | 2017-09-14 10:30:54 +0000 | [diff] [blame] | 402 | ret void |
| 403 | } |
Matt Arsenault | 37ab4cf | 2017-09-14 18:02:29 +0000 | [diff] [blame] | 404 | |
| 405 | ; Don't crash on a type that isn't a valid vector element. |
Matt Arsenault | 37ab4cf | 2017-09-14 18:02:29 +0000 | [diff] [blame] | 406 | define amdgpu_kernel void @alloca_struct() #0 { |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 407 | ; CHECK-LABEL: @alloca_struct( |
| 408 | ; CHECK-NEXT: entry: |
Nikita Popov | eaac348 | 2022-03-10 11:14:45 +0100 | [diff] [blame] | 409 | ; CHECK-NEXT: ret void |
| 410 | ; |
Matt Arsenault | 37ab4cf | 2017-09-14 18:02:29 +0000 | [diff] [blame] | 411 | entry: |
Matt Arsenault | c1710e7 | 2022-11-28 10:01:20 -0500 | [diff] [blame] | 412 | %alloca = alloca [2 x %struct], align 4, addrspace(5) |
Matt Arsenault | 37ab4cf | 2017-09-14 18:02:29 +0000 | [diff] [blame] | 413 | ret void |
| 414 | } |