| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=aggressive-instcombine %s -S | FileCheck %s |
| |
| ; Test that consecutive load folding works even when the final assembled value |
| ; has multiple uses. |
| |
| ; Simple case: 4 bytes assembled to i32, result used twice |
| define i32 @fold_i32_multiple_uses(ptr %input, ptr %output) { |
| ; CHECK-LABEL: define i32 @fold_i32_multiple_uses( |
| ; CHECK-SAME: ptr [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[B0:%.*]] = load i32, ptr [[INPUT]], align 1 |
| ; CHECK-NEXT: store i32 [[B0]], ptr [[OUTPUT]], align 4 |
| ; CHECK-NEXT: ret i32 [[B0]] |
| ; |
| %b0 = load i8, ptr %input, align 1 |
| %ptr1 = getelementptr inbounds i8, ptr %input, i64 1 |
| %b1 = load i8, ptr %ptr1, align 1 |
| %ptr2 = getelementptr inbounds i8, ptr %input, i64 2 |
| %b2 = load i8, ptr %ptr2, align 1 |
| %ptr3 = getelementptr inbounds i8, ptr %input, i64 3 |
| %b3 = load i8, ptr %ptr3, align 1 |
| |
| ; Assemble bytes to i32 |
| %b0_32 = zext i8 %b0 to i32 |
| %b1_32 = zext i8 %b1 to i32 |
| %b1_shift = shl i32 %b1_32, 8 |
| %val01 = or i32 %b1_shift, %b0_32 |
| |
| %b2_32 = zext i8 %b2 to i32 |
| %b2_shift = shl i32 %b2_32, 16 |
| %val012 = or i32 %val01, %b2_shift |
| |
| %b3_32 = zext i8 %b3 to i32 |
| %b3_shift = shl i32 %b3_32, 24 |
| %val = or i32 %val012, %b3_shift |
| |
| ; Multiple uses of the assembled value |
| store i32 %val, ptr %output, align 4 |
| ret i32 %val |
| } |