blob: 8f04c2565f2d4e13c6064f5387366ec757bd2596 [file]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=aggressive-instcombine %s -S | FileCheck %s
; Test that consecutive load folding works even when the final assembled value
; has multiple uses.
; Simple case: 4 bytes assembled to i32, result used twice
define i32 @fold_i32_multiple_uses(ptr %input, ptr %output) {
; CHECK-LABEL: define i32 @fold_i32_multiple_uses(
; CHECK-SAME: ptr [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[B0:%.*]] = load i32, ptr [[INPUT]], align 1
; CHECK-NEXT: store i32 [[B0]], ptr [[OUTPUT]], align 4
; CHECK-NEXT: ret i32 [[B0]]
;
%b0 = load i8, ptr %input, align 1
%ptr1 = getelementptr inbounds i8, ptr %input, i64 1
%b1 = load i8, ptr %ptr1, align 1
%ptr2 = getelementptr inbounds i8, ptr %input, i64 2
%b2 = load i8, ptr %ptr2, align 1
%ptr3 = getelementptr inbounds i8, ptr %input, i64 3
%b3 = load i8, ptr %ptr3, align 1
; Assemble bytes to i32
%b0_32 = zext i8 %b0 to i32
%b1_32 = zext i8 %b1 to i32
%b1_shift = shl i32 %b1_32, 8
%val01 = or i32 %b1_shift, %b0_32
%b2_32 = zext i8 %b2 to i32
%b2_shift = shl i32 %b2_32, 16
%val012 = or i32 %val01, %b2_shift
%b3_32 = zext i8 %b3 to i32
%b3_shift = shl i32 %b3_32, 24
%val = or i32 %val012, %b3_shift
; Multiple uses of the assembled value
store i32 %val, ptr %output, align 4
ret i32 %val
}