blob: 802795da4789448e0202331d4780901780dbedf9 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
define void @onevec(ptr %ptr) {
; CHECK-LABEL: define void @onevec(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x i32>
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP2]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to <1 x i32>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP5]] to <1 x i32>
; CHECK-NEXT: ret void
;
%ld0 = load <1 x i32>, ptr %ptr, align 4
%ld1 = load i32, ptr %ptr, align 4
%gep1 = getelementptr inbounds i8, ptr %ptr, i32 16
%ld2 = load i32, ptr %gep1, align 4
%ld3 = load <1 x i32>, ptr %gep1, align 4
%gep2 = getelementptr inbounds i8, ptr %ptr, i32 32
%ld4 = load <1 x i32>, ptr %gep2, align 4
%ld5 = load <1 x i32>, ptr %gep2, align 4
ret void
}
define void @test(ptr %ptr) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[PTR:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR]], align 4
; CHECK-NEXT: [[LD01:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[LD12:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[LD23:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[LD34:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
; CHECK-NEXT: ret void
;
%ld0 = load i32, ptr %ptr, align 4
%gep1 = getelementptr inbounds i8, ptr %ptr, i32 4
%ld1 = load <2 x i32>, ptr %gep1, align 4
%gep2 = getelementptr inbounds i8, ptr %ptr, i32 8
%ld2 = load <2 x i32>, ptr %gep2, align 4
%gep3 = getelementptr inbounds i8, ptr %ptr, i32 8
%ld3 = load i32, ptr %gep3, align 4
ret void
}
@ptr = external local_unnamed_addr addrspace(1) global <8 x float>, align 4
define void @test2() {
; CHECK-LABEL: define void @test2() {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr addrspace(1) @ptr, align 4
; CHECK-NEXT: [[VECINS1:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 1
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 2
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 3
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 4
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 5
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 6
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 7
; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x float> [[VECINS1]], float [[TMP2]], i64 1
; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x float> [[VECINS_1]], float [[TMP3]], i64 2
; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x float> [[VECINS_2]], float [[TMP4]], i64 3
; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x float> [[VECINS_3]], float [[TMP5]], i64 4
; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x float> [[VECINS_4]], float [[TMP6]], i64 5
; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x float> [[VECINS_5]], float [[TMP7]], i64 6
; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x float> [[VECINS_6]], float [[TMP8]], i64 7
; CHECK-NEXT: ret void
;
%vecins = load <8 x float>, ptr addrspace(1) @ptr, align 4
%5 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 4), align 4
%vecins.1 = insertelement <8 x float> %vecins, float %5, i64 1
%6 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 8), align 4
%vecins.2 = insertelement <8 x float> %vecins.1, float %6, i64 2
%7 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 12), align 4
%vecins.3 = insertelement <8 x float> %vecins.2, float %7, i64 3
%8 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 16), align 4
%vecins.4 = insertelement <8 x float> %vecins.3, float %8, i64 4
%9 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 20), align 4
%vecins.5 = insertelement <8 x float> %vecins.4, float %9, i64 5
%10 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 24), align 4
%vecins.6 = insertelement <8 x float> %vecins.5, float %10, i64 6
%11 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 28), align 4
%vecins.7 = insertelement <8 x float> %vecins.6, float %11, i64 7
ret void
}
define void @vect_zext_bitcast_i8_st4_to_i32_idx(ptr addrspace(1) %arg1, i32 %base) {
; CHECK-LABEL: define void @vect_zext_bitcast_i8_st4_to_i32_idx(
; CHECK-SAME: ptr addrspace(1) [[ARG1:%.*]], i32 [[BASE:%.*]]) {
; CHECK-NEXT: [[ADD1:%.*]] = add nuw i32 [[BASE]], 0
; CHECK-NEXT: [[ZEXT1:%.*]] = zext i32 [[ADD1]] to i64
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT1]]
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[GEP1]], align 4
; CHECK-NEXT: [[LOAD11:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
; CHECK-NEXT: [[LOAD22:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
; CHECK-NEXT: [[ADD25:%.*]] = add nuw i32 [[BASE]], 6
; CHECK-NEXT: [[ZEXT25:%.*]] = zext i32 [[ADD25]] to i64
; CHECK-NEXT: [[GEP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT25]]
; CHECK-NEXT: [[LOAD25:%.*]] = load i32, ptr addrspace(1) [[GEP25]], align 4
; CHECK-NEXT: [[ADD3:%.*]] = add nuw i32 [[BASE]], 8
; CHECK-NEXT: [[ZEXT3:%.*]] = zext i32 [[ADD3]] to i64
; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT3]]
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(1) [[GEP3]], align 4
; CHECK-NEXT: [[LOAD33:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0
; CHECK-NEXT: [[LOAD44:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
; CHECK-NEXT: ret void
;
%add1 = add nuw i32 %base, 0
%zext1 = zext i32 %add1 to i64
%gep1 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext1
%load1 = load i32, ptr addrspace(1) %gep1, align 4
%add2 = add nuw i32 %base, 4
%zext2 = zext i32 %add2 to i64
%gep2 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext2
%load2 = load i32, ptr addrspace(1) %gep2, align 4
; A load with 2-byte overlap breaks continuity.
%add25 = add nuw i32 %base, 6
%zext25 = zext i32 %add25 to i64
%gep25 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext25
%load25 = load i32, ptr addrspace(1) %gep25, align 4
%add3 = add nuw i32 %base, 8
%zext3 = zext i32 %add3 to i64
%gep3 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext3
%load3 = load i32, ptr addrspace(1) %gep3, align 4
%add4 = add nuw i32 %base, 12
%zext4 = zext i32 %add4 to i64
%gep4 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext4
%load4 = load i32, ptr addrspace(1) %gep4, align 4
ret void
}