| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s |
| |
| define void @onevec(ptr %ptr) { |
| ; CHECK-LABEL: define void @onevec( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[PTR]], align 4 |
| ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x i32> |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 16 |
| ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32> |
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i32 32 |
| ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP2]], align 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP5]] to <1 x i32> |
| ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32 [[TMP5]] to <1 x i32> |
| ; CHECK-NEXT: ret void |
| ; |
| %ld0 = load <1 x i32>, ptr %ptr, align 4 |
| %ld1 = load i32, ptr %ptr, align 4 |
| |
| %gep1 = getelementptr inbounds i8, ptr %ptr, i32 16 |
| %ld2 = load i32, ptr %gep1, align 4 |
| %ld3 = load <1 x i32>, ptr %gep1, align 4 |
| |
| %gep2 = getelementptr inbounds i8, ptr %ptr, i32 32 |
| %ld4 = load <1 x i32>, ptr %gep2, align 4 |
| %ld5 = load <1 x i32>, ptr %gep2, align 4 |
| ret void |
| } |
| |
| define void @test(ptr %ptr) { |
| ; CHECK-LABEL: define void @test( |
| ; CHECK-SAME: ptr [[PTR:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR]], align 4 |
| ; CHECK-NEXT: [[LD01:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[LD12:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 1, i32 2> |
| ; CHECK-NEXT: [[LD23:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 2, i32 3> |
| ; CHECK-NEXT: [[LD34:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %ld0 = load i32, ptr %ptr, align 4 |
| %gep1 = getelementptr inbounds i8, ptr %ptr, i32 4 |
| %ld1 = load <2 x i32>, ptr %gep1, align 4 |
| %gep2 = getelementptr inbounds i8, ptr %ptr, i32 8 |
| %ld2 = load <2 x i32>, ptr %gep2, align 4 |
| %gep3 = getelementptr inbounds i8, ptr %ptr, i32 8 |
| %ld3 = load i32, ptr %gep3, align 4 |
| ret void |
| } |
| |
| @ptr = external local_unnamed_addr addrspace(1) global <8 x float>, align 4 |
| |
| define void @test2() { |
| ; CHECK-LABEL: define void @test2() { |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr addrspace(1) @ptr, align 4 |
| ; CHECK-NEXT: [[VECINS1:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP1]], i32 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP1]], i32 3 |
| ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP1]], i32 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP1]], i32 5 |
| ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP1]], i32 6 |
| ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP1]], i32 7 |
| ; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <8 x float> [[VECINS1]], float [[TMP2]], i64 1 |
| ; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <8 x float> [[VECINS_1]], float [[TMP3]], i64 2 |
| ; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <8 x float> [[VECINS_2]], float [[TMP4]], i64 3 |
| ; CHECK-NEXT: [[VECINS_4:%.*]] = insertelement <8 x float> [[VECINS_3]], float [[TMP5]], i64 4 |
| ; CHECK-NEXT: [[VECINS_5:%.*]] = insertelement <8 x float> [[VECINS_4]], float [[TMP6]], i64 5 |
| ; CHECK-NEXT: [[VECINS_6:%.*]] = insertelement <8 x float> [[VECINS_5]], float [[TMP7]], i64 6 |
| ; CHECK-NEXT: [[VECINS_7:%.*]] = insertelement <8 x float> [[VECINS_6]], float [[TMP8]], i64 7 |
| ; CHECK-NEXT: ret void |
| ; |
| %vecins = load <8 x float>, ptr addrspace(1) @ptr, align 4 |
| %5 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 4), align 4 |
| %vecins.1 = insertelement <8 x float> %vecins, float %5, i64 1 |
| %6 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 8), align 4 |
| %vecins.2 = insertelement <8 x float> %vecins.1, float %6, i64 2 |
| %7 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 12), align 4 |
| %vecins.3 = insertelement <8 x float> %vecins.2, float %7, i64 3 |
| %8 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 16), align 4 |
| %vecins.4 = insertelement <8 x float> %vecins.3, float %8, i64 4 |
| %9 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 20), align 4 |
| %vecins.5 = insertelement <8 x float> %vecins.4, float %9, i64 5 |
| %10 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 24), align 4 |
| %vecins.6 = insertelement <8 x float> %vecins.5, float %10, i64 6 |
| %11 = load float, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @ptr, i32 28), align 4 |
| %vecins.7 = insertelement <8 x float> %vecins.6, float %11, i64 7 |
| ret void |
| } |
| |
| define void @vect_zext_bitcast_i8_st4_to_i32_idx(ptr addrspace(1) %arg1, i32 %base) { |
| ; CHECK-LABEL: define void @vect_zext_bitcast_i8_st4_to_i32_idx( |
| ; CHECK-SAME: ptr addrspace(1) [[ARG1:%.*]], i32 [[BASE:%.*]]) { |
| ; CHECK-NEXT: [[ADD1:%.*]] = add nuw i32 [[BASE]], 0 |
| ; CHECK-NEXT: [[ZEXT1:%.*]] = zext i32 [[ADD1]] to i64 |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT1]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[GEP1]], align 4 |
| ; CHECK-NEXT: [[LOAD11:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[LOAD22:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[ADD25:%.*]] = add nuw i32 [[BASE]], 6 |
| ; CHECK-NEXT: [[ZEXT25:%.*]] = zext i32 [[ADD25]] to i64 |
| ; CHECK-NEXT: [[GEP25:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT25]] |
| ; CHECK-NEXT: [[LOAD25:%.*]] = load i32, ptr addrspace(1) [[GEP25]], align 4 |
| ; CHECK-NEXT: [[ADD3:%.*]] = add nuw i32 [[BASE]], 8 |
| ; CHECK-NEXT: [[ZEXT3:%.*]] = zext i32 [[ADD3]] to i64 |
| ; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 [[ZEXT3]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(1) [[GEP3]], align 4 |
| ; CHECK-NEXT: [[LOAD33:%.*]] = extractelement <2 x i32> [[TMP2]], i32 0 |
| ; CHECK-NEXT: [[LOAD44:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1 |
| ; CHECK-NEXT: ret void |
| ; |
| %add1 = add nuw i32 %base, 0 |
| %zext1 = zext i32 %add1 to i64 |
| %gep1 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext1 |
| %load1 = load i32, ptr addrspace(1) %gep1, align 4 |
| %add2 = add nuw i32 %base, 4 |
| %zext2 = zext i32 %add2 to i64 |
| %gep2 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext2 |
| %load2 = load i32, ptr addrspace(1) %gep2, align 4 |
| |
| ; A load with 2-byte overlap breaks continuity. |
| %add25 = add nuw i32 %base, 6 |
| %zext25 = zext i32 %add25 to i64 |
| %gep25 = getelementptr inbounds i8,ptr addrspace(1) %arg1, i64 %zext25 |
| %load25 = load i32, ptr addrspace(1) %gep25, align 4 |
| |
| %add3 = add nuw i32 %base, 8 |
| %zext3 = zext i32 %add3 to i64 |
| %gep3 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext3 |
| %load3 = load i32, ptr addrspace(1) %gep3, align 4 |
| %add4 = add nuw i32 %base, 12 |
| %zext4 = zext i32 %add4 to i64 |
| %gep4 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %zext4 |
| %load4 = load i32, ptr addrspace(1) %gep4, align 4 |
| ret void |
| } |