| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -pass-remarks-output=%t < %s | FileCheck %s |
| ; RUN: FileCheck --input-file=%t %s --check-prefix=YAML |
| |
| declare void @external_call() |
| |
| ; YAML: --- !Passed |
| ; YAML-NEXT: Pass: slp-vectorizer |
| ; YAML-NEXT: Name: StoresVectorized |
| ; YAML-NEXT: Function: test_spillcost_backedge |
| ; YAML-NEXT: Args: |
| ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' |
| ; YAML-NEXT: - Cost: '-99' |
| ; YAML-NEXT: - String: ' and with tree size ' |
| ; YAML-NEXT: - TreeSize: '6' |
| ; YAML-NEXT: ... |
| define void @test_spillcost_backedge(ptr noalias %res, ptr noalias %in, double %x, double %y) { |
| ; CHECK-LABEL: define void @test_spillcost_backedge( |
| ; CHECK-SAME: ptr noalias [[RES:%.*]], ptr noalias [[IN:%.*]], double [[X:%.*]], double [[Y:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[X]], i32 0 |
| ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[Y]], i32 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = fdiv <2 x double> splat (double 1.000000e+00), [[TMP1]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 100 |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_BODY:.*]], label %[[EXIT:.*]] |
| ; CHECK: [[LOOP_BODY]]: |
| ; CHECK-NEXT: [[GEP_IN_0:%.*]] = getelementptr double, ptr [[IN]], i64 [[IV]] |
| ; CHECK-NEXT: [[GEP_OUT_0:%.*]] = getelementptr double, ptr [[RES]], i64 [[IV]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[GEP_IN_0]], align 8 |
| ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], [[TMP2]] |
| ; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[GEP_OUT_0]], align 8 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[IV]], 50 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[CALL_BLOCK:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[CALL_BLOCK]]: |
| ; CHECK-NEXT: call void @external_call() |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; CHECK-NEXT: br label %[[LOOP_HEADER]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %a = fdiv double 1.000000e+00, %x |
| %b = fdiv double 1.000000e+00, %y |
| br label %loop.header |
| |
| loop.header: ; preds = %entry, %loop.latch |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %cmp = icmp slt i64 %iv, 100 |
| br i1 %cmp, label %loop.body, label %exit |
| |
| loop.body: ; preds = %loop.header |
| %gep.in.0 = getelementptr double, ptr %in, i64 %iv |
| %gep.in.1 = getelementptr inbounds double, ptr %gep.in.0, i64 1 |
| %v0 = load double, ptr %gep.in.0, align 8 |
| %v1 = load double, ptr %gep.in.1, align 8 |
| %r1 = fsub double %v0, %a |
| %r2 = fsub double %v1, %b |
| %gep.out.0 = getelementptr double, ptr %res, i64 %iv |
| %gep.out.1 = getelementptr inbounds double, ptr %gep.out.0, i64 1 |
| store double %r1, ptr %gep.out.0, align 8 |
| store double %r2, ptr %gep.out.1, align 8 |
| %cond = icmp slt i64 %iv, 50 |
| br i1 %cond, label %call.block, label %loop.latch |
| |
| call.block: ; preds = %loop.body |
| ; This block is strictly dominated by %loop.body. The backward BFS in |
| ; getSpillCost must not traverse here when analyzing the edge from the |
| ; fsub pair in %loop.body to the fdiv pair in %entry: in forward |
| ; execution %call.block runs *after* the use in %loop.body, so |
| ; @external_call is not between Op's def and Entry's first use. |
| call void @external_call() |
| br label %loop.latch |
| |
| loop.latch: ; preds = %loop.body, %call.block |
| %iv.next = add i64 %iv, 1 |
| br label %loop.header |
| |
| exit: ; preds = %loop.header |
| ret void |
| } |