| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "Cost.*(EMIT-SCALAR|CLONE).*(trunc|sext|zext)" --filter "Cost.*WIDEN-CAST" --filter-out-after "LV: Selecting VF" --version 6 |
| ; REQUIRES: asserts |
| ; RUN: opt -p loop-vectorize -force-target-supports-scalable-vectors -S -disable-output -debug-only=loop-vectorize %s 2>&1 | FileCheck %s |
| |
| define void @trunc_store(ptr %dst) { |
| ; CHECK-LABEL: 'trunc_store' |
| ; CHECK: Cost of 0 for VF 2: EMIT-SCALAR vp<[[VP5:%[0-9]+]]> = trunc vp<[[VP4:%[0-9]+]]> to i8 |
| ; CHECK: Cost of 0 for VF 4: EMIT-SCALAR vp<[[VP5]]> = trunc vp<[[VP4]]> to i8 |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %iv.trunc = trunc i64 %iv to i8 |
| %gep = getelementptr i8, ptr %dst, i64 %iv |
| store i8 %iv.trunc, ptr %gep, align 1 |
| %iv.next = add i64 %iv, 1 |
| %ec = icmp eq i64 %iv.next, 7 |
| br i1 %ec, label %exit, label %loop |
| |
| exit: |
| ret void |
| } |
| |
| define i32 @sext_reduc(i32 %x, i32 %y) { |
| ; CHECK-LABEL: 'sext_reduc' |
| ; CHECK: Cost of 1 for VF 2: WIDEN-CAST vp<[[VP6:%[0-9]+]]> = trunc ir<%red.next> to i1 |
| ; CHECK: Cost of 1 for VF 2: WIDEN-CAST vp<[[VP7:%[0-9]+]]> = sext vp<[[VP6]]> to i32 |
| ; CHECK: Cost of 0 for VF 2: EMIT-SCALAR vp<[[VP10:%[0-9]+]]> = sext vp<[[VP9:%[0-9]+]]> to i32 |
| ; CHECK: Cost of 1 for VF 4: WIDEN-CAST vp<[[VP6]]> = trunc ir<%red.next> to i1 |
| ; CHECK: Cost of 1 for VF 4: WIDEN-CAST vp<[[VP7]]> = sext vp<[[VP6]]> to i32 |
| ; CHECK: Cost of 0 for VF 4: EMIT-SCALAR vp<[[VP10]]> = sext vp<[[VP9]]> to i32 |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: |
| %iv = phi i32 [ %x, %entry ], [ %iv.next, %for.body ] |
| %red = phi i32 [ %y, %entry ], [ %red.next, %for.body ] |
| %t0 = and i32 %red, 1 |
| %red.next = add i32 %t0, -1 |
| %iv.next = add nsw i32 %iv, 1 |
| %cond = icmp sgt i32 %iv, 77 |
| br i1 %cond, label %for.end, label %for.body |
| |
| for.end: |
| ret i32 %red.next |
| } |
| |
| define i8 @reduc_add_trunc(ptr %A) { |
| ; CHECK-LABEL: 'reduc_add_trunc' |
| ; CHECK: Cost of 0 for VF 2: WIDEN-CAST ir<%zext> = zext ir<%load> to i32 |
| ; CHECK: Cost of 1 for VF 2: WIDEN-CAST vp<[[VP7:%[0-9]+]]> = trunc ir<%red.next> to i8 |
| ; CHECK: Cost of 1 for VF 2: WIDEN-CAST vp<[[VP8:%[0-9]+]]> = zext vp<[[VP7]]> to i32 |
| ; CHECK: Cost of 0 for VF 2: EMIT-SCALAR vp<[[VP11:%[0-9]+]]> = zext vp<[[VP10:%[0-9]+]]> to i32 |
| ; CHECK: Cost of 0 for VF 4: WIDEN-CAST ir<%zext> = zext ir<%load> to i32 |
| ; CHECK: Cost of 1 for VF 4: WIDEN-CAST vp<[[VP7]]> = trunc ir<%red.next> to i8 |
| ; CHECK: Cost of 1 for VF 4: WIDEN-CAST vp<[[VP8]]> = zext vp<[[VP7]]> to i32 |
| ; CHECK: Cost of 0 for VF 4: EMIT-SCALAR vp<[[VP11]]> = zext vp<[[VP10]]> to i32 |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] |
| %red = phi i32 [ %red.next, %loop ], [ 255, %entry ] |
| %red.trunc = and i32 %red, 255 |
| %gep = getelementptr inbounds i8, ptr %A, i32 %iv |
| %load = load i8, ptr %gep, align 4 |
| %zext = zext i8 %load to i32 |
| %red.next = add i32 %red.trunc, %zext |
| %iv.next = add i32 %iv, 1 |
| %exitcond = icmp eq i32 %iv.next, 256 |
| br i1 %exitcond, label %exit, label %loop |
| |
| exit: |
| %ret = trunc i32 %red.next to i8 |
| ret i8 %ret |
| } |
| |
| define void @replicate_zext(ptr %A, i32 %n) { |
| ; CHECK-LABEL: 'replicate_zext' |
| ; CHECK: Cost of 1 for VF vscale x 1: CLONE ir<%iv.ext> = zext vp<[[VP4:%[0-9]+]]> |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] |
| %iv.ext = zext i32 %iv to i64 |
| %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv.ext |
| store i32 0, ptr %gep.A, align 4 |
| %cmp = icmp ne i32 %iv, 1000 |
| %iv.next = add nsw i32 %iv, 1 |
| br i1 %cmp, label %loop, label %exit, !llvm.loop !0 |
| |
| exit: |
| ret void |
| } |
| |
| !0 = distinct !{!0, !1} |
| !1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} |