| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %} |
| ; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %} |
| |
| ; Don't care about the profitability with these tests, just want to demonstrate the ability |
| ; to combine opcodes |
| |
| define void @sub_mul(ptr %p, ptr %s) { |
| ; CHECK-LABEL: define void @sub_mul( |
| ; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2 |
| ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3> |
| ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %p1 = getelementptr i16, ptr %p, i64 1 |
| %p2 = getelementptr i16, ptr %p, i64 2 |
| %p3 = getelementptr i16, ptr %p, i64 3 |
| |
| %l0 = load i16, ptr %p |
| %l1 = load i16, ptr %p1 |
| %l2 = load i16, ptr %p2 |
| %l3 = load i16, ptr %p3 |
| |
| %mul0 = sub i16 %l0, 0 |
| %mul1 = mul i16 %l1, 5 |
| %mul2 = mul i16 %l2, 2 |
| %mul3 = mul i16 %l3, 3 |
| |
| %s1 = getelementptr i16, ptr %s, i64 1 |
| %s2 = getelementptr i16, ptr %s, i64 2 |
| %s3 = getelementptr i16, ptr %s, i64 3 |
| |
| store i16 %mul0, ptr %s |
| store i16 %mul1, ptr %s1 |
| store i16 %mul2, ptr %s2 |
| store i16 %mul3, ptr %s3 |
| ret void |
| } |
| |
| define void @add_mul(ptr %p, ptr %s) { |
| ; CHECK-LABEL: define void @add_mul( |
| ; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2 |
| ; CHECK-NEXT: [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3> |
| ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %p1 = getelementptr i16, ptr %p, i64 1 |
| %p2 = getelementptr i16, ptr %p, i64 2 |
| %p3 = getelementptr i16, ptr %p, i64 3 |
| |
| %l0 = load i16, ptr %p |
| %l1 = load i16, ptr %p1 |
| %l2 = load i16, ptr %p2 |
| %l3 = load i16, ptr %p3 |
| |
| %mul0 = add i16 %l0, 0 |
| %mul1 = mul i16 %l1, 5 |
| %mul2 = mul i16 %l2, 2 |
| %mul3 = mul i16 %l3, 3 |
| |
| %s1 = getelementptr i16, ptr %s, i64 1 |
| %s2 = getelementptr i16, ptr %s, i64 2 |
| %s3 = getelementptr i16, ptr %s, i64 3 |
| |
| store i16 %mul0, ptr %s |
| store i16 %mul1, ptr %s1 |
| store i16 %mul2, ptr %s2 |
| store i16 %mul3, ptr %s3 |
| ret void |
| } |
| |
| define void @sub_and(ptr %p, ptr %s) { |
| ; CHECK-LABEL: define void @sub_and( |
| ; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2 |
| ; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i16> [[TMP0]], <i16 -1, i16 5, i16 2, i16 3> |
| ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[S]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %p1 = getelementptr i16, ptr %p, i64 1 |
| %p2 = getelementptr i16, ptr %p, i64 2 |
| %p3 = getelementptr i16, ptr %p, i64 3 |
| |
| %l0 = load i16, ptr %p |
| %l1 = load i16, ptr %p1 |
| %l2 = load i16, ptr %p2 |
| %l3 = load i16, ptr %p3 |
| |
| %mul0 = add i16 %l0, 0 |
| %mul1 = and i16 %l1, 5 |
| %mul2 = and i16 %l2, 2 |
| %mul3 = and i16 %l3, 3 |
| |
| %s1 = getelementptr i16, ptr %s, i64 1 |
| %s2 = getelementptr i16, ptr %s, i64 2 |
| %s3 = getelementptr i16, ptr %s, i64 3 |
| |
| store i16 %mul0, ptr %s |
| store i16 %mul1, ptr %s1 |
| store i16 %mul2, ptr %s2 |
| store i16 %mul3, ptr %s3 |
| ret void |
| } |