| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE |
| ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX |
| |
| define <4 x float> @shuffle_fma_const_chain(<4 x float> %a0) { |
| ; SSE-LABEL: define <4 x float> @shuffle_fma_const_chain( |
| ; SSE-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; SSE-NEXT: [[F:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ; SSE-NEXT: ret <4 x float> [[RES]] |
| ; |
| ; AVX-LABEL: define <4 x float> @shuffle_fma_const_chain( |
| ; AVX-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ; AVX-NEXT: [[RES:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; AVX-NEXT: ret <4 x float> [[RES]] |
| ; |
| %f = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %res = shufflevector <4 x float> %f, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ret <4 x float> %res |
| } |
| |
| define <8 x float> @concat_fma_const_chain(<4 x float> %a0, <4 x float> %a1) { |
| ; CHECK-LABEL: define <8 x float> @concat_fma_const_chain( |
| ; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000)) |
| ; CHECK-NEXT: ret <8 x float> [[RES]] |
| ; |
| %l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %res = shufflevector <4 x float> %l, <4 x float> %h, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> |
| ret <8 x float> %res |
| } |
| |
| define <8 x float> @interleave_fma_const_chain(<4 x float> %a0, <4 x float> %a1) { |
| ; SSE-LABEL: define <8 x float> @interleave_fma_const_chain( |
| ; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { |
| ; SSE-NEXT: [[L:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; SSE-NEXT: [[H:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A1]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; SSE-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[L]], <4 x float> [[H]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; SSE-NEXT: ret <8 x float> [[RES]] |
| ; |
| ; AVX-LABEL: define <8 x float> @interleave_fma_const_chain( |
| ; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { |
| ; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ; AVX-NEXT: [[RES:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> splat (float 0x3F8DE8D040000000), <8 x float> splat (float 0xBFB3715EE0000000)) |
| ; AVX-NEXT: ret <8 x float> [[RES]] |
| ; |
| %l = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %h = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a1, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %res = shufflevector <4 x float> %l, <4 x float> %h, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> |
| ret <8 x float> %res |
| } |
| |
| ; Negative test - multiple uses make the transformation unprofitable |
| define <4 x float> @shuffle_fma_const_chain_multiuse(<4 x float> %a0, ptr %p) { |
| ; CHECK-LABEL: define <4 x float> @shuffle_fma_const_chain_multiuse( |
| ; CHECK-SAME: <4 x float> [[A0:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[F:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ; CHECK-NEXT: store <4 x float> [[F]], ptr [[P]], align 16 |
| ; CHECK-NEXT: ret <4 x float> [[RES]] |
| ; |
| %f = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %res = shufflevector <4 x float> %f, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| store <4 x float> %f, ptr %p, align 16 |
| ret <4 x float> %res |
| } |
| |
| ; Negative test - intrinsic used by shuffle and arithmetic |
| define <4 x float> @shuffle_fma_multiuse_with_arith(<4 x float> %a0, <4 x float> %b) { |
| ; CHECK-LABEL: define <4 x float> @shuffle_fma_multiuse_with_arith( |
| ; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[B:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[F:%.*]] = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[F]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[F]], [[B]] |
| ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[SHUF]], [[ADD]] |
| ; CHECK-NEXT: ret <4 x float> [[RES]] |
| ; |
| %f = tail call noundef <4 x float> @llvm.fma.v4f32(<4 x float> %a0, <4 x float> splat (float 0x3F8DE8D040000000), <4 x float> splat (float 0xBFB3715EE0000000)) |
| %shuf = shufflevector <4 x float> %f, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| %add = fadd <4 x float> %f, %b |
| %res = fadd <4 x float> %shuf, %add |
| ret <4 x float> %res |
| } |